Full-text search v1
This commit is contained in:
@@ -17,6 +17,10 @@
|
|||||||
</nav>
|
</nav>
|
||||||
<form class="search-form" action="/" method="get">
|
<form class="search-form" action="/" method="get">
|
||||||
<input class="search-input" type="search" name="q" value="{{.Query}}" placeholder="Search folders…" autofocus />
|
<input class="search-input" type="search" name="q" value="{{.Query}}" placeholder="Search folders…" autofocus />
|
||||||
|
<label class="search-toggle muted" title="Also search page contents">
|
||||||
|
<input type="checkbox" name="full" value="1" {{if .Full}}checked{{end}} />
|
||||||
|
full-text
|
||||||
|
</label>
|
||||||
<button class="btn" type="submit">GO</button>
|
<button class="btn" type="submit">GO</button>
|
||||||
</form>
|
</form>
|
||||||
</header>
|
</header>
|
||||||
@@ -27,9 +31,14 @@
|
|||||||
<div class="listing-header">{{len .Results}} match{{if ne (len .Results) 1}}es{{end}} for “{{.Query}}”</div>
|
<div class="listing-header">{{len .Results}} match{{if ne (len .Results) 1}}es{{end}} for “{{.Query}}”</div>
|
||||||
{{range .Results}}
|
{{range .Results}}
|
||||||
<div class="listing-item">
|
<div class="listing-item">
|
||||||
|
<div class="search-result">
|
||||||
|
<div class="search-result-row">
|
||||||
<a href="{{.URL}}">{{.Name}}</a>
|
<a href="{{.URL}}">{{.Name}}</a>
|
||||||
<span class="meta">{{.Path}}</span>
|
<span class="meta">{{.Path}}</span>
|
||||||
</div>
|
</div>
|
||||||
|
{{if .Snippet}}<div class="search-snippet muted">{{.Snippet}}</div>{{end}}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{{end}}
|
{{end}}
|
||||||
</div>
|
</div>
|
||||||
{{else}}
|
{{else}}
|
||||||
|
|||||||
@@ -379,6 +379,38 @@ textarea {
|
|||||||
.search-input:focus {
|
.search-input:focus {
|
||||||
border-color: var(--primary-hover);
|
border-color: var(--primary-hover);
|
||||||
}
|
}
|
||||||
|
.search-toggle {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.25rem;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
.search-result {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.2rem;
|
||||||
|
flex: 1;
|
||||||
|
min-width: 0;
|
||||||
|
}
|
||||||
|
.search-result-row {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.75rem;
|
||||||
|
min-width: 0;
|
||||||
|
}
|
||||||
|
.search-result-row a {
|
||||||
|
flex: 1;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
.search-snippet {
|
||||||
|
font-size: 0.8rem;
|
||||||
|
line-height: 1.4;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
/* === Muted text === */
|
/* === Muted text === */
|
||||||
.muted {
|
.muted {
|
||||||
|
|||||||
@@ -5,14 +5,17 @@ import (
|
|||||||
"io/fs"
|
"io/fs"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
var searchTmpl = template.Must(template.New("search.html").ParseFS(assets, "assets/search.html"))
|
var searchTmpl = template.Must(template.New("search.html").ParseFS(assets, "assets/search.html"))
|
||||||
|
|
||||||
// Match ranks. Lower is better.
|
// Match ranks. Lower is better. Used for folder-name search (Phase 1).
|
||||||
const (
|
const (
|
||||||
rankExact = 0
|
rankExact = 0
|
||||||
rankPrefix = 1
|
rankPrefix = 1
|
||||||
@@ -24,23 +27,34 @@ type searchResult struct {
|
|||||||
Name string
|
Name string
|
||||||
URL string
|
URL string
|
||||||
Path string
|
Path string
|
||||||
Rank int
|
Rank int // Phase 1 only
|
||||||
|
Score int // Phase 2: number of query tokens that hit
|
||||||
|
NameHit bool // Phase 2: at least one hit came from the folder name
|
||||||
|
Snippet string // Phase 2: ~100 chars around first body hit
|
||||||
}
|
}
|
||||||
|
|
||||||
type searchPageData struct {
|
type searchPageData struct {
|
||||||
Query string
|
Query string
|
||||||
|
Full bool
|
||||||
Results []searchResult
|
Results []searchResult
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleSearch walks the wiki root and renders a search results page for the
|
// handleSearch walks the wiki root and renders a search results page for the
|
||||||
// query in r.URL.Query().Get("q"). Only invoked when path is "/" and "q" is
|
// query in r.URL.Query().Get("q"). Only invoked when path is "/" and "q" is
|
||||||
// present.
|
// present. With ?full=1 it also scans index.md bodies (Phase 2).
|
||||||
func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
|
||||||
query := strings.TrimSpace(r.URL.Query().Get("q"))
|
query := strings.TrimSpace(r.URL.Query().Get("q"))
|
||||||
results := searchFolders(h.root, query)
|
full := r.URL.Query().Get("full") == "1"
|
||||||
|
|
||||||
|
var results []searchResult
|
||||||
|
if full {
|
||||||
|
results = searchFull(h.root, query)
|
||||||
|
} else {
|
||||||
|
results = searchFolders(h.root, query)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||||
if err := searchTmpl.Execute(w, searchPageData{Query: query, Results: results}); err != nil {
|
if err := searchTmpl.Execute(w, searchPageData{Query: query, Full: full, Results: results}); err != nil {
|
||||||
log.Printf("search template error: %v", err)
|
log.Printf("search template error: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -57,28 +71,19 @@ func searchFolders(root, query string) []searchResult {
|
|||||||
maxDist = 3
|
maxDist = 3
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resolve symlinks so WalkDir descends into the real tree even when the
|
walkRoot := resolveWalkRoot(root)
|
||||||
// configured wiki root is itself a symlink (as on the deployed NAS).
|
|
||||||
walkRoot, err := filepath.EvalSymlinks(root)
|
|
||||||
if err != nil {
|
|
||||||
walkRoot = root
|
|
||||||
}
|
|
||||||
|
|
||||||
var results []searchResult
|
var results []searchResult
|
||||||
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
|
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
name := d.Name()
|
if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
|
||||||
if strings.HasPrefix(name, ".") {
|
return walkErr
|
||||||
if d.IsDir() && fsPath != root {
|
|
||||||
return filepath.SkipDir
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
if !d.IsDir() || fsPath == walkRoot {
|
if !d.IsDir() || fsPath == walkRoot {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
name := d.Name()
|
||||||
rank, ok := matchRank(strings.ToLower(name), q, maxDist)
|
rank, ok := matchRank(strings.ToLower(name), q, maxDist)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil
|
return nil
|
||||||
@@ -87,10 +92,9 @@ func searchFolders(root, query string) []searchResult {
|
|||||||
if relErr != nil {
|
if relErr != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
urlPath := "/" + filepath.ToSlash(rel) + "/"
|
|
||||||
results = append(results, searchResult{
|
results = append(results, searchResult{
|
||||||
Name: name,
|
Name: name,
|
||||||
URL: urlPath,
|
URL: "/" + filepath.ToSlash(rel) + "/",
|
||||||
Path: filepath.ToSlash(rel),
|
Path: filepath.ToSlash(rel),
|
||||||
Rank: rank,
|
Rank: rank,
|
||||||
})
|
})
|
||||||
@@ -106,6 +110,105 @@ func searchFolders(root, query string) []searchResult {
|
|||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// searchFull walks root and scores each directory by how many whitespace-split
|
||||||
|
// query tokens hit a word in either the folder name or its index.md body.
|
||||||
|
// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2.
|
||||||
|
// Folder-name hits break score ties above content-only hits.
|
||||||
|
func searchFull(root, query string) []searchResult {
|
||||||
|
if query == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
qTokens := tokenize(query)
|
||||||
|
if len(qTokens) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
walkRoot := resolveWalkRoot(root)
|
||||||
|
var results []searchResult
|
||||||
|
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
|
||||||
|
return walkErr
|
||||||
|
}
|
||||||
|
if !d.IsDir() || fsPath == walkRoot {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
name := d.Name()
|
||||||
|
body, _ := os.ReadFile(filepath.Join(fsPath, "index.md"))
|
||||||
|
|
||||||
|
nameWords := tokenize(name)
|
||||||
|
bodyStr := string(body)
|
||||||
|
bodyLower := strings.ToLower(bodyStr)
|
||||||
|
bodyWords := tokenize(bodyLower)
|
||||||
|
|
||||||
|
score := 0
|
||||||
|
nameHit := false
|
||||||
|
for _, qt := range qTokens {
|
||||||
|
inName := tokenInWords(qt, nameWords)
|
||||||
|
inBody := tokenInWords(qt, bodyWords)
|
||||||
|
if inName || inBody {
|
||||||
|
score++
|
||||||
|
}
|
||||||
|
if inName {
|
||||||
|
nameHit = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if score == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
rel, relErr := filepath.Rel(walkRoot, fsPath)
|
||||||
|
if relErr != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
results = append(results, searchResult{
|
||||||
|
Name: name,
|
||||||
|
URL: "/" + filepath.ToSlash(rel) + "/",
|
||||||
|
Path: filepath.ToSlash(rel),
|
||||||
|
Score: score,
|
||||||
|
NameHit: nameHit,
|
||||||
|
Snippet: makeSnippet(bodyStr, bodyLower, qTokens),
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
sort.SliceStable(results, func(i, j int) bool {
|
||||||
|
if results[i].Score != results[j].Score {
|
||||||
|
return results[i].Score > results[j].Score
|
||||||
|
}
|
||||||
|
if results[i].NameHit != results[j].NameHit {
|
||||||
|
return results[i].NameHit
|
||||||
|
}
|
||||||
|
return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveWalkRoot resolves symlinks so WalkDir descends into the real tree
|
||||||
|
// even when the configured wiki root is itself a symlink (as on the NAS).
|
||||||
|
func resolveWalkRoot(root string) string {
|
||||||
|
if r, err := filepath.EvalSymlinks(root); err == nil {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return root
|
||||||
|
}
|
||||||
|
|
||||||
|
// hiddenSkip handles dotfile/dot-dir entries during a WalkDir. It returns
|
||||||
|
// (skipped, walkErr): skipped=true means the caller should `return walkErr`
|
||||||
|
// to either prune the subtree (hidden dir) or move past the entry (hidden
|
||||||
|
// file). When skipped=false the entry should be processed normally.
|
||||||
|
func hiddenSkip(fsPath, walkRoot string, d fs.DirEntry) (bool, error) {
|
||||||
|
if !strings.HasPrefix(d.Name(), ".") {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
if d.IsDir() && fsPath != walkRoot {
|
||||||
|
return true, filepath.SkipDir
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
// matchRank returns the best (lowest) rank for which name matches q, or
|
// matchRank returns the best (lowest) rank for which name matches q, or
|
||||||
// (0, false) if no rule matches. Inputs are expected to be lowercased.
|
// (0, false) if no rule matches. Inputs are expected to be lowercased.
|
||||||
func matchRank(name, q string, maxDist int) (int, bool) {
|
func matchRank(name, q string, maxDist int) (int, bool) {
|
||||||
@@ -124,6 +227,86 @@ func matchRank(name, q string, maxDist int) (int, bool) {
|
|||||||
return 0, false
|
return 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tokenize splits s into lowercase word tokens, breaking on any rune that is
|
||||||
|
// not a letter or digit. Unicode-aware so umlauts etc. survive intact.
|
||||||
|
func tokenize(s string) []string {
|
||||||
|
var tokens []string
|
||||||
|
var b strings.Builder
|
||||||
|
for _, r := range s {
|
||||||
|
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||||||
|
b.WriteRune(unicode.ToLower(r))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if b.Len() > 0 {
|
||||||
|
tokens = append(tokens, b.String())
|
||||||
|
b.Reset()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if b.Len() > 0 {
|
||||||
|
tokens = append(tokens, b.String())
|
||||||
|
}
|
||||||
|
return tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
// tokenInWords reports whether qt matches any word exactly or within
|
||||||
|
// Levenshtein distance 2. qt and words must already be lowercase.
|
||||||
|
func tokenInWords(qt string, words []string) bool {
|
||||||
|
for _, w := range words {
|
||||||
|
if w == qt {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if levenshtein(w, qt) <= 2 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
var snippetWS = regexp.MustCompile(`\s+`)
|
||||||
|
|
||||||
|
// makeSnippet returns ~100 characters of body around the earliest substring
|
||||||
|
// match of any query token. Falls back to empty when no token appears as a
|
||||||
|
// substring (a token may have hit only via Levenshtein, with no exact span to
|
||||||
|
// quote).
|
||||||
|
func makeSnippet(body, bodyLower string, tokens []string) string {
|
||||||
|
pos := -1
|
||||||
|
for _, t := range tokens {
|
||||||
|
i := strings.Index(bodyLower, t)
|
||||||
|
if i < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if pos < 0 || i < pos {
|
||||||
|
pos = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pos < 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
start := pos - 50
|
||||||
|
if start < 0 {
|
||||||
|
start = 0
|
||||||
|
}
|
||||||
|
end := pos + 50
|
||||||
|
if end > len(body) {
|
||||||
|
end = len(body)
|
||||||
|
}
|
||||||
|
for start > 0 && body[start]&0xC0 == 0x80 {
|
||||||
|
start--
|
||||||
|
}
|
||||||
|
for end < len(body) && body[end]&0xC0 == 0x80 {
|
||||||
|
end++
|
||||||
|
}
|
||||||
|
s := snippetWS.ReplaceAllString(body[start:end], " ")
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
if start > 0 {
|
||||||
|
s = "…" + s
|
||||||
|
}
|
||||||
|
if end < len(body) {
|
||||||
|
s = s + "…"
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
// levenshtein returns the edit distance between a and b. Operates on runes so
|
// levenshtein returns the edit distance between a and b. Operates on runes so
|
||||||
// multi-byte characters count as one edit.
|
// multi-byte characters count as one edit.
|
||||||
func levenshtein(a, b string) int {
|
func levenshtein(a, b string) int {
|
||||||
|
|||||||
Reference in New Issue
Block a user