diff --git a/assets/search.html b/assets/search.html index 17da7ed..d126801 100644 --- a/assets/search.html +++ b/assets/search.html @@ -17,6 +17,10 @@
+
@@ -27,8 +31,13 @@
{{len .Results}} match{{if ne (len .Results) 1}}es{{end}} for “{{.Query}}”
{{range .Results}}
- {{.Name}} - {{.Path}} +
+
+ {{.Name}} + {{.Path}} +
+ {{if .Snippet}}
{{.Snippet}}
{{end}} +
{{end}} diff --git a/assets/style.css b/assets/style.css index e3f1dbd..b66d0df 100644 --- a/assets/style.css +++ b/assets/style.css @@ -379,6 +379,38 @@ textarea { .search-input:focus { border-color: var(--primary-hover); } +.search-toggle { + display: flex; + align-items: center; + gap: 0.25rem; + white-space: nowrap; +} +.search-result { + display: flex; + flex-direction: column; + gap: 0.2rem; + flex: 1; + min-width: 0; +} +.search-result-row { + display: flex; + align-items: center; + gap: 0.75rem; + min-width: 0; +} +.search-result-row a { + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.search-snippet { + font-size: 0.8rem; + line-height: 1.4; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} /* === Muted text === */ .muted { diff --git a/search.go b/search.go index 0360e92..45af473 100644 --- a/search.go +++ b/search.go @@ -5,14 +5,17 @@ import ( "io/fs" "log" "net/http" + "os" "path/filepath" + "regexp" "sort" "strings" + "unicode" ) var searchTmpl = template.Must(template.New("search.html").ParseFS(assets, "assets/search.html")) -// Match ranks. Lower is better. +// Match ranks. Lower is better. Used for folder-name search (Phase 1). const ( rankExact = 0 rankPrefix = 1 @@ -21,26 +24,37 @@ const ( ) type searchResult struct { - Name string - URL string - Path string - Rank int + Name string + URL string + Path string + Rank int // Phase 1 only + Score int // Phase 2: number of query tokens that hit + NameHit bool // Phase 2: at least one hit came from the folder name + Snippet string // Phase 2: ~100 chars around first body hit } type searchPageData struct { Query string + Full bool Results []searchResult } // handleSearch walks the wiki root and renders a search results page for the // query in r.URL.Query().Get("q"). Only invoked when path is "/" and "q" is -// present. +// present. With ?full=1 it also scans index.md bodies (Phase 2). func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) { query := strings.TrimSpace(r.URL.Query().Get("q")) - results := searchFolders(h.root, query) + full := r.URL.Query().Get("full") == "1" + + var results []searchResult + if full { + results = searchFull(h.root, query) + } else { + results = searchFolders(h.root, query) + } w.Header().Set("Content-Type", "text/html; charset=utf-8") - if err := searchTmpl.Execute(w, searchPageData{Query: query, Results: results}); err != nil { + if err := searchTmpl.Execute(w, searchPageData{Query: query, Full: full, Results: results}); err != nil { log.Printf("search template error: %v", err) } } @@ -57,28 +71,19 @@ func searchFolders(root, query string) []searchResult { maxDist = 3 } - // Resolve symlinks so WalkDir descends into the real tree even when the - // configured wiki root is itself a symlink (as on the deployed NAS). - walkRoot, err := filepath.EvalSymlinks(root) - if err != nil { - walkRoot = root - } - + walkRoot := resolveWalkRoot(root) var results []searchResult _ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error { if err != nil { return nil } - name := d.Name() - if strings.HasPrefix(name, ".") { - if d.IsDir() && fsPath != root { - return filepath.SkipDir - } - return nil + if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip { + return walkErr } if !d.IsDir() || fsPath == walkRoot { return nil } + name := d.Name() rank, ok := matchRank(strings.ToLower(name), q, maxDist) if !ok { return nil @@ -87,10 +92,9 @@ func searchFolders(root, query string) []searchResult { if relErr != nil { return nil } - urlPath := "/" + filepath.ToSlash(rel) + "/" results = append(results, searchResult{ Name: name, - URL: urlPath, + URL: "/" + filepath.ToSlash(rel) + "/", Path: filepath.ToSlash(rel), Rank: rank, }) @@ -106,6 +110,105 @@ func searchFolders(root, query string) []searchResult { return results } +// searchFull walks root and scores each directory by how many whitespace-split +// query tokens hit a word in either the folder name or its index.md body. +// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2. +// Folder-name hits break score ties above content-only hits. +func searchFull(root, query string) []searchResult { + if query == "" { + return nil + } + qTokens := tokenize(query) + if len(qTokens) == 0 { + return nil + } + + walkRoot := resolveWalkRoot(root) + var results []searchResult + _ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip { + return walkErr + } + if !d.IsDir() || fsPath == walkRoot { + return nil + } + name := d.Name() + body, _ := os.ReadFile(filepath.Join(fsPath, "index.md")) + + nameWords := tokenize(name) + bodyStr := string(body) + bodyLower := strings.ToLower(bodyStr) + bodyWords := tokenize(bodyLower) + + score := 0 + nameHit := false + for _, qt := range qTokens { + inName := tokenInWords(qt, nameWords) + inBody := tokenInWords(qt, bodyWords) + if inName || inBody { + score++ + } + if inName { + nameHit = true + } + } + if score == 0 { + return nil + } + + rel, relErr := filepath.Rel(walkRoot, fsPath) + if relErr != nil { + return nil + } + results = append(results, searchResult{ + Name: name, + URL: "/" + filepath.ToSlash(rel) + "/", + Path: filepath.ToSlash(rel), + Score: score, + NameHit: nameHit, + Snippet: makeSnippet(bodyStr, bodyLower, qTokens), + }) + return nil + }) + + sort.SliceStable(results, func(i, j int) bool { + if results[i].Score != results[j].Score { + return results[i].Score > results[j].Score + } + if results[i].NameHit != results[j].NameHit { + return results[i].NameHit + } + return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name) + }) + return results +} + +// resolveWalkRoot resolves symlinks so WalkDir descends into the real tree +// even when the configured wiki root is itself a symlink (as on the NAS). +func resolveWalkRoot(root string) string { + if r, err := filepath.EvalSymlinks(root); err == nil { + return r + } + return root +} + +// hiddenSkip handles dotfile/dot-dir entries during a WalkDir. It returns +// (skipped, walkErr): skipped=true means the caller should `return walkErr` +// to either prune the subtree (hidden dir) or move past the entry (hidden +// file). When skipped=false the entry should be processed normally. +func hiddenSkip(fsPath, walkRoot string, d fs.DirEntry) (bool, error) { + if !strings.HasPrefix(d.Name(), ".") { + return false, nil + } + if d.IsDir() && fsPath != walkRoot { + return true, filepath.SkipDir + } + return true, nil +} + // matchRank returns the best (lowest) rank for which name matches q, or // (0, false) if no rule matches. Inputs are expected to be lowercased. func matchRank(name, q string, maxDist int) (int, bool) { @@ -124,6 +227,86 @@ func matchRank(name, q string, maxDist int) (int, bool) { return 0, false } +// tokenize splits s into lowercase word tokens, breaking on any rune that is +// not a letter or digit. Unicode-aware so umlauts etc. survive intact. +func tokenize(s string) []string { + var tokens []string + var b strings.Builder + for _, r := range s { + if unicode.IsLetter(r) || unicode.IsDigit(r) { + b.WriteRune(unicode.ToLower(r)) + continue + } + if b.Len() > 0 { + tokens = append(tokens, b.String()) + b.Reset() + } + } + if b.Len() > 0 { + tokens = append(tokens, b.String()) + } + return tokens +} + +// tokenInWords reports whether qt matches any word exactly or within +// Levenshtein distance 2. qt and words must already be lowercase. +func tokenInWords(qt string, words []string) bool { + for _, w := range words { + if w == qt { + return true + } + if levenshtein(w, qt) <= 2 { + return true + } + } + return false +} + +var snippetWS = regexp.MustCompile(`\s+`) + +// makeSnippet returns ~100 characters of body around the earliest substring +// match of any query token. Falls back to empty when no token appears as a +// substring (a token may have hit only via Levenshtein, with no exact span to +// quote). +func makeSnippet(body, bodyLower string, tokens []string) string { + pos := -1 + for _, t := range tokens { + i := strings.Index(bodyLower, t) + if i < 0 { + continue + } + if pos < 0 || i < pos { + pos = i + } + } + if pos < 0 { + return "" + } + start := pos - 50 + if start < 0 { + start = 0 + } + end := pos + 50 + if end > len(body) { + end = len(body) + } + for start > 0 && body[start]&0xC0 == 0x80 { + start-- + } + for end < len(body) && body[end]&0xC0 == 0x80 { + end++ + } + s := snippetWS.ReplaceAllString(body[start:end], " ") + s = strings.TrimSpace(s) + if start > 0 { + s = "…" + s + } + if end < len(body) { + s = s + "…" + } + return s +} + // levenshtein returns the edit distance between a and b. Operates on runes so // multi-byte characters count as one edit. func levenshtein(a, b string) int {