diff --git a/assets/search.html b/assets/search.html
index 17da7ed..d126801 100644
--- a/assets/search.html
+++ b/assets/search.html
@@ -17,6 +17,10 @@
@@ -27,8 +31,13 @@
{{range .Results}}
-
{{.Name}}
-
{{.Path}}
+
+
+ {{if .Snippet}}
{{.Snippet}}
{{end}}
+
{{end}}
diff --git a/assets/style.css b/assets/style.css
index e3f1dbd..b66d0df 100644
--- a/assets/style.css
+++ b/assets/style.css
@@ -379,6 +379,38 @@ textarea {
.search-input:focus {
border-color: var(--primary-hover);
}
+.search-toggle {
+ display: flex;
+ align-items: center;
+ gap: 0.25rem;
+ white-space: nowrap;
+}
+.search-result {
+ display: flex;
+ flex-direction: column;
+ gap: 0.2rem;
+ flex: 1;
+ min-width: 0;
+}
+.search-result-row {
+ display: flex;
+ align-items: center;
+ gap: 0.75rem;
+ min-width: 0;
+}
+.search-result-row a {
+ flex: 1;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ white-space: nowrap;
+}
+.search-snippet {
+ font-size: 0.8rem;
+ line-height: 1.4;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ white-space: nowrap;
+}
/* === Muted text === */
.muted {
diff --git a/search.go b/search.go
index 0360e92..45af473 100644
--- a/search.go
+++ b/search.go
@@ -5,14 +5,17 @@ import (
"io/fs"
"log"
"net/http"
+ "os"
"path/filepath"
+ "regexp"
"sort"
"strings"
+ "unicode"
)
var searchTmpl = template.Must(template.New("search.html").ParseFS(assets, "assets/search.html"))
-// Match ranks. Lower is better.
+// Match ranks. Lower is better. Used for folder-name search (Phase 1).
const (
rankExact = 0
rankPrefix = 1
@@ -21,26 +24,37 @@ const (
)
type searchResult struct {
- Name string
- URL string
- Path string
- Rank int
+ Name string
+ URL string
+ Path string
+ Rank int // Phase 1 only
+ Score int // Phase 2: number of query tokens that hit
+ NameHit bool // Phase 2: at least one hit came from the folder name
+ Snippet string // Phase 2: ~100 chars around first body hit
}
type searchPageData struct {
Query string
+ Full bool
Results []searchResult
}
// handleSearch walks the wiki root and renders a search results page for the
// query in r.URL.Query().Get("q"). Only invoked when path is "/" and "q" is
-// present.
+// present. With ?full=1 it also scans index.md bodies (Phase 2).
func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
query := strings.TrimSpace(r.URL.Query().Get("q"))
- results := searchFolders(h.root, query)
+ full := r.URL.Query().Get("full") == "1"
+
+ var results []searchResult
+ if full {
+ results = searchFull(h.root, query)
+ } else {
+ results = searchFolders(h.root, query)
+ }
w.Header().Set("Content-Type", "text/html; charset=utf-8")
- if err := searchTmpl.Execute(w, searchPageData{Query: query, Results: results}); err != nil {
+ if err := searchTmpl.Execute(w, searchPageData{Query: query, Full: full, Results: results}); err != nil {
log.Printf("search template error: %v", err)
}
}
@@ -57,28 +71,19 @@ func searchFolders(root, query string) []searchResult {
maxDist = 3
}
- // Resolve symlinks so WalkDir descends into the real tree even when the
- // configured wiki root is itself a symlink (as on the deployed NAS).
- walkRoot, err := filepath.EvalSymlinks(root)
- if err != nil {
- walkRoot = root
- }
-
+ walkRoot := resolveWalkRoot(root)
var results []searchResult
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
if err != nil {
return nil
}
- name := d.Name()
- if strings.HasPrefix(name, ".") {
- if d.IsDir() && fsPath != root {
- return filepath.SkipDir
- }
- return nil
+ if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
+ return walkErr
}
if !d.IsDir() || fsPath == walkRoot {
return nil
}
+ name := d.Name()
rank, ok := matchRank(strings.ToLower(name), q, maxDist)
if !ok {
return nil
@@ -87,10 +92,9 @@ func searchFolders(root, query string) []searchResult {
if relErr != nil {
return nil
}
- urlPath := "/" + filepath.ToSlash(rel) + "/"
results = append(results, searchResult{
Name: name,
- URL: urlPath,
+ URL: "/" + filepath.ToSlash(rel) + "/",
Path: filepath.ToSlash(rel),
Rank: rank,
})
@@ -106,6 +110,105 @@ func searchFolders(root, query string) []searchResult {
return results
}
+// searchFull walks root and scores each directory by how many whitespace-split
+// query tokens hit a word in either the folder name or its index.md body.
+// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2.
+// Folder-name hits break score ties above content-only hits.
+func searchFull(root, query string) []searchResult {
+ if query == "" {
+ return nil
+ }
+ qTokens := tokenize(query)
+ if len(qTokens) == 0 {
+ return nil
+ }
+
+ walkRoot := resolveWalkRoot(root)
+ var results []searchResult
+ _ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
+ if err != nil {
+ return nil
+ }
+ if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
+ return walkErr
+ }
+ if !d.IsDir() || fsPath == walkRoot {
+ return nil
+ }
+ name := d.Name()
+ body, _ := os.ReadFile(filepath.Join(fsPath, "index.md"))
+
+ nameWords := tokenize(name)
+ bodyStr := string(body)
+ bodyLower := strings.ToLower(bodyStr)
+ bodyWords := tokenize(bodyLower)
+
+ score := 0
+ nameHit := false
+ for _, qt := range qTokens {
+ inName := tokenInWords(qt, nameWords)
+ inBody := tokenInWords(qt, bodyWords)
+ if inName || inBody {
+ score++
+ }
+ if inName {
+ nameHit = true
+ }
+ }
+ if score == 0 {
+ return nil
+ }
+
+ rel, relErr := filepath.Rel(walkRoot, fsPath)
+ if relErr != nil {
+ return nil
+ }
+ results = append(results, searchResult{
+ Name: name,
+ URL: "/" + filepath.ToSlash(rel) + "/",
+ Path: filepath.ToSlash(rel),
+ Score: score,
+ NameHit: nameHit,
+ Snippet: makeSnippet(bodyStr, bodyLower, qTokens),
+ })
+ return nil
+ })
+
+ sort.SliceStable(results, func(i, j int) bool {
+ if results[i].Score != results[j].Score {
+ return results[i].Score > results[j].Score
+ }
+ if results[i].NameHit != results[j].NameHit {
+ return results[i].NameHit
+ }
+ return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
+ })
+ return results
+}
+
+// resolveWalkRoot resolves symlinks so WalkDir descends into the real tree
+// even when the configured wiki root is itself a symlink (as on the NAS).
+func resolveWalkRoot(root string) string {
+ if r, err := filepath.EvalSymlinks(root); err == nil {
+ return r
+ }
+ return root
+}
+
+// hiddenSkip handles dotfile/dot-dir entries during a WalkDir. It returns
+// (skipped, walkErr): skipped=true means the caller should `return walkErr`
+// to either prune the subtree (hidden dir) or move past the entry (hidden
+// file). When skipped=false the entry should be processed normally.
+func hiddenSkip(fsPath, walkRoot string, d fs.DirEntry) (bool, error) {
+ if !strings.HasPrefix(d.Name(), ".") {
+ return false, nil
+ }
+ if d.IsDir() && fsPath != walkRoot {
+ return true, filepath.SkipDir
+ }
+ return true, nil
+}
+
// matchRank returns the best (lowest) rank for which name matches q, or
// (0, false) if no rule matches. Inputs are expected to be lowercased.
func matchRank(name, q string, maxDist int) (int, bool) {
@@ -124,6 +227,86 @@ func matchRank(name, q string, maxDist int) (int, bool) {
return 0, false
}
+// tokenize splits s into lowercase word tokens, breaking on any rune that is
+// not a letter or digit. Unicode-aware so umlauts etc. survive intact.
+func tokenize(s string) []string {
+ var tokens []string
+ var b strings.Builder
+ for _, r := range s {
+ if unicode.IsLetter(r) || unicode.IsDigit(r) {
+ b.WriteRune(unicode.ToLower(r))
+ continue
+ }
+ if b.Len() > 0 {
+ tokens = append(tokens, b.String())
+ b.Reset()
+ }
+ }
+ if b.Len() > 0 {
+ tokens = append(tokens, b.String())
+ }
+ return tokens
+}
+
+// tokenInWords reports whether qt matches any word exactly or within
+// Levenshtein distance 2. qt and words must already be lowercase.
+func tokenInWords(qt string, words []string) bool {
+ for _, w := range words {
+ if w == qt {
+ return true
+ }
+ if levenshtein(w, qt) <= 2 {
+ return true
+ }
+ }
+ return false
+}
+
+var snippetWS = regexp.MustCompile(`\s+`)
+
+// makeSnippet returns ~100 characters of body around the earliest substring
+// match of any query token. Falls back to empty when no token appears as a
+// substring (a token may have hit only via Levenshtein, with no exact span to
+// quote).
+func makeSnippet(body, bodyLower string, tokens []string) string {
+ pos := -1
+ for _, t := range tokens {
+ i := strings.Index(bodyLower, t)
+ if i < 0 {
+ continue
+ }
+ if pos < 0 || i < pos {
+ pos = i
+ }
+ }
+ if pos < 0 {
+ return ""
+ }
+ start := pos - 50
+ if start < 0 {
+ start = 0
+ }
+ end := pos + 50
+ if end > len(body) {
+ end = len(body)
+ }
+ for start > 0 && body[start]&0xC0 == 0x80 {
+ start--
+ }
+ for end < len(body) && body[end]&0xC0 == 0x80 {
+ end++
+ }
+ s := snippetWS.ReplaceAllString(body[start:end], " ")
+ s = strings.TrimSpace(s)
+ if start > 0 {
+ s = "…" + s
+ }
+ if end < len(body) {
+ s = s + "…"
+ }
+ return s
+}
+
// levenshtein returns the edit distance between a and b. Operates on runes so
// multi-byte characters count as one edit.
func levenshtein(a, b string) int {