diff --git a/assets/search.html b/assets/search.html
index cc31320..e90ebe9 100644
--- a/assets/search.html
+++ b/assets/search.html
@@ -9,7 +9,6 @@
{{.Name}}
/{{.Path}}
- {{if .Snippet}}{{.Snippet}}
{{end}}
{{end}}
{{else}}
diff --git a/search.go b/search.go
index 67a75ee..7fb74b1 100644
--- a/search.go
+++ b/search.go
@@ -4,21 +4,17 @@ import (
"io/fs"
"log"
"net/http"
- "os"
"path/filepath"
- "regexp"
"sort"
"strings"
"unicode"
)
type searchResult struct {
- Name string
- URL string
- Path string
- Score int // number of query tokens that hit
- NameHit bool // at least one hit came from the folder name
- Snippet string // ~300 chars around first body hit, or page stub for name-only hits
+ Name string
+ URL string
+ Path string
+ Score int
}
type searchPageData struct {
@@ -52,15 +48,15 @@ func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
}
}
-// searchWiki walks root and scores each directory by how many whitespace-split
-// query tokens hit a word in either the folder name or its index.md body.
-// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2.
-// Folder-name hits break score ties above content-only hits.
+// searchWiki walks root and scores each directory by how well the folder name
+// matches the query. Page contents are not searched. Higher score = more
+// relevant; exact matches rank first.
func searchWiki(root, query string) []searchResult {
if query == "" {
return nil
}
- qTokens := tokenize(query)
+ qLower := strings.ToLower(query)
+ qTokens := tokenize(qLower)
if len(qTokens) == 0 {
return nil
}
@@ -78,40 +74,19 @@ func searchWiki(root, query string) []searchResult {
return nil
}
name := d.Name()
- body, _ := os.ReadFile(filepath.Join(fsPath, "index.md"))
-
- nameWords := tokenize(name)
- bodyStr := string(body)
- bodyLower := strings.ToLower(bodyStr)
- bodyWords := tokenize(bodyLower)
-
- score := 0
- nameHit := false
- for _, qt := range qTokens {
- inName := tokenInWords(qt, nameWords)
- inBody := tokenInWords(qt, bodyWords)
- if inName || inBody {
- score++
- }
- if inName {
- nameHit = true
- }
- }
+ score := scoreName(strings.ToLower(name), qLower, qTokens)
if score == 0 {
return nil
}
-
rel, relErr := filepath.Rel(walkRoot, fsPath)
if relErr != nil {
return nil
}
results = append(results, searchResult{
- Name: name,
- URL: "/" + filepath.ToSlash(rel) + "/",
- Path: filepath.ToSlash(rel),
- Score: score,
- NameHit: nameHit,
- Snippet: makeSnippet(bodyStr, bodyLower, qTokens),
+ Name: name,
+ URL: "/" + filepath.ToSlash(rel) + "/",
+ Path: filepath.ToSlash(rel),
+ Score: score,
})
return nil
})
@@ -120,14 +95,52 @@ func searchWiki(root, query string) []searchResult {
if results[i].Score != results[j].Score {
return results[i].Score > results[j].Score
}
- if results[i].NameHit != results[j].NameHit {
- return results[i].NameHit
+ di, dj := strings.Count(results[i].Path, "/"), strings.Count(results[j].Path, "/")
+ if di != dj {
+ return di < dj
}
return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
})
return results
}
+// scoreName ranks how well nameLower matches the query. Whole-name exact
+// match dominates; otherwise score is the sum of each token's best match
+// against the words in the name. Position within the name does not matter —
+// nesting depth is the tiebreaker, applied by the caller.
+func scoreName(nameLower, qLower string, qTokens []string) int {
+ if nameLower == qLower {
+ return 1000
+ }
+ score := 0
+ nameWords := tokenize(nameLower)
+ for _, qt := range qTokens {
+ best := 0
+ for _, w := range nameWords {
+ switch {
+ case w == qt:
+ if best < 100 {
+ best = 100
+ }
+ case strings.HasPrefix(w, qt):
+ if best < 50 {
+ best = 50
+ }
+ case strings.Contains(w, qt):
+ if best < 20 {
+ best = 20
+ }
+ case levenshtein(w, qt) <= 2:
+ if best < 5 {
+ best = 5
+ }
+ }
+ }
+ score += best
+ }
+ return score
+}
+
// resolveWalkRoot resolves symlinks so WalkDir descends into the real tree
// even when the configured wiki root is itself a symlink (as on the NAS).
func resolveWalkRoot(root string) string {
@@ -172,117 +185,6 @@ func tokenize(s string) []string {
return tokens
}
-// tokenInWords reports whether qt matches any word exactly or within
-// Levenshtein distance 2. qt and words must already be lowercase.
-func tokenInWords(qt string, words []string) bool {
- for _, w := range words {
- if w == qt {
- return true
- }
- if levenshtein(w, qt) <= 2 {
- return true
- }
- }
- return false
-}
-
-var snippetWS = regexp.MustCompile(`\s+`)
-
-const snippetWindow = 300
-
-// makeSnippet returns ~300 characters of body around the earliest substring
-// match of any query token. When no token has an exact substring span (e.g.
-// matched only via Levenshtein, or the hit was folder-name-only), it falls
-// back to the first ~300 chars of the body with the leading heading stripped.
-// Returns "" only when the body itself is empty.
-func makeSnippet(body, bodyLower string, tokens []string) string {
- pos := -1
- for _, t := range tokens {
- i := strings.Index(bodyLower, t)
- if i < 0 {
- continue
- }
- if pos < 0 || i < pos {
- pos = i
- }
- }
- if pos < 0 {
- return makeStub(body)
- }
-
- half := snippetWindow / 2
- start := pos - half
- if start < 0 {
- start = 0
- }
- end := pos + half
- if end > len(body) {
- end = len(body)
- }
- start, end = expandToWordBoundaries(body, start, end)
- out := snippetWS.ReplaceAllString(body[start:end], " ")
- out = strings.TrimSpace(out)
- if start > 0 {
- out = "…" + out
- }
- if end < len(body) {
- out = out + "…"
- }
- return out
-}
-
-// makeStub returns ~snippetWindow chars from the start of body, with the
-// leading "# Heading" line stripped. Returns "" for an empty body.
-func makeStub(body string) string {
- stripped := string(stripFirstHeading([]byte(body)))
- stripped = strings.TrimSpace(stripped)
- if stripped == "" {
- return ""
- }
- end := snippetWindow
- if end > len(stripped) {
- end = len(stripped)
- }
- _, end = expandToWordBoundaries(stripped, 0, end)
- out := snippetWS.ReplaceAllString(stripped[:end], " ")
- out = strings.TrimSpace(out)
- if end < len(stripped) {
- out = out + "…"
- }
- return out
-}
-
-// expandToWordBoundaries adjusts start/end so they don't split a word and
-// don't fall in the middle of a UTF-8 sequence. start moves forward past
-// any partial word at the beginning; end moves backward to the previous
-// word boundary.
-func expandToWordBoundaries(s string, start, end int) (int, int) {
- for start > 0 && start < len(s) && s[start]&0xC0 == 0x80 {
- start--
- }
- for end < len(s) && s[end]&0xC0 == 0x80 {
- end++
- }
- if start > 0 && start < len(s) && isWordByte(s[start-1]) && isWordByte(s[start]) {
- for start < end && isWordByte(s[start]) {
- start++
- }
- }
- if end < len(s) && isWordByte(s[end-1]) && isWordByte(s[end]) {
- for end > start && isWordByte(s[end-1]) {
- end--
- }
- }
- return start, end
-}
-
-func isWordByte(b byte) bool {
- if b&0x80 != 0 {
- return true // assume any multibyte char is part of a word
- }
- return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
-}
-
// levenshtein returns the edit distance between a and b. Operates on runes so
// multi-byte characters count as one edit.
func levenshtein(a, b string) int {