Update search function

This commit is contained in:
2026-05-04 10:03:53 +02:00
parent be035bf478
commit 99579ba7e5
2 changed files with 54 additions and 153 deletions
-1
View File
@@ -9,7 +9,6 @@
<article class="search-card"> <article class="search-card">
<a href="{{.URL}}">{{.Name}}</a> <a href="{{.URL}}">{{.Name}}</a>
<div class="muted">/{{.Path}}</div> <div class="muted">/{{.Path}}</div>
{{if .Snippet}}<div>{{.Snippet}}</div>{{end}}
</article> </article>
{{end}} {{end}}
{{else}} {{else}}
+54 -152
View File
@@ -4,21 +4,17 @@ import (
"io/fs" "io/fs"
"log" "log"
"net/http" "net/http"
"os"
"path/filepath" "path/filepath"
"regexp"
"sort" "sort"
"strings" "strings"
"unicode" "unicode"
) )
type searchResult struct { type searchResult struct {
Name string Name string
URL string URL string
Path string Path string
Score int // number of query tokens that hit Score int
NameHit bool // at least one hit came from the folder name
Snippet string // ~300 chars around first body hit, or page stub for name-only hits
} }
type searchPageData struct { type searchPageData struct {
@@ -52,15 +48,15 @@ func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
} }
} }
// searchWiki walks root and scores each directory by how many whitespace-split // searchWiki walks root and scores each directory by how well the folder name
// query tokens hit a word in either the folder name or its index.md body. // matches the query. Page contents are not searched. Higher score = more
// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2. // relevant; exact matches rank first.
// Folder-name hits break score ties above content-only hits.
func searchWiki(root, query string) []searchResult { func searchWiki(root, query string) []searchResult {
if query == "" { if query == "" {
return nil return nil
} }
qTokens := tokenize(query) qLower := strings.ToLower(query)
qTokens := tokenize(qLower)
if len(qTokens) == 0 { if len(qTokens) == 0 {
return nil return nil
} }
@@ -78,40 +74,19 @@ func searchWiki(root, query string) []searchResult {
return nil return nil
} }
name := d.Name() name := d.Name()
body, _ := os.ReadFile(filepath.Join(fsPath, "index.md")) score := scoreName(strings.ToLower(name), qLower, qTokens)
nameWords := tokenize(name)
bodyStr := string(body)
bodyLower := strings.ToLower(bodyStr)
bodyWords := tokenize(bodyLower)
score := 0
nameHit := false
for _, qt := range qTokens {
inName := tokenInWords(qt, nameWords)
inBody := tokenInWords(qt, bodyWords)
if inName || inBody {
score++
}
if inName {
nameHit = true
}
}
if score == 0 { if score == 0 {
return nil return nil
} }
rel, relErr := filepath.Rel(walkRoot, fsPath) rel, relErr := filepath.Rel(walkRoot, fsPath)
if relErr != nil { if relErr != nil {
return nil return nil
} }
results = append(results, searchResult{ results = append(results, searchResult{
Name: name, Name: name,
URL: "/" + filepath.ToSlash(rel) + "/", URL: "/" + filepath.ToSlash(rel) + "/",
Path: filepath.ToSlash(rel), Path: filepath.ToSlash(rel),
Score: score, Score: score,
NameHit: nameHit,
Snippet: makeSnippet(bodyStr, bodyLower, qTokens),
}) })
return nil return nil
}) })
@@ -120,14 +95,52 @@ func searchWiki(root, query string) []searchResult {
if results[i].Score != results[j].Score { if results[i].Score != results[j].Score {
return results[i].Score > results[j].Score return results[i].Score > results[j].Score
} }
if results[i].NameHit != results[j].NameHit { di, dj := strings.Count(results[i].Path, "/"), strings.Count(results[j].Path, "/")
return results[i].NameHit if di != dj {
return di < dj
} }
return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name) return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
}) })
return results return results
} }
// scoreName ranks how well nameLower matches the query. Whole-name exact
// match dominates; otherwise score is the sum of each token's best match
// against the words in the name. Position within the name does not matter —
// nesting depth is the tiebreaker, applied by the caller.
func scoreName(nameLower, qLower string, qTokens []string) int {
if nameLower == qLower {
return 1000
}
score := 0
nameWords := tokenize(nameLower)
for _, qt := range qTokens {
best := 0
for _, w := range nameWords {
switch {
case w == qt:
if best < 100 {
best = 100
}
case strings.HasPrefix(w, qt):
if best < 50 {
best = 50
}
case strings.Contains(w, qt):
if best < 20 {
best = 20
}
case levenshtein(w, qt) <= 2:
if best < 5 {
best = 5
}
}
}
score += best
}
return score
}
// resolveWalkRoot resolves symlinks so WalkDir descends into the real tree // resolveWalkRoot resolves symlinks so WalkDir descends into the real tree
// even when the configured wiki root is itself a symlink (as on the NAS). // even when the configured wiki root is itself a symlink (as on the NAS).
func resolveWalkRoot(root string) string { func resolveWalkRoot(root string) string {
@@ -172,117 +185,6 @@ func tokenize(s string) []string {
return tokens return tokens
} }
// tokenInWords reports whether qt matches any word exactly or within
// Levenshtein distance 2. qt and words must already be lowercase.
func tokenInWords(qt string, words []string) bool {
for _, w := range words {
if w == qt {
return true
}
if levenshtein(w, qt) <= 2 {
return true
}
}
return false
}
var snippetWS = regexp.MustCompile(`\s+`)
const snippetWindow = 300
// makeSnippet returns ~300 characters of body around the earliest substring
// match of any query token. When no token has an exact substring span (e.g.
// matched only via Levenshtein, or the hit was folder-name-only), it falls
// back to the first ~300 chars of the body with the leading heading stripped.
// Returns "" only when the body itself is empty.
func makeSnippet(body, bodyLower string, tokens []string) string {
pos := -1
for _, t := range tokens {
i := strings.Index(bodyLower, t)
if i < 0 {
continue
}
if pos < 0 || i < pos {
pos = i
}
}
if pos < 0 {
return makeStub(body)
}
half := snippetWindow / 2
start := pos - half
if start < 0 {
start = 0
}
end := pos + half
if end > len(body) {
end = len(body)
}
start, end = expandToWordBoundaries(body, start, end)
out := snippetWS.ReplaceAllString(body[start:end], " ")
out = strings.TrimSpace(out)
if start > 0 {
out = "…" + out
}
if end < len(body) {
out = out + "…"
}
return out
}
// makeStub returns ~snippetWindow chars from the start of body, with the
// leading "# Heading" line stripped. Returns "" for an empty body.
func makeStub(body string) string {
stripped := string(stripFirstHeading([]byte(body)))
stripped = strings.TrimSpace(stripped)
if stripped == "" {
return ""
}
end := snippetWindow
if end > len(stripped) {
end = len(stripped)
}
_, end = expandToWordBoundaries(stripped, 0, end)
out := snippetWS.ReplaceAllString(stripped[:end], " ")
out = strings.TrimSpace(out)
if end < len(stripped) {
out = out + "…"
}
return out
}
// expandToWordBoundaries adjusts start/end so they don't split a word and
// don't fall in the middle of a UTF-8 sequence. start moves forward past
// any partial word at the beginning; end moves backward to the previous
// word boundary.
func expandToWordBoundaries(s string, start, end int) (int, int) {
for start > 0 && start < len(s) && s[start]&0xC0 == 0x80 {
start--
}
for end < len(s) && s[end]&0xC0 == 0x80 {
end++
}
if start > 0 && start < len(s) && isWordByte(s[start-1]) && isWordByte(s[start]) {
for start < end && isWordByte(s[start]) {
start++
}
}
if end < len(s) && isWordByte(s[end-1]) && isWordByte(s[end]) {
for end > start && isWordByte(s[end-1]) {
end--
}
}
return start, end
}
func isWordByte(b byte) bool {
if b&0x80 != 0 {
return true // assume any multibyte char is part of a word
}
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
}
// levenshtein returns the edit distance between a and b. Operates on runes so // levenshtein returns the edit distance between a and b. Operates on runes so
// multi-byte characters count as one edit. // multi-byte characters count as one edit.
func levenshtein(a, b string) int { func levenshtein(a, b string) int {