Update search function

2026-05-04 10:03:53 +02:00
parent c016dcabaa
commit 72edf7b258
2 changed files with 54 additions and 153 deletions
@@ -9,7 +9,6 @@
    <article class="search-card">
        <a href="{{.URL}}">{{.Name}}</a>
        <div class="muted">/{{.Path}}</div>
        {{if .Snippet}}<div>{{.Snippet}}</div>{{end}}
    </article>
    {{end}}
    {{else}}
@@ -4,21 +4,17 @@ import (
 	"io/fs"
 	"log"
 	"net/http"
 	"os"
 	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
 	"unicode"
 )
 type searchResult struct {
-	Name    string
+	Name  string
-	URL     string
+	URL   string
-	Path    string
+	Path  string
-	Score   int    // number of query tokens that hit
+	Score int
 	NameHit bool   // at least one hit came from the folder name
 	Snippet string // ~300 chars around first body hit, or page stub for name-only hits
 }
 type searchPageData struct {
@@ -52,15 +48,15 @@ func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
 	}
 }
-// searchWiki walks root and scores each directory by how many whitespace-split
+// searchWiki walks root and scores each directory by how well the folder name
-// query tokens hit a word in either the folder name or its index.md body.
+// matches the query. Page contents are not searched. Higher score = more
-// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2.
+// relevant; exact matches rank first.
 // Folder-name hits break score ties above content-only hits.
 func searchWiki(root, query string) []searchResult {
 	if query == "" {
 		return nil
 	}
-	qTokens := tokenize(query)
+	qLower := strings.ToLower(query)
 	qTokens := tokenize(qLower)
 	if len(qTokens) == 0 {
 		return nil
 	}
@@ -78,40 +74,19 @@ func searchWiki(root, query string) []searchResult {
 			return nil
 		}
 		name := d.Name()
-		body, _ := os.ReadFile(filepath.Join(fsPath, "index.md"))
+		score := scoreName(strings.ToLower(name), qLower, qTokens)
 		nameWords := tokenize(name)
 		bodyStr := string(body)
 		bodyLower := strings.ToLower(bodyStr)
 		bodyWords := tokenize(bodyLower)
 		score := 0
 		nameHit := false
 		for _, qt := range qTokens {
 			inName := tokenInWords(qt, nameWords)
 			inBody := tokenInWords(qt, bodyWords)
 			if inName || inBody {
 				score++
 			}
 			if inName {
 				nameHit = true
 			}
 		}
 		if score == 0 {
 			return nil
 		}
 		rel, relErr := filepath.Rel(walkRoot, fsPath)
 		if relErr != nil {
 			return nil
 		}
 		results = append(results, searchResult{
-			Name:    name,
+			Name:  name,
-			URL:     "/" + filepath.ToSlash(rel) + "/",
+			URL:   "/" + filepath.ToSlash(rel) + "/",
-			Path:    filepath.ToSlash(rel),
+			Path:  filepath.ToSlash(rel),
-			Score:   score,
+			Score: score,
 			NameHit: nameHit,
 			Snippet: makeSnippet(bodyStr, bodyLower, qTokens),
 		})
 		return nil
 	})
@@ -120,14 +95,52 @@ func searchWiki(root, query string) []searchResult {
 		if results[i].Score != results[j].Score {
 			return results[i].Score > results[j].Score
 		}
-		if results[i].NameHit != results[j].NameHit {
+		di, dj := strings.Count(results[i].Path, "/"), strings.Count(results[j].Path, "/")
-			return results[i].NameHit
+		if di != dj {
 			return di < dj
 		}
 		return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
 	})
 	return results
 }
 // scoreName ranks how well nameLower matches the query. Whole-name exact
 // match dominates; otherwise score is the sum of each token's best match
 // against the words in the name. Position within the name does not matter —
 // nesting depth is the tiebreaker, applied by the caller.
 func scoreName(nameLower, qLower string, qTokens []string) int {
 	if nameLower == qLower {
 		return 1000
 	}
 	score := 0
 	nameWords := tokenize(nameLower)
 	for _, qt := range qTokens {
 		best := 0
 		for _, w := range nameWords {
 			switch {
 			case w == qt:
 				if best < 100 {
 					best = 100
 				}
 			case strings.HasPrefix(w, qt):
 				if best < 50 {
 					best = 50
 				}
 			case strings.Contains(w, qt):
 				if best < 20 {
 					best = 20
 				}
 			case levenshtein(w, qt) <= 2:
 				if best < 5 {
 					best = 5
 				}
 			}
 		}
 		score += best
 	}
 	return score
 }
 // resolveWalkRoot resolves symlinks so WalkDir descends into the real tree
 // even when the configured wiki root is itself a symlink (as on the NAS).
 func resolveWalkRoot(root string) string {
@@ -172,117 +185,6 @@ func tokenize(s string) []string {
 	return tokens
 }
 // tokenInWords reports whether qt matches any word exactly or within
 // Levenshtein distance 2. qt and words must already be lowercase.
 func tokenInWords(qt string, words []string) bool {
 	for _, w := range words {
 		if w == qt {
 			return true
 		}
 		if levenshtein(w, qt) <= 2 {
 			return true
 		}
 	}
 	return false
 }
 var snippetWS = regexp.MustCompile(`\s+`)
 const snippetWindow = 300
 // makeSnippet returns ~300 characters of body around the earliest substring
 // match of any query token. When no token has an exact substring span (e.g.
 // matched only via Levenshtein, or the hit was folder-name-only), it falls
 // back to the first ~300 chars of the body with the leading heading stripped.
 // Returns "" only when the body itself is empty.
 func makeSnippet(body, bodyLower string, tokens []string) string {
 	pos := -1
 	for _, t := range tokens {
 		i := strings.Index(bodyLower, t)
 		if i < 0 {
 			continue
 		}
 		if pos < 0 || i < pos {
 			pos = i
 		}
 	}
 	if pos < 0 {
 		return makeStub(body)
 	}
 	half := snippetWindow / 2
 	start := pos - half
 	if start < 0 {
 		start = 0
 	}
 	end := pos + half
 	if end > len(body) {
 		end = len(body)
 	}
 	start, end = expandToWordBoundaries(body, start, end)
 	out := snippetWS.ReplaceAllString(body[start:end], " ")
 	out = strings.TrimSpace(out)
 	if start > 0 {
 		out = "…" + out
 	}
 	if end < len(body) {
 		out = out + "…"
 	}
 	return out
 }
 // makeStub returns ~snippetWindow chars from the start of body, with the
 // leading "# Heading" line stripped. Returns "" for an empty body.
 func makeStub(body string) string {
 	stripped := string(stripFirstHeading([]byte(body)))
 	stripped = strings.TrimSpace(stripped)
 	if stripped == "" {
 		return ""
 	}
 	end := snippetWindow
 	if end > len(stripped) {
 		end = len(stripped)
 	}
 	_, end = expandToWordBoundaries(stripped, 0, end)
 	out := snippetWS.ReplaceAllString(stripped[:end], " ")
 	out = strings.TrimSpace(out)
 	if end < len(stripped) {
 		out = out + "…"
 	}
 	return out
 }
 // expandToWordBoundaries adjusts start/end so they don't split a word and
 // don't fall in the middle of a UTF-8 sequence. start moves forward past
 // any partial word at the beginning; end moves backward to the previous
 // word boundary.
 func expandToWordBoundaries(s string, start, end int) (int, int) {
 	for start > 0 && start < len(s) && s[start]&0xC0 == 0x80 {
 		start--
 	}
 	for end < len(s) && s[end]&0xC0 == 0x80 {
 		end++
 	}
 	if start > 0 && start < len(s) && isWordByte(s[start-1]) && isWordByte(s[start]) {
 		for start < end && isWordByte(s[start]) {
 			start++
 		}
 	}
 	if end < len(s) && isWordByte(s[end-1]) && isWordByte(s[end]) {
 		for end > start && isWordByte(s[end-1]) {
 			end--
 		}
 	}
 	return start, end
 }
 func isWordByte(b byte) bool {
 	if b&0x80 != 0 {
 		return true // assume any multibyte char is part of a word
 	}
 	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
 }
 // levenshtein returns the edit distance between a and b. Operates on runes so
 // multi-byte characters count as one edit.
 func levenshtein(a, b string) int {