package main
import (
"html/template"
"io/fs"
"log"
"net/http"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"unicode"
)
var searchTmpl = template.Must(template.New("search.html").ParseFS(assets, "assets/search.html"))
// Match ranks. Lower is better. Used for folder-name search (Phase 1).
const (
rankExact = 0
rankPrefix = 1
rankSubstring = 2
rankFuzzy = 3
)
type searchResult struct {
Name string
URL string
Path string
Rank int // Phase 1 only
Score int // Phase 2: number of query tokens that hit
NameHit bool // Phase 2: at least one hit came from the folder name
Snippet string // Phase 2: ~100 chars around first body hit
}
type searchPageData struct {
Query string
Full bool
Results []searchResult
}
// handleSearch walks the wiki root and renders a search results page for the
// query in r.URL.Query().Get("q"). Only invoked when path is "/" and "q" is
// present. With ?full=1 it also scans index.md bodies (Phase 2).
func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
query := strings.TrimSpace(r.URL.Query().Get("q"))
full := r.URL.Query().Get("full") == "1"
var results []searchResult
if full {
results = searchFull(h.root, query)
} else {
results = searchFolders(h.root, query)
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
if err := searchTmpl.Execute(w, searchPageData{Query: query, Full: full, Results: results}); err != nil {
log.Printf("search template error: %v", err)
}
}
// searchFolders walks root and returns directories whose final path segment
// matches the query, ranked best-first. Returns nil for an empty query.
func searchFolders(root, query string) []searchResult {
if query == "" {
return nil
}
q := strings.ToLower(query)
maxDist := 2
if len([]rune(q)) > 6 {
maxDist = 3
}
walkRoot := resolveWalkRoot(root)
var results []searchResult
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
if err != nil {
return nil
}
if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
return walkErr
}
if !d.IsDir() || fsPath == walkRoot {
return nil
}
name := d.Name()
rank, ok := matchRank(strings.ToLower(name), q, maxDist)
if !ok {
return nil
}
rel, relErr := filepath.Rel(walkRoot, fsPath)
if relErr != nil {
return nil
}
results = append(results, searchResult{
Name: name,
URL: "/" + filepath.ToSlash(rel) + "/",
Path: filepath.ToSlash(rel),
Rank: rank,
})
return nil
})
sort.SliceStable(results, func(i, j int) bool {
if results[i].Rank != results[j].Rank {
return results[i].Rank < results[j].Rank
}
return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
})
return results
}
// searchFull walks root and scores each directory by how many whitespace-split
// query tokens hit a word in either the folder name or its index.md body.
// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2.
// Folder-name hits break score ties above content-only hits.
func searchFull(root, query string) []searchResult {
if query == "" {
return nil
}
qTokens := tokenize(query)
if len(qTokens) == 0 {
return nil
}
walkRoot := resolveWalkRoot(root)
var results []searchResult
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
if err != nil {
return nil
}
if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
return walkErr
}
if !d.IsDir() || fsPath == walkRoot {
return nil
}
name := d.Name()
body, _ := os.ReadFile(filepath.Join(fsPath, "index.md"))
nameWords := tokenize(name)
bodyStr := string(body)
bodyLower := strings.ToLower(bodyStr)
bodyWords := tokenize(bodyLower)
score := 0
nameHit := false
for _, qt := range qTokens {
inName := tokenInWords(qt, nameWords)
inBody := tokenInWords(qt, bodyWords)
if inName || inBody {
score++
}
if inName {
nameHit = true
}
}
if score == 0 {
return nil
}
rel, relErr := filepath.Rel(walkRoot, fsPath)
if relErr != nil {
return nil
}
results = append(results, searchResult{
Name: name,
URL: "/" + filepath.ToSlash(rel) + "/",
Path: filepath.ToSlash(rel),
Score: score,
NameHit: nameHit,
Snippet: makeSnippet(bodyStr, bodyLower, qTokens),
})
return nil
})
sort.SliceStable(results, func(i, j int) bool {
if results[i].Score != results[j].Score {
return results[i].Score > results[j].Score
}
if results[i].NameHit != results[j].NameHit {
return results[i].NameHit
}
return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
})
return results
}
// resolveWalkRoot resolves symlinks so WalkDir descends into the real tree
// even when the configured wiki root is itself a symlink (as on the NAS).
func resolveWalkRoot(root string) string {
if r, err := filepath.EvalSymlinks(root); err == nil {
return r
}
return root
}
// hiddenSkip handles dotfile/dot-dir entries during a WalkDir. It returns
// (skipped, walkErr): skipped=true means the caller should `return walkErr`
// to either prune the subtree (hidden dir) or move past the entry (hidden
// file). When skipped=false the entry should be processed normally.
func hiddenSkip(fsPath, walkRoot string, d fs.DirEntry) (bool, error) {
if !strings.HasPrefix(d.Name(), ".") {
return false, nil
}
if d.IsDir() && fsPath != walkRoot {
return true, filepath.SkipDir
}
return true, nil
}
// matchRank returns the best (lowest) rank for which name matches q, or
// (0, false) if no rule matches. Inputs are expected to be lowercased.
func matchRank(name, q string, maxDist int) (int, bool) {
if name == q {
return rankExact, true
}
if strings.HasPrefix(name, q) {
return rankPrefix, true
}
if strings.Contains(name, q) {
return rankSubstring, true
}
if levenshtein(name, q) <= maxDist {
return rankFuzzy, true
}
return 0, false
}
// tokenize splits s into lowercase word tokens, breaking on any rune that is
// not a letter or digit. Unicode-aware so umlauts etc. survive intact.
func tokenize(s string) []string {
var tokens []string
var b strings.Builder
for _, r := range s {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
b.WriteRune(unicode.ToLower(r))
continue
}
if b.Len() > 0 {
tokens = append(tokens, b.String())
b.Reset()
}
}
if b.Len() > 0 {
tokens = append(tokens, b.String())
}
return tokens
}
// tokenInWords reports whether qt matches any word exactly or within
// Levenshtein distance 2. qt and words must already be lowercase.
func tokenInWords(qt string, words []string) bool {
for _, w := range words {
if w == qt {
return true
}
if levenshtein(w, qt) <= 2 {
return true
}
}
return false
}
var snippetWS = regexp.MustCompile(`\s+`)
// makeSnippet returns ~100 characters of body around the earliest substring
// match of any query token. Falls back to empty when no token appears as a
// substring (a token may have hit only via Levenshtein, with no exact span to
// quote).
func makeSnippet(body, bodyLower string, tokens []string) string {
pos := -1
for _, t := range tokens {
i := strings.Index(bodyLower, t)
if i < 0 {
continue
}
if pos < 0 || i < pos {
pos = i
}
}
if pos < 0 {
return ""
}
start := pos - 50
if start < 0 {
start = 0
}
end := pos + 50
if end > len(body) {
end = len(body)
}
for start > 0 && body[start]&0xC0 == 0x80 {
start--
}
for end < len(body) && body[end]&0xC0 == 0x80 {
end++
}
s := snippetWS.ReplaceAllString(body[start:end], " ")
s = strings.TrimSpace(s)
if start > 0 {
s = "…" + s
}
if end < len(body) {
s = s + "…"
}
return s
}
// levenshtein returns the edit distance between a and b. Operates on runes so
// multi-byte characters count as one edit.
func levenshtein(a, b string) int {
ar, br := []rune(a), []rune(b)
if len(ar) == 0 {
return len(br)
}
if len(br) == 0 {
return len(ar)
}
prev := make([]int, len(br)+1)
curr := make([]int, len(br)+1)
for j := range prev {
prev[j] = j
}
for i := 1; i <= len(ar); i++ {
curr[0] = i
for j := 1; j <= len(br); j++ {
cost := 1
if ar[i-1] == br[j-1] {
cost = 0
}
del := prev[j] + 1
ins := curr[j-1] + 1
sub := prev[j-1] + cost
curr[j] = min3(del, ins, sub)
}
prev, curr = curr, prev
}
return prev[len(br)]
}
func min3(a, b, c int) int {
m := a
if b < m {
m = b
}
if c < m {
m = c
}
return m
}