Files

424 lines
11 KiB
Go

package main
import (
"encoding/json"
"io/fs"
"log"
"net/http"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"unicode"
)
type searchResult struct {
Name string
URL string
Path string
Score int
}
type searchPageData struct {
Title string
ParentURL string
EditMode bool
Query string
Results []searchResult
IndexBuiltAt time.Time
RenderMS int64
}
// folderEntry is a single indexed directory: its forward-slash relative path
// plus pre-tokenized basename so the per-query scoring loop avoids redoing
// the lowercasing and tokenization on every keystroke.
type folderEntry struct {
Path string
NameLower string
NameTokens []string
}
// folderIndex holds the in-memory directory index used by search. Writers
// always replace the entries slice wholesale so a reader that snapshots the
// header under RLock can score without holding the lock.
var folderIndex struct {
sync.RWMutex
entries []folderEntry
builtAt time.Time
buildMu sync.Mutex
ready chan struct{}
}
func init() {
folderIndex.ready = make(chan struct{})
}
// handleSearch renders the search results page for the query in
// r.URL.Query().Get("q"). Only invoked when path is "/" and "q" is present.
func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
query := strings.TrimSpace(r.URL.Query().Get("q"))
results, builtAt := searchWiki(query)
title := "Search"
if query != "" {
title = "Search: " + query
}
data := searchPageData{
Title: title,
ParentURL: "/",
Query: query,
Results: results,
IndexBuiltAt: builtAt,
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
data.RenderMS = elapsedMS(r)
if err := searchTmpl.ExecuteTemplate(w, "layout", data); err != nil {
log.Printf("search template error: %v", err)
}
}
// searchWiki scores the cached folder index against query. Blocks on the
// initial build so the very first request after startup serves correct
// results rather than an empty list. Returns the snapshot's builtAt so the
// UI can show how fresh the index is.
func searchWiki(query string) ([]searchResult, time.Time) {
<-folderIndex.ready
folderIndex.RLock()
entries := folderIndex.entries
builtAt := folderIndex.builtAt
folderIndex.RUnlock()
if query == "" {
return nil, builtAt
}
qLower := strings.ToLower(query)
qTokens := tokenize(qLower)
if len(qTokens) == 0 {
return nil, builtAt
}
var results []searchResult
for _, e := range entries {
score := scoreName(e.NameLower, e.NameTokens, qLower, qTokens)
if score == 0 {
continue
}
results = append(results, searchResult{
Name: filepath.Base(e.Path),
URL: "/" + e.Path + "/",
Path: e.Path,
Score: score,
})
}
sort.SliceStable(results, func(i, j int) bool {
if results[i].Score != results[j].Score {
return results[i].Score > results[j].Score
}
di, dj := strings.Count(results[i].Path, "/"), strings.Count(results[j].Path, "/")
if di != dj {
return di < dj
}
return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
})
return results, builtAt
}
// scoreName ranks how well nameLower matches the query. Whole-name exact
// match dominates; otherwise score is the sum of each token's best match
// against the words in the name. nameTokens is precomputed by the index.
func scoreName(nameLower string, nameTokens []string, qLower string, qTokens []string) int {
if nameLower == qLower {
return 1000
}
score := 0
for _, qt := range qTokens {
best := 0
for _, w := range nameTokens {
switch {
case w == qt:
if best < 100 {
best = 100
}
case strings.HasPrefix(w, qt):
if best < 50 {
best = 50
}
case strings.Contains(w, qt):
if best < 20 {
best = 20
}
case levenshtein(w, qt) <= 2:
if best < 5 {
best = 5
}
}
}
score += best
}
return score
}
// handleSearchSuggest serves the JSON typeahead for the header dropdown and
// the editor's link picker. Caps results at 5; reports total so the UI can
// surface a "show all" footer when more matches exist. Empty/whitespace query
// is a no-op (200 with empty results), not a 400 — every keystroke fires this.
func (h *handler) handleSearchSuggest(w http.ResponseWriter, r *http.Request) {
if !h.checkAuth(w, r) {
return
}
query := strings.TrimSpace(r.URL.Query().Get("q"))
type suggestResult struct {
Name string `json:"name"`
Path string `json:"path"`
URL string `json:"url"`
}
type suggestResp struct {
Query string `json:"query"`
Results []suggestResult `json:"results"`
Total int `json:"total"`
}
resp := suggestResp{Query: query, Results: []suggestResult{}}
if query != "" {
all, _ := searchWiki(query)
resp.Total = len(all)
limit := 5
if len(all) < limit {
limit = len(all)
}
for i := 0; i < limit; i++ {
resp.Results = append(resp.Results, suggestResult{
Name: all[i].Name,
Path: all[i].Path,
URL: all[i].URL,
})
}
}
w.Header().Set("Content-Type", "application/json; charset=utf-8")
if err := json.NewEncoder(w).Encode(resp); err != nil {
log.Printf("search suggest encode error: %v", err)
}
}
// handleReindex rebuilds the folder index synchronously and returns 204.
// The frontend reloads the page on success. Serialized via buildMu so a
// double-click waits rather than running two walks in parallel.
func (h *handler) handleReindex(w http.ResponseWriter, r *http.Request) {
if !h.checkAuth(w, r) {
return
}
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
rebuildFolderIndex(h.root)
w.WriteHeader(http.StatusNoContent)
}
// buildFolderIndex walks root and returns a fresh slice of folder entries.
// Hidden directories (`.git`, `.thumbs`, …) are pruned; the root itself is
// excluded since it cannot be a search match.
func buildFolderIndex(root string) []folderEntry {
walkRoot := resolveWalkRoot(root)
var entries []folderEntry
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
if err != nil {
return nil
}
if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
return walkErr
}
if !d.IsDir() || fsPath == walkRoot {
return nil
}
rel, relErr := filepath.Rel(walkRoot, fsPath)
if relErr != nil {
return nil
}
entries = append(entries, newFolderEntry(filepath.ToSlash(rel)))
return nil
})
return entries
}
// newFolderEntry builds a folderEntry from a forward-slash relative path,
// computing the lowercased basename and its tokens once so search scoring
// doesn't have to redo it per query.
func newFolderEntry(relPath string) folderEntry {
name := relPath
if i := strings.LastIndex(relPath, "/"); i >= 0 {
name = relPath[i+1:]
}
nameLower := strings.ToLower(name)
return folderEntry{
Path: relPath,
NameLower: nameLower,
NameTokens: tokenize(nameLower),
}
}
// rebuildFolderIndex walks root and replaces the index entries atomically.
// buildMu serializes overlapping rebuilds (manual + ticker + startup) so
// the WalkDir cost is paid once even under contention.
func rebuildFolderIndex(root string) {
folderIndex.buildMu.Lock()
defer folderIndex.buildMu.Unlock()
entries := buildFolderIndex(root)
folderIndex.Lock()
folderIndex.entries = entries
folderIndex.builtAt = time.Now()
folderIndex.Unlock()
}
// folderIndexAdd appends relPath as a new entry. No-op for empty/root paths.
func folderIndexAdd(relPath string) {
relPath = strings.Trim(relPath, "/")
if relPath == "" {
return
}
folderIndex.Lock()
folderIndex.entries = append(folderIndex.entries, newFolderEntry(relPath))
folderIndex.Unlock()
}
// folderIndexRemoveSubtree drops the entry at relPath plus every descendant.
// Replaces the slice rather than mutating in place so any in-flight search
// reader keeps a valid snapshot.
func folderIndexRemoveSubtree(relPath string) {
relPath = strings.Trim(relPath, "/")
if relPath == "" {
return
}
prefix := relPath + "/"
folderIndex.Lock()
defer folderIndex.Unlock()
old := folderIndex.entries
out := make([]folderEntry, 0, len(old))
for _, e := range old {
if e.Path == relPath || strings.HasPrefix(e.Path, prefix) {
continue
}
out = append(out, e)
}
folderIndex.entries = out
}
// folderIndexRenameSubtree rewrites the path prefix for every entry under
// oldRel. The renamed root entry's basename may have changed so its
// NameLower/NameTokens are recomputed; descendants keep their basenames.
func folderIndexRenameSubtree(oldRel, newRel string) {
oldRel = strings.Trim(oldRel, "/")
newRel = strings.Trim(newRel, "/")
if oldRel == "" || newRel == "" {
return
}
oldPrefix := oldRel + "/"
folderIndex.Lock()
defer folderIndex.Unlock()
old := folderIndex.entries
out := make([]folderEntry, len(old))
for i, e := range old {
switch {
case e.Path == oldRel:
out[i] = newFolderEntry(newRel)
case strings.HasPrefix(e.Path, oldPrefix):
out[i] = folderEntry{
Path: newRel + "/" + strings.TrimPrefix(e.Path, oldPrefix),
NameLower: e.NameLower,
NameTokens: e.NameTokens,
}
default:
out[i] = e
}
}
folderIndex.entries = out
}
// resolveWalkRoot resolves symlinks so WalkDir descends into the real tree
// even when the configured wiki root is itself a symlink (as on the NAS).
func resolveWalkRoot(root string) string {
if r, err := filepath.EvalSymlinks(root); err == nil {
return r
}
return root
}
// hiddenSkip handles dotfile/dot-dir entries during a WalkDir. It returns
// (skipped, walkErr): skipped=true means the caller should `return walkErr`
// to either prune the subtree (hidden dir) or move past the entry (hidden
// file). When skipped=false the entry should be processed normally.
func hiddenSkip(fsPath, walkRoot string, d fs.DirEntry) (bool, error) {
if !strings.HasPrefix(d.Name(), ".") {
return false, nil
}
if d.IsDir() && fsPath != walkRoot {
return true, filepath.SkipDir
}
return true, nil
}
// tokenize splits s into lowercase word tokens, breaking on any rune that is
// not a letter or digit. Unicode-aware so umlauts etc. survive intact.
func tokenize(s string) []string {
var tokens []string
var b strings.Builder
for _, r := range s {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
b.WriteRune(unicode.ToLower(r))
continue
}
if b.Len() > 0 {
tokens = append(tokens, b.String())
b.Reset()
}
}
if b.Len() > 0 {
tokens = append(tokens, b.String())
}
return tokens
}
// levenshtein returns the edit distance between a and b. Operates on runes so
// multi-byte characters count as one edit.
func levenshtein(a, b string) int {
ar, br := []rune(a), []rune(b)
if len(ar) == 0 {
return len(br)
}
if len(br) == 0 {
return len(ar)
}
prev := make([]int, len(br)+1)
curr := make([]int, len(br)+1)
for j := range prev {
prev[j] = j
}
for i := 1; i <= len(ar); i++ {
curr[0] = i
for j := 1; j <= len(br); j++ {
cost := 1
if ar[i-1] == br[j-1] {
cost = 0
}
del := prev[j] + 1
ins := curr[j-1] + 1
sub := prev[j-1] + cost
curr[j] = min3(del, ins, sub)
}
prev, curr = curr, prev
}
return prev[len(br)]
}
func min3(a, b, c int) int {
m := a
if b < m {
m = b
}
if c < m {
m = c
}
return m
}