Refactor Layout and improve search
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"io/fs"
|
||||
"log"
|
||||
"net/http"
|
||||
@@ -13,108 +12,51 @@ import (
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var searchTmpl = template.Must(template.New("search.html").ParseFS(assets, "assets/search.html"))
|
||||
|
||||
// Match ranks. Lower is better. Used for folder-name search (Phase 1).
|
||||
const (
|
||||
rankExact = 0
|
||||
rankPrefix = 1
|
||||
rankSubstring = 2
|
||||
rankFuzzy = 3
|
||||
)
|
||||
|
||||
type searchResult struct {
|
||||
Name string
|
||||
URL string
|
||||
Path string
|
||||
Rank int // Phase 1 only
|
||||
Score int // Phase 2: number of query tokens that hit
|
||||
NameHit bool // Phase 2: at least one hit came from the folder name
|
||||
Snippet string // Phase 2: ~100 chars around first body hit
|
||||
Score int // number of query tokens that hit
|
||||
NameHit bool // at least one hit came from the folder name
|
||||
Snippet string // ~300 chars around first body hit, or page stub for name-only hits
|
||||
}
|
||||
|
||||
type searchPageData struct {
|
||||
Query string
|
||||
Full bool
|
||||
Results []searchResult
|
||||
Title string
|
||||
Crumbs []crumb
|
||||
EditMode bool
|
||||
Query string
|
||||
Results []searchResult
|
||||
}
|
||||
|
||||
// handleSearch walks the wiki root and renders a search results page for the
|
||||
// query in r.URL.Query().Get("q"). Only invoked when path is "/" and "q" is
|
||||
// present. With ?full=1 it also scans index.md bodies (Phase 2).
|
||||
// present.
|
||||
func (h *handler) handleSearch(w http.ResponseWriter, r *http.Request) {
|
||||
query := strings.TrimSpace(r.URL.Query().Get("q"))
|
||||
full := r.URL.Query().Get("full") == "1"
|
||||
results := searchWiki(h.root, query)
|
||||
|
||||
var results []searchResult
|
||||
if full {
|
||||
results = searchFull(h.root, query)
|
||||
} else {
|
||||
results = searchFolders(h.root, query)
|
||||
title := "Search"
|
||||
if query != "" {
|
||||
title = "Search: " + query
|
||||
}
|
||||
data := searchPageData{
|
||||
Title: title,
|
||||
Crumbs: []crumb{{Name: "search", URL: "/?q=" + query}},
|
||||
Query: query,
|
||||
Results: results,
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
if err := searchTmpl.Execute(w, searchPageData{Query: query, Full: full, Results: results}); err != nil {
|
||||
if err := searchTmpl.ExecuteTemplate(w, "layout", data); err != nil {
|
||||
log.Printf("search template error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// searchFolders walks root and returns directories whose final path segment
|
||||
// matches the query, ranked best-first. Returns nil for an empty query.
|
||||
func searchFolders(root, query string) []searchResult {
|
||||
if query == "" {
|
||||
return nil
|
||||
}
|
||||
q := strings.ToLower(query)
|
||||
maxDist := 2
|
||||
if len([]rune(q)) > 6 {
|
||||
maxDist = 3
|
||||
}
|
||||
|
||||
walkRoot := resolveWalkRoot(root)
|
||||
var results []searchResult
|
||||
_ = filepath.WalkDir(walkRoot, func(fsPath string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if skip, walkErr := hiddenSkip(fsPath, walkRoot, d); skip {
|
||||
return walkErr
|
||||
}
|
||||
if !d.IsDir() || fsPath == walkRoot {
|
||||
return nil
|
||||
}
|
||||
name := d.Name()
|
||||
rank, ok := matchRank(strings.ToLower(name), q, maxDist)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
rel, relErr := filepath.Rel(walkRoot, fsPath)
|
||||
if relErr != nil {
|
||||
return nil
|
||||
}
|
||||
results = append(results, searchResult{
|
||||
Name: name,
|
||||
URL: "/" + filepath.ToSlash(rel) + "/",
|
||||
Path: filepath.ToSlash(rel),
|
||||
Rank: rank,
|
||||
})
|
||||
return nil
|
||||
})
|
||||
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
if results[i].Rank != results[j].Rank {
|
||||
return results[i].Rank < results[j].Rank
|
||||
}
|
||||
return strings.ToLower(results[i].Name) < strings.ToLower(results[j].Name)
|
||||
})
|
||||
return results
|
||||
}
|
||||
|
||||
// searchFull walks root and scores each directory by how many whitespace-split
|
||||
// searchWiki walks root and scores each directory by how many whitespace-split
|
||||
// query tokens hit a word in either the folder name or its index.md body.
|
||||
// A word "hits" a token via case-insensitive equality or Levenshtein ≤ 2.
|
||||
// Folder-name hits break score ties above content-only hits.
|
||||
func searchFull(root, query string) []searchResult {
|
||||
func searchWiki(root, query string) []searchResult {
|
||||
if query == "" {
|
||||
return nil
|
||||
}
|
||||
@@ -209,24 +151,6 @@ func hiddenSkip(fsPath, walkRoot string, d fs.DirEntry) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// matchRank returns the best (lowest) rank for which name matches q, or
|
||||
// (0, false) if no rule matches. Inputs are expected to be lowercased.
|
||||
func matchRank(name, q string, maxDist int) (int, bool) {
|
||||
if name == q {
|
||||
return rankExact, true
|
||||
}
|
||||
if strings.HasPrefix(name, q) {
|
||||
return rankPrefix, true
|
||||
}
|
||||
if strings.Contains(name, q) {
|
||||
return rankSubstring, true
|
||||
}
|
||||
if levenshtein(name, q) <= maxDist {
|
||||
return rankFuzzy, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// tokenize splits s into lowercase word tokens, breaking on any rune that is
|
||||
// not a letter or digit. Unicode-aware so umlauts etc. survive intact.
|
||||
func tokenize(s string) []string {
|
||||
@@ -264,10 +188,13 @@ func tokenInWords(qt string, words []string) bool {
|
||||
|
||||
var snippetWS = regexp.MustCompile(`\s+`)
|
||||
|
||||
// makeSnippet returns ~100 characters of body around the earliest substring
|
||||
// match of any query token. Falls back to empty when no token appears as a
|
||||
// substring (a token may have hit only via Levenshtein, with no exact span to
|
||||
// quote).
|
||||
const snippetWindow = 300
|
||||
|
||||
// makeSnippet returns ~300 characters of body around the earliest substring
|
||||
// match of any query token. When no token has an exact substring span (e.g.
|
||||
// matched only via Levenshtein, or the hit was folder-name-only), it falls
|
||||
// back to the first ~300 chars of the body with the leading heading stripped.
|
||||
// Returns "" only when the body itself is empty.
|
||||
func makeSnippet(body, bodyLower string, tokens []string) string {
|
||||
pos := -1
|
||||
for _, t := range tokens {
|
||||
@@ -280,31 +207,80 @@ func makeSnippet(body, bodyLower string, tokens []string) string {
|
||||
}
|
||||
}
|
||||
if pos < 0 {
|
||||
return ""
|
||||
return makeStub(body)
|
||||
}
|
||||
start := pos - 50
|
||||
|
||||
half := snippetWindow / 2
|
||||
start := pos - half
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
end := pos + 50
|
||||
end := pos + half
|
||||
if end > len(body) {
|
||||
end = len(body)
|
||||
}
|
||||
for start > 0 && body[start]&0xC0 == 0x80 {
|
||||
start--
|
||||
}
|
||||
for end < len(body) && body[end]&0xC0 == 0x80 {
|
||||
end++
|
||||
}
|
||||
s := snippetWS.ReplaceAllString(body[start:end], " ")
|
||||
s = strings.TrimSpace(s)
|
||||
start, end = expandToWordBoundaries(body, start, end)
|
||||
out := snippetWS.ReplaceAllString(body[start:end], " ")
|
||||
out = strings.TrimSpace(out)
|
||||
if start > 0 {
|
||||
s = "…" + s
|
||||
out = "…" + out
|
||||
}
|
||||
if end < len(body) {
|
||||
s = s + "…"
|
||||
out = out + "…"
|
||||
}
|
||||
return s
|
||||
return out
|
||||
}
|
||||
|
||||
// makeStub returns ~snippetWindow chars from the start of body, with the
|
||||
// leading "# Heading" line stripped. Returns "" for an empty body.
|
||||
func makeStub(body string) string {
|
||||
stripped := string(stripFirstHeading([]byte(body)))
|
||||
stripped = strings.TrimSpace(stripped)
|
||||
if stripped == "" {
|
||||
return ""
|
||||
}
|
||||
end := snippetWindow
|
||||
if end > len(stripped) {
|
||||
end = len(stripped)
|
||||
}
|
||||
_, end = expandToWordBoundaries(stripped, 0, end)
|
||||
out := snippetWS.ReplaceAllString(stripped[:end], " ")
|
||||
out = strings.TrimSpace(out)
|
||||
if end < len(stripped) {
|
||||
out = out + "…"
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// expandToWordBoundaries adjusts start/end so they don't split a word and
|
||||
// don't fall in the middle of a UTF-8 sequence. start moves forward past
|
||||
// any partial word at the beginning; end moves backward to the previous
|
||||
// word boundary.
|
||||
func expandToWordBoundaries(s string, start, end int) (int, int) {
|
||||
for start > 0 && start < len(s) && s[start]&0xC0 == 0x80 {
|
||||
start--
|
||||
}
|
||||
for end < len(s) && s[end]&0xC0 == 0x80 {
|
||||
end++
|
||||
}
|
||||
if start > 0 && start < len(s) && isWordByte(s[start-1]) && isWordByte(s[start]) {
|
||||
for start < end && isWordByte(s[start]) {
|
||||
start++
|
||||
}
|
||||
}
|
||||
if end < len(s) && isWordByte(s[end-1]) && isWordByte(s[end]) {
|
||||
for end > start && isWordByte(s[end-1]) {
|
||||
end--
|
||||
}
|
||||
}
|
||||
return start, end
|
||||
}
|
||||
|
||||
func isWordByte(b byte) bool {
|
||||
if b&0x80 != 0 {
|
||||
return true // assume any multibyte char is part of a word
|
||||
}
|
||||
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
|
||||
}
|
||||
|
||||
// levenshtein returns the edit distance between a and b. Operates on runes so
|
||||
|
||||
Reference in New Issue
Block a user