fix download encoding, redo config passing,

This commit is contained in:
Cyberes 2023-12-11 15:29:34 -07:00
parent 4b9c1ba91a
commit 634f3eb8ea
39 changed files with 319 additions and 350 deletions

View File

@ -1,16 +1,3 @@
TODO: add a "last modified" to "sort"
in <https://chub-archive.evulid.cc/api/file/list?path=/chub.ai/characters&page=1&limit=50&sort=folders>
TODO: add an admin endpoint to fetch the last n modified files. Maybe store files update time in elasticsearch?
TODO: fix the 3 loading placeholders
TODO: <https://github.com/victorspringer/http-cache>
TODO: fix encoding on https://chub-archive.evulid.cc/api/file/download?path=/other/takeout/part1.md
TODO: fix /api/file/download when an item is in the cache but does not exist on the disk
# crazy-file-server # crazy-file-server
*A heavy-duty web file browser for CRAZY files.* *A heavy-duty web file browser for CRAZY files.*
@ -28,23 +15,18 @@ files stored in a very complicated directory tree in just 5 minutes.
## Features ## Features
- Automated cache management - Automated cache management. Fill the cache when the starts, or as requests come in.
- Optionally fill the cache on server start, or as requests come in.
- Watch for changes or scan interval.
- File browsing API. - File browsing API.
- Download API. - Download API.
- Restrict certain files and directories from the download API to prevent users from downloading your entire 100GB+ - Restrict certain files and directories from the download API to prevent users from downloading your entire 100GB+
dataset. dataset.
- Frontend-agnostic design. You can have it serve a simple web interface or just act as a JSON API and serve files. - Frontend-agnostic design.
- Simple resources. The resources for the frontend aren't compiled into the binary which allows you to modify or even - Basic searching or Elasticsearch integration.
replace it.
- Basic searching.
- Elasticsearch integration (to do).
## Install ## Install
1. Install Go. 1. Install Go.
2. Download the binary or do `cd src && go mod tidy && go build`. 2. Download the binary or do `cd src && go mod tidy && go build`
## Use ## Use
@ -54,8 +36,7 @@ files stored in a very complicated directory tree in just 5 minutes.
By default, it looks for your config in the same directory as the executable: `./config.yml` or `./config.yaml`. By default, it looks for your config in the same directory as the executable: `./config.yml` or `./config.yaml`.
If you're using initial cache and have tons of files to scan you'll need at least 5GB of RAM and will have to wait 10 or If you're using initial cache and have tons of files to scan you'll need at least 5GB of RAM and will have to wait 10 or
so minutes for it to traverse the directory structure. CrazyFS is heavily threaded so you'll want at least an 8-core so minutes for it to traverse the directory structure. CrazyFS is heavily threaded, so you'll want at least an 8-core
machine. machine.
The search endpoint searches through the cached files. If they aren't cached, they won't be found. Enable pre-cache at CrazyFS works great with an HTTP cache in front of it.
startup to cache everything.

View File

@ -10,12 +10,12 @@ import (
) )
func NewItem(fullPath string, info os.FileInfo) *Item { func NewItem(fullPath string, info os.FileInfo) *Item {
if !strings.HasPrefix(fullPath, config.RootDir) { if !strings.HasPrefix(fullPath, config.GetConfig().RootDir) {
// Retard check // Retard check
log.Fatalf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath) log.Fatalf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath)
} }
if config.CachePrintNew { if config.GetConfig().CachePrintNew {
log.Debugf("CACHE - new: %s", fullPath) log.Debugf("CACHE - new: %s", fullPath)
} }
@ -31,6 +31,7 @@ func NewItem(fullPath string, info os.FileInfo) *Item {
} }
var mimeType string var mimeType string
var encoding string
var ext string var ext string
var err error var err error
if !info.IsDir() { if !info.IsDir() {
@ -40,17 +41,26 @@ func NewItem(fullPath string, info os.FileInfo) *Item {
} else { } else {
mimePath = fullPath mimePath = fullPath
} }
if config.CrawlerParseMIME {
if config.GetConfig().CrawlerParseMIME {
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info) _, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
} else { } else {
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info) _, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
} }
if config.GetConfig().CrawlerParseEncoding {
encoding, err = file.DetectFileEncoding(fullPath)
if err != nil {
log.Warnf("ITEM - Error detecting file encoding of file %s - %v", fullPath, err)
encoding = "utf-8" // fall back to utf-8
}
}
if os.IsNotExist(err) { if os.IsNotExist(err) {
log.Warnf("Path does not exist: %s", fullPath) log.Warnf("Path does not exist: %s", fullPath)
return nil return nil
} else if err != nil { } else if err != nil {
log.Warnf("Error detecting MIME type: %v", err) log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
} }
} }
@ -74,7 +84,8 @@ func NewItem(fullPath string, info os.FileInfo) *Item {
IsSymlink: info.Mode()&os.ModeSymlink != 0, IsSymlink: info.Mode()&os.ModeSymlink != 0,
Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds
Children: make([]string, 0), Children: make([]string, 0),
Type: mimeTypePtr, MimeType: mimeTypePtr,
Encoding: &encoding,
} }
} }
@ -87,7 +98,8 @@ type Item struct {
Mode uint32 `json:"mode"` Mode uint32 `json:"mode"`
IsDir bool `json:"isDir"` IsDir bool `json:"isDir"`
IsSymlink bool `json:"isSymlink"` IsSymlink bool `json:"isSymlink"`
Type *string `json:"type"` MimeType *string `json:"type"`
Encoding *string `json:"encoding"`
Children []string `json:"children"` Children []string `json:"children"`
Content string `json:"content,omitempty"` Content string `json:"content,omitempty"`
Cached int64 `json:"cached"` Cached int64 `json:"cached"`

View File

@ -43,7 +43,7 @@ func NewResponseItem(cacheItem *CacheItem.Item, sharedCache *lru.Cache[string, *
IsSymlink: cacheItem.IsSymlink, IsSymlink: cacheItem.IsSymlink,
Cached: cacheItem.Cached, Cached: cacheItem.Cached,
Children: make([]*CacheItem.Item, len(cacheItem.Children)), Children: make([]*CacheItem.Item, len(cacheItem.Children)),
Type: cacheItem.Type, Type: cacheItem.MimeType,
} }
// Grab the children from the cache and add them to this new item // Grab the children from the cache and add them to this new item
@ -59,7 +59,7 @@ func NewResponseItem(cacheItem *CacheItem.Item, sharedCache *lru.Cache[string, *
log.Debugf("CRAWLER - %s not in cache, crawling", child) log.Debugf("CRAWLER - %s not in cache, crawling", child)
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
item, err := dc.CrawlNoRecursion(filepath.Join(config.RootDir, child)) item, err := dc.CrawlNoRecursion(filepath.Join(config.GetConfig().RootDir, child))
if err != nil { if err != nil {
log.Errorf("NewResponseItem - CrawlNoRecursion - %s", err) log.Errorf("NewResponseItem - CrawlNoRecursion - %s", err)
@ -82,7 +82,7 @@ func NewResponseItem(cacheItem *CacheItem.Item, sharedCache *lru.Cache[string, *
IsSymlink: childItem.IsSymlink, IsSymlink: childItem.IsSymlink,
Cached: childItem.Cached, Cached: childItem.Cached,
Children: nil, Children: nil,
Type: childItem.Type, MimeType: childItem.MimeType,
} }
children = append(children, copiedChildItem) children = append(children, copiedChildItem)
} }

View File

@ -11,9 +11,9 @@ import (
"net/http" "net/http"
) )
func AdminCacheInfo(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func AdminCacheInfo(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
auth := r.URL.Query().Get("auth") auth := r.URL.Query().Get("auth")
if auth == "" || auth != cfg.HttpAdminKey { if auth == "" || auth != config.GetConfig().HttpAdminKey {
helpers.Return403Msg("access denied", w) helpers.Return403Msg("access denied", w)
return return
} }
@ -22,7 +22,7 @@ func AdminCacheInfo(w http.ResponseWriter, r *http.Request, cfg *config.Config,
response := map[string]interface{}{ response := map[string]interface{}{
"cache_size": cacheLen, "cache_size": cacheLen,
"cache_max": cfg.CacheSize, "cache_max": config.GetConfig().CacheSize,
"crawls_running": DirectoryCrawler.GetGlobalActiveCrawls(), "crawls_running": DirectoryCrawler.GetGlobalActiveCrawls(),
"active_workers": DirectoryCrawler.ActiveWorkers, "active_workers": DirectoryCrawler.ActiveWorkers,
"busy_workers": DirectoryCrawler.ActiveWalks, "busy_workers": DirectoryCrawler.ActiveWalks,

View File

@ -11,7 +11,7 @@ import (
"net/http" "net/http"
) )
func AdminReCache(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func AdminReCache(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
if r.Method != http.MethodPost { if r.Method != http.MethodPost {
helpers.Return400Msg("this is a POST endpoint", w) helpers.Return400Msg("this is a POST endpoint", w)
return return
@ -26,7 +26,7 @@ func AdminReCache(w http.ResponseWriter, r *http.Request, cfg *config.Config, sh
} }
auth := requestBody["auth"] auth := requestBody["auth"]
if auth == "" || auth != cfg.HttpAdminKey { if auth == "" || auth != config.GetConfig().HttpAdminKey {
helpers.Return403Msg("access denied", w) helpers.Return403Msg("access denied", w)
return return
} }

View File

@ -5,21 +5,18 @@ import (
"crazyfs/api/helpers" "crazyfs/api/helpers"
"crazyfs/config" "crazyfs/config"
"crazyfs/file" "crazyfs/file"
"crazyfs/logging"
"fmt" "fmt"
lru "github.com/hashicorp/golang-lru/v2" lru "github.com/hashicorp/golang-lru/v2"
"net/http" "net/http"
"strings" "strings"
) )
func Download(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func Download(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
if helpers.CheckInitialCrawl() { if helpers.CheckInitialCrawl() {
helpers.HandleRejectDuringInitialCrawl(w) helpers.HandleRejectDuringInitialCrawl(w)
return return
} }
log := logging.GetLogger()
pathArg := r.URL.Query().Get("path") pathArg := r.URL.Query().Get("path")
if pathArg == "" { if pathArg == "" {
helpers.Return400Msg("missing path", w) helpers.Return400Msg("missing path", w)
@ -48,7 +45,7 @@ func Download(w http.ResponseWriter, r *http.Request, cfg *config.Config, shared
} }
// Multiple files, zip them // Multiple files, zip them
helpers.ZipHandlerCompressMultiple(cleanPaths, w, r, cfg, sharedCache) helpers.ZipHandlerCompressMultiple(cleanPaths, w, r, sharedCache)
return return
} }
@ -71,45 +68,22 @@ func Download(w http.ResponseWriter, r *http.Request, cfg *config.Config, shared
// Try to get the data from the cache // Try to get the data from the cache
item, found := sharedCache.Get(relPath) item, found := sharedCache.Get(relPath)
if !found { if !found {
item = helpers.HandleFileNotFound(relPath, fullPath, sharedCache, cfg, w) item = helpers.HandleFileNotFound(relPath, fullPath, sharedCache, w)
} }
if item == nil { if item == nil {
// The errors have already been handled in handleFileNotFound() so we're good to just exit // The errors have already been handled in handleFileNotFound() so we're good to just exit
return return
} }
if cfg.HttpAPIDlCacheControl > 0 { if config.GetConfig().HttpAPIDlCacheControl > 0 {
w.Header().Set("Cache-Control", fmt.Sprintf("public, max-age=%d, must-revalidate", cfg.HttpAPIDlCacheControl)) w.Header().Set("Cache-Control", fmt.Sprintf("public, max-age=%d, must-revalidate", config.GetConfig().HttpAPIDlCacheControl))
} else { } else {
w.Header().Set("Cache-Control", "no-store") w.Header().Set("Cache-Control", "no-store")
} }
if !item.IsDir { if !item.IsDir {
// Get the MIME type of the file
var fileExists bool
var mimeType string
var err error
if item.Type == nil {
fileExists, mimeType, _, err = file.GetMimeType(fullPath, true, nil)
if !fileExists {
helpers.Return400Msg("file not found", w)
}
if err != nil {
log.Warnf("Error detecting MIME type: %v", err)
helpers.Return500Msg(w)
return
}
// GetMimeType() returns an empty string if it was a directory
if mimeType != "" {
// Update the CacheItem's MIME in the sharedCache
item.Type = &mimeType
sharedCache.Add(relPath, item)
}
}
// https://stackoverflow.com/a/57994289
// Only files can have inline disposition, zip archives cannot // Only files can have inline disposition, zip archives cannot
// https://stackoverflow.com/a/57994289
contentDownload := r.URL.Query().Get("download") contentDownload := r.URL.Query().Get("download")
var disposition string var disposition string
if contentDownload != "" { if contentDownload != "" {
@ -119,8 +93,53 @@ func Download(w http.ResponseWriter, r *http.Request, cfg *config.Config, shared
} }
w.Header().Set("Content-Disposition", fmt.Sprintf(`%s; filename="%s"`, disposition, item.Name)) w.Header().Set("Content-Disposition", fmt.Sprintf(`%s; filename="%s"`, disposition, item.Name))
w.Header().Set("Content-Type", mimeType) // Set the content type to the MIME type of the file // Get the MIME type of the file
http.ServeFile(w, r, fullPath) // Send the file to the client var mimeType string
var err error
if item.MimeType == nil { // only if the MIME type of this item has not been set yet
_, mimeType, _, err = file.GetMimeType(fullPath, true, nil)
if err != nil {
log.Errorf("Error detecting MIME type: %v", err)
} else if mimeType != "" {
// GetMimeType() returns an empty string if it was a directory.
// Update the CacheItem's MIME in the sharedCache.
item.MimeType = &mimeType
sharedCache.Add(relPath, item)
} else {
log.Errorf("Download.go failed to match a condition when checking a file's MIME - %s", fullPath)
helpers.Return500Msg(w)
}
} else {
mimeType = *item.MimeType
}
// Get the encoding of this file
var encoding string
encoding = "utf-8" // fall back to utf-8
if item.Encoding == nil || *item.Encoding == "" { // only if the encoding of this item has not been set yet
encoding, err = file.DetectFileEncoding(fullPath)
if err != nil {
log.Warnf("Error detecting file encoding: %v", err)
} else {
// Update the object in the cache.
item.Encoding = &encoding
}
} else {
encoding = *item.Encoding
}
if config.GetConfig().HTTPNoMimeSniffHeader {
w.Header().Set("X-Content-Type-Options", "nosniff")
mimeType = file.CastTextMimes(mimeType)
}
// If we were able to find the MIME type and the encoding of the file, set the Content-Type header.
if mimeType != "" && encoding != "" {
w.Header().Set("Content-Type", mimeType+"; charset="+encoding)
}
// Send the file to the client.
http.ServeFile(w, r, fullPath)
} else { } else {
// Stream archive of the directory here // Stream archive of the directory here
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.zip"`, item.Name)) w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.zip"`, item.Name))

View File

@ -4,7 +4,6 @@ import (
"crazyfs/CacheItem" "crazyfs/CacheItem"
"crazyfs/cache" "crazyfs/cache"
"crazyfs/cache/DirectoryCrawler" "crazyfs/cache/DirectoryCrawler"
"crazyfs/config"
"encoding/json" "encoding/json"
lru "github.com/hashicorp/golang-lru/v2" lru "github.com/hashicorp/golang-lru/v2"
"net/http" "net/http"
@ -12,7 +11,7 @@ import (
// TODO: show the time the initial crawl started // TODO: show the time the initial crawl started
func HealthCheck(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func HealthCheck(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
//log := logging.GetLogger() //log := logging.GetLogger()
response := map[string]interface{}{} response := map[string]interface{}{}

View File

@ -12,7 +12,7 @@ import (
"strconv" "strconv"
) )
func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func ListDir(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
if helpers.CheckInitialCrawl() { if helpers.CheckInitialCrawl() {
helpers.HandleRejectDuringInitialCrawl(w) helpers.HandleRejectDuringInitialCrawl(w)
return return
@ -49,7 +49,7 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
// Try to get the data from the cache // Try to get the data from the cache
cacheItem, found := sharedCache.Get(relPath) cacheItem, found := sharedCache.Get(relPath)
if !found { if !found {
cacheItem = helpers.HandleFileNotFound(relPath, fullPath, sharedCache, cfg, w) cacheItem = helpers.HandleFileNotFound(relPath, fullPath, sharedCache, w)
} }
if cacheItem == nil { if cacheItem == nil {
return // The errors have already been handled in handleFileNotFound() so we're good to just exit return // The errors have already been handled in handleFileNotFound() so we're good to just exit
@ -61,12 +61,12 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
// Get the MIME type of the file if the 'mime' argument is present // Get the MIME type of the file if the 'mime' argument is present
mime := r.URL.Query().Get("mime") mime := r.URL.Query().Get("mime")
if mime != "" { if mime != "" {
if item.IsDir && !cfg.HttpAllowDirMimeParse { if item.IsDir && !config.GetConfig().HttpAllowDirMimeParse {
helpers.Return403Msg("not allowed to analyze the mime of directories", w) helpers.Return403Msg("not allowed to analyze the mime of directories", w)
return return
} else { } else {
// Only update the mime in the cache if it hasn't been set already. // Only update the mime in the cache if it hasn't been set already.
// TODO: need to make sure that when a re-crawl is triggered, the Type is set back to nil // TODO: need to make sure that when a re-crawl is triggered, the MimeType is set back to nil
if item.Type == nil { if item.Type == nil {
fileExists, mimeType, ext, err := file.GetMimeType(fullPath, true, nil) fileExists, mimeType, ext, err := file.GetMimeType(fullPath, true, nil)
if !fileExists { if !fileExists {
@ -78,7 +78,7 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
return return
} }
// Update the original cached CacheItem's MIME in the sharedCache // Update the original cached CacheItem's MIME in the sharedCache
cacheItem.Type = &mimeType cacheItem.MimeType = &mimeType
cacheItem.Extension = &ext cacheItem.Extension = &ext
sharedCache.Add(relPath, cacheItem) // take the address of CacheItem sharedCache.Add(relPath, cacheItem) // take the address of CacheItem
} }

View File

@ -6,6 +6,7 @@ import (
"crazyfs/cache" "crazyfs/cache"
"crazyfs/config" "crazyfs/config"
"crazyfs/elastic" "crazyfs/elastic"
"crazyfs/logging"
"encoding/json" "encoding/json"
lru "github.com/hashicorp/golang-lru/v2" lru "github.com/hashicorp/golang-lru/v2"
"net/http" "net/http"
@ -15,7 +16,7 @@ import (
"time" "time"
) )
func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func SearchFile(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
if helpers.CheckInitialCrawl() { if helpers.CheckInitialCrawl() {
helpers.HandleRejectDuringInitialCrawl(w) helpers.HandleRejectDuringInitialCrawl(w)
return return
@ -27,7 +28,7 @@ func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, shar
return return
} }
if !cfg.ElasticsearchEnable { if !config.GetConfig().ElasticsearchEnable {
// If we aren't using Elastic, convert the query to lowercase to reduce the complication. // If we aren't using Elastic, convert the query to lowercase to reduce the complication.
queryString = strings.ToLower(queryString) queryString = strings.ToLower(queryString)
} }
@ -68,11 +69,11 @@ func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, shar
var results []*CacheItem.Item var results []*CacheItem.Item
results = make([]*CacheItem.Item, 0) results = make([]*CacheItem.Item, 0)
if cfg.ElasticsearchEnable { if config.GetConfig().ElasticsearchEnable {
// Perform the Elasticsearch query // Perform the Elasticsearch query
resp, err := elastic.Search(queryString, excludeElements, cfg) resp, err := elastic.Search(queryString, excludeElements)
if err != nil { if err != nil {
log.Errorf("SEARCH - Failed to perform Elasticsearch query: %s", err) log.Errorf(`SEARCH - Failed to perform Elasticsearch query "%s" - %s`, queryString, err)
helpers.Return500Msg(w) helpers.Return500Msg(w)
return return
} }
@ -81,7 +82,7 @@ func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, shar
var respData map[string]interface{} var respData map[string]interface{}
err = json.NewDecoder(resp.Body).Decode(&respData) err = json.NewDecoder(resp.Body).Decode(&respData)
if err != nil { if err != nil {
log.Errorf("SEARCH - Failed to parse Elasticsearch response: %s", err) log.Errorf(`SEARCH - Failed to parse Elasticsearch response for query "%s" - %s`, queryString, err)
helpers.Return500Msg(w) helpers.Return500Msg(w)
return return
} }
@ -128,7 +129,7 @@ func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, shar
Mode: uint32(itemSource["mode"].(float64)), Mode: uint32(itemSource["mode"].(float64)),
IsDir: itemSource["isDir"].(bool), IsDir: itemSource["isDir"].(bool),
IsSymlink: itemSource["isSymlink"].(bool), IsSymlink: itemSource["isSymlink"].(bool),
Type: itemType, MimeType: itemType,
Cached: int64(itemSource["cached"].(float64)), Cached: int64(itemSource["cached"].(float64)),
} }
items[i] = item items[i] = item
@ -142,7 +143,7 @@ func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, shar
results = append(results, items...) results = append(results, items...)
} }
} else { } else {
results = cache.SearchLRU(queryString, excludeElements, limitResults, sharedCache, cfg) results = cache.SearchLRU(queryString, excludeElements, limitResults, sharedCache)
} }
if folderSorting == "folders" { if folderSorting == "folders" {
@ -152,12 +153,14 @@ func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, shar
} }
searchDuration := time.Since(searchStart).Round(time.Second) searchDuration := time.Since(searchStart).Round(time.Second)
log.Infof("SEARCH - completed in %s and returned %d items", searchDuration, len(results)) log.Debugf(`SEARCH - %s - Query: "%s" - Results: %d - Elapsed: %d`, logging.GetRealIP(r), queryString, len(results), searchDuration)
w.Header().Set("Cache-Control", "no-store") w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
err := json.NewEncoder(w).Encode(map[string]interface{}{ err := json.NewEncoder(w).Encode(map[string]interface{}{
"results": results, "results": results,
"numResults": len(results),
"elapsed": searchDuration,
}) })
if err != nil { if err != nil {
log.Errorf("SEARCH - Failed to serialize JSON: %s", err) log.Errorf("SEARCH - Failed to serialize JSON: %s", err)

View File

@ -22,17 +22,17 @@ import (
"strings" "strings"
) )
func Thumbnail(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func Thumbnail(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
if cache.InitialCrawlInProgress && !cfg.HttpAllowDuringInitialCrawl { if cache.InitialCrawlInProgress && !config.GetConfig().HttpAllowDuringInitialCrawl {
helpers.HandleRejectDuringInitialCrawl(w) helpers.HandleRejectDuringInitialCrawl(w)
returnDummyPNG(w) returnDummyPNG(w)
return return
} }
log := logging.GetLogger() log := logging.GetLogger()
relPath := file.StripRootDir(filepath.Join(cfg.RootDir, r.URL.Query().Get("path"))) relPath := file.StripRootDir(filepath.Join(config.GetConfig().RootDir, r.URL.Query().Get("path")))
relPath = strings.TrimSuffix(relPath, "/") relPath = strings.TrimSuffix(relPath, "/")
fullPath := filepath.Join(cfg.RootDir, relPath) fullPath := filepath.Join(config.GetConfig().RootDir, relPath)
// Validate args before doing any operations // Validate args before doing any operations
width, err := getPositiveIntFromQuery(r, "width") width, err := getPositiveIntFromQuery(r, "width")
@ -65,7 +65,7 @@ func Thumbnail(w http.ResponseWriter, r *http.Request, cfg *config.Config, share
// Try to get the data from the cache // Try to get the data from the cache
item, found := sharedCache.Get(relPath) item, found := sharedCache.Get(relPath)
if !found { if !found {
item = helpers.HandleFileNotFound(relPath, fullPath, sharedCache, cfg, w) item = helpers.HandleFileNotFound(relPath, fullPath, sharedCache, w)
} }
if item == nil { if item == nil {
returnDummyPNG(w) returnDummyPNG(w)
@ -89,7 +89,7 @@ func Thumbnail(w http.ResponseWriter, r *http.Request, cfg *config.Config, share
return return
} }
// Update the CacheItem's MIME in the sharedCache // Update the CacheItem's MIME in the sharedCache
item.Type = &mimeType item.MimeType = &mimeType
item.Extension = &ext item.Extension = &ext
sharedCache.Add(relPath, item) sharedCache.Add(relPath, item)

View File

@ -4,7 +4,6 @@ import (
"crazyfs/CacheItem" "crazyfs/CacheItem"
"crazyfs/cache" "crazyfs/cache"
"crazyfs/cache/DirectoryCrawler" "crazyfs/cache/DirectoryCrawler"
"crazyfs/config"
"encoding/json" "encoding/json"
lru "github.com/hashicorp/golang-lru/v2" lru "github.com/hashicorp/golang-lru/v2"
"net/http" "net/http"
@ -12,7 +11,7 @@ import (
// TODO: show the time the initial crawl started // TODO: show the time the initial crawl started
func ClientHealthCheck(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func ClientHealthCheck(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
response := map[string]interface{}{} response := map[string]interface{}{}
response["scan_running"] = DirectoryCrawler.GetGlobalActiveCrawls() > 0 response["scan_running"] = DirectoryCrawler.GetGlobalActiveCrawls() > 0

View File

@ -8,9 +8,9 @@ import (
"net/http" "net/http"
) )
func RestrictedDownloadDirectories(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func RestrictedDownloadDirectories(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
response := map[string]interface{}{ response := map[string]interface{}{
"restricted_download_directories": config.RestrictedDownloadPaths, "restricted_download_directories": config.GetConfig().RestrictedDownloadPaths,
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")

View File

@ -1,20 +1,21 @@
package helpers package helpers
import ( import (
"crazyfs/logging"
"encoding/json" "encoding/json"
"net/http" "net/http"
) )
func WriteErrorResponse(json_code, http_code int, msg string, w http.ResponseWriter) { func WriteErrorResponse(jsonCode, httpCode int, msg string, w http.ResponseWriter) {
//log := logging.GetLogger() log := logging.GetLogger()
//log.Warnln(msg) log.Warnln(msg)
w.Header().Set("Cache-Control", "no-store") w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http_code) w.WriteHeader(httpCode)
err := json.NewEncoder(w).Encode(map[string]interface{}{ err := json.NewEncoder(w).Encode(map[string]interface{}{
"code": json_code, "code": jsonCode,
"error": msg, "error": msg,
}) })
@ -24,6 +25,7 @@ func WriteErrorResponse(json_code, http_code int, msg string, w http.ResponseWri
} }
func ReturnFake404Msg(msg string, w http.ResponseWriter) { func ReturnFake404Msg(msg string, w http.ResponseWriter) {
log.Fatalf(msg)
WriteErrorResponse(404, http.StatusBadRequest, msg, w) WriteErrorResponse(404, http.StatusBadRequest, msg, w)
} }

View File

@ -14,7 +14,7 @@ import (
) )
// HandleFileNotFound if the data is not in the cache, start a new crawler // HandleFileNotFound if the data is not in the cache, start a new crawler
func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config, w http.ResponseWriter) *CacheItem.Item { func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[string, *CacheItem.Item], w http.ResponseWriter) *CacheItem.Item {
log := logging.GetLogger() log := logging.GetLogger()
//log.Fatalf("CRAWLER - %s not in cache, crawling", fullPath) //log.Fatalf("CRAWLER - %s not in cache, crawling", fullPath)
@ -84,7 +84,7 @@ func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[
Return500Msg(w) Return500Msg(w)
return nil return nil
} }
cache.CheckAndRecache(fullPath, cfg, sharedCache) cache.CheckAndRecache(fullPath, sharedCache)
return item return item
} }
@ -110,11 +110,11 @@ func Max(a, b int) int {
} }
func CheckInitialCrawl() bool { func CheckInitialCrawl() bool {
return cache.InitialCrawlInProgress && !config.HttpAllowDuringInitialCrawl return cache.InitialCrawlInProgress && !config.GetConfig().HttpAllowDuringInitialCrawl
} }
func CheckPathRestricted(relPath string) bool { func CheckPathRestricted(relPath string) bool {
for _, restrictedPath := range config.RestrictedDownloadPaths { for _, restrictedPath := range config.GetConfig().RestrictedDownloadPaths {
if restrictedPath == "" { if restrictedPath == "" {
restrictedPath = "/" restrictedPath = "/"
} }

View File

@ -2,7 +2,6 @@ package helpers
import ( import (
"crazyfs/CacheItem" "crazyfs/CacheItem"
"crazyfs/config"
"crazyfs/file" "crazyfs/file"
lru "github.com/hashicorp/golang-lru/v2" lru "github.com/hashicorp/golang-lru/v2"
kzip "github.com/klauspost/compress/zip" kzip "github.com/klauspost/compress/zip"
@ -49,7 +48,7 @@ func ZipHandlerCompress(dirPath string, w http.ResponseWriter, r *http.Request)
log.Errorf("ZIPSTREM - failed to close zipwriter: %s", err) log.Errorf("ZIPSTREM - failed to close zipwriter: %s", err)
} }
} }
func ZipHandlerCompressMultiple(paths []string, w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func ZipHandlerCompressMultiple(paths []string, w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
zipWriter := kzip.NewWriter(w) zipWriter := kzip.NewWriter(w)
// Walk through each file and add it to the zip // Walk through each file and add it to the zip
for _, fullPath := range paths { for _, fullPath := range paths {
@ -58,7 +57,7 @@ func ZipHandlerCompressMultiple(paths []string, w http.ResponseWriter, r *http.R
// Try to get the data from the cache // Try to get the data from the cache
item, found := sharedCache.Get(relPath) item, found := sharedCache.Get(relPath)
if !found { if !found {
item = HandleFileNotFound(relPath, fullPath, sharedCache, cfg, w) item = HandleFileNotFound(relPath, fullPath, sharedCache, w)
} }
if item == nil { if item == nil {
// The errors have already been handled in handleFileNotFound() so we're good to just exit // The errors have already been handled in handleFileNotFound() so we're good to just exit

View File

@ -3,7 +3,6 @@ package api
import ( import (
"crazyfs/CacheItem" "crazyfs/CacheItem"
"crazyfs/api/client" "crazyfs/api/client"
"crazyfs/config"
"crazyfs/logging" "crazyfs/logging"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -21,7 +20,7 @@ type Route struct {
type Routes []Route type Routes []Route
type AppHandler func(http.ResponseWriter, *http.Request, *config.Config, *lru.Cache[string, *CacheItem.Item]) type AppHandler func(http.ResponseWriter, *http.Request, *lru.Cache[string, *CacheItem.Item])
var routes = Routes{ var routes = Routes{
Route{ Route{
@ -104,7 +103,7 @@ func setHeaders(next http.Handler) http.Handler {
}) })
} }
func NewRouter(cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) *mux.Router { func NewRouter(sharedCache *lru.Cache[string, *CacheItem.Item]) *mux.Router {
r := mux.NewRouter().StrictSlash(true) r := mux.NewRouter().StrictSlash(true)
for _, route := range routes { for _, route := range routes {
var handler http.Handler var handler http.Handler
@ -113,7 +112,7 @@ func NewRouter(cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Ite
currentRoute := route currentRoute := route
handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
currentRoute.HandlerFunc(w, r, cfg, sharedCache) currentRoute.HandlerFunc(w, r, sharedCache)
}) })
handler = setHeaders(handler) handler = setHeaders(handler)
handler = logging.LogRequest(handler) handler = logging.LogRequest(handler)
@ -139,7 +138,7 @@ func NewRouter(cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Ite
} }
func wrongMethod(expectedMethod string, next AppHandler) AppHandler { func wrongMethod(expectedMethod string, next AppHandler) AppHandler {
return func(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { return func(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[string, *CacheItem.Item]) {
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest) w.WriteHeader(http.StatusBadRequest)
json.NewEncoder(w).Encode(map[string]interface{}{ json.NewEncoder(w).Encode(map[string]interface{}{

View File

@ -33,7 +33,7 @@ func (dc *DirectoryCrawler) processPath(fullPath string, info os.FileInfo) error
// This block of code ensures that the parent directory's Children field is always up-to-date with // This block of code ensures that the parent directory's Children field is always up-to-date with
// the current state of its subdirectories. It removes any old versions of the current directory // the current state of its subdirectories. It removes any old versions of the current directory
// from the parent's Children field and adds the new version. // from the parent's Children field and adds the new version.
if fullPath != config.RootDir { if fullPath != config.GetConfig().RootDir {
parentDir := filepath.Dir(fullPath) parentDir := filepath.Dir(fullPath)
strippedParentDir := file.StripRootDir(parentDir) strippedParentDir := file.StripRootDir(parentDir)
parentItem, found := dc.cache.Get(strippedParentDir) parentItem, found := dc.cache.Get(strippedParentDir)

23
src/cache/crawler.go vendored
View File

@ -17,23 +17,23 @@ func init() {
log = logging.GetLogger() log = logging.GetLogger()
} }
func StartCrawler(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config) error { func StartCrawler(sharedCache *lru.Cache[string, *CacheItem.Item]) error {
var wg sync.WaitGroup var wg sync.WaitGroup
crawlerChan := make(chan struct{}, cfg.DirectoryCrawlers) crawlerChan := make(chan struct{}, config.GetConfig().DirectoryCrawlers)
go startCrawl(cfg, sharedCache, &wg, crawlerChan) go startCrawl(sharedCache, &wg, crawlerChan)
ticker := time.NewTicker(60 * time.Second) ticker := time.NewTicker(60 * time.Second)
go logCacheStatus("CACHE STATUS", ticker, sharedCache, cfg, log.Debugf) go logCacheStatus("CACHE STATUS", ticker, sharedCache, log.Debugf)
return nil return nil
} }
func startCrawl(cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGroup, crawlerChan chan struct{}) { func startCrawl(sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGroup, crawlerChan chan struct{}) {
ticker := time.NewTicker(time.Duration(cfg.CrawlModeCrawlInterval) * time.Second) ticker := time.NewTicker(time.Duration(config.GetConfig().CrawlModeCrawlInterval) * time.Second)
defer ticker.Stop() defer ticker.Stop()
time.Sleep(time.Duration(cfg.CrawlModeCrawlInterval) * time.Second) time.Sleep(time.Duration(config.GetConfig().CrawlModeCrawlInterval) * time.Second)
for range ticker.C { for range ticker.C {
crawlerChan <- struct{}{} crawlerChan <- struct{}{}
@ -43,25 +43,24 @@ func startCrawl(cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.It
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
log.Infoln("CRAWLER - Starting a crawl...") log.Infoln("CRAWLER - Starting a crawl...")
start := time.Now() start := time.Now()
err := dc.Crawl(cfg.RootDir, true) err := dc.Crawl(config.GetConfig().RootDir, true)
duration := time.Since(start).Round(time.Second) duration := time.Since(start).Round(time.Second)
if err != nil { if err != nil {
log.Warnf("CRAWLER - Crawl failed: %s", err) log.Warnf("CRAWLER - Crawl failed: %s", err)
} else { } else {
log.Infof("CRAWLER - Crawl completed in %s", duration) log.Infof("CRAWLER - Crawl completed in %s", duration)
log.Debugf("%d/%d items in the cache.", cfg.CacheSize, len(sharedCache.Keys())) log.Debugf("%d/%d items in the cache.", config.GetConfig().CacheSize, len(sharedCache.Keys()))
} }
<-crawlerChan <-crawlerChan
}() }()
} }
} }
func logCacheStatus(msg string, ticker *time.Ticker, sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config, logFn func(format string, args ...interface{})) { func logCacheStatus(msg string, ticker *time.Ticker, sharedCache *lru.Cache[string, *CacheItem.Item], logFn func(format string, args ...interface{})) {
defer ticker.Stop() defer ticker.Stop()
for range ticker.C { for range ticker.C {
activeWorkers := int(DirectoryCrawler.ActiveWorkers) activeWorkers := int(DirectoryCrawler.ActiveWorkers)
busyWorkers := int(DirectoryCrawler.ActiveWalks) busyWorkers := int(DirectoryCrawler.ActiveWalks)
logFn("%s - %d/%d items in the cache. Active workers: %d Active crawls: %d", msg, len(sharedCache.Keys()), cfg.CacheSize, activeWorkers, busyWorkers) logFn("%s - %d/%d items in the cache. Active workers: %d Active crawls: %d", msg, len(sharedCache.Keys()), config.GetConfig().CacheSize, activeWorkers, busyWorkers)
//fmt.Println(sharedCache.Keys())
} }
} }

View File

@ -15,18 +15,18 @@ func init() {
InitialCrawlInProgress = false InitialCrawlInProgress = false
} }
func InitialCrawl(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config) { func InitialCrawl(sharedCache *lru.Cache[string, *CacheItem.Item]) {
log = logging.GetLogger() log = logging.GetLogger()
log.Infof("INITIAL CRAWL - starting the crawl for %s", config.RootDir) log.Infof("INITIAL CRAWL - starting the crawl for %s", config.GetConfig().RootDir)
ticker := time.NewTicker(3 * time.Second) ticker := time.NewTicker(3 * time.Second)
go logCacheStatus("INITIAL CRAWL", ticker, sharedCache, cfg, log.Infof) go logCacheStatus("INITIAL CRAWL", ticker, sharedCache, log.Infof)
InitialCrawlInProgress = true InitialCrawlInProgress = true
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
//start := time.Now() //start := time.Now()
err := dc.Crawl(config.RootDir, true) err := dc.Crawl(config.GetConfig().RootDir, true)
if err != nil { if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err) log.Errorf("LIST - background recursive crawl failed: %s", err)
} }

1
src/cache/missing.go vendored Normal file
View File

@ -0,0 +1 @@
package cache

View File

@ -18,9 +18,9 @@ func InitRecacheSemaphore(limit int) {
sem = make(chan struct{}, limit) sem = make(chan struct{}, limit)
} }
func CheckAndRecache(path string, cfg *config.Config, sharedCache *lru.Cache[string, *CacheItem.Item]) { func CheckAndRecache(path string, sharedCache *lru.Cache[string, *CacheItem.Item]) {
item, found := sharedCache.Get(path) item, found := sharedCache.Get(path)
if found && time.Now().UnixNano()/int64(time.Millisecond)-item.Cached > int64(cfg.CacheTime)*60*1000 { if found && time.Now().UnixNano()/int64(time.Millisecond)-item.Cached > int64(config.GetConfig().CacheTime)*60*1000 {
log := logging.GetLogger() log := logging.GetLogger()
log.Debugf("Re-caching: %s", path) log.Debugf("Re-caching: %s", path)
sem <- struct{}{} // acquire a token sem <- struct{}{} // acquire a token

10
src/cache/search.go vendored
View File

@ -9,7 +9,7 @@ import (
"strings" "strings"
) )
func SearchLRU(queryString string, excludeElements []string, limitResults int, sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config) []*CacheItem.Item { func SearchLRU(queryString string, excludeElements []string, limitResults int, sharedCache *lru.Cache[string, *CacheItem.Item]) []*CacheItem.Item {
results := make([]*CacheItem.Item, 0) results := make([]*CacheItem.Item, 0)
const maxGoroutines = 100 const maxGoroutines = 100
@ -20,7 +20,7 @@ func SearchLRU(queryString string, excludeElements []string, limitResults int, s
resultsChan := make(chan *CacheItem.Item, len(sharedCache.Keys())) resultsChan := make(chan *CacheItem.Item, len(sharedCache.Keys()))
for _, key := range sharedCache.Keys() { for _, key := range sharedCache.Keys() {
searchKey(key, queryString, excludeElements, sem, resultsChan, sharedCache, cfg) searchKey(key, queryString, excludeElements, sem, resultsChan, sharedCache)
} }
// Wait for all goroutines to finish // Wait for all goroutines to finish
@ -32,7 +32,7 @@ func SearchLRU(queryString string, excludeElements []string, limitResults int, s
item := <-resultsChan item := <-resultsChan
if item != nil { if item != nil {
results = append(results, item) results = append(results, item)
if (limitResults > 0 && len(results) == limitResults) || len(results) >= cfg.ApiSearchMaxResults { if (limitResults > 0 && len(results) == limitResults) || len(results) >= config.GetConfig().ApiSearchMaxResults {
break break
} }
} }
@ -41,7 +41,7 @@ func SearchLRU(queryString string, excludeElements []string, limitResults int, s
return results return results
} }
func searchKey(key string, queryString string, excludeElements []string, sem chan struct{}, resultsChan chan *CacheItem.Item, sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config) { func searchKey(key string, queryString string, excludeElements []string, sem chan struct{}, resultsChan chan *CacheItem.Item, sharedCache *lru.Cache[string, *CacheItem.Item]) {
// Acquire a token // Acquire a token
sem <- struct{}{} sem <- struct{}{}
@ -87,7 +87,7 @@ func searchKey(key string, queryString string, excludeElements []string, sem cha
resultsChan <- nil resultsChan <- nil
return return
} }
if !cfg.ApiSearchShowChildren { if !config.GetConfig().ApiSearchShowChildren {
item.Children = nil // erase the children dict item.Children = nil // erase the children dict
} }
resultsChan <- &item resultsChan <- &item

101
src/cache/watcher.go vendored
View File

@ -1,101 +0,0 @@
package cache
import (
"crazyfs/CacheItem"
"crazyfs/cache/DirectoryCrawler"
"crazyfs/config"
lru "github.com/hashicorp/golang-lru/v2"
"github.com/radovskyb/watcher"
"strings"
"sync"
"time"
)
func StartWatcher(basePath string, sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config) (*watcher.Watcher, error) {
w := watcher.New()
var wg sync.WaitGroup
crawlerChan := make(chan struct{}, cfg.DirectoryCrawlers) // limit to cfg.DirectoryCrawlers concurrent crawlers
go func() {
for {
select {
case event := <-w.Event:
// Ignore events outside of basePath
if !strings.HasPrefix(event.Path, basePath) {
if cfg.CachePrintChanges {
log.Warnf("Ignoring file outside the base path: %s", event.Path)
}
continue
}
if event.Op == watcher.Create {
if cfg.CachePrintChanges {
log.Debugf("WATCHER - File created: %s", event.Path)
}
}
if event.Op == watcher.Write {
if cfg.CachePrintChanges {
log.Debugf("WATCHER - File modified: %s", event.Path)
}
}
if event.Op == watcher.Remove {
if cfg.CachePrintChanges {
log.Debugf("WATCHER - File removed: %s", event.Path)
}
sharedCache.Remove(event.Path) // remove the entry from the cache
continue // skip the rest of the loop for this event
}
if event.Op == watcher.Rename {
if cfg.CachePrintChanges {
log.Debugf("WATCHER- File renamed: %s", event.Path)
}
sharedCache.Remove(event.Path)
continue
}
if event.Op == watcher.Chmod {
if cfg.CachePrintChanges {
log.Debugf("WATCHER - File chmod: %s", event.Path)
}
}
crawlerChan <- struct{}{} // block if there are already 4 crawlers
wg.Add(1)
go func() {
defer wg.Done()
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
err := dc.Crawl(event.Path, true)
if err != nil {
log.Warnf("WATCHER - Crawl failed: %s", err)
}
<-crawlerChan // release
}()
case err := <-w.Error:
log.Errorf("WATCHER - %s", err)
case <-w.Closed:
return
}
}
}()
// Watch test_folder recursively for changes.
if err := w.AddRecursive(basePath); err != nil {
log.Fatalf("WATCHER RECURSIVE): %s", err)
}
go func() {
// Start the watching process - it'll check for changes every 100ms.
if err := w.Start(time.Second * time.Duration(cfg.WatchInterval)); err != nil {
log.Fatalf("WATCHER: %s", err)
}
}()
// Print the filenames of the cache entries every 5 seconds
ticker := time.NewTicker(60 * time.Second)
go func(c *lru.Cache[string, *CacheItem.Item]) {
for range ticker.C {
keys := c.Keys()
log.Debugf("%d items in the cache.", len(keys))
}
}(sharedCache)
return w, nil
}

View File

@ -6,14 +6,15 @@ import (
"strings" "strings"
) )
// The global, read-only config variable.
var cfg *Config
type Config struct { type Config struct {
RootDir string RootDir string
HTTPPort string HTTPPort string
WatchMode string
CrawlModeCrawlInterval int CrawlModeCrawlInterval int
DirectoryCrawlers int DirectoryCrawlers int
CrawlWorkers int CrawlWorkers int
WatchInterval int
CacheSize int CacheSize int
CacheTime int CacheTime int
CachePrintNew bool CachePrintNew bool
@ -21,6 +22,7 @@ type Config struct {
InitialCrawl bool InitialCrawl bool
CacheRecacheCrawlerLimit int CacheRecacheCrawlerLimit int
CrawlerParseMIME bool CrawlerParseMIME bool
CrawlerParseEncoding bool
HttpAPIListCacheControl int HttpAPIListCacheControl int
HttpAPIDlCacheControl int HttpAPIDlCacheControl int
HttpAllowDirMimeParse bool HttpAllowDirMimeParse bool
@ -42,9 +44,16 @@ type Config struct {
ElasticsearchAllowConcurrentSyncs bool ElasticsearchAllowConcurrentSyncs bool
ElasticsearchFullSyncOnStart bool ElasticsearchFullSyncOnStart bool
ElasticsearchDefaultQueryField string ElasticsearchDefaultQueryField string
HTTPRealIPHeader string
HTTPNoMimeSniffHeader bool
}
func SetConfig(configFile string) (*Config, error) {
// Only allow the config to be set once.
if cfg != nil {
panic("Config has already been set!")
} }
func LoadConfig(configFile string) (*Config, error) {
viper.SetConfigFile(configFile) viper.SetConfigFile(configFile)
viper.SetDefault("http_port", "8080") viper.SetDefault("http_port", "8080")
viper.SetDefault("watch_interval", 1) viper.SetDefault("watch_interval", 1)
@ -59,6 +68,7 @@ func LoadConfig(configFile string) (*Config, error) {
viper.SetDefault("initial_crawl", false) viper.SetDefault("initial_crawl", false)
viper.SetDefault("cache_recache_crawler_limit", 50) viper.SetDefault("cache_recache_crawler_limit", 50)
viper.SetDefault("crawler_parse_mime", false) viper.SetDefault("crawler_parse_mime", false)
viper.SetDefault("crawler_parse_encoding", false)
viper.SetDefault("http_api_list_cache_control", 600) viper.SetDefault("http_api_list_cache_control", 600)
viper.SetDefault("http_api_download_cache_control", 600) viper.SetDefault("http_api_download_cache_control", 600)
viper.SetDefault("http_allow_dir_mime_parse", true) viper.SetDefault("http_allow_dir_mime_parse", true)
@ -80,6 +90,8 @@ func LoadConfig(configFile string) (*Config, error) {
viper.SetDefault("elasticsearch_full_sync_on_start", false) viper.SetDefault("elasticsearch_full_sync_on_start", false)
viper.SetDefault("elasticsearch_query_fields", []string{"extension", "name", "path", "type", "size", "isDir"}) viper.SetDefault("elasticsearch_query_fields", []string{"extension", "name", "path", "type", "size", "isDir"})
viper.SetDefault("elasticsearch_default_query_field", "name") viper.SetDefault("elasticsearch_default_query_field", "name")
viper.SetDefault("http_real_ip_header", "X-Forwarded-For")
viper.SetDefault("http_no_mime_sniff_header", false)
err := viper.ReadInConfig() err := viper.ReadInConfig()
if err != nil { if err != nil {
@ -109,9 +121,7 @@ func LoadConfig(configFile string) (*Config, error) {
config := &Config{ config := &Config{
RootDir: rootDir, RootDir: rootDir,
HTTPPort: viper.GetString("http_port"), HTTPPort: viper.GetString("http_port"),
WatchMode: viper.GetString("watch_mode"),
CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"), CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"),
WatchInterval: viper.GetInt("watch_interval"),
DirectoryCrawlers: viper.GetInt("crawl_mode_crawl_interval"), DirectoryCrawlers: viper.GetInt("crawl_mode_crawl_interval"),
CrawlWorkers: viper.GetInt("crawl_workers"), CrawlWorkers: viper.GetInt("crawl_workers"),
CacheSize: viper.GetInt("cache_size"), CacheSize: viper.GetInt("cache_size"),
@ -121,6 +131,7 @@ func LoadConfig(configFile string) (*Config, error) {
InitialCrawl: viper.GetBool("initial_crawl"), InitialCrawl: viper.GetBool("initial_crawl"),
CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"), CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"),
CrawlerParseMIME: viper.GetBool("crawler_parse_mime"), CrawlerParseMIME: viper.GetBool("crawler_parse_mime"),
CrawlerParseEncoding: viper.GetBool("crawler_parse_encoding"),
HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"), HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"),
HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"), HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"),
HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"), HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"),
@ -142,10 +153,8 @@ func LoadConfig(configFile string) (*Config, error) {
ElasticsearchAllowConcurrentSyncs: viper.GetBool("elasticsearch_allow_concurrent_syncs"), ElasticsearchAllowConcurrentSyncs: viper.GetBool("elasticsearch_allow_concurrent_syncs"),
ElasticsearchFullSyncOnStart: viper.GetBool("elasticsearch_full_sync_on_start"), ElasticsearchFullSyncOnStart: viper.GetBool("elasticsearch_full_sync_on_start"),
ElasticsearchDefaultQueryField: viper.GetString("elasticsearch_default_query_field"), ElasticsearchDefaultQueryField: viper.GetString("elasticsearch_default_query_field"),
} HTTPRealIPHeader: viper.GetString("http_real_ip_header"),
HTTPNoMimeSniffHeader: viper.GetBool("http_no_mime_sniff_header"),
if config.WatchMode != "crawl" && config.WatchMode != "watch" {
return nil, errors.New("watch_mode must be 'crawl' or 'watch'")
} }
if config.CacheTime < 0 { if config.CacheTime < 0 {
@ -188,5 +197,13 @@ func LoadConfig(configFile string) (*Config, error) {
return nil, errors.New("elasticsearch_full_sync_interval must be greater than elasticsearch_sync_interval") return nil, errors.New("elasticsearch_full_sync_interval must be greater than elasticsearch_sync_interval")
} }
cfg = config
return config, nil return config, nil
} }
func GetConfig() *Config {
if cfg == nil {
panic("Config has not been set!")
}
return cfg
}

View File

@ -1,13 +1,13 @@
package config package config
// Config constants
var FollowSymlinks bool var FollowSymlinks bool
var CachePrintNew bool
var RootDir string //var CachePrintNew bool
var CrawlerParseMIME bool //var RootDir string
var MaxWorkers int //var CrawlerParseMIME bool
var HttpAllowDuringInitialCrawl bool //var MaxWorkers int
var RestrictedDownloadPaths []string //var HttpAllowDuringInitialCrawl bool
var ElasticsearchEnable bool //var RestrictedDownloadPaths []string
var ElasticsearchEndpoint string //var ElasticsearchEnable bool
var ElasticsearchSyncInterval int //var ElasticsearchEndpoint string
//var ElasticsearchSyncInterval int

View File

@ -22,7 +22,6 @@ import (
) )
var log *logrus.Logger var log *logrus.Logger
var cfg *config.Config
type cliConfig struct { type cliConfig struct {
configFile string configFile string
@ -79,7 +78,7 @@ func main() {
} }
var err error var err error
cfg, err = config.LoadConfig(cliArgs.configFile) cfg, err := config.SetConfig(cliArgs.configFile)
if err != nil { if err != nil {
log.Fatalf("Failed to load config file: %s", err) log.Fatalf("Failed to load config file: %s", err)
} }
@ -89,30 +88,19 @@ func main() {
log.Fatal(err) log.Fatal(err)
} }
// Set config variables // Set global variables.
// TODO: just pass the entire cfg object config.FollowSymlinks = false // TODO: make sure this works then set it based on the config yml
config.FollowSymlinks = false
config.CachePrintNew = cfg.CachePrintNew
config.RootDir = cfg.RootDir
config.CrawlerParseMIME = cfg.CrawlerParseMIME
config.MaxWorkers = cfg.CrawlWorkers
config.HttpAllowDuringInitialCrawl = cfg.HttpAllowDuringInitialCrawl
DirectoryCrawler.JobQueueSize = cfg.WorkersJobQueueSize
config.RestrictedDownloadPaths = cfg.RestrictedDownloadPaths
config.ElasticsearchEnable = cfg.ElasticsearchEnable
config.ElasticsearchEndpoint = cfg.ElasticsearchEndpoint
config.ElasticsearchSyncInterval = cfg.ElasticsearchSyncInterval
log.Infof("Elasticsearch enabled: %t", cfg.ElasticsearchEnable) log.Infof("Elasticsearch enabled: %t", cfg.ElasticsearchEnable)
// Init global variables // Init global variables
//DirectoryCrawler.CrawlWorkerPool = DirectoryCrawler.NewWorkerPool(config.MaxWorkers) //DirectoryCrawler.CrawlWorkerPool = DirectoryCrawler.NewWorkerPool(config.MaxWorkers)
DirectoryCrawler.WorkerPool = make(chan struct{}, config.MaxWorkers) DirectoryCrawler.WorkerPool = make(chan struct{}, cfg.CrawlWorkers)
cache.InitRecacheSemaphore(cfg.CacheRecacheCrawlerLimit) cache.InitRecacheSemaphore(cfg.CacheRecacheCrawlerLimit)
// Start the webserver before doing the long crawl // Start the webserver before doing the long crawl
r := api.NewRouter(cfg, sharedCache) r := api.NewRouter(sharedCache)
//log.Fatal(http.ListenAndServe(fmt.Sprintf(":%s", cfg.HTTPPort), r)) //log.Fatal(http.ListenAndServe(fmt.Sprintf(":%s", cfg.HTTPPort), r))
go func() { go func() {
err := http.ListenAndServe(fmt.Sprintf(":%s", cfg.HTTPPort), r) err := http.ListenAndServe(fmt.Sprintf(":%s", cfg.HTTPPort), r)
@ -125,28 +113,17 @@ func main() {
if cliArgs.initialCrawl || cfg.InitialCrawl { if cliArgs.initialCrawl || cfg.InitialCrawl {
log.Infoln("Preforming initial crawl...") log.Infoln("Preforming initial crawl...")
start := time.Now() start := time.Now()
cache.InitialCrawl(sharedCache, cfg) cache.InitialCrawl(sharedCache)
duration := time.Since(start).Round(time.Second) duration := time.Since(start).Round(time.Second)
keys := sharedCache.Keys() keys := sharedCache.Keys()
log.Infof("Initial crawl completed in %s. %d items added to the cache.", duration, len(keys)) log.Infof("Initial crawl completed in %s. %d items added to the cache.", duration, len(keys))
} }
if cfg.WatchMode == "watch" { err = cache.StartCrawler(sharedCache)
log.Debugln("Starting the watcher process")
watcher, err := cache.StartWatcher(cfg.RootDir, sharedCache, cfg)
if err != nil {
log.Fatalf("Failed to start watcher process: %s", err)
}
log.Infoln("Started the watcher process")
defer watcher.Close()
} else if cfg.WatchMode == "crawl" {
//log.Debugln("Starting the crawler")
err := cache.StartCrawler(sharedCache, cfg)
if err != nil { if err != nil {
log.Fatalf("Failed to start timed crawler process: %s", err) log.Fatalf("Failed to start timed crawler process: %s", err)
} }
log.Infoln("Started the timed crawler process") log.Infoln("Started the timed crawler process")
}
if cfg.ElasticsearchEnable { if cfg.ElasticsearchEnable {
// If we fail to establish a connection to Elastic, don't kill the entire server. // If we fail to establish a connection to Elastic, don't kill the entire server.
@ -167,7 +144,7 @@ func main() {
elastic.ElasticClient = es elastic.ElasticClient = es
if cfg.ElasticsearchSyncEnable && !cliArgs.disableElasticSync { if cfg.ElasticsearchSyncEnable && !cliArgs.disableElasticSync {
go elastic.ElasticsearchThread(sharedCache, cfg) go elastic.ElasticsearchThread(sharedCache)
log.Info("Started the background Elasticsearch sync thread.") log.Info("Started the background Elasticsearch sync thread.")
} else { } else {
log.Info("The background Elasticsearch sync thread is disabled.") log.Info("The background Elasticsearch sync thread is disabled.")

View File

@ -9,51 +9,51 @@ import (
"time" "time"
) )
func ElasticsearchThread(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config) { func ElasticsearchThread(sharedCache *lru.Cache[string, *CacheItem.Item]) {
createCrazyfsIndex(cfg) createCrazyfsIndex()
// Test connection to Elastic. // Test connection to Elastic.
esContents, err := getPathsFromIndex(cfg) esContents, err := getPathsFromIndex()
if err != nil { if err != nil {
logElasticConnError(err) logElasticConnError(err)
return return
} }
esSize := len(esContents) esSize := len(esContents)
log.Infof(`ELASTIC - index "%s" contains %d items.`, cfg.ElasticsearchIndex, esSize) log.Infof(`ELASTIC - index "%s" contains %d items.`, config.GetConfig().ElasticsearchIndex, esSize)
var wg sync.WaitGroup var wg sync.WaitGroup
sem := make(chan bool, cfg.ElasticsearchSyncThreads) sem := make(chan bool, config.GetConfig().ElasticsearchSyncThreads)
// Run a partial sync at startup, unless configured to run a full one. // Run a partial sync at startup, unless configured to run a full one.
syncElasticsearch(sharedCache, cfg, &wg, sem, cfg.ElasticsearchFullSyncOnStart) syncElasticsearch(sharedCache, &wg, sem, config.GetConfig().ElasticsearchFullSyncOnStart)
ticker := time.NewTicker(time.Duration(cfg.ElasticsearchSyncInterval) * time.Second) ticker := time.NewTicker(time.Duration(config.GetConfig().ElasticsearchSyncInterval) * time.Second)
fullSyncTicker := time.NewTicker(time.Duration(cfg.ElasticsearchFullSyncInterval) * time.Second) fullSyncTicker := time.NewTicker(time.Duration(config.GetConfig().ElasticsearchFullSyncInterval) * time.Second)
var mutex sync.Mutex var mutex sync.Mutex
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
if !cfg.ElasticsearchAllowConcurrentSyncs { if !config.GetConfig().ElasticsearchAllowConcurrentSyncs {
mutex.Lock() mutex.Lock()
} }
syncElasticsearch(sharedCache, cfg, &wg, sem, false) syncElasticsearch(sharedCache, &wg, sem, false)
if !cfg.ElasticsearchAllowConcurrentSyncs { if !config.GetConfig().ElasticsearchAllowConcurrentSyncs {
mutex.Unlock() mutex.Unlock()
} }
case <-fullSyncTicker.C: case <-fullSyncTicker.C:
if !cfg.ElasticsearchAllowConcurrentSyncs { if !config.GetConfig().ElasticsearchAllowConcurrentSyncs {
mutex.Lock() mutex.Lock()
} }
syncElasticsearch(sharedCache, cfg, &wg, sem, true) syncElasticsearch(sharedCache, &wg, sem, true)
if !cfg.ElasticsearchAllowConcurrentSyncs { if !config.GetConfig().ElasticsearchAllowConcurrentSyncs {
mutex.Unlock() mutex.Unlock()
} }
} }
} }
} }
func syncElasticsearch(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config, wg *sync.WaitGroup, sem chan bool, fullSync bool) { func syncElasticsearch(sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGroup, sem chan bool, fullSync bool) {
var syncType string var syncType string
var esContents []string var esContents []string
if fullSync { if fullSync {
@ -64,7 +64,7 @@ func syncElasticsearch(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *con
syncType = "refresh" syncType = "refresh"
var err error var err error
esContents, err = getPathsFromIndex(cfg) esContents, err = getPathsFromIndex()
if err != nil { if err != nil {
log.Errorf("ELASTIC - Failed to read the index: %s", err) log.Errorf("ELASTIC - Failed to read the index: %s", err)
return return
@ -82,14 +82,14 @@ func syncElasticsearch(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *con
if !found { if !found {
log.Fatalf(`ELASTICSEARCH - Could not fetch item "%s" from the LRU cache!`, key) log.Fatalf(`ELASTICSEARCH - Could not fetch item "%s" from the LRU cache!`, key)
} else { } else {
if !shouldExclude(key, cfg.ElasticsearchExcludePatterns) { if !shouldExclude(key, config.GetConfig().ElasticsearchExcludePatterns) {
if fullSync { if fullSync {
addToElasticsearch(cacheItem, cfg) addToElasticsearch(cacheItem)
} else if !slices.Contains(esContents, key) { } else if !slices.Contains(esContents, key) {
addToElasticsearch(cacheItem, cfg) addToElasticsearch(cacheItem)
} }
} else { } else {
deleteFromElasticsearch(key, cfg) // clean up deleteFromElasticsearch(key) // clean up
//log.Debugf(`ELASTIC - skipping adding "%s"`, key) //log.Debugf(`ELASTIC - skipping adding "%s"`, key)
} }
} }
@ -99,7 +99,7 @@ func syncElasticsearch(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *con
wg.Wait() wg.Wait()
log.Debugln("ELASTIC - Checking for removed items...") log.Debugln("ELASTIC - Checking for removed items...")
removeStaleItemsFromElasticsearch(sharedCache, cfg) removeStaleItemsFromElasticsearch(sharedCache)
if fullSync { if fullSync {
ElasticRefreshSyncRunning = false ElasticRefreshSyncRunning = false

View File

@ -9,7 +9,7 @@ import (
"github.com/elastic/go-elasticsearch/v8/esapi" "github.com/elastic/go-elasticsearch/v8/esapi"
) )
func addToElasticsearch(item *CacheItem.Item, cfg *config.Config) { func addToElasticsearch(item *CacheItem.Item) {
log.Debugf(`ELASTIC - Adding: "%s"`, item.Path) log.Debugf(`ELASTIC - Adding: "%s"`, item.Path)
prepareCacheItem(item) prepareCacheItem(item)
data, err := json.Marshal(item) data, err := json.Marshal(item)
@ -18,7 +18,7 @@ func addToElasticsearch(item *CacheItem.Item, cfg *config.Config) {
return return
} }
req := esapi.IndexRequest{ req := esapi.IndexRequest{
Index: cfg.ElasticsearchIndex, Index: config.GetConfig().ElasticsearchIndex,
DocumentID: encodeToBase64(item.Path), DocumentID: encodeToBase64(item.Path),
Body: bytes.NewReader(data), Body: bytes.NewReader(data),
Refresh: "true", Refresh: "true",

View File

@ -10,16 +10,16 @@ import (
"sync" "sync"
) )
func removeStaleItemsFromElasticsearch(sharedCache *lru.Cache[string, *CacheItem.Item], cfg *config.Config) { func removeStaleItemsFromElasticsearch(sharedCache *lru.Cache[string, *CacheItem.Item]) {
// Retrieve all keys from Elasticsearch // Retrieve all keys from Elasticsearch
keys, err := getPathsFromIndex(cfg) keys, err := getPathsFromIndex()
if err != nil { if err != nil {
log.Errorf("ELASTIC - Error retrieving keys from Elasticsearch: %s", err) log.Errorf("ELASTIC - Error retrieving keys from Elasticsearch: %s", err)
return return
} }
// Create a buffered channel as a semaphore // Create a buffered channel as a semaphore
sem := make(chan struct{}, cfg.ElasticsearchSyncThreads) sem := make(chan struct{}, config.GetConfig().ElasticsearchSyncThreads)
// Create a wait group to wait for all goroutines to finish // Create a wait group to wait for all goroutines to finish
var wg sync.WaitGroup var wg sync.WaitGroup
@ -41,7 +41,7 @@ func removeStaleItemsFromElasticsearch(sharedCache *lru.Cache[string, *CacheItem
if _, ok := sharedCache.Get(key); !ok { if _, ok := sharedCache.Get(key); !ok {
// If a key does not exist in the LRU cache, delete it from Elasticsearch // If a key does not exist in the LRU cache, delete it from Elasticsearch
deleteFromElasticsearch(key, cfg) deleteFromElasticsearch(key)
log.Debugf(`ELASTIC - Removed key "%s"`, key) log.Debugf(`ELASTIC - Removed key "%s"`, key)
} }
}(key) }(key)
@ -51,9 +51,9 @@ func removeStaleItemsFromElasticsearch(sharedCache *lru.Cache[string, *CacheItem
wg.Wait() wg.Wait()
} }
func deleteFromElasticsearch(key string, cfg *config.Config) { func deleteFromElasticsearch(key string) {
req := esapi.DeleteRequest{ req := esapi.DeleteRequest{
Index: cfg.ElasticsearchIndex, Index: config.GetConfig().ElasticsearchIndex,
DocumentID: encodeToBase64(key), DocumentID: encodeToBase64(key),
} }

View File

@ -4,9 +4,9 @@ import (
"crazyfs/config" "crazyfs/config"
) )
func createCrazyfsIndex(cfg *config.Config) { func createCrazyfsIndex() {
// Check if index exists // Check if index exists
res, err := ElasticClient.Indices.Exists([]string{cfg.ElasticsearchIndex}) res, err := ElasticClient.Indices.Exists([]string{config.GetConfig().ElasticsearchIndex})
if err != nil { if err != nil {
log.Fatalf("Error checking if index exists: %s", err) log.Fatalf("Error checking if index exists: %s", err)
} }
@ -16,7 +16,7 @@ func createCrazyfsIndex(cfg *config.Config) {
if res.StatusCode == 401 { if res.StatusCode == 401 {
log.Fatalln("ELASTIC - Failed to create a new index: got code 401.") log.Fatalln("ELASTIC - Failed to create a new index: got code 401.")
} else if res.StatusCode == 404 { } else if res.StatusCode == 404 {
res, err = ElasticClient.Indices.Create(cfg.ElasticsearchIndex) res, err = ElasticClient.Indices.Create(config.GetConfig().ElasticsearchIndex)
if err != nil { if err != nil {
log.Fatalf("Error creating index: %s", err) log.Fatalf("Error creating index: %s", err)
} }
@ -26,6 +26,6 @@ func createCrazyfsIndex(cfg *config.Config) {
log.Printf("Error creating index: %s", res.String()) log.Printf("Error creating index: %s", res.String())
} }
log.Infof(`Created a new index named "%s"`, cfg.ElasticsearchIndex) log.Infof(`Created a new index named "%s"`, config.GetConfig().ElasticsearchIndex)
} }
} }

View File

@ -10,7 +10,7 @@ import (
"time" "time"
) )
func getPathsFromIndex(cfg *config.Config) ([]string, error) { func getPathsFromIndex() ([]string, error) {
// This may take a bit if the index is very large, so avoid calling this. // This may take a bit if the index is very large, so avoid calling this.
// Print a debug message so the user doesn't think we're frozen. // Print a debug message so the user doesn't think we're frozen.
@ -21,7 +21,7 @@ func getPathsFromIndex(cfg *config.Config) ([]string, error) {
res, err := ElasticClient.Search( res, err := ElasticClient.Search(
ElasticClient.Search.WithContext(context.Background()), ElasticClient.Search.WithContext(context.Background()),
ElasticClient.Search.WithIndex(cfg.ElasticsearchIndex), ElasticClient.Search.WithIndex(config.GetConfig().ElasticsearchIndex),
ElasticClient.Search.WithScroll(time.Minute), ElasticClient.Search.WithScroll(time.Minute),
ElasticClient.Search.WithSize(1000), ElasticClient.Search.WithSize(1000),
) )

View File

@ -10,9 +10,7 @@ import (
"strings" "strings"
) )
func Search(query string, exclude []string, cfg *config.Config) (*esapi.Response, error) { func Search(query string, exclude []string) (*esapi.Response, error) {
log.Debugf(`ELASTIC - Query: "%s"`, query)
var excludeQuery string var excludeQuery string
if len(exclude) > 0 { if len(exclude) > 0 {
var excludeConditions []string var excludeConditions []string
@ -37,11 +35,11 @@ func Search(query string, exclude []string, cfg *config.Config) (*esapi.Response
return ElasticClient.Search( return ElasticClient.Search(
ElasticClient.Search.WithContext(context.Background()), ElasticClient.Search.WithContext(context.Background()),
ElasticClient.Search.WithIndex(cfg.ElasticsearchIndex), ElasticClient.Search.WithIndex(config.GetConfig().ElasticsearchIndex),
ElasticClient.Search.WithBody(strings.NewReader(esQuery)), ElasticClient.Search.WithBody(strings.NewReader(esQuery)),
ElasticClient.Search.WithTrackTotalHits(true), ElasticClient.Search.WithTrackTotalHits(true),
ElasticClient.Search.WithPretty(), ElasticClient.Search.WithPretty(),
ElasticClient.Search.WithSize(cfg.ApiSearchMaxResults), ElasticClient.Search.WithSize(config.GetConfig().ApiSearchMaxResults),
) )
} }

36
src/file/encoding.go Normal file
View File

@ -0,0 +1,36 @@
package file
import (
"github.com/saintfish/chardet"
"os"
"strings"
)
func DetectFileEncoding(filePath string) (string, error) {
file, err := os.Open(filePath)
if err != nil {
return "", err
}
defer file.Close()
bytes, err := os.ReadFile(filePath)
if err != nil {
return "", err
}
// Detect the encoding
detector := chardet.NewTextDetector()
result, err := detector.DetectBest(bytes)
if err != nil {
return "", err
}
return result.Charset, nil
}
func CastTextMimes(mimeType string) string {
if strings.HasPrefix(mimeType, "text/") {
return "text/plain"
}
return mimeType
}

View File

@ -49,7 +49,7 @@ func GetMimeType(path string, analyze bool, passedInfo *os.FileInfo) (bool, stri
if analyze { if analyze {
MIME, err = mimetype.DetectFile(path) MIME, err = mimetype.DetectFile(path)
if err != nil { if err != nil {
log.Warnf("Error analyzing MIME type: %v", err) log.Errorf("Error analyzing MIME type: %v", err)
return false, "", "", err return false, "", "", err
} }
mimeType = MIME.String() mimeType = MIME.String()
@ -66,10 +66,10 @@ func GetMimeType(path string, analyze bool, passedInfo *os.FileInfo) (bool, stri
} }
func StripRootDir(path string) string { func StripRootDir(path string) string {
if path == "/" || path == config.RootDir || path == "" { if path == "/" || path == config.GetConfig().RootDir || path == "" {
// Avoid erasing our path // Avoid erasing our path
return "/" return "/"
} else { } else {
return strings.TrimSuffix(strings.TrimPrefix(path, config.RootDir), "/") return strings.TrimSuffix(strings.TrimPrefix(path, config.GetConfig().RootDir), "/")
} }
} }

View File

@ -10,7 +10,7 @@ import (
// SafeJoin Clean the provided path // SafeJoin Clean the provided path
func SafeJoin(pathArg string) (string, error) { func SafeJoin(pathArg string) (string, error) {
cleanPath := filepath.Join(config.RootDir, filepath.Clean(pathArg)) cleanPath := filepath.Join(config.GetConfig().RootDir, filepath.Clean(pathArg))
cleanPath = strings.TrimRight(cleanPath, "/") cleanPath = strings.TrimRight(cleanPath, "/")
return cleanPath, nil return cleanPath, nil
} }
@ -33,10 +33,10 @@ func DetectTraversal(pathArg string) (bool, error) {
} }
cleanArg := filepath.Clean(pathArg) cleanArg := filepath.Clean(pathArg)
cleanPath := filepath.Join(config.RootDir, cleanArg) cleanPath := filepath.Join(config.GetConfig().RootDir, cleanArg)
// If the path is not within the base path, return an error // If the path is not within the base path, return an error
if !strings.HasPrefix(cleanPath, config.RootDir) { if !strings.HasPrefix(cleanPath, config.GetConfig().RootDir) {
return true, fmt.Errorf("the full path is outside the root dir: %s", pathArg) return true, fmt.Errorf("the full path is outside the root dir: %s", pathArg)
} }

View File

@ -13,7 +13,7 @@ require (
github.com/klauspost/compress v1.16.7 github.com/klauspost/compress v1.16.7
github.com/mitchellh/mapstructure v1.5.0 github.com/mitchellh/mapstructure v1.5.0
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646
github.com/radovskyb/watcher v1.0.7 github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
github.com/spf13/viper v1.16.0 github.com/spf13/viper v1.16.0
) )

View File

@ -164,10 +164,10 @@ github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qR
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/radovskyb/watcher v1.0.7 h1:AYePLih6dpmS32vlHfhCeli8127LzkIgwJGcwwe8tUE=
github.com/radovskyb/watcher v1.0.7/go.mod h1:78okwvY5wPdzcb1UYnip1pvrZNIVEIh/Cm+ZuvsUYIg=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/afero v1.9.5 h1:stMpOSZFs//0Lv29HduCmli3GUfpFoF3Y1Q/aXj/wVM= github.com/spf13/afero v1.9.5 h1:stMpOSZFs//0Lv29HduCmli3GUfpFoF3Y1Q/aXj/wVM=

View File

@ -1,8 +1,10 @@
package logging package logging
import ( import (
"crazyfs/config"
"net" "net"
"net/http" "net/http"
"strings"
) )
type statusWriter struct { type statusWriter struct {
@ -15,13 +17,32 @@ func (sw *statusWriter) WriteHeader(status int) {
sw.ResponseWriter.WriteHeader(status) sw.ResponseWriter.WriteHeader(status)
} }
// TODO: handle the proxy http headers func GetRealIP(r *http.Request) string {
ip, _, _ := net.SplitHostPort(r.RemoteAddr) // Get the IP address without port number
// Check if the request was forwarded by a proxy
var forwarded string
if config.GetConfig().HTTPRealIPHeader == "X-Forwarded-For" {
// The X-Forwarded-For header can contain multiple IPs, use the first one
if forwarded = r.Header.Get(config.GetConfig().HTTPRealIPHeader); forwarded != "" {
split := strings.Split(forwarded, ",")
ip = strings.TrimSpace(split[0])
}
} else {
// Or just use the header the user specified.
forwarded = r.Header.Get(config.GetConfig().HTTPRealIPHeader)
}
return ip
}
func LogRequest(handler http.Handler) http.Handler { func LogRequest(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
sw := statusWriter{ResponseWriter: w, status: http.StatusOK} // set default status sw := statusWriter{ResponseWriter: w, status: http.StatusOK} // set default status
handler.ServeHTTP(&sw, r) handler.ServeHTTP(&sw, r)
ip, _, _ := net.SplitHostPort(r.RemoteAddr) // Get the IP address without port number
ip := GetRealIP(r)
log.Infof("%s - %d - %s from %s", r.Method, sw.status, r.URL.RequestURI(), ip) log.Infof("%s - %d - %s from %s", r.Method, sw.status, r.URL.RequestURI(), ip)
}) })
} }

View File

@ -1,5 +1,13 @@
- Track active crawls and list them on the admin page
- Limit to one on-demand crawl per path. Don't start another if one is already running. See HandleFileNotFound()
- Add config value to limit the number of on-demand crawls
- Add config value to limit the number of concurrent crawls, other crawls get queued.
- add an admin endpoint to fetch the last n modified files.
- fix /api/file/download when an item is in the cache but does not exist on the disk
- Is using scroll for the Elastic query really the best way to do a real-time query?
Later:
- Add a wildcard option to restricted_download_paths to block all sub-directories - Add a wildcard option to restricted_download_paths to block all sub-directories
- Add a dict to each restricted_download_paths item to specify how many levels recursive the block should be applied - Add a dict to each restricted_download_paths item to specify how many levels recursive the block should be applied
- Add an endpoint to return restricted_download_paths so the frontend can block downloads for those folders - add a "last modified" to "sort" https://chub-archive.evulid.cc/api/file/list?path=/chub.ai/characters&page=1&limit=50&sort=folders
- Load the config into a global variable and stop passing it as function args
- Remove the file change watcher mode