add new config variable, minor qol

This commit is contained in:
Cyberes 2024-03-17 09:58:20 -06:00
parent 4200d4c710
commit 88fd63bfb9
9 changed files with 105 additions and 89 deletions

View File

@ -11,6 +11,7 @@ fi
mkdir -p "$SCRIPT_DIR/dist" mkdir -p "$SCRIPT_DIR/dist"
cd "$SCRIPT_DIR/src" || exit 1 cd "$SCRIPT_DIR/src" || exit 1
go mod tidy
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs" go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs"
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then

View File

@ -48,6 +48,8 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc func(string, os.File
walkFunc = dc.walkRecursiveFunc walkFunc = dc.walkRecursiveFunc
} }
// TODO: check if symlink and reject if it is
//Extrapolate the name of the callback function. //Extrapolate the name of the callback function.
pc := reflect.ValueOf(walkFunc).Pointer() pc := reflect.ValueOf(walkFunc).Pointer()
fn := runtime.FuncForPC(pc) fn := runtime.FuncForPC(pc)
@ -105,6 +107,9 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc func(string, os.File
// CrawlNoRecursion this function crawls a file or directory and does not recurse into any subdirectories. Also returns the result of the crawl. // CrawlNoRecursion this function crawls a file or directory and does not recurse into any subdirectories. Also returns the result of the crawl.
func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string) (*CacheItem.Item, error) { func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string) (*CacheItem.Item, error) {
CacheItem.RetardCheck(fullPath) CacheItem.RetardCheck(fullPath)
// TODO: check if symlink and reject if it is
readyToStart := dc.startCrawl(fullPath, "walkNonRecursive") readyToStart := dc.startCrawl(fullPath, "walkNonRecursive")
if !readyToStart { if !readyToStart {
return nil, errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath)) return nil, errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath))

View File

@ -76,24 +76,29 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
for _, child := range cacheItem.Children { for _, child := range cacheItem.Children {
childItem, found := SharedCache.Cache.Get(child) childItem, found := SharedCache.Cache.Get(child)
// If the path wasn't found, do a quick crawl since the path could have been modified, since the last crawl.
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
// because that would be an extra os.Lstat() call in processPath().
if !found { if !found {
crawlRelPath := filepath.Join(config.GetConfig().RootDir, child) // If the path wasn't found, do a quick crawl since the path could have been modified, since the last crawl.
// TODO: when does this get triggered? // This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
log.Debugf(`NewResponseItem:Crawl - Not in cache, crawling: "%s" ("%s")`, child, crawlRelPath) // because that would be an extra os.Lstat() call in processPath().
dc := DirectoryCrawler.NewDirectoryCrawler() if config.GetConfig().CrawlerCrawlMissingResponseChildren {
item, err := dc.CrawlNoRecursion(crawlRelPath) crawlRelPath := filepath.Join(config.GetConfig().RootDir, child)
if err != nil { // TODO: when does this get triggered?
log.Errorf("NewResponseItem:Crawl - %s", err) log.Debugf(`NewResponseItem:Crawl - Not in cache, crawling: "%s" ("%s")`, child, crawlRelPath)
continue // skip this child dc := DirectoryCrawler.NewDirectoryCrawler()
} item, err := dc.CrawlNoRecursion(crawlRelPath)
if item == nil { if err != nil {
log.Debugf(`NewResponseItem:Crawl - Not found: "%s". Likely broken symlink`, child) log.Errorf("NewResponseItem:Crawl - %s", err)
continue // skip this child
}
if item == nil {
log.Debugf(`NewResponseItem:Crawl - Not found: "%s". Likely broken symlink`, child)
continue
}
childItem = item // Update the `childItem` var with the newly cached item.
} else {
log.Debugf("Skipping %s due to CrawlerCrawlMissingResponseChildren", child) // TODO: remove
continue continue
} }
childItem = item // Update the `childItem` var with the newly cached item.
} }
if childItem != nil { // Double check if childItem != nil { // Double check

View File

@ -71,6 +71,8 @@ func (w *Walker) processPath(relPath string) error {
// Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory // Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory
// in the tree, starting with the root directory. It is only called one place: `Walk()` in Walk.go // in the tree, starting with the root directory. It is only called one place: `Walk()` in Walk.go
func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error { func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error {
// TODO: compare with filepath.WalkDir()
w.walkFunc = walkFn w.walkFunc = walkFn
// Parse the beginning path. // Parse the beginning path.

View File

@ -19,7 +19,7 @@ func InitializeWorkers() {
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ { for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
go worker() go worker()
} }
log.Debugf("Started %d directory crawler Workers.", config.GetConfig().DirectoryCrawlers) log.Debugf("WORKERS - Started %d directory crawler Workers.", config.GetConfig().DirectoryCrawlers)
} }
// worker processes jobs forever. // worker processes jobs forever.
@ -30,7 +30,7 @@ func worker() {
atomic.AddInt32(&BusyWorkers, 1) atomic.AddInt32(&BusyWorkers, 1)
err := job.Walker.processPath(job.StartPath) err := job.Walker.processPath(job.StartPath)
if err != nil { if err != nil {
log.Warnf("Workers - %s - %s", job.StartPath, err) log.Warnf("WORKER - %s - %s", job.StartPath, err)
} }
job.Walker.wg.Done() job.Walker.wg.Done()

View File

@ -58,7 +58,7 @@ func APIList(w http.ResponseWriter, r *http.Request) {
mime := r.URL.Query().Get("mime") mime := r.URL.Query().Get("mime")
if mime != "" { if mime != "" {
if cacheItem.IsDir && !config.GetConfig().HttpAllowDirMimeParse { if cacheItem.IsDir && !config.GetConfig().HttpAllowDirMimeParse {
helpers.Return403Msg("not allowed to analyze the mime of directories", w) helpers.Return403Msg("unable to analyze the mime of directories", w)
return return
} else { } else {
// Only update the mime in the cache if it hasn't been set already. // Only update the mime in the cache if it hasn't been set already.
@ -125,8 +125,8 @@ func APIList(w http.ResponseWriter, r *http.Request) {
item.Children = append(dirs, files...) item.Children = append(dirs, files...)
} }
// Set the children to an empty array so that the JSON encoder doesn't return it as nil // Set the children to an empty array so that the JSON encoder doesn't return it as nil.
var paginatedChildren []*ResponseItem.ResponseItem // this var is either the full CacheItem list or a paginated list depending on the query args var paginatedChildren []*ResponseItem.ResponseItem // this var will be either the full CacheItem list or a paginated list depending on the query args
if item.Children != nil { if item.Children != nil {
paginatedChildren = item.Children paginatedChildren = item.Children
} else { } else {

View File

@ -10,39 +10,40 @@ import (
var cfg *Config var cfg *Config
type Config struct { type Config struct {
RootDir string RootDir string
HTTPPort string HTTPPort string
CrawlModeCrawlInterval int CrawlModeCrawlInterval int
DirectoryCrawlers int DirectoryCrawlers int
CacheSize int CacheSize int
CacheTime int // TODO: does this do anything? CacheTime int // TODO: does this do anything?
CachePrintNew bool CachePrintNew bool
InitialCrawl bool InitialCrawl bool
CacheRecacheCrawlerLimit int CacheRecacheCrawlerLimit int
CrawlerParseMIME bool CrawlerParseMIME bool
CrawlerParseEncoding bool CrawlerParseEncoding bool
HttpAPIListCacheControl int CrawlerCrawlMissingResponseChildren bool
HttpAPIDlCacheControl int HttpAPIListCacheControl int
HttpAllowDirMimeParse bool HttpAPIDlCacheControl int
HttpAdminKey string HttpAllowDirMimeParse bool
HttpAllowDuringInitialCrawl bool HttpAdminKey string
RestrictedDownloadPaths []string HttpAllowDuringInitialCrawl bool
ApiSearchMaxResults int RestrictedDownloadPaths []string
ApiSearchShowChildren bool ApiSearchMaxResults int
ElasticsearchEnable bool ApiSearchShowChildren bool
ElasticsearchEndpoint string ElasticsearchEnable bool
ElasticsearchSyncEnable bool ElasticsearchEndpoint string
ElasticsearchSyncInterval int ElasticsearchSyncEnable bool
ElasticsearchFullSyncInterval int ElasticsearchSyncInterval int
ElasticsearchAPIKey string ElasticsearchFullSyncInterval int
ElasticsearchIndex string ElasticsearchAPIKey string
ElasticsearchSyncThreads int ElasticsearchIndex string
ElasticsearchExcludePatterns []string ElasticsearchSyncThreads int
ElasticsearchFullSyncOnStart bool ElasticsearchExcludePatterns []string
ElasticsearchDefaultQueryField string ElasticsearchFullSyncOnStart bool
HTTPRealIPHeader string ElasticsearchDefaultQueryField string
HTTPNoMimeSniffHeader bool HTTPRealIPHeader string
HTTPAccessControlAllowOriginHeader string HTTPNoMimeSniffHeader bool
HTTPAccessControlAllowOriginHeader string
} }
func SetConfig(configFile string) (*Config, error) { func SetConfig(configFile string) (*Config, error) {
@ -65,6 +66,7 @@ func SetConfig(configFile string) (*Config, error) {
viper.SetDefault("cache_recache_crawler_limit", 50) viper.SetDefault("cache_recache_crawler_limit", 50)
viper.SetDefault("crawler_parse_mime", false) viper.SetDefault("crawler_parse_mime", false)
viper.SetDefault("crawler_parse_encoding", false) viper.SetDefault("crawler_parse_encoding", false)
viper.SetDefault("crawler_crawl_missing_response_children", false)
viper.SetDefault("http_api_list_cache_control", 600) viper.SetDefault("http_api_list_cache_control", 600)
viper.SetDefault("http_api_download_cache_control", 600) viper.SetDefault("http_api_download_cache_control", 600)
viper.SetDefault("http_allow_dir_mime_parse", true) viper.SetDefault("http_allow_dir_mime_parse", true)
@ -108,39 +110,40 @@ func SetConfig(configFile string) (*Config, error) {
} }
config := &Config{ config := &Config{
RootDir: rootDir, RootDir: rootDir,
HTTPPort: viper.GetString("http_port"), HTTPPort: viper.GetString("http_port"),
CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"), CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"),
DirectoryCrawlers: viper.GetInt("directory_crawlers"), DirectoryCrawlers: viper.GetInt("directory_crawlers"),
CacheSize: viper.GetInt("cache_size"), CacheSize: viper.GetInt("cache_size"),
CacheTime: viper.GetInt("cache_time"), CacheTime: viper.GetInt("cache_time"),
CachePrintNew: viper.GetBool("cache_print_new"), CachePrintNew: viper.GetBool("cache_print_new"),
InitialCrawl: viper.GetBool("initial_crawl"), InitialCrawl: viper.GetBool("initial_crawl"),
CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"), CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"),
CrawlerParseMIME: viper.GetBool("crawler_parse_mime"), CrawlerParseMIME: viper.GetBool("crawler_parse_mime"),
CrawlerParseEncoding: viper.GetBool("crawler_parse_encoding"), CrawlerParseEncoding: viper.GetBool("crawler_parse_encoding"),
HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"), CrawlerCrawlMissingResponseChildren: viper.GetBool("crawler_crawl_missing_response_children"),
HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"), HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"),
HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"), HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"),
HttpAdminKey: viper.GetString("api_admin_key"), HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"),
HttpAllowDuringInitialCrawl: viper.GetBool("http_allow_during_initial_crawl"), HttpAdminKey: viper.GetString("api_admin_key"),
RestrictedDownloadPaths: restrictedPaths, HttpAllowDuringInitialCrawl: viper.GetBool("http_allow_during_initial_crawl"),
ApiSearchMaxResults: viper.GetInt("api_search_max_results"), RestrictedDownloadPaths: restrictedPaths,
ApiSearchShowChildren: viper.GetBool("api_search_show_children"), ApiSearchMaxResults: viper.GetInt("api_search_max_results"),
ElasticsearchEnable: viper.GetBool("elasticsearch_enable"), ApiSearchShowChildren: viper.GetBool("api_search_show_children"),
ElasticsearchEndpoint: viper.GetString("elasticsearch_endpoint"), ElasticsearchEnable: viper.GetBool("elasticsearch_enable"),
ElasticsearchSyncEnable: viper.GetBool("elasticsearch_sync_enable"), ElasticsearchEndpoint: viper.GetString("elasticsearch_endpoint"),
ElasticsearchSyncInterval: viper.GetInt("elasticsearch_sync_interval"), ElasticsearchSyncEnable: viper.GetBool("elasticsearch_sync_enable"),
ElasticsearchFullSyncInterval: viper.GetInt("elasticsearch_full_sync_interval"), ElasticsearchSyncInterval: viper.GetInt("elasticsearch_sync_interval"),
ElasticsearchAPIKey: viper.GetString("elasticsearch_api_key"), ElasticsearchFullSyncInterval: viper.GetInt("elasticsearch_full_sync_interval"),
ElasticsearchIndex: viper.GetString("elasticsearch_index"), ElasticsearchAPIKey: viper.GetString("elasticsearch_api_key"),
ElasticsearchSyncThreads: viper.GetInt("elasticsearch_sync_threads"), ElasticsearchIndex: viper.GetString("elasticsearch_index"),
ElasticsearchExcludePatterns: viper.GetStringSlice("elasticsearch_exclude_patterns"), ElasticsearchSyncThreads: viper.GetInt("elasticsearch_sync_threads"),
ElasticsearchFullSyncOnStart: viper.GetBool("elasticsearch_full_sync_on_start"), ElasticsearchExcludePatterns: viper.GetStringSlice("elasticsearch_exclude_patterns"),
ElasticsearchDefaultQueryField: viper.GetString("elasticsearch_default_query_field"), ElasticsearchFullSyncOnStart: viper.GetBool("elasticsearch_full_sync_on_start"),
HTTPRealIPHeader: viper.GetString("http_real_ip_header"), ElasticsearchDefaultQueryField: viper.GetString("elasticsearch_default_query_field"),
HTTPNoMimeSniffHeader: viper.GetBool("http_no_mime_sniff_header"), HTTPRealIPHeader: viper.GetString("http_real_ip_header"),
HTTPAccessControlAllowOriginHeader: viper.GetString("http_access_control_allow_origin_header"), HTTPNoMimeSniffHeader: viper.GetBool("http_no_mime_sniff_header"),
HTTPAccessControlAllowOriginHeader: viper.GetString("http_access_control_allow_origin_header"),
} }
if config.CacheTime < 0 { if config.CacheTime < 0 {

View File

@ -1,6 +1,6 @@
package config package config
// Various global variables. // Various read-only global variables.
var FollowSymlinks bool var FollowSymlinks bool
var InitialCrawlElapsed int var InitialCrawlElapsed int

View File

@ -50,6 +50,6 @@ func LogRequest(handler http.Handler) http.Handler {
ip := GetRealIP(r) ip := GetRealIP(r)
log.Infof("%s - %d - %s from %s took %v", r.Method, sw.status, r.URL.RequestURI(), ip, duration) log.Infof("HTTP - %s %d %s from %s took %v", r.Method, sw.status, r.URL.RequestURI(), ip, duration)
}) })
} }