add new config variable, minor qol

This commit is contained in:
Cyberes 2024-03-17 09:58:20 -06:00
parent 4200d4c710
commit 88fd63bfb9
9 changed files with 105 additions and 89 deletions

View File

@ -11,6 +11,7 @@ fi
mkdir -p "$SCRIPT_DIR/dist" mkdir -p "$SCRIPT_DIR/dist"
cd "$SCRIPT_DIR/src" || exit 1 cd "$SCRIPT_DIR/src" || exit 1
go mod tidy
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs" go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs"
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then

View File

@ -48,6 +48,8 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc func(string, os.File
walkFunc = dc.walkRecursiveFunc walkFunc = dc.walkRecursiveFunc
} }
// TODO: check if symlink and reject if it is
//Extrapolate the name of the callback function. //Extrapolate the name of the callback function.
pc := reflect.ValueOf(walkFunc).Pointer() pc := reflect.ValueOf(walkFunc).Pointer()
fn := runtime.FuncForPC(pc) fn := runtime.FuncForPC(pc)
@ -105,6 +107,9 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc func(string, os.File
// CrawlNoRecursion this function crawls a file or directory and does not recurse into any subdirectories. Also returns the result of the crawl. // CrawlNoRecursion this function crawls a file or directory and does not recurse into any subdirectories. Also returns the result of the crawl.
func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string) (*CacheItem.Item, error) { func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string) (*CacheItem.Item, error) {
CacheItem.RetardCheck(fullPath) CacheItem.RetardCheck(fullPath)
// TODO: check if symlink and reject if it is
readyToStart := dc.startCrawl(fullPath, "walkNonRecursive") readyToStart := dc.startCrawl(fullPath, "walkNonRecursive")
if !readyToStart { if !readyToStart {
return nil, errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath)) return nil, errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath))

View File

@ -76,10 +76,11 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
for _, child := range cacheItem.Children { for _, child := range cacheItem.Children {
childItem, found := SharedCache.Cache.Get(child) childItem, found := SharedCache.Cache.Get(child)
if !found {
// If the path wasn't found, do a quick crawl since the path could have been modified, since the last crawl. // If the path wasn't found, do a quick crawl since the path could have been modified, since the last crawl.
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning // This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
// because that would be an extra os.Lstat() call in processPath(). // because that would be an extra os.Lstat() call in processPath().
if !found { if config.GetConfig().CrawlerCrawlMissingResponseChildren {
crawlRelPath := filepath.Join(config.GetConfig().RootDir, child) crawlRelPath := filepath.Join(config.GetConfig().RootDir, child)
// TODO: when does this get triggered? // TODO: when does this get triggered?
log.Debugf(`NewResponseItem:Crawl - Not in cache, crawling: "%s" ("%s")`, child, crawlRelPath) log.Debugf(`NewResponseItem:Crawl - Not in cache, crawling: "%s" ("%s")`, child, crawlRelPath)
@ -94,6 +95,10 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
continue continue
} }
childItem = item // Update the `childItem` var with the newly cached item. childItem = item // Update the `childItem` var with the newly cached item.
} else {
log.Debugf("Skipping %s due to CrawlerCrawlMissingResponseChildren", child) // TODO: remove
continue
}
} }
if childItem != nil { // Double check if childItem != nil { // Double check

View File

@ -71,6 +71,8 @@ func (w *Walker) processPath(relPath string) error {
// Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory // Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory
// in the tree, starting with the root directory. It is only called one place: `Walk()` in Walk.go // in the tree, starting with the root directory. It is only called one place: `Walk()` in Walk.go
func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error { func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error {
// TODO: compare with filepath.WalkDir()
w.walkFunc = walkFn w.walkFunc = walkFn
// Parse the beginning path. // Parse the beginning path.

View File

@ -19,7 +19,7 @@ func InitializeWorkers() {
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ { for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
go worker() go worker()
} }
log.Debugf("Started %d directory crawler Workers.", config.GetConfig().DirectoryCrawlers) log.Debugf("WORKERS - Started %d directory crawler Workers.", config.GetConfig().DirectoryCrawlers)
} }
// worker processes jobs forever. // worker processes jobs forever.
@ -30,7 +30,7 @@ func worker() {
atomic.AddInt32(&BusyWorkers, 1) atomic.AddInt32(&BusyWorkers, 1)
err := job.Walker.processPath(job.StartPath) err := job.Walker.processPath(job.StartPath)
if err != nil { if err != nil {
log.Warnf("Workers - %s - %s", job.StartPath, err) log.Warnf("WORKER - %s - %s", job.StartPath, err)
} }
job.Walker.wg.Done() job.Walker.wg.Done()

View File

@ -58,7 +58,7 @@ func APIList(w http.ResponseWriter, r *http.Request) {
mime := r.URL.Query().Get("mime") mime := r.URL.Query().Get("mime")
if mime != "" { if mime != "" {
if cacheItem.IsDir && !config.GetConfig().HttpAllowDirMimeParse { if cacheItem.IsDir && !config.GetConfig().HttpAllowDirMimeParse {
helpers.Return403Msg("not allowed to analyze the mime of directories", w) helpers.Return403Msg("unable to analyze the mime of directories", w)
return return
} else { } else {
// Only update the mime in the cache if it hasn't been set already. // Only update the mime in the cache if it hasn't been set already.
@ -125,8 +125,8 @@ func APIList(w http.ResponseWriter, r *http.Request) {
item.Children = append(dirs, files...) item.Children = append(dirs, files...)
} }
// Set the children to an empty array so that the JSON encoder doesn't return it as nil // Set the children to an empty array so that the JSON encoder doesn't return it as nil.
var paginatedChildren []*ResponseItem.ResponseItem // this var is either the full CacheItem list or a paginated list depending on the query args var paginatedChildren []*ResponseItem.ResponseItem // this var will be either the full CacheItem list or a paginated list depending on the query args
if item.Children != nil { if item.Children != nil {
paginatedChildren = item.Children paginatedChildren = item.Children
} else { } else {

View File

@ -21,6 +21,7 @@ type Config struct {
CacheRecacheCrawlerLimit int CacheRecacheCrawlerLimit int
CrawlerParseMIME bool CrawlerParseMIME bool
CrawlerParseEncoding bool CrawlerParseEncoding bool
CrawlerCrawlMissingResponseChildren bool
HttpAPIListCacheControl int HttpAPIListCacheControl int
HttpAPIDlCacheControl int HttpAPIDlCacheControl int
HttpAllowDirMimeParse bool HttpAllowDirMimeParse bool
@ -65,6 +66,7 @@ func SetConfig(configFile string) (*Config, error) {
viper.SetDefault("cache_recache_crawler_limit", 50) viper.SetDefault("cache_recache_crawler_limit", 50)
viper.SetDefault("crawler_parse_mime", false) viper.SetDefault("crawler_parse_mime", false)
viper.SetDefault("crawler_parse_encoding", false) viper.SetDefault("crawler_parse_encoding", false)
viper.SetDefault("crawler_crawl_missing_response_children", false)
viper.SetDefault("http_api_list_cache_control", 600) viper.SetDefault("http_api_list_cache_control", 600)
viper.SetDefault("http_api_download_cache_control", 600) viper.SetDefault("http_api_download_cache_control", 600)
viper.SetDefault("http_allow_dir_mime_parse", true) viper.SetDefault("http_allow_dir_mime_parse", true)
@ -119,6 +121,7 @@ func SetConfig(configFile string) (*Config, error) {
CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"), CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"),
CrawlerParseMIME: viper.GetBool("crawler_parse_mime"), CrawlerParseMIME: viper.GetBool("crawler_parse_mime"),
CrawlerParseEncoding: viper.GetBool("crawler_parse_encoding"), CrawlerParseEncoding: viper.GetBool("crawler_parse_encoding"),
CrawlerCrawlMissingResponseChildren: viper.GetBool("crawler_crawl_missing_response_children"),
HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"), HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"),
HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"), HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"),
HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"), HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"),

View File

@ -1,6 +1,6 @@
package config package config
// Various global variables. // Various read-only global variables.
var FollowSymlinks bool var FollowSymlinks bool
var InitialCrawlElapsed int var InitialCrawlElapsed int

View File

@ -50,6 +50,6 @@ func LogRequest(handler http.Handler) http.Handler {
ip := GetRealIP(r) ip := GetRealIP(r)
log.Infof("%s - %d - %s from %s took %v", r.Method, sw.status, r.URL.RequestURI(), ip, duration) log.Infof("HTTP - %s %d %s from %s took %v", r.Method, sw.status, r.URL.RequestURI(), ip, duration)
}) })
} }