add new config variable, minor qol
This commit is contained in:
parent
4200d4c710
commit
88fd63bfb9
1
build.sh
1
build.sh
|
@ -11,6 +11,7 @@ fi
|
||||||
mkdir -p "$SCRIPT_DIR/dist"
|
mkdir -p "$SCRIPT_DIR/dist"
|
||||||
|
|
||||||
cd "$SCRIPT_DIR/src" || exit 1
|
cd "$SCRIPT_DIR/src" || exit 1
|
||||||
|
go mod tidy
|
||||||
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs"
|
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs"
|
||||||
|
|
||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
|
|
|
@ -48,6 +48,8 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc func(string, os.File
|
||||||
walkFunc = dc.walkRecursiveFunc
|
walkFunc = dc.walkRecursiveFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: check if symlink and reject if it is
|
||||||
|
|
||||||
//Extrapolate the name of the callback function.
|
//Extrapolate the name of the callback function.
|
||||||
pc := reflect.ValueOf(walkFunc).Pointer()
|
pc := reflect.ValueOf(walkFunc).Pointer()
|
||||||
fn := runtime.FuncForPC(pc)
|
fn := runtime.FuncForPC(pc)
|
||||||
|
@ -105,6 +107,9 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc func(string, os.File
|
||||||
// CrawlNoRecursion this function crawls a file or directory and does not recurse into any subdirectories. Also returns the result of the crawl.
|
// CrawlNoRecursion this function crawls a file or directory and does not recurse into any subdirectories. Also returns the result of the crawl.
|
||||||
func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string) (*CacheItem.Item, error) {
|
func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string) (*CacheItem.Item, error) {
|
||||||
CacheItem.RetardCheck(fullPath)
|
CacheItem.RetardCheck(fullPath)
|
||||||
|
|
||||||
|
// TODO: check if symlink and reject if it is
|
||||||
|
|
||||||
readyToStart := dc.startCrawl(fullPath, "walkNonRecursive")
|
readyToStart := dc.startCrawl(fullPath, "walkNonRecursive")
|
||||||
if !readyToStart {
|
if !readyToStart {
|
||||||
return nil, errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath))
|
return nil, errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath))
|
||||||
|
|
|
@ -76,24 +76,29 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
|
||||||
for _, child := range cacheItem.Children {
|
for _, child := range cacheItem.Children {
|
||||||
childItem, found := SharedCache.Cache.Get(child)
|
childItem, found := SharedCache.Cache.Get(child)
|
||||||
|
|
||||||
// If the path wasn't found, do a quick crawl since the path could have been modified, since the last crawl.
|
|
||||||
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
|
|
||||||
// because that would be an extra os.Lstat() call in processPath().
|
|
||||||
if !found {
|
if !found {
|
||||||
crawlRelPath := filepath.Join(config.GetConfig().RootDir, child)
|
// If the path wasn't found, do a quick crawl since the path could have been modified, since the last crawl.
|
||||||
// TODO: when does this get triggered?
|
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
|
||||||
log.Debugf(`NewResponseItem:Crawl - Not in cache, crawling: "%s" ("%s")`, child, crawlRelPath)
|
// because that would be an extra os.Lstat() call in processPath().
|
||||||
dc := DirectoryCrawler.NewDirectoryCrawler()
|
if config.GetConfig().CrawlerCrawlMissingResponseChildren {
|
||||||
item, err := dc.CrawlNoRecursion(crawlRelPath)
|
crawlRelPath := filepath.Join(config.GetConfig().RootDir, child)
|
||||||
if err != nil {
|
// TODO: when does this get triggered?
|
||||||
log.Errorf("NewResponseItem:Crawl - %s", err)
|
log.Debugf(`NewResponseItem:Crawl - Not in cache, crawling: "%s" ("%s")`, child, crawlRelPath)
|
||||||
continue // skip this child
|
dc := DirectoryCrawler.NewDirectoryCrawler()
|
||||||
}
|
item, err := dc.CrawlNoRecursion(crawlRelPath)
|
||||||
if item == nil {
|
if err != nil {
|
||||||
log.Debugf(`NewResponseItem:Crawl - Not found: "%s". Likely broken symlink`, child)
|
log.Errorf("NewResponseItem:Crawl - %s", err)
|
||||||
|
continue // skip this child
|
||||||
|
}
|
||||||
|
if item == nil {
|
||||||
|
log.Debugf(`NewResponseItem:Crawl - Not found: "%s". Likely broken symlink`, child)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
childItem = item // Update the `childItem` var with the newly cached item.
|
||||||
|
} else {
|
||||||
|
log.Debugf("Skipping %s due to CrawlerCrawlMissingResponseChildren", child) // TODO: remove
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
childItem = item // Update the `childItem` var with the newly cached item.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if childItem != nil { // Double check
|
if childItem != nil { // Double check
|
||||||
|
|
|
@ -71,6 +71,8 @@ func (w *Walker) processPath(relPath string) error {
|
||||||
// Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory
|
// Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory
|
||||||
// in the tree, starting with the root directory. It is only called one place: `Walk()` in Walk.go
|
// in the tree, starting with the root directory. It is only called one place: `Walk()` in Walk.go
|
||||||
func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error {
|
func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error {
|
||||||
|
// TODO: compare with filepath.WalkDir()
|
||||||
|
|
||||||
w.walkFunc = walkFn
|
w.walkFunc = walkFn
|
||||||
|
|
||||||
// Parse the beginning path.
|
// Parse the beginning path.
|
||||||
|
|
|
@ -19,7 +19,7 @@ func InitializeWorkers() {
|
||||||
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
|
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
|
||||||
go worker()
|
go worker()
|
||||||
}
|
}
|
||||||
log.Debugf("Started %d directory crawler Workers.", config.GetConfig().DirectoryCrawlers)
|
log.Debugf("WORKERS - Started %d directory crawler Workers.", config.GetConfig().DirectoryCrawlers)
|
||||||
}
|
}
|
||||||
|
|
||||||
// worker processes jobs forever.
|
// worker processes jobs forever.
|
||||||
|
@ -30,7 +30,7 @@ func worker() {
|
||||||
atomic.AddInt32(&BusyWorkers, 1)
|
atomic.AddInt32(&BusyWorkers, 1)
|
||||||
err := job.Walker.processPath(job.StartPath)
|
err := job.Walker.processPath(job.StartPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("Workers - %s - %s", job.StartPath, err)
|
log.Warnf("WORKER - %s - %s", job.StartPath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Walker.wg.Done()
|
job.Walker.wg.Done()
|
||||||
|
|
|
@ -58,7 +58,7 @@ func APIList(w http.ResponseWriter, r *http.Request) {
|
||||||
mime := r.URL.Query().Get("mime")
|
mime := r.URL.Query().Get("mime")
|
||||||
if mime != "" {
|
if mime != "" {
|
||||||
if cacheItem.IsDir && !config.GetConfig().HttpAllowDirMimeParse {
|
if cacheItem.IsDir && !config.GetConfig().HttpAllowDirMimeParse {
|
||||||
helpers.Return403Msg("not allowed to analyze the mime of directories", w)
|
helpers.Return403Msg("unable to analyze the mime of directories", w)
|
||||||
return
|
return
|
||||||
} else {
|
} else {
|
||||||
// Only update the mime in the cache if it hasn't been set already.
|
// Only update the mime in the cache if it hasn't been set already.
|
||||||
|
@ -125,8 +125,8 @@ func APIList(w http.ResponseWriter, r *http.Request) {
|
||||||
item.Children = append(dirs, files...)
|
item.Children = append(dirs, files...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the children to an empty array so that the JSON encoder doesn't return it as nil
|
// Set the children to an empty array so that the JSON encoder doesn't return it as nil.
|
||||||
var paginatedChildren []*ResponseItem.ResponseItem // this var is either the full CacheItem list or a paginated list depending on the query args
|
var paginatedChildren []*ResponseItem.ResponseItem // this var will be either the full CacheItem list or a paginated list depending on the query args
|
||||||
if item.Children != nil {
|
if item.Children != nil {
|
||||||
paginatedChildren = item.Children
|
paginatedChildren = item.Children
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -10,39 +10,40 @@ import (
|
||||||
var cfg *Config
|
var cfg *Config
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
RootDir string
|
RootDir string
|
||||||
HTTPPort string
|
HTTPPort string
|
||||||
CrawlModeCrawlInterval int
|
CrawlModeCrawlInterval int
|
||||||
DirectoryCrawlers int
|
DirectoryCrawlers int
|
||||||
CacheSize int
|
CacheSize int
|
||||||
CacheTime int // TODO: does this do anything?
|
CacheTime int // TODO: does this do anything?
|
||||||
CachePrintNew bool
|
CachePrintNew bool
|
||||||
InitialCrawl bool
|
InitialCrawl bool
|
||||||
CacheRecacheCrawlerLimit int
|
CacheRecacheCrawlerLimit int
|
||||||
CrawlerParseMIME bool
|
CrawlerParseMIME bool
|
||||||
CrawlerParseEncoding bool
|
CrawlerParseEncoding bool
|
||||||
HttpAPIListCacheControl int
|
CrawlerCrawlMissingResponseChildren bool
|
||||||
HttpAPIDlCacheControl int
|
HttpAPIListCacheControl int
|
||||||
HttpAllowDirMimeParse bool
|
HttpAPIDlCacheControl int
|
||||||
HttpAdminKey string
|
HttpAllowDirMimeParse bool
|
||||||
HttpAllowDuringInitialCrawl bool
|
HttpAdminKey string
|
||||||
RestrictedDownloadPaths []string
|
HttpAllowDuringInitialCrawl bool
|
||||||
ApiSearchMaxResults int
|
RestrictedDownloadPaths []string
|
||||||
ApiSearchShowChildren bool
|
ApiSearchMaxResults int
|
||||||
ElasticsearchEnable bool
|
ApiSearchShowChildren bool
|
||||||
ElasticsearchEndpoint string
|
ElasticsearchEnable bool
|
||||||
ElasticsearchSyncEnable bool
|
ElasticsearchEndpoint string
|
||||||
ElasticsearchSyncInterval int
|
ElasticsearchSyncEnable bool
|
||||||
ElasticsearchFullSyncInterval int
|
ElasticsearchSyncInterval int
|
||||||
ElasticsearchAPIKey string
|
ElasticsearchFullSyncInterval int
|
||||||
ElasticsearchIndex string
|
ElasticsearchAPIKey string
|
||||||
ElasticsearchSyncThreads int
|
ElasticsearchIndex string
|
||||||
ElasticsearchExcludePatterns []string
|
ElasticsearchSyncThreads int
|
||||||
ElasticsearchFullSyncOnStart bool
|
ElasticsearchExcludePatterns []string
|
||||||
ElasticsearchDefaultQueryField string
|
ElasticsearchFullSyncOnStart bool
|
||||||
HTTPRealIPHeader string
|
ElasticsearchDefaultQueryField string
|
||||||
HTTPNoMimeSniffHeader bool
|
HTTPRealIPHeader string
|
||||||
HTTPAccessControlAllowOriginHeader string
|
HTTPNoMimeSniffHeader bool
|
||||||
|
HTTPAccessControlAllowOriginHeader string
|
||||||
}
|
}
|
||||||
|
|
||||||
func SetConfig(configFile string) (*Config, error) {
|
func SetConfig(configFile string) (*Config, error) {
|
||||||
|
@ -65,6 +66,7 @@ func SetConfig(configFile string) (*Config, error) {
|
||||||
viper.SetDefault("cache_recache_crawler_limit", 50)
|
viper.SetDefault("cache_recache_crawler_limit", 50)
|
||||||
viper.SetDefault("crawler_parse_mime", false)
|
viper.SetDefault("crawler_parse_mime", false)
|
||||||
viper.SetDefault("crawler_parse_encoding", false)
|
viper.SetDefault("crawler_parse_encoding", false)
|
||||||
|
viper.SetDefault("crawler_crawl_missing_response_children", false)
|
||||||
viper.SetDefault("http_api_list_cache_control", 600)
|
viper.SetDefault("http_api_list_cache_control", 600)
|
||||||
viper.SetDefault("http_api_download_cache_control", 600)
|
viper.SetDefault("http_api_download_cache_control", 600)
|
||||||
viper.SetDefault("http_allow_dir_mime_parse", true)
|
viper.SetDefault("http_allow_dir_mime_parse", true)
|
||||||
|
@ -108,39 +110,40 @@ func SetConfig(configFile string) (*Config, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
config := &Config{
|
config := &Config{
|
||||||
RootDir: rootDir,
|
RootDir: rootDir,
|
||||||
HTTPPort: viper.GetString("http_port"),
|
HTTPPort: viper.GetString("http_port"),
|
||||||
CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"),
|
CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"),
|
||||||
DirectoryCrawlers: viper.GetInt("directory_crawlers"),
|
DirectoryCrawlers: viper.GetInt("directory_crawlers"),
|
||||||
CacheSize: viper.GetInt("cache_size"),
|
CacheSize: viper.GetInt("cache_size"),
|
||||||
CacheTime: viper.GetInt("cache_time"),
|
CacheTime: viper.GetInt("cache_time"),
|
||||||
CachePrintNew: viper.GetBool("cache_print_new"),
|
CachePrintNew: viper.GetBool("cache_print_new"),
|
||||||
InitialCrawl: viper.GetBool("initial_crawl"),
|
InitialCrawl: viper.GetBool("initial_crawl"),
|
||||||
CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"),
|
CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"),
|
||||||
CrawlerParseMIME: viper.GetBool("crawler_parse_mime"),
|
CrawlerParseMIME: viper.GetBool("crawler_parse_mime"),
|
||||||
CrawlerParseEncoding: viper.GetBool("crawler_parse_encoding"),
|
CrawlerParseEncoding: viper.GetBool("crawler_parse_encoding"),
|
||||||
HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"),
|
CrawlerCrawlMissingResponseChildren: viper.GetBool("crawler_crawl_missing_response_children"),
|
||||||
HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"),
|
HttpAPIListCacheControl: viper.GetInt("http_api_list_cache_control"),
|
||||||
HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"),
|
HttpAPIDlCacheControl: viper.GetInt("http_api_download_cache_control"),
|
||||||
HttpAdminKey: viper.GetString("api_admin_key"),
|
HttpAllowDirMimeParse: viper.GetBool("http_allow_dir_mime_parse"),
|
||||||
HttpAllowDuringInitialCrawl: viper.GetBool("http_allow_during_initial_crawl"),
|
HttpAdminKey: viper.GetString("api_admin_key"),
|
||||||
RestrictedDownloadPaths: restrictedPaths,
|
HttpAllowDuringInitialCrawl: viper.GetBool("http_allow_during_initial_crawl"),
|
||||||
ApiSearchMaxResults: viper.GetInt("api_search_max_results"),
|
RestrictedDownloadPaths: restrictedPaths,
|
||||||
ApiSearchShowChildren: viper.GetBool("api_search_show_children"),
|
ApiSearchMaxResults: viper.GetInt("api_search_max_results"),
|
||||||
ElasticsearchEnable: viper.GetBool("elasticsearch_enable"),
|
ApiSearchShowChildren: viper.GetBool("api_search_show_children"),
|
||||||
ElasticsearchEndpoint: viper.GetString("elasticsearch_endpoint"),
|
ElasticsearchEnable: viper.GetBool("elasticsearch_enable"),
|
||||||
ElasticsearchSyncEnable: viper.GetBool("elasticsearch_sync_enable"),
|
ElasticsearchEndpoint: viper.GetString("elasticsearch_endpoint"),
|
||||||
ElasticsearchSyncInterval: viper.GetInt("elasticsearch_sync_interval"),
|
ElasticsearchSyncEnable: viper.GetBool("elasticsearch_sync_enable"),
|
||||||
ElasticsearchFullSyncInterval: viper.GetInt("elasticsearch_full_sync_interval"),
|
ElasticsearchSyncInterval: viper.GetInt("elasticsearch_sync_interval"),
|
||||||
ElasticsearchAPIKey: viper.GetString("elasticsearch_api_key"),
|
ElasticsearchFullSyncInterval: viper.GetInt("elasticsearch_full_sync_interval"),
|
||||||
ElasticsearchIndex: viper.GetString("elasticsearch_index"),
|
ElasticsearchAPIKey: viper.GetString("elasticsearch_api_key"),
|
||||||
ElasticsearchSyncThreads: viper.GetInt("elasticsearch_sync_threads"),
|
ElasticsearchIndex: viper.GetString("elasticsearch_index"),
|
||||||
ElasticsearchExcludePatterns: viper.GetStringSlice("elasticsearch_exclude_patterns"),
|
ElasticsearchSyncThreads: viper.GetInt("elasticsearch_sync_threads"),
|
||||||
ElasticsearchFullSyncOnStart: viper.GetBool("elasticsearch_full_sync_on_start"),
|
ElasticsearchExcludePatterns: viper.GetStringSlice("elasticsearch_exclude_patterns"),
|
||||||
ElasticsearchDefaultQueryField: viper.GetString("elasticsearch_default_query_field"),
|
ElasticsearchFullSyncOnStart: viper.GetBool("elasticsearch_full_sync_on_start"),
|
||||||
HTTPRealIPHeader: viper.GetString("http_real_ip_header"),
|
ElasticsearchDefaultQueryField: viper.GetString("elasticsearch_default_query_field"),
|
||||||
HTTPNoMimeSniffHeader: viper.GetBool("http_no_mime_sniff_header"),
|
HTTPRealIPHeader: viper.GetString("http_real_ip_header"),
|
||||||
HTTPAccessControlAllowOriginHeader: viper.GetString("http_access_control_allow_origin_header"),
|
HTTPNoMimeSniffHeader: viper.GetBool("http_no_mime_sniff_header"),
|
||||||
|
HTTPAccessControlAllowOriginHeader: viper.GetString("http_access_control_allow_origin_header"),
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.CacheTime < 0 {
|
if config.CacheTime < 0 {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
package config
|
package config
|
||||||
|
|
||||||
// Various global variables.
|
// Various read-only global variables.
|
||||||
|
|
||||||
var FollowSymlinks bool
|
var FollowSymlinks bool
|
||||||
var InitialCrawlElapsed int
|
var InitialCrawlElapsed int
|
||||||
|
|
|
@ -50,6 +50,6 @@ func LogRequest(handler http.Handler) http.Handler {
|
||||||
|
|
||||||
ip := GetRealIP(r)
|
ip := GetRealIP(r)
|
||||||
|
|
||||||
log.Infof("%s - %d - %s from %s took %v", r.Method, sw.status, r.URL.RequestURI(), ip, duration)
|
log.Infof("HTTP - %s %d %s from %s took %v", r.Method, sw.status, r.URL.RequestURI(), ip, duration)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue