make workers global, fix worker setup, clean up
This commit is contained in:
parent
7078712bc3
commit
157f80a463
|
@ -33,7 +33,7 @@ func NewItem(fullPath string, info os.FileInfo) *Item {
|
||||||
// Ignore symlinks
|
// Ignore symlinks
|
||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("NewItem - Path does not exist: %s", fullPath)
|
log.Warnf("NewItem - StartPath does not exist: %s", fullPath)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ func NewItem(fullPath string, info os.FileInfo) *Item {
|
||||||
}
|
}
|
||||||
|
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
log.Warnf("Path does not exist: %s", fullPath)
|
log.Warnf("StartPath does not exist: %s", fullPath)
|
||||||
return nil
|
return nil
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
|
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
|
||||||
|
|
|
@ -3,7 +3,6 @@ package api
|
||||||
import (
|
import (
|
||||||
"crazyfs/CacheItem"
|
"crazyfs/CacheItem"
|
||||||
"crazyfs/api/helpers"
|
"crazyfs/api/helpers"
|
||||||
"crazyfs/cache/DirectoryCrawler"
|
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/elastic"
|
"crazyfs/elastic"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
@ -30,12 +29,11 @@ func AdminCacheInfo(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cac
|
||||||
cacheLen := sharedCache.Len()
|
cacheLen := sharedCache.Len()
|
||||||
|
|
||||||
response := map[string]interface{}{
|
response := map[string]interface{}{
|
||||||
"cache_size": cacheLen,
|
"cachedItems": cacheLen,
|
||||||
"cache_max": config.GetConfig().CacheSize,
|
"cacheMax": config.GetConfig().CacheSize,
|
||||||
"crawls_running": DirectoryCrawler.GetTotalActiveCrawls(),
|
"recacheCrawlLimit": config.GetConfig().CacheRecacheCrawlerLimit,
|
||||||
"busy_workers": DirectoryCrawler.BusyWorkers,
|
"newSyncRunning": elastic.ElasticRefreshSyncRunning,
|
||||||
"new_sync_running": elastic.ElasticRefreshSyncRunning,
|
"refreshSyncRunning": elastic.ElasticRefreshSyncRunning,
|
||||||
"refresh_sync_running": elastic.ElasticRefreshSyncRunning,
|
|
||||||
}
|
}
|
||||||
w.Header().Set("Cache-Control", "no-store")
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
|
|
@ -27,8 +27,14 @@ func AdminCrawlsInfo(w http.ResponseWriter, r *http.Request, sharedCache *lru.Ca
|
||||||
return
|
return
|
||||||
} else {
|
} else {
|
||||||
response := map[string]interface{}{
|
response := map[string]interface{}{
|
||||||
|
"crawls": map[string]interface{}{
|
||||||
"active": DirectoryCrawler.GetActiveCrawls(),
|
"active": DirectoryCrawler.GetActiveCrawls(),
|
||||||
"finished": DirectoryCrawler.GetFinishedCrawls(),
|
"finished": DirectoryCrawler.GetFinishedCrawls(),
|
||||||
|
},
|
||||||
|
"workers": map[string]interface{}{
|
||||||
|
"busy": DirectoryCrawler.BusyWorkers,
|
||||||
|
"max": config.GetConfig().DirectoryCrawlers,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
w.Header().Set("Cache-Control", "no-store")
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
|
|
@ -13,6 +13,9 @@ import (
|
||||||
// WorkerPool is a buffered channel acting as a semaphore to limit the number of active workers globally
|
// WorkerPool is a buffered channel acting as a semaphore to limit the number of active workers globally
|
||||||
var WorkerPool chan struct{}
|
var WorkerPool chan struct{}
|
||||||
|
|
||||||
|
// Jobs is a global channel that all Walker instances submit jobs to
|
||||||
|
var Jobs chan WalkJob
|
||||||
|
|
||||||
// BusyWorkers is an atomic counter for the number of active workers
|
// BusyWorkers is an atomic counter for the number of active workers
|
||||||
var BusyWorkers int32
|
var BusyWorkers int32
|
||||||
|
|
||||||
|
@ -20,15 +23,53 @@ var BusyWorkers int32
|
||||||
// to a walker function, does not point to a directory
|
// to a walker function, does not point to a directory
|
||||||
var ErrNotDir = errors.New("not a directory")
|
var ErrNotDir = errors.New("not a directory")
|
||||||
|
|
||||||
|
// WalkJob is a job that's passed to the workers.
|
||||||
|
type WalkJob struct {
|
||||||
|
StartPath string
|
||||||
|
Walker *Walker
|
||||||
|
}
|
||||||
|
|
||||||
// Walker is constructed for each Walk() function invocation
|
// Walker is constructed for each Walk() function invocation
|
||||||
type Walker struct {
|
type Walker struct {
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
jobs chan string
|
|
||||||
root string
|
root string
|
||||||
followSymlinks bool
|
followSymlinks bool
|
||||||
walkFunc filepath.WalkFunc
|
walkFunc filepath.WalkFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// InitializeWorkers starts the number of workers defined by config.GetConfig().DirectoryCrawlers
|
||||||
|
func InitializeWorkers() {
|
||||||
|
WorkerPool = make(chan struct{}, config.GetConfig().DirectoryCrawlers)
|
||||||
|
Jobs = make(chan WalkJob, config.GetConfig().CacheSize)
|
||||||
|
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
|
||||||
|
go worker()
|
||||||
|
}
|
||||||
|
log.Debugf("Started %d directory crawler workers.", config.GetConfig().DirectoryCrawlers)
|
||||||
|
}
|
||||||
|
|
||||||
|
// worker processes all the jobs until the jobs channel is explicitly closed
|
||||||
|
func worker() {
|
||||||
|
for job := range Jobs {
|
||||||
|
WorkerPool <- struct{}{} // acquire a worker
|
||||||
|
atomic.AddInt32(&BusyWorkers, 1) // increment the number of active workers
|
||||||
|
err := job.Walker.processPath(job.StartPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("worker - %s - %s", job.StartPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Walker.wg.Done() // decrement the WaitGroup counter
|
||||||
|
<-WorkerPool // release the worker when done
|
||||||
|
atomic.AddInt32(&BusyWorkers, -1) // decrement the number of active workers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// addJob increments the job counter
|
||||||
|
// and pushes the path to the jobs channel
|
||||||
|
func (w *Walker) addJob(job WalkJob) {
|
||||||
|
w.wg.Add(1)
|
||||||
|
Jobs <- job
|
||||||
|
}
|
||||||
|
|
||||||
// the readDirNames function below was taken from the original
|
// the readDirNames function below was taken from the original
|
||||||
// implementation (see https://golang.org/src/path/filepath/path.go)
|
// implementation (see https://golang.org/src/path/filepath/path.go)
|
||||||
// but has sorting removed (sorting doesn't make sense
|
// but has sorting removed (sorting doesn't make sense
|
||||||
|
@ -57,8 +98,8 @@ func readDirNames(dirname string) ([]string, error) {
|
||||||
|
|
||||||
// lstat is a wrapper for os.Lstat which accepts a path
|
// lstat is a wrapper for os.Lstat which accepts a path
|
||||||
// relative to Walker.root and also follows symlinks
|
// relative to Walker.root and also follows symlinks
|
||||||
func (w *Walker) lstat(relpath string) (info os.FileInfo, err error) {
|
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
||||||
path := filepath.Join(w.root, relpath)
|
path := filepath.Join(w.root, relPath)
|
||||||
info, err = os.Lstat(path)
|
info, err = os.Lstat(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -81,82 +122,51 @@ func (w *Walker) lstat(relpath string) (info os.FileInfo, err error) {
|
||||||
|
|
||||||
// processPath processes one directory and adds
|
// processPath processes one directory and adds
|
||||||
// its subdirectories to the queue for further processing
|
// its subdirectories to the queue for further processing
|
||||||
func (w *Walker) processPath(relpath string) error {
|
func (w *Walker) processPath(relPath string) error {
|
||||||
defer w.wg.Done()
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
|
names, err := readDirNames(fullPath)
|
||||||
path := filepath.Join(w.root, relpath)
|
|
||||||
names, err := readDirNames(path)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Walker - processPath - readDirNames - %s", err)
|
log.Errorf("Walker - processPath - readDirNames - %s", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
subpath := filepath.Join(relpath, name)
|
subPath := filepath.Join(relPath, name)
|
||||||
info, err := w.lstat(subpath)
|
info, err := w.lstat(subPath)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("processPath - %s - %s", relpath, err)
|
log.Warnf("processPath - %s - %s", relPath, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if info == nil {
|
if info == nil {
|
||||||
log.Warnf("processPath - %s - %s", relpath, err)
|
log.Warnf("processPath - %s - %s", relPath, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
subPathFull := filepath.Join(w.root, subPath)
|
||||||
err = w.walkFunc(filepath.Join(w.root, subpath), info, err)
|
err = w.walkFunc(subPathFull, info, err)
|
||||||
if errors.Is(err, filepath.SkipDir) {
|
if errors.Is(err, filepath.SkipDir) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if info.Mode().IsDir() {
|
if info.Mode().IsDir() {
|
||||||
w.addJob(subpath)
|
w.addJob(WalkJob{
|
||||||
|
StartPath: subPath,
|
||||||
|
Walker: w,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// addJob increments the job counter
|
|
||||||
// and pushes the path to the jobs channel
|
|
||||||
func (w *Walker) addJob(path string) {
|
|
||||||
w.wg.Add(1)
|
|
||||||
select {
|
|
||||||
// try to push the job to the channel
|
|
||||||
case w.jobs <- path: // ok
|
|
||||||
default: // buffer overflow
|
|
||||||
// process job synchronously
|
|
||||||
err := w.processPath(path)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("addJob - %s - %s", path, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// worker processes all the jobs until the jobs channel is explicitly closed
|
|
||||||
func (w *Walker) worker() {
|
|
||||||
for path := range w.jobs {
|
|
||||||
WorkerPool <- struct{}{} // acquire a worker
|
|
||||||
atomic.AddInt32(&BusyWorkers, 1) // increment the number of active workers
|
|
||||||
|
|
||||||
err := w.processPath(path)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("worker - %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
<-WorkerPool // release the worker when done
|
|
||||||
atomic.AddInt32(&BusyWorkers, -1) // decrement the number of active workers
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walk recursively descends into subdirectories, calling walkFn for each file or directory
|
// Walk recursively descends into subdirectories, calling walkFn for each file or directory
|
||||||
// in the tree, including the root directory.
|
// in the tree, including the root directory.
|
||||||
func (w *Walker) Walk(relpath string, walkFn filepath.WalkFunc) error {
|
func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error {
|
||||||
w.jobs = make(chan string, config.GetConfig().DirectoryCrawlers)
|
|
||||||
w.walkFunc = walkFn
|
w.walkFunc = walkFn
|
||||||
|
|
||||||
info, err := w.lstat(relpath)
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
err = w.walkFunc(filepath.Join(w.root, relpath), info, err)
|
info, err := w.lstat(relPath)
|
||||||
|
err = w.walkFunc(fullPath, info, err)
|
||||||
if errors.Is(err, filepath.SkipDir) {
|
if errors.Is(err, filepath.SkipDir) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -165,21 +175,18 @@ func (w *Walker) Walk(relpath string, walkFn filepath.WalkFunc) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if info == nil {
|
if info == nil {
|
||||||
return fmt.Errorf("broken symlink: %s", relpath)
|
return fmt.Errorf("broken symlink: %s", relPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !info.Mode().IsDir() {
|
if !info.Mode().IsDir() {
|
||||||
return ErrNotDir
|
return ErrNotDir
|
||||||
}
|
}
|
||||||
|
|
||||||
// Spawn workers
|
w.addJob(WalkJob{
|
||||||
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
|
StartPath: relPath,
|
||||||
go w.worker()
|
Walker: w,
|
||||||
}
|
}) // add this path as a first job
|
||||||
|
|
||||||
w.addJob(relpath) // add this path as a first job
|
|
||||||
w.wg.Wait() // wait till all paths are processed
|
w.wg.Wait() // wait till all paths are processed
|
||||||
close(w.jobs) // signal workers to close
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ func (dc *DirectoryCrawler) processPath(fullPath string, info os.FileInfo) error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Path is a file
|
// StartPath is a file
|
||||||
dc.AddCacheItem(fullPath, info)
|
dc.AddCacheItem(fullPath, info)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -59,8 +59,7 @@ func startCrawl(sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGr
|
||||||
func logCacheStatus(msg string, ticker *time.Ticker, sharedCache *lru.Cache[string, *CacheItem.Item], logFn func(format string, args ...interface{})) {
|
func logCacheStatus(msg string, ticker *time.Ticker, sharedCache *lru.Cache[string, *CacheItem.Item], logFn func(format string, args ...interface{})) {
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
for range ticker.C {
|
for range ticker.C {
|
||||||
activeWorkers := int(DirectoryCrawler.BusyWorkers)
|
logFn("%s - %d/%d items in the cache. Busy workers: %d, running crawls: %d",
|
||||||
runningCrawls := DirectoryCrawler.GetTotalActiveCrawls()
|
msg, len(sharedCache.Keys()), config.GetConfig().CacheSize, DirectoryCrawler.BusyWorkers, DirectoryCrawler.GetTotalActiveCrawls())
|
||||||
logFn("%s - %d/%d items in the cache. Active workers: %d Active crawls: %d", msg, len(sharedCache.Keys()), config.GetConfig().CacheSize, activeWorkers, runningCrawls)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,11 +14,9 @@ type Config struct {
|
||||||
HTTPPort string
|
HTTPPort string
|
||||||
CrawlModeCrawlInterval int
|
CrawlModeCrawlInterval int
|
||||||
DirectoryCrawlers int
|
DirectoryCrawlers int
|
||||||
CrawlWorkers int
|
|
||||||
CacheSize int
|
CacheSize int
|
||||||
CacheTime int
|
CacheTime int
|
||||||
CachePrintNew bool
|
CachePrintNew bool
|
||||||
CachePrintChanges bool
|
|
||||||
InitialCrawl bool
|
InitialCrawl bool
|
||||||
CacheRecacheCrawlerLimit int
|
CacheRecacheCrawlerLimit int
|
||||||
CrawlerParseMIME bool
|
CrawlerParseMIME bool
|
||||||
|
@ -31,7 +29,6 @@ type Config struct {
|
||||||
RestrictedDownloadPaths []string
|
RestrictedDownloadPaths []string
|
||||||
ApiSearchMaxResults int
|
ApiSearchMaxResults int
|
||||||
ApiSearchShowChildren bool
|
ApiSearchShowChildren bool
|
||||||
WorkersJobQueueSize int
|
|
||||||
ElasticsearchEnable bool
|
ElasticsearchEnable bool
|
||||||
ElasticsearchEndpoint string
|
ElasticsearchEndpoint string
|
||||||
ElasticsearchSyncEnable bool
|
ElasticsearchSyncEnable bool
|
||||||
|
@ -59,8 +56,7 @@ func SetConfig(configFile string) (*Config, error) {
|
||||||
viper.SetDefault("watch_interval", 1)
|
viper.SetDefault("watch_interval", 1)
|
||||||
viper.SetDefault("watch_mode", "crawl")
|
viper.SetDefault("watch_mode", "crawl")
|
||||||
viper.SetDefault("crawl_mode_crawl_interval", 3600)
|
viper.SetDefault("crawl_mode_crawl_interval", 3600)
|
||||||
viper.SetDefault("directory_crawlers", 4)
|
viper.SetDefault("directory_crawlers", 10)
|
||||||
viper.SetDefault("crawl_workers", 10)
|
|
||||||
viper.SetDefault("cache_size", 100000000)
|
viper.SetDefault("cache_size", 100000000)
|
||||||
viper.SetDefault("cache_time", 30)
|
viper.SetDefault("cache_time", 30)
|
||||||
viper.SetDefault("cache_print_new", false)
|
viper.SetDefault("cache_print_new", false)
|
||||||
|
@ -110,24 +106,22 @@ func SetConfig(configFile string) (*Config, error) {
|
||||||
rootDir = "/"
|
rootDir = "/"
|
||||||
}
|
}
|
||||||
|
|
||||||
workersJobQueueSizeValue := viper.GetInt("crawler_worker_job_queue_size")
|
//workersJobQueueSizeValue := viper.GetInt("crawler_worker_job_queue_size")
|
||||||
var workersJobQueueSize int
|
//var workersJobQueueSize int
|
||||||
if workersJobQueueSizeValue == 0 {
|
//if workersJobQueueSizeValue == 0 {
|
||||||
workersJobQueueSize = viper.GetInt("crawl_workers") * 100
|
// workersJobQueueSize = viper.GetInt("crawl_workers") * 100
|
||||||
} else {
|
//} else {
|
||||||
workersJobQueueSize = workersJobQueueSizeValue
|
// workersJobQueueSize = workersJobQueueSizeValue
|
||||||
}
|
//}
|
||||||
|
|
||||||
config := &Config{
|
config := &Config{
|
||||||
RootDir: rootDir,
|
RootDir: rootDir,
|
||||||
HTTPPort: viper.GetString("http_port"),
|
HTTPPort: viper.GetString("http_port"),
|
||||||
CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"),
|
CrawlModeCrawlInterval: viper.GetInt("crawl_mode_crawl_interval"),
|
||||||
DirectoryCrawlers: viper.GetInt("crawl_mode_crawl_interval"),
|
DirectoryCrawlers: viper.GetInt("directory_crawlers"),
|
||||||
CrawlWorkers: viper.GetInt("crawl_workers"),
|
|
||||||
CacheSize: viper.GetInt("cache_size"),
|
CacheSize: viper.GetInt("cache_size"),
|
||||||
CacheTime: viper.GetInt("cache_time"),
|
CacheTime: viper.GetInt("cache_time"),
|
||||||
CachePrintNew: viper.GetBool("cache_print_new"),
|
CachePrintNew: viper.GetBool("cache_print_new"),
|
||||||
CachePrintChanges: viper.GetBool("cache_print_changes"),
|
|
||||||
InitialCrawl: viper.GetBool("initial_crawl"),
|
InitialCrawl: viper.GetBool("initial_crawl"),
|
||||||
CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"),
|
CacheRecacheCrawlerLimit: viper.GetInt("cache_recache_crawler_limit"),
|
||||||
CrawlerParseMIME: viper.GetBool("crawler_parse_mime"),
|
CrawlerParseMIME: viper.GetBool("crawler_parse_mime"),
|
||||||
|
@ -140,7 +134,6 @@ func SetConfig(configFile string) (*Config, error) {
|
||||||
RestrictedDownloadPaths: restrictedPaths,
|
RestrictedDownloadPaths: restrictedPaths,
|
||||||
ApiSearchMaxResults: viper.GetInt("api_search_max_results"),
|
ApiSearchMaxResults: viper.GetInt("api_search_max_results"),
|
||||||
ApiSearchShowChildren: viper.GetBool("api_search_show_children"),
|
ApiSearchShowChildren: viper.GetBool("api_search_show_children"),
|
||||||
WorkersJobQueueSize: workersJobQueueSize,
|
|
||||||
ElasticsearchEnable: viper.GetBool("elasticsearch_enable"),
|
ElasticsearchEnable: viper.GetBool("elasticsearch_enable"),
|
||||||
ElasticsearchEndpoint: viper.GetString("elasticsearch_endpoint"),
|
ElasticsearchEndpoint: viper.GetString("elasticsearch_endpoint"),
|
||||||
ElasticsearchSyncEnable: viper.GetBool("elasticsearch_sync_enable"),
|
ElasticsearchSyncEnable: viper.GetBool("elasticsearch_sync_enable"),
|
||||||
|
@ -165,10 +158,6 @@ func SetConfig(configFile string) (*Config, error) {
|
||||||
return nil, errors.New("crawl_mode_crawl_interval must be more than 1")
|
return nil, errors.New("crawl_mode_crawl_interval must be more than 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.CrawlWorkers < 1 {
|
|
||||||
return nil, errors.New("crawl_workers must be more than 1")
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.CacheSize < 1 {
|
if config.CacheSize < 1 {
|
||||||
return nil, errors.New("crawl_workers must be more than 1")
|
return nil, errors.New("crawl_workers must be more than 1")
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,14 +39,6 @@ type cliConfig struct {
|
||||||
// TODO: admin api endpoint to get status and progress of the full refresh of elasticsearch
|
// TODO: admin api endpoint to get status and progress of the full refresh of elasticsearch
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
//fullPath := "/srv/chub-archive"
|
|
||||||
//RootDir := "/srv/chub-archive"
|
|
||||||
//
|
|
||||||
//fmt.Println(strings.HasPrefix(fullPath, RootDir))
|
|
||||||
////fmt.Println(fullPath != RootDir)
|
|
||||||
//
|
|
||||||
//return
|
|
||||||
|
|
||||||
cliArgs := parseArgs()
|
cliArgs := parseArgs()
|
||||||
if cliArgs.help {
|
if cliArgs.help {
|
||||||
flag.Usage()
|
flag.Usage()
|
||||||
|
@ -101,9 +93,7 @@ func main() {
|
||||||
|
|
||||||
log.Infof("Elasticsearch enabled: %t", cfg.ElasticsearchEnable)
|
log.Infof("Elasticsearch enabled: %t", cfg.ElasticsearchEnable)
|
||||||
|
|
||||||
// Init global variables
|
DirectoryCrawler.InitializeWorkers()
|
||||||
//DirectoryCrawler.CrawlWorkerPool = DirectoryCrawler.NewWorkerPool(config.MaxWorkers)
|
|
||||||
DirectoryCrawler.WorkerPool = make(chan struct{}, cfg.CrawlWorkers)
|
|
||||||
|
|
||||||
cache.InitRecacheSemaphore(cfg.CacheRecacheCrawlerLimit)
|
cache.InitRecacheSemaphore(cfg.CacheRecacheCrawlerLimit)
|
||||||
|
|
||||||
|
@ -166,7 +156,7 @@ func main() {
|
||||||
|
|
||||||
func parseArgs() cliConfig {
|
func parseArgs() cliConfig {
|
||||||
var cliArgs cliConfig
|
var cliArgs cliConfig
|
||||||
flag.StringVar(&cliArgs.configFile, "config", "", "Path to the config file")
|
flag.StringVar(&cliArgs.configFile, "config", "", "StartPath to the config file")
|
||||||
flag.BoolVar(&cliArgs.initialCrawl, "initial-crawl", false, "Do an initial crawl to fill the cache")
|
flag.BoolVar(&cliArgs.initialCrawl, "initial-crawl", false, "Do an initial crawl to fill the cache")
|
||||||
flag.BoolVar(&cliArgs.initialCrawl, "i", false, "Do an initial crawl to fill the cache")
|
flag.BoolVar(&cliArgs.initialCrawl, "i", false, "Do an initial crawl to fill the cache")
|
||||||
flag.BoolVar(&cliArgs.debug, "d", false, "Enable debug mode")
|
flag.BoolVar(&cliArgs.debug, "d", false, "Enable debug mode")
|
||||||
|
|
Loading…
Reference in New Issue