clean up directory crawler, fix active workers number, minor bugfixes
This commit is contained in:
parent
3f305b104b
commit
c0faaab3f8
|
@ -165,7 +165,7 @@ func main() {
|
|||
log.Infof("Server started on port %s", cfg.HTTPPort)
|
||||
|
||||
if cliArgs.initialCrawl || cfg.InitialCrawl {
|
||||
log.Infof(`Preforming initial crawl for "%s"`, config.GetConfig().RootDir)
|
||||
log.Infof(`Performing initial crawl for "%s"`, config.GetConfig().RootDir)
|
||||
start := time.Now()
|
||||
err := cache.InitialCrawl()
|
||||
if err != nil {
|
||||
|
|
|
@ -34,28 +34,16 @@ type FinishedCrawl struct {
|
|||
}
|
||||
|
||||
type DirectoryCrawler struct {
|
||||
visited sync.Map
|
||||
wg sync.WaitGroup
|
||||
queue *queuedwalk.JobQueue
|
||||
wg sync.WaitGroup
|
||||
queue *queuedwalk.JobQueue
|
||||
}
|
||||
|
||||
func NewDirectoryCrawler(queue *queuedwalk.JobQueue) *DirectoryCrawler {
|
||||
return &DirectoryCrawler{
|
||||
visited: sync.Map{},
|
||||
queue: queue,
|
||||
queue: queue,
|
||||
}
|
||||
}
|
||||
|
||||
func (dc *DirectoryCrawler) CleanupDeletedFiles(path string) {
|
||||
dc.visited.Range(func(key, value interface{}) bool {
|
||||
keyStr := key.(string)
|
||||
if isSubpath(file.StripRootDir(path), keyStr) && value.(bool) {
|
||||
sharedcache.Cache.Remove(keyStr)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func (dc *DirectoryCrawler) addCacheItem(fullPath string, info os.FileInfo) error {
|
||||
strippedPath := file.StripRootDir(fullPath)
|
||||
item, err := cacheitem.NewItem(fullPath, info)
|
||||
|
|
|
@ -11,11 +11,14 @@ func InitializeDirectoryCrawlerWorkers() *globals.DcWorkers {
|
|||
if globals.DirectoryCrawlers != nil {
|
||||
panic("DirectoryCrawlers has already been defined!")
|
||||
}
|
||||
dcWorkers := workers.InitializeWorkers(directoryCrawlerWorker)
|
||||
dcWorkers := workers.InitializeWorkers(directoryCrawlerWorker) // *workers.CrawlWorkers
|
||||
d := &globals.DcWorkers{}
|
||||
|
||||
// Copy the fields given to us by InitializeWorkers() to the global object.
|
||||
d.Queue = dcWorkers.Queue
|
||||
d.BusyWorkers = dcWorkers.BusyWorkers
|
||||
dcWorkers.BusyWorkers = &d.BusyWorkers
|
||||
globals.DirectoryCrawlers = d
|
||||
|
||||
log.Debugf("CRAWLERS - Started %d directory crawler workers.", config.GetConfig().DirectoryCrawlers)
|
||||
return d
|
||||
}
|
||||
|
@ -24,7 +27,7 @@ func directoryCrawlerWorker(w *workers.CrawlWorkers) {
|
|||
// Reminder that this worker type does not support shutdown
|
||||
for {
|
||||
job := w.Queue.GetJob()
|
||||
atomic.AddInt32(&w.BusyWorkers, 1)
|
||||
atomic.AddInt32(w.BusyWorkers, 1)
|
||||
|
||||
err := job.Walker.ReadPathAndQueue(job.StartPath)
|
||||
if err != nil {
|
||||
|
@ -32,6 +35,6 @@ func directoryCrawlerWorker(w *workers.CrawlWorkers) {
|
|||
}
|
||||
job.Walker.Wg.Done()
|
||||
|
||||
atomic.AddInt32(&w.BusyWorkers, -1)
|
||||
atomic.AddInt32(w.BusyWorkers, -1)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,9 +11,6 @@ import (
|
|||
|
||||
func (dc *DirectoryCrawler) processPath(fullPath string, info os.FileInfo) error {
|
||||
relPath := file.StripRootDir(fullPath)
|
||||
|
||||
dc.visited.Store(relPath, true)
|
||||
|
||||
if info.Mode().IsDir() {
|
||||
dirItem, err := cacheitem.NewItem(fullPath, info)
|
||||
if err != nil {
|
||||
|
|
|
@ -20,7 +20,7 @@ func InitializeElasticCrawlerWorkers() *globals.DcWorkers {
|
|||
deleteWorkers := workers.InitializeWorkers(elasticDeleteWorker)
|
||||
d := &globals.DcWorkers{}
|
||||
d.Queue = deleteWorkers.Queue
|
||||
d.BusyWorkers = deleteWorkers.BusyWorkers
|
||||
deleteWorkers.BusyWorkers = &d.BusyWorkers
|
||||
globals.ElasticCrawlers = d
|
||||
log.Debugf("CRAWLERS - Started %d Elasticsearch sync workers.", config.GetConfig().ElasticsearchSyncThreads)
|
||||
return d
|
||||
|
@ -37,7 +37,7 @@ func elasticDeleteWorker(w *workers.CrawlWorkers) {
|
|||
return
|
||||
}
|
||||
|
||||
atomic.AddInt32(&w.BusyWorkers, 1)
|
||||
atomic.AddInt32(w.BusyWorkers, 1)
|
||||
|
||||
if job.Extra == nil {
|
||||
// Jobs without any extras are the standard Walk jobs that add items to Elastic.
|
||||
|
@ -64,6 +64,6 @@ func elasticDeleteWorker(w *workers.CrawlWorkers) {
|
|||
panic(task)
|
||||
}
|
||||
}
|
||||
atomic.AddInt32(&w.BusyWorkers, -1)
|
||||
atomic.AddInt32(w.BusyWorkers, -1)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ func syncElasticsearch(doFullSync bool) {
|
|||
return
|
||||
case <-ticker.C:
|
||||
elapsed := time.Since(start)
|
||||
logStr := "ELASTIC - Sync in progress. Elapsed: %s. Busy Elastic delete workers: %d. Elastic deletes queued: %d"
|
||||
logStr := "ELASTIC - Sync in progress. Elapsed: %s. Busy workers: %d. Jobs queued: %d"
|
||||
log.Debugf(logStr, elapsed, globals.ElasticCrawlers.BusyWorkers, globals.ElasticCrawlers.Queue.GetQueueSize())
|
||||
}
|
||||
}
|
||||
|
@ -128,7 +128,7 @@ func syncElasticsearch(doFullSync bool) {
|
|||
}
|
||||
|
||||
func logElasticConnError(err error) {
|
||||
log.Errorf("ELASTIC - Failed to read the index: %s", err.Error())
|
||||
log.Errorf("ELASTIC - Failed to read the index: %s", err)
|
||||
}
|
||||
|
||||
// EnableElasticsearchConnection tests the connection to Elastic and enables the backend if it's successful.
|
||||
|
|
|
@ -13,7 +13,7 @@ import (
|
|||
func getElasticSize() (int, error) {
|
||||
keysByPath, _, err := getPathsFromIndex()
|
||||
if err != nil {
|
||||
return -1, nil
|
||||
return -1, err
|
||||
}
|
||||
return len(keysByPath), nil
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ type CrawlWorkerFunc func(workerData *CrawlWorkers)
|
|||
|
||||
type CrawlWorkers struct {
|
||||
Queue *queuedwalk.JobQueue
|
||||
BusyWorkers int32
|
||||
BusyWorkers *int32
|
||||
WorkerFunc CrawlWorkerFunc
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue