fix some data races

2024-02-03 19:46:07 -07:00 · 2024-02-03 19:46:07 -07:00 · e1e6e2cbc2
parent 665a2e8c18
commit e1e6e2cbc2
11 changed files with 60 additions and 43 deletions
--- a/src/DirectoryCrawler/DirectoryCrawler.go
+++ b/src/DirectoryCrawler/DirectoryCrawler.go
@ -15,8 +15,8 @@ const maxFinishedCrawls = 100

 var activeCrawls = make(map[string]*ActiveCrawl)
 var finishedCrawls = make([]FinishedCrawl, 0, maxFinishedCrawls)
-var activeCrawlsMutex = &sync.Mutex{}
-var finishedCrawlsMutex = &sync.Mutex{}
+var activeCrawlsMutex = &sync.RWMutex{}
+var finishedCrawlsMutex = &sync.RWMutex{}

 type ActiveCrawl struct {
 	Path     string `json:"path"`
@ -35,7 +35,6 @@ type FinishedCrawl struct {
 type DirectoryCrawler struct {
 	visited sync.Map
 	wg      sync.WaitGroup
-	mu      sync.Mutex // lock for the visted map
 }

 func NewDirectoryCrawler() *DirectoryCrawler {
@ -104,26 +103,28 @@ func (dc *DirectoryCrawler) startCrawl(path string, function string) bool {
 	return true
 }

-func (dc *DirectoryCrawler) endCrawl(path string) {
+func (dc *DirectoryCrawler) endCrawl(path string, function string) {
 	activeCrawlsMutex.Lock()
-	finishedCrawlsMutex.Lock()
 	defer activeCrawlsMutex.Unlock()
+	finishedCrawlsMutex.Lock()
 	defer finishedCrawlsMutex.Unlock()
 	if len(finishedCrawls) >= maxFinishedCrawls {
 		finishedCrawls = finishedCrawls[1:]
 	}
+	if activeCrawl, ok := activeCrawls[path]; ok && activeCrawl.Function == function {
 		finishedCrawls = append(finishedCrawls, FinishedCrawl{
 			Path:     path,
-		Start:    activeCrawls[path].Start,
-		Elapsed:  int64(time.Since(time.Unix(activeCrawls[path].Start, 0)).Seconds()),
-		Function: activeCrawls[path].Function,
+			Start:    activeCrawl.Start,
+			Elapsed:  int64(time.Since(time.Unix(activeCrawl.Start, 0)).Seconds()),
+			Function: activeCrawl.Function,
 		})
 		delete(activeCrawls, path)
 	}
+}

 func (dc *DirectoryCrawler) IsCrawlActive(path string, function *string) bool {
-	activeCrawlsMutex.Lock()
-	defer activeCrawlsMutex.Unlock()
+	activeCrawlsMutex.RLock()
+	defer activeCrawlsMutex.RUnlock()
 	if crawl, active := activeCrawls[path]; active {
 		return crawl.Function == *function
 	}
@ -131,23 +132,29 @@ func (dc *DirectoryCrawler) IsCrawlActive(path string, function *string) bool {
 }

 func GetActiveCrawls() map[string]*ActiveCrawl {
-	activeCrawlsMutex.Lock()
-	defer activeCrawlsMutex.Unlock()
+	activeCrawlsMutex.RLock()
+	defer activeCrawlsMutex.RUnlock()
+	activeCrawlsCopy := make(map[string]*ActiveCrawl)
 	for path := range activeCrawls {
 		a := activeCrawls[path]
 		a.Elapsed = int64(time.Since(time.Unix(a.Start, 0)).Seconds())
+		activeCrawlsCopy[path] = a
 	}
-	return activeCrawls
+	return activeCrawlsCopy
 }

 func GetFinishedCrawls() []FinishedCrawl {
-	finishedCrawlsMutex.Lock()
-	defer finishedCrawlsMutex.Unlock()
-	return finishedCrawls
+	finishedCrawlsMutex.RLock()
+	defer finishedCrawlsMutex.RUnlock()
+	finishedCrawlsCopy := make([]FinishedCrawl, 0, maxFinishedCrawls)
+	for k, v := range finishedCrawls {
+		finishedCrawlsCopy[k] = v
+	}
+	return finishedCrawlsCopy
 }

 func GetTotalActiveCrawls() int {
-	finishedCrawlsMutex.Lock()
-	defer finishedCrawlsMutex.Unlock()
+	activeCrawlsMutex.RLock()
+	defer activeCrawlsMutex.RUnlock()
 	return len(activeCrawls)
 }
--- a/src/DirectoryCrawler/crawl.go
+++ b/src/DirectoryCrawler/crawl.go
@ -54,13 +54,13 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc func(string, os.File
 	fullName := fn.Name()
 	parts := strings.Split(fullName, ".")
 	funcName := parts[len(parts)-1]
-	cleanFuncName := strings.TrimSuffix(funcName, "-fm")
+	cleanFuncName := strings.TrimSuffix(funcName, "Func-fm")

-	readyToStart := dc.startCrawl(fullPath, strings.TrimSuffix(cleanFuncName, "Func"))
+	readyToStart := dc.startCrawl(fullPath, cleanFuncName)
 	if !readyToStart {
 		return errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath))
 	}
-	defer dc.endCrawl(fullPath)
+	defer dc.endCrawl(fullPath, cleanFuncName)

 	info, err := os.Lstat(fullPath)
 	if os.IsNotExist(err) {
@ -110,7 +110,7 @@ func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string) (*CacheItem.Item,
 	if !readyToStart {
 		return nil, errors.New(fmt.Sprintf(`rejecting crawl, already in progress for "%s"`, fullPath))
 	}
-	defer dc.endCrawl(fullPath)
+	defer dc.endCrawl(fullPath, "walkNonRecursive")

 	info, err := os.Lstat(fullPath)
 	if os.IsNotExist(err) {
--- a/src/ResponseItem/ResponseItem.go
+++ b/src/ResponseItem/ResponseItem.go
@ -33,10 +33,9 @@ type ResponseItem struct {
 }

 func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
-	var debugChildItem *CacheItem.Item
 	defer func() {
 		if r := recover(); r != nil {
-			log.Fatalf("Recovered from panic: %s - %s - %s - %s", r, cacheItem.Path, debugChildItem, debug.Stack())
+			log.Fatalf("Recovered from panic: %s  - %+v - %s", r, cacheItem, debug.Stack())
 		}
 	}()

@ -60,15 +59,11 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
 		for _, child := range cacheItem.Children {
 			childItem, found := SharedCache.Cache.Get(child)

-			// TODO: remove
-			debugChildItem = childItem
-
-			// Do a quick crawl since the path could have been modfied since the last crawl.
+			// Do a quick crawl since the path could have been modified, since the last crawl.
 			// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
 			// because that would be an extra os.Lstat() call in processPath().
 			if !found {
 				log.Debugf("CRAWLER - %s not in cache, crawling", child)
-
 				dc := DirectoryCrawler.NewDirectoryCrawler()
 				item, err := dc.CrawlNoRecursion(filepath.Join(config.GetConfig().RootDir, child))
 				if err != nil {
--- a/src/Workers/Queue.go
+++ b/src/Workers/Queue.go
@ -15,7 +15,7 @@ type WalkJob struct {
 // WalkJobQueue is the Queue that workers pull jobs from.
 type WalkJobQueue struct {
 	jobs  []WalkJob
-	mutex sync.Mutex
+	mutex sync.RWMutex
 	cond  *sync.Cond
 }

@ -28,8 +28,8 @@ func NewJobQueue() *WalkJobQueue {
 // AddJob adds a job to the queue and signals the workers so they know to pick it up.
 func (q *WalkJobQueue) AddJob(job WalkJob) {
 	q.mutex.Lock()
+	defer q.mutex.Unlock()
 	q.jobs = append(q.jobs, job)
-	q.mutex.Unlock()
 	q.cond.Signal()
 }

@ -50,5 +50,7 @@ func (q *WalkJobQueue) GetJob() WalkJob {

 // GetQueueSize returns the size of the queue.
 func (q *WalkJobQueue) GetQueueSize() int {
+	q.mutex.RLock()
+	defer q.mutex.RUnlock()
 	return len(q.jobs)
 }
--- a/src/Workers/worker.go
+++ b/src/Workers/worker.go
@ -25,16 +25,15 @@ func InitializeWorkers() {
 // worker processes jobs forever.
 func worker() {
 	for {
-		// Get a job from the queue. This is thread-safe because `GetJob()` locks the queue while reading.
 		job := Queue.GetJob()

-		atomic.AddInt32(&BusyWorkers, 1) // increment the number of active Workers
+		atomic.AddInt32(&BusyWorkers, 1)
 		err := job.Walker.processPath(job.StartPath)
 		if err != nil {
 			log.Warnf("Workers - %s - %s", job.StartPath, err)
 		}

-		job.Walker.wg.Done()              // decrement the WaitGroup counter
-		atomic.AddInt32(&BusyWorkers, -1) // decrement the number of active Workers
+		job.Walker.wg.Done()
+		atomic.AddInt32(&BusyWorkers, -1)
 	}
 }
--- a/src/api/routes/admin/AdminCrawlsInfo.go
+++ b/src/api/routes/admin/AdminCrawlsInfo.go
@ -9,6 +9,7 @@ import (
 	"crypto/sha256"
 	"crypto/subtle"
 	"net/http"
+	"sync/atomic"
 )

 func AdminCrawlsInfo(w http.ResponseWriter, r *http.Request) {
@ -41,7 +42,7 @@ func AdminCrawlsInfo(w http.ResponseWriter, r *http.Request) {
 					"finished": DirectoryCrawler.GetFinishedCrawls(),
 				},
 				"crawlWorkers": map[string]interface{}{
-					"busy":  Workers.BusyWorkers,
+					"busy":  atomic.LoadInt32(&Workers.BusyWorkers),
 					"alive": config.GetConfig().DirectoryCrawlers,
 				},
 				"queue": map[string]interface{}{
--- a/src/build.sh
+++ b/src/build.sh
@ -0,0 +1,4 @@
+#!/bin/bash
+
+mkdir -p ../dist
+go build -v -trimpath -ldflags "-s -w" -tags "$(date -u)" -o ../dist/crazyfs
--- a/src/cache/crawler.go
+++ b/src/cache/crawler.go
@ -7,6 +7,7 @@ import (
 	"crazyfs/config"
 	"crazyfs/elastic"
 	"sync"
+	"sync/atomic"
 	"time"
 )

@ -55,11 +56,11 @@ func logCacheStatus(msg string, ticker *time.Ticker, logFn func(format string, a
 		if !config.GetConfig().ElasticsearchSyncEnable || InitialCrawlInProgress {
 			logStr := "%s - %d/%d items in the cache. Busy Workers: %d. Jobs queued: %d. Running crawls: %d."
 			logFn(logStr,
-				msg, len(SharedCache.Cache.Keys()), config.GetConfig().CacheSize, Workers.BusyWorkers, Workers.Queue.GetQueueSize(), DirectoryCrawler.GetTotalActiveCrawls())
+				msg, len(SharedCache.Cache.Keys()), config.GetConfig().CacheSize, atomic.LoadInt32(&Workers.BusyWorkers), Workers.Queue.GetQueueSize(), DirectoryCrawler.GetTotalActiveCrawls())
 		} else {
 			logStr := "%s - %d/%d items in the cache. Busy Workers: %d. Jobs queued: %d. Running crawls: %d. Busy Elastic delete workers: %d. Elastic deletes queued: %d"
 			logFn(logStr,
-				msg, len(SharedCache.Cache.Keys()), config.GetConfig().CacheSize, Workers.BusyWorkers, Workers.Queue.GetQueueSize(), DirectoryCrawler.GetTotalActiveCrawls(), elastic.BusyWorkers, elastic.Queue.GetQueueSize())
+				msg, len(SharedCache.Cache.Keys()), config.GetConfig().CacheSize, atomic.LoadInt32(&Workers.BusyWorkers), Workers.Queue.GetQueueSize(), DirectoryCrawler.GetTotalActiveCrawls(), elastic.BusyWorkers, elastic.Queue.GetQueueSize())
 		}
 	}
 }
--- a/src/cache/initial.go
+++ b/src/cache/initial.go
@ -4,11 +4,15 @@ import (
 	"crazyfs/DirectoryCrawler"
 	"crazyfs/config"
 	"crazyfs/logging"
+	"sync"
 	"time"
 )

 var InitialCrawlInProgress bool

+// InitialCrawlLock is used only when the initial crawl is in progress to pause the program execution until it finishes.
+var InitialCrawlLock sync.RWMutex
+
 func init() {
 	InitialCrawlInProgress = false
 }
@ -21,6 +25,7 @@ func InitialCrawl() {
 	ticker := time.NewTicker(3 * time.Second)
 	go logCacheStatus("INITIAL CRAWL", ticker, log.Infof)

+	InitialCrawlLock.Lock()
 	InitialCrawlInProgress = true
 	dc := DirectoryCrawler.NewDirectoryCrawler()
 	//start := time.Now()
@ -29,6 +34,7 @@ func InitialCrawl() {
 		log.Errorf("LIST - background recursive crawl failed: %s", err)
 	}
 	InitialCrawlInProgress = false
+	InitialCrawlLock.Unlock()
 	ticker.Stop()
 	//log.Infof("INITIAL CRAWL - finished the initial crawl in %s", time.Since(start).Round(time.Second))
 }
--- a/src/crazyfs.go
+++ b/src/crazyfs.go
@ -110,10 +110,12 @@ func main() {
 			// This could take a minute, so we do this in the background while we crawl.
 			go func() {
 				elastic.EnableElasticsearchConnection()
+				cache.InitialCrawlLock.RLock()
 				for cache.InitialCrawlInProgress {
 					// Sleep while the initial crawl is running.
 					time.Sleep(1 * time.Second)
 				}
+				cache.InitialCrawlLock.RUnlock()
 				if !cliArgs.disableElasticSync || !cfg.ElasticsearchSyncEnable {
 					go elastic.SyncThread()
 					log.Info("Started the background Elasticsearch sync thread.")
--- a/src/elastic/delete.go
+++ b/src/elastic/delete.go
@ -10,7 +10,7 @@ import (
 )

 func startRemoveStaleItemsFromElasticsearch(pathsByKey map[string]string) {
-	log.Debugln("ELASTIC - Checking for removed items...")
+	log.Debugln("ELASTIC - Checking for deleted items that need to be removed from Elastic...")

 	// TODO: use waitgroups here so we know when all the jobs are done and we can erase globalKeysByPath and globalPathsByKey