fix memory usage related to the worker queue size, reorganize things
This commit is contained in:
parent
b5327e0c67
commit
2579c76f04
|
@ -0,0 +1,54 @@
|
||||||
|
package Workers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This is a queue implementation that doesn't rely on channels. This way of doing things should be more memory-efficient.
|
||||||
|
|
||||||
|
// WalkJob is an individual job passed to the Workers.
|
||||||
|
type WalkJob struct {
|
||||||
|
StartPath string
|
||||||
|
Walker *Walker // A pointer to the shared Walker object is passed as well.
|
||||||
|
}
|
||||||
|
|
||||||
|
// WalkJobQueue is the Queue that workers pull jobs from.
|
||||||
|
type WalkJobQueue struct {
|
||||||
|
jobs []WalkJob
|
||||||
|
mutex sync.Mutex
|
||||||
|
cond *sync.Cond
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewJobQueue() *WalkJobQueue {
|
||||||
|
q := &WalkJobQueue{}
|
||||||
|
q.cond = sync.NewCond(&q.mutex)
|
||||||
|
return q
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddJob adds a job to the queue and signals the workers so they know to pick it up.
|
||||||
|
func (q *WalkJobQueue) AddJob(job WalkJob) {
|
||||||
|
q.mutex.Lock()
|
||||||
|
q.jobs = append(q.jobs, job)
|
||||||
|
q.mutex.Unlock()
|
||||||
|
q.cond.Signal()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetJob is how a worker pulls a job from the queue.
|
||||||
|
func (q *WalkJobQueue) GetJob() WalkJob {
|
||||||
|
q.mutex.Lock()
|
||||||
|
defer q.mutex.Unlock()
|
||||||
|
|
||||||
|
for len(q.jobs) == 0 {
|
||||||
|
q.cond.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
job := q.jobs[0]
|
||||||
|
q.jobs = q.jobs[1:]
|
||||||
|
|
||||||
|
return job
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetQueueSize returns the size of the queue.
|
||||||
|
func (q *WalkJobQueue) GetQueueSize() int {
|
||||||
|
return len(q.jobs)
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
package Workers
|
||||||
|
|
||||||
|
import "path/filepath"
|
||||||
|
|
||||||
|
// Walk is a wrapper function for the Walker object that mimics the behavior of filepath.Walk, and doesn't follow symlinks.
|
||||||
|
func Walk(root string, followSymlinks bool, walkFn filepath.WalkFunc) error {
|
||||||
|
w := Walker{
|
||||||
|
root: root,
|
||||||
|
followSymlinks: followSymlinks,
|
||||||
|
}
|
||||||
|
return w.Walk("", walkFn)
|
||||||
|
}
|
|
@ -0,0 +1,103 @@
|
||||||
|
package Workers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Walker.go is the implementation behind `Walk()`, which is a filesystem walk
|
||||||
|
// using workers that pull jobs from a queue.
|
||||||
|
|
||||||
|
// ErrNotDir indicates that the path, which is being passed to a walker function, does not point to a directory.
|
||||||
|
var ErrNotDir = errors.New("not a directory")
|
||||||
|
|
||||||
|
// Walker is constructed for each Walk() function invocation.
|
||||||
|
type Walker struct {
|
||||||
|
wg sync.WaitGroup // used to sync the progress of all workers.
|
||||||
|
root string
|
||||||
|
followSymlinks bool
|
||||||
|
walkFunc filepath.WalkFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
// addJob increments the job counter and pushes the path to the job queue.
|
||||||
|
func (w *Walker) addJob(job WalkJob) {
|
||||||
|
w.wg.Add(1)
|
||||||
|
Queue.AddJob(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// processPath processes one path.
|
||||||
|
func (w *Walker) processPath(relPath string) error {
|
||||||
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
|
names, err := readDirNames(fullPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Walker - processPath - readDirNames - %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over the path's children.
|
||||||
|
for _, name := range names {
|
||||||
|
subPath := filepath.Join(relPath, name)
|
||||||
|
info, err := w.lstat(subPath)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("processPath - %s - %s", relPath, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if info == nil {
|
||||||
|
log.Warnf("processPath - %s - %s", relPath, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
subPathFull := filepath.Join(w.root, subPath)
|
||||||
|
err = w.walkFunc(subPathFull, info, err)
|
||||||
|
if errors.Is(err, filepath.SkipDir) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this child is a directory, add it to the queue then move on.
|
||||||
|
if info.Mode().IsDir() {
|
||||||
|
w.addJob(WalkJob{
|
||||||
|
StartPath: subPath,
|
||||||
|
Walker: w,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory
|
||||||
|
// in the tree, starting with the root directory. It is only called one place: `Walk()` in Walk.go
|
||||||
|
func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error {
|
||||||
|
w.walkFunc = walkFn
|
||||||
|
|
||||||
|
// Parse the beginning path.
|
||||||
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
|
info, err := w.lstat(relPath)
|
||||||
|
err = w.walkFunc(fullPath, info, err)
|
||||||
|
if errors.Is(err, filepath.SkipDir) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if info == nil {
|
||||||
|
return fmt.Errorf("broken symlink: %s", relPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !info.Mode().IsDir() {
|
||||||
|
return ErrNotDir
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let the workers handle everything else.
|
||||||
|
w.addJob(WalkJob{
|
||||||
|
StartPath: relPath,
|
||||||
|
Walker: w,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Wait for the workers to finish reading the file system.
|
||||||
|
w.wg.Wait()
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
package Workers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// file.go holds functions related to the filesystem.
|
||||||
|
|
||||||
|
// the readDirNames function below was taken from the original
|
||||||
|
// implementation (see https://golang.org/src/path/filepath/path.go)
|
||||||
|
// but has sorting removed (sorting doesn't make sense
|
||||||
|
// in concurrent execution, anyway)
|
||||||
|
|
||||||
|
// readDirNames reads the directory named by dirname and returns
|
||||||
|
// a list of directory entries.
|
||||||
|
func readDirNames(dirname string) ([]string, error) {
|
||||||
|
f, err := os.Open(dirname)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
cerr := f.Close()
|
||||||
|
if err == nil {
|
||||||
|
err = cerr
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
names, err := f.Readdirnames(-1)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return names, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lstat is a wrapper for os.Lstat which accepts a path
|
||||||
|
// relative to Walker.root and also follows symlinks
|
||||||
|
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
||||||
|
path := filepath.Join(w.root, relPath)
|
||||||
|
info, err = os.Lstat(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// check if this is a symlink
|
||||||
|
if w.followSymlinks {
|
||||||
|
if info.Mode()&os.ModeSymlink > 0 {
|
||||||
|
path, err = filepath.EvalSymlinks(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
info, err = os.Lstat(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
package Workers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crazyfs/logging"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
var log *logrus.Logger
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
log = logging.GetLogger()
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
package Workers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crazyfs/config"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// worker.go holds the worker function for `Walk()`.
|
||||||
|
|
||||||
|
// Queue is the global walk job queue.
|
||||||
|
var Queue *WalkJobQueue
|
||||||
|
|
||||||
|
// BusyWorkers is an atomic counter for the number of active Workers
|
||||||
|
var BusyWorkers int32
|
||||||
|
|
||||||
|
// InitializeWorkers starts the number of Workers defined by the config.
|
||||||
|
func InitializeWorkers() {
|
||||||
|
Queue = NewJobQueue()
|
||||||
|
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
|
||||||
|
go worker()
|
||||||
|
}
|
||||||
|
log.Debugf("Started %d directory crawler Workers.", config.GetConfig().DirectoryCrawlers)
|
||||||
|
}
|
||||||
|
|
||||||
|
// worker processes jobs forever.
|
||||||
|
func worker() {
|
||||||
|
for {
|
||||||
|
// Get a job from the queue. This is thread-safe because `GetJob()` locks the queue while reading.
|
||||||
|
job := Queue.GetJob()
|
||||||
|
|
||||||
|
atomic.AddInt32(&BusyWorkers, 1) // increment the number of active Workers
|
||||||
|
err := job.Walker.processPath(job.StartPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Workers - %s - %s", job.StartPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Walker.wg.Done() // decrement the WaitGroup counter
|
||||||
|
atomic.AddInt32(&BusyWorkers, -1) // decrement the number of active Workers
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,6 +2,7 @@ package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crazyfs/CacheItem"
|
"crazyfs/CacheItem"
|
||||||
|
"crazyfs/Workers"
|
||||||
"crazyfs/api/helpers"
|
"crazyfs/api/helpers"
|
||||||
"crazyfs/cache/DirectoryCrawler"
|
"crazyfs/cache/DirectoryCrawler"
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
|
@ -32,8 +33,11 @@ func AdminCrawlsInfo(w http.ResponseWriter, r *http.Request, sharedCache *lru.Ca
|
||||||
"finished": DirectoryCrawler.GetFinishedCrawls(),
|
"finished": DirectoryCrawler.GetFinishedCrawls(),
|
||||||
},
|
},
|
||||||
"workers": map[string]interface{}{
|
"workers": map[string]interface{}{
|
||||||
"busy": DirectoryCrawler.BusyWorkers,
|
"busy": Workers.BusyWorkers,
|
||||||
"max": config.GetConfig().DirectoryCrawlers,
|
"alive": config.GetConfig().DirectoryCrawlers,
|
||||||
|
},
|
||||||
|
"queue": map[string]interface{}{
|
||||||
|
"size": Workers.Queue.GetQueueSize(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
w.Header().Set("Cache-Control", "no-store")
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
|
|
|
@ -1,202 +0,0 @@
|
||||||
package DirectoryCrawler
|
|
||||||
|
|
||||||
import (
|
|
||||||
"crazyfs/config"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
|
||||||
)
|
|
||||||
|
|
||||||
// WorkerPool is a buffered channel acting as a semaphore to limit the number of active workers globally
|
|
||||||
var WorkerPool chan struct{}
|
|
||||||
|
|
||||||
// Jobs is a global channel that all Walker instances submit jobs to
|
|
||||||
var Jobs chan WalkJob
|
|
||||||
|
|
||||||
// BusyWorkers is an atomic counter for the number of active workers
|
|
||||||
var BusyWorkers int32
|
|
||||||
|
|
||||||
// ErrNotDir indicates that the path, which is being passed
|
|
||||||
// to a walker function, does not point to a directory
|
|
||||||
var ErrNotDir = errors.New("not a directory")
|
|
||||||
|
|
||||||
// WalkJob is a job that's passed to the workers.
|
|
||||||
type WalkJob struct {
|
|
||||||
StartPath string
|
|
||||||
Walker *Walker
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walker is constructed for each Walk() function invocation
|
|
||||||
type Walker struct {
|
|
||||||
wg sync.WaitGroup
|
|
||||||
root string
|
|
||||||
followSymlinks bool
|
|
||||||
walkFunc filepath.WalkFunc
|
|
||||||
}
|
|
||||||
|
|
||||||
// InitializeWorkers starts the number of workers defined by config.GetConfig().DirectoryCrawlers
|
|
||||||
func InitializeWorkers() {
|
|
||||||
WorkerPool = make(chan struct{}, config.GetConfig().DirectoryCrawlers)
|
|
||||||
Jobs = make(chan WalkJob, config.GetConfig().CacheSize)
|
|
||||||
for n := 1; n <= config.GetConfig().DirectoryCrawlers; n++ {
|
|
||||||
go worker()
|
|
||||||
}
|
|
||||||
log.Debugf("Started %d directory crawler workers.", config.GetConfig().DirectoryCrawlers)
|
|
||||||
}
|
|
||||||
|
|
||||||
// worker processes all the jobs until the jobs channel is explicitly closed
|
|
||||||
func worker() {
|
|
||||||
for job := range Jobs {
|
|
||||||
WorkerPool <- struct{}{} // acquire a worker
|
|
||||||
atomic.AddInt32(&BusyWorkers, 1) // increment the number of active workers
|
|
||||||
err := job.Walker.processPath(job.StartPath)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("worker - %s - %s", job.StartPath, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
job.Walker.wg.Done() // decrement the WaitGroup counter
|
|
||||||
<-WorkerPool // release the worker when done
|
|
||||||
atomic.AddInt32(&BusyWorkers, -1) // decrement the number of active workers
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// addJob increments the job counter
|
|
||||||
// and pushes the path to the jobs channel
|
|
||||||
func (w *Walker) addJob(job WalkJob) {
|
|
||||||
w.wg.Add(1)
|
|
||||||
Jobs <- job
|
|
||||||
}
|
|
||||||
|
|
||||||
// the readDirNames function below was taken from the original
|
|
||||||
// implementation (see https://golang.org/src/path/filepath/path.go)
|
|
||||||
// but has sorting removed (sorting doesn't make sense
|
|
||||||
// in concurrent execution, anyway)
|
|
||||||
|
|
||||||
// readDirNames reads the directory named by dirname and returns
|
|
||||||
// a list of directory entries.
|
|
||||||
func readDirNames(dirname string) ([]string, error) {
|
|
||||||
f, err := os.Open(dirname)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
cerr := f.Close()
|
|
||||||
if err == nil {
|
|
||||||
err = cerr
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
names, err := f.Readdirnames(-1)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return names, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// lstat is a wrapper for os.Lstat which accepts a path
|
|
||||||
// relative to Walker.root and also follows symlinks
|
|
||||||
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
|
||||||
path := filepath.Join(w.root, relPath)
|
|
||||||
info, err = os.Lstat(path)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
// check if this is a symlink
|
|
||||||
if w.followSymlinks {
|
|
||||||
if info.Mode()&os.ModeSymlink > 0 {
|
|
||||||
path, err = filepath.EvalSymlinks(path)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
info, err = os.Lstat(path)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// processPath processes one directory and adds
|
|
||||||
// its subdirectories to the queue for further processing
|
|
||||||
func (w *Walker) processPath(relPath string) error {
|
|
||||||
fullPath := filepath.Join(w.root, relPath)
|
|
||||||
names, err := readDirNames(fullPath)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Walker - processPath - readDirNames - %s", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, name := range names {
|
|
||||||
subPath := filepath.Join(relPath, name)
|
|
||||||
info, err := w.lstat(subPath)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("processPath - %s - %s", relPath, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if info == nil {
|
|
||||||
log.Warnf("processPath - %s - %s", relPath, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
subPathFull := filepath.Join(w.root, subPath)
|
|
||||||
err = w.walkFunc(subPathFull, info, err)
|
|
||||||
if errors.Is(err, filepath.SkipDir) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if info.Mode().IsDir() {
|
|
||||||
w.addJob(WalkJob{
|
|
||||||
StartPath: subPath,
|
|
||||||
Walker: w,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walk recursively descends into subdirectories, calling walkFn for each file or directory
|
|
||||||
// in the tree, including the root directory.
|
|
||||||
func (w *Walker) Walk(relPath string, walkFn filepath.WalkFunc) error {
|
|
||||||
w.walkFunc = walkFn
|
|
||||||
|
|
||||||
fullPath := filepath.Join(w.root, relPath)
|
|
||||||
info, err := w.lstat(relPath)
|
|
||||||
err = w.walkFunc(fullPath, info, err)
|
|
||||||
if errors.Is(err, filepath.SkipDir) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if info == nil {
|
|
||||||
return fmt.Errorf("broken symlink: %s", relPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !info.Mode().IsDir() {
|
|
||||||
return ErrNotDir
|
|
||||||
}
|
|
||||||
|
|
||||||
w.addJob(WalkJob{
|
|
||||||
StartPath: relPath,
|
|
||||||
Walker: w,
|
|
||||||
}) // add this path as a first job
|
|
||||||
w.wg.Wait() // wait till all paths are processed
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walk is a wrapper function for the Walker object
|
|
||||||
// that mimics the behavior of filepath.Walk,
|
|
||||||
// and doesn't follow symlinks.
|
|
||||||
func Walk(root string, followSymlinks bool, walkFn filepath.WalkFunc) error {
|
|
||||||
w := Walker{
|
|
||||||
root: root,
|
|
||||||
followSymlinks: followSymlinks,
|
|
||||||
}
|
|
||||||
return w.Walk("", walkFn)
|
|
||||||
}
|
|
|
@ -2,6 +2,7 @@ package DirectoryCrawler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crazyfs/CacheItem"
|
"crazyfs/CacheItem"
|
||||||
|
"crazyfs/Workers"
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/file"
|
"crazyfs/file"
|
||||||
"errors"
|
"errors"
|
||||||
|
@ -78,7 +79,7 @@ func (dc *DirectoryCrawler) Crawl(fullPath string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the path is a directory, start a walk
|
// If the path is a directory, start a walk
|
||||||
err := Walk(fullPath, config.FollowSymlinks, dc.walkRecursiveFunc)
|
err := Workers.Walk(fullPath, config.FollowSymlinks, dc.walkRecursiveFunc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("CRAWLER - crawl for %s failed: %s", fullPath, err)
|
log.Errorf("CRAWLER - crawl for %s failed: %s", fullPath, err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package cache
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crazyfs/CacheItem"
|
"crazyfs/CacheItem"
|
||||||
|
"crazyfs/Workers"
|
||||||
"crazyfs/cache/DirectoryCrawler"
|
"crazyfs/cache/DirectoryCrawler"
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/logging"
|
"crazyfs/logging"
|
||||||
|
@ -59,7 +60,7 @@ func startCrawl(sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGr
|
||||||
func logCacheStatus(msg string, ticker *time.Ticker, sharedCache *lru.Cache[string, *CacheItem.Item], logFn func(format string, args ...interface{})) {
|
func logCacheStatus(msg string, ticker *time.Ticker, sharedCache *lru.Cache[string, *CacheItem.Item], logFn func(format string, args ...interface{})) {
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
for range ticker.C {
|
for range ticker.C {
|
||||||
logFn("%s - %d/%d items in the cache. Busy workers: %d, running crawls: %d",
|
logFn("%s - %d/%d items in the cache. Busy Workers: %d. Jobs remaining: %d. Running crawls: %d",
|
||||||
msg, len(sharedCache.Keys()), config.GetConfig().CacheSize, DirectoryCrawler.BusyWorkers, DirectoryCrawler.GetTotalActiveCrawls())
|
msg, len(sharedCache.Keys()), config.GetConfig().CacheSize, Workers.BusyWorkers, Workers.Queue.GetQueueSize(), DirectoryCrawler.GetTotalActiveCrawls())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -106,14 +106,6 @@ func SetConfig(configFile string) (*Config, error) {
|
||||||
rootDir = "/"
|
rootDir = "/"
|
||||||
}
|
}
|
||||||
|
|
||||||
//workersJobQueueSizeValue := viper.GetInt("crawler_worker_job_queue_size")
|
|
||||||
//var workersJobQueueSize int
|
|
||||||
//if workersJobQueueSizeValue == 0 {
|
|
||||||
// workersJobQueueSize = viper.GetInt("crawl_workers") * 100
|
|
||||||
//} else {
|
|
||||||
// workersJobQueueSize = workersJobQueueSizeValue
|
|
||||||
//}
|
|
||||||
|
|
||||||
config := &Config{
|
config := &Config{
|
||||||
RootDir: rootDir,
|
RootDir: rootDir,
|
||||||
HTTPPort: viper.GetString("http_port"),
|
HTTPPort: viper.GetString("http_port"),
|
||||||
|
|
|
@ -2,9 +2,9 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crazyfs/CacheItem"
|
"crazyfs/CacheItem"
|
||||||
|
"crazyfs/Workers"
|
||||||
"crazyfs/api"
|
"crazyfs/api"
|
||||||
"crazyfs/cache"
|
"crazyfs/cache"
|
||||||
"crazyfs/cache/DirectoryCrawler"
|
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/elastic"
|
"crazyfs/elastic"
|
||||||
"crazyfs/logging"
|
"crazyfs/logging"
|
||||||
|
@ -93,7 +93,7 @@ func main() {
|
||||||
|
|
||||||
log.Infof("Elasticsearch enabled: %t", cfg.ElasticsearchEnable)
|
log.Infof("Elasticsearch enabled: %t", cfg.ElasticsearchEnable)
|
||||||
|
|
||||||
DirectoryCrawler.InitializeWorkers()
|
Workers.InitializeWorkers()
|
||||||
|
|
||||||
cache.InitRecacheSemaphore(cfg.CacheRecacheCrawlerLimit)
|
cache.InitRecacheSemaphore(cfg.CacheRecacheCrawlerLimit)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue