dealing with memory usage

This commit is contained in:
Cyberes 2023-12-11 23:45:09 -07:00
parent 72e6355869
commit 82636792ea
8 changed files with 117 additions and 116 deletions

1
.gitignore vendored
View File

@ -1,6 +1,7 @@
.idea .idea
config.yml config.yml
config.yaml config.yaml
build/
# ---> Go # ---> Go
# If you prefer the allow list template instead of the deny list, see community template: # If you prefer the allow list template instead of the deny list, see community template:

View File

@ -1,102 +1,5 @@
package CacheItem package CacheItem
import (
"crazyfs/config"
"crazyfs/file"
"fmt"
"os"
"path/filepath"
"strings"
"time"
)
func PathOutsideRoot(fullPath string) bool {
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
}
func RetardCheck(fullPath string) {
// Make sure we never do anything outside of the root dir.
if PathOutsideRoot(fullPath) {
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
}
}
func NewItem(fullPath string, info os.FileInfo) *Item {
RetardCheck(fullPath)
if config.GetConfig().CachePrintNew {
log.Debugf("CACHE - new: %s", fullPath)
}
pathExists, _ := file.PathExists(fullPath)
if !pathExists {
if info.Mode()&os.ModeSymlink > 0 {
// Ignore symlinks
return nil
} else {
log.Warnf("NewItem - StartPath does not exist: %s", fullPath)
return nil
}
}
var mimeType string
var encoding string
var ext string
var err error
if !info.IsDir() {
var mimePath string
if config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
mimePath, _ = filepath.EvalSymlinks(fullPath)
} else {
mimePath = fullPath
}
if config.GetConfig().CrawlerParseMIME {
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
} else {
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
}
if config.GetConfig().CrawlerParseEncoding {
encoding, err = file.DetectFileEncoding(fullPath)
if err != nil {
log.Warnf("ITEM - Error detecting file encoding of file %s - %v", fullPath, err)
encoding = "utf-8" // fall back to utf-8
}
}
if os.IsNotExist(err) {
log.Warnf("StartPath does not exist: %s", fullPath)
return nil
} else if err != nil {
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
}
}
// Create pointers for mimeType and ext
var mimeTypePtr, extPtr *string
if mimeType != "" {
mimeTypePtr = &mimeType
}
if ext != "" {
extPtr = &ext
}
return &Item{
Path: file.StripRootDir(fullPath),
Name: info.Name(),
Size: info.Size(),
Extension: extPtr,
Modified: info.ModTime().UTC().Format(time.RFC3339Nano),
Mode: uint32(info.Mode().Perm()),
IsDir: info.IsDir(),
IsSymlink: info.Mode()&os.ModeSymlink != 0,
Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds
Children: make([]string, 0),
MimeType: mimeTypePtr,
Encoding: &encoding,
}
}
type Item struct { type Item struct {
Path string `json:"path"` Path string `json:"path"`
Name string `json:"name"` Name string `json:"name"`

98
src/CacheItem/NewItem.go Normal file
View File

@ -0,0 +1,98 @@
package CacheItem
import (
"crazyfs/config"
"crazyfs/file"
"fmt"
"os"
"path/filepath"
"strings"
"time"
)
func NewItem(fullPath string, info os.FileInfo) *Item {
RetardCheck(fullPath)
if config.GetConfig().CachePrintNew {
log.Debugf("CACHE - new: %s", fullPath)
}
pathExists, _ := file.PathExists(fullPath)
if !pathExists {
if info.Mode()&os.ModeSymlink > 0 {
// Ignore symlinks
return nil
} else {
log.Warnf("NewItem - StartPath does not exist: %s", fullPath)
return nil
}
}
var mimeType string
var encoding string
var ext string
var err error
if !info.IsDir() {
var mimePath string
if config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
mimePath, _ = filepath.EvalSymlinks(fullPath)
} else {
mimePath = fullPath
}
if config.GetConfig().CrawlerParseMIME {
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
} else {
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
}
if config.GetConfig().CrawlerParseEncoding {
encoding, err = file.DetectFileEncoding(fullPath)
if err != nil {
log.Warnf("ITEM - Error detecting file encoding of file %s - %v", fullPath, err)
encoding = "utf-8" // fall back to utf-8
}
}
if os.IsNotExist(err) {
log.Warnf("StartPath does not exist: %s", fullPath)
return nil
} else if err != nil {
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
}
}
// Create pointers for mimeType and ext
var mimeTypePtr, extPtr *string
if mimeType != "" {
mimeTypePtr = &mimeType
}
if ext != "" {
extPtr = &ext
}
return &Item{
Path: file.StripRootDir(fullPath),
Name: info.Name(),
Size: info.Size(),
Extension: extPtr,
Modified: info.ModTime().UTC().Format(time.RFC3339Nano),
Mode: uint32(info.Mode().Perm()),
IsDir: info.IsDir(),
IsSymlink: info.Mode()&os.ModeSymlink != 0,
Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds
Children: make([]string, 0),
MimeType: mimeTypePtr,
Encoding: &encoding,
}
}
func PathOutsideRoot(fullPath string) bool {
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
}
func RetardCheck(fullPath string) {
// Make sure we never do anything outside of the root dir.
if PathOutsideRoot(fullPath) {
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
}
}

View File

@ -50,14 +50,15 @@ func (w *Walker) processPath(relPath string) error {
log.Warnf("processPath - %s - %s", relPath, err) log.Warnf("processPath - %s - %s", relPath, err)
continue continue
} }
subPathFull := filepath.Join(w.root, subPath)
subPathFull := filepath.Join(fullPath, name)
err = w.walkFunc(subPathFull, info, err) err = w.walkFunc(subPathFull, info, err)
if errors.Is(err, filepath.SkipDir) { if errors.Is(err, filepath.SkipDir) {
return nil return nil
} }
// If this child is a directory, add it to the queue then move on. // If this child is a directory, add it to the queue then move on.
if info.Mode().IsDir() { if err == nil && info.Mode().IsDir() {
w.addJob(WalkJob{ w.addJob(WalkJob{
StartPath: subPath, StartPath: subPath,
Walker: w, Walker: w,

View File

@ -33,22 +33,20 @@ func readDirNames(dirname string) ([]string, error) {
return names, nil return names, nil
} }
// lstat is a wrapper for os.Lstat which accepts a path // lstat is a wrapper for os.Lstat which accepts a path relative to Walker.root and also follows symlinks
// relative to Walker.root and also follows symlinks
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) { func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
path := filepath.Join(w.root, relPath) fullPath := filepath.Join(w.root, relPath)
info, err = os.Lstat(path) info, err = os.Lstat(fullPath)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// check if this is a symlink
if w.followSymlinks { if w.followSymlinks {
if info.Mode()&os.ModeSymlink > 0 { if info.Mode()&os.ModeSymlink > 0 {
path, err = filepath.EvalSymlinks(path) fullPath, err = filepath.EvalSymlinks(fullPath)
if err != nil { if err != nil {
return nil, err return nil, err
} }
info, err = os.Lstat(path) info, err = os.Lstat(fullPath)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -39,6 +39,7 @@ func AdminCrawlsInfo(w http.ResponseWriter, r *http.Request, sharedCache *lru.Ca
"queue": map[string]interface{}{ "queue": map[string]interface{}{
"size": Workers.Queue.GetQueueSize(), "size": Workers.Queue.GetQueueSize(),
}, },
"initialCrawlElapsed": config.InitialCrawlElapsed,
} }
w.Header().Set("Cache-Control", "no-store") w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")

View File

@ -1,13 +1,6 @@
package config package config
var FollowSymlinks bool // Various global variables.
//var CachePrintNew bool var FollowSymlinks bool
//var RootDir string var InitialCrawlElapsed int
//var CrawlerParseMIME bool
//var MaxWorkers int
//var HttpAllowDuringInitialCrawl bool
//var RestrictedDownloadPaths []string
//var ElasticsearchEnable bool
//var ElasticsearchEndpoint string
//var ElasticsearchSyncInterval int

View File

@ -80,6 +80,7 @@ func main() {
var err error var err error
cfg, err := config.SetConfig(cliArgs.configFile) cfg, err := config.SetConfig(cliArgs.configFile)
if err != nil { if err != nil {
log.Infof("Using config file: %s", cliArgs.configFile)
log.Fatalf("Failed to load config file: %s", err) log.Fatalf("Failed to load config file: %s", err)
} }
@ -114,6 +115,7 @@ func main() {
cache.InitialCrawl(sharedCache) cache.InitialCrawl(sharedCache)
duration := time.Since(start).Round(time.Second) duration := time.Since(start).Round(time.Second)
keys := sharedCache.Keys() keys := sharedCache.Keys()
config.InitialCrawlElapsed = int(duration)
log.Infof("Initial crawl completed in %s. %d items added to the cache.", duration, len(keys)) log.Infof("Initial crawl completed in %s. %d items added to the cache.", duration, len(keys))
} }
@ -151,6 +153,10 @@ func main() {
} }
} }
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
select {} select {}
} }