dealing with memory usage
This commit is contained in:
parent
72e6355869
commit
82636792ea
|
@ -1,6 +1,7 @@
|
|||
.idea
|
||||
config.yml
|
||||
config.yaml
|
||||
build/
|
||||
|
||||
# ---> Go
|
||||
# If you prefer the allow list template instead of the deny list, see community template:
|
||||
|
|
|
@ -1,102 +1,5 @@
|
|||
package CacheItem
|
||||
|
||||
import (
|
||||
"crazyfs/config"
|
||||
"crazyfs/file"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func PathOutsideRoot(fullPath string) bool {
|
||||
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
|
||||
}
|
||||
|
||||
func RetardCheck(fullPath string) {
|
||||
// Make sure we never do anything outside of the root dir.
|
||||
if PathOutsideRoot(fullPath) {
|
||||
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
|
||||
}
|
||||
}
|
||||
|
||||
func NewItem(fullPath string, info os.FileInfo) *Item {
|
||||
RetardCheck(fullPath)
|
||||
if config.GetConfig().CachePrintNew {
|
||||
log.Debugf("CACHE - new: %s", fullPath)
|
||||
}
|
||||
|
||||
pathExists, _ := file.PathExists(fullPath)
|
||||
if !pathExists {
|
||||
if info.Mode()&os.ModeSymlink > 0 {
|
||||
// Ignore symlinks
|
||||
return nil
|
||||
} else {
|
||||
log.Warnf("NewItem - StartPath does not exist: %s", fullPath)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var mimeType string
|
||||
var encoding string
|
||||
var ext string
|
||||
var err error
|
||||
if !info.IsDir() {
|
||||
var mimePath string
|
||||
if config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
|
||||
mimePath, _ = filepath.EvalSymlinks(fullPath)
|
||||
} else {
|
||||
mimePath = fullPath
|
||||
}
|
||||
|
||||
if config.GetConfig().CrawlerParseMIME {
|
||||
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
|
||||
} else {
|
||||
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
|
||||
}
|
||||
|
||||
if config.GetConfig().CrawlerParseEncoding {
|
||||
encoding, err = file.DetectFileEncoding(fullPath)
|
||||
if err != nil {
|
||||
log.Warnf("ITEM - Error detecting file encoding of file %s - %v", fullPath, err)
|
||||
encoding = "utf-8" // fall back to utf-8
|
||||
}
|
||||
}
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
log.Warnf("StartPath does not exist: %s", fullPath)
|
||||
return nil
|
||||
} else if err != nil {
|
||||
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create pointers for mimeType and ext
|
||||
var mimeTypePtr, extPtr *string
|
||||
if mimeType != "" {
|
||||
mimeTypePtr = &mimeType
|
||||
}
|
||||
if ext != "" {
|
||||
extPtr = &ext
|
||||
}
|
||||
|
||||
return &Item{
|
||||
Path: file.StripRootDir(fullPath),
|
||||
Name: info.Name(),
|
||||
Size: info.Size(),
|
||||
Extension: extPtr,
|
||||
Modified: info.ModTime().UTC().Format(time.RFC3339Nano),
|
||||
Mode: uint32(info.Mode().Perm()),
|
||||
IsDir: info.IsDir(),
|
||||
IsSymlink: info.Mode()&os.ModeSymlink != 0,
|
||||
Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds
|
||||
Children: make([]string, 0),
|
||||
MimeType: mimeTypePtr,
|
||||
Encoding: &encoding,
|
||||
}
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
Path string `json:"path"`
|
||||
Name string `json:"name"`
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
package CacheItem
|
||||
|
||||
import (
|
||||
"crazyfs/config"
|
||||
"crazyfs/file"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func NewItem(fullPath string, info os.FileInfo) *Item {
|
||||
RetardCheck(fullPath)
|
||||
if config.GetConfig().CachePrintNew {
|
||||
log.Debugf("CACHE - new: %s", fullPath)
|
||||
}
|
||||
|
||||
pathExists, _ := file.PathExists(fullPath)
|
||||
if !pathExists {
|
||||
if info.Mode()&os.ModeSymlink > 0 {
|
||||
// Ignore symlinks
|
||||
return nil
|
||||
} else {
|
||||
log.Warnf("NewItem - StartPath does not exist: %s", fullPath)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var mimeType string
|
||||
var encoding string
|
||||
var ext string
|
||||
var err error
|
||||
if !info.IsDir() {
|
||||
var mimePath string
|
||||
if config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
|
||||
mimePath, _ = filepath.EvalSymlinks(fullPath)
|
||||
} else {
|
||||
mimePath = fullPath
|
||||
}
|
||||
|
||||
if config.GetConfig().CrawlerParseMIME {
|
||||
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
|
||||
} else {
|
||||
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
|
||||
}
|
||||
|
||||
if config.GetConfig().CrawlerParseEncoding {
|
||||
encoding, err = file.DetectFileEncoding(fullPath)
|
||||
if err != nil {
|
||||
log.Warnf("ITEM - Error detecting file encoding of file %s - %v", fullPath, err)
|
||||
encoding = "utf-8" // fall back to utf-8
|
||||
}
|
||||
}
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
log.Warnf("StartPath does not exist: %s", fullPath)
|
||||
return nil
|
||||
} else if err != nil {
|
||||
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create pointers for mimeType and ext
|
||||
var mimeTypePtr, extPtr *string
|
||||
if mimeType != "" {
|
||||
mimeTypePtr = &mimeType
|
||||
}
|
||||
if ext != "" {
|
||||
extPtr = &ext
|
||||
}
|
||||
|
||||
return &Item{
|
||||
Path: file.StripRootDir(fullPath),
|
||||
Name: info.Name(),
|
||||
Size: info.Size(),
|
||||
Extension: extPtr,
|
||||
Modified: info.ModTime().UTC().Format(time.RFC3339Nano),
|
||||
Mode: uint32(info.Mode().Perm()),
|
||||
IsDir: info.IsDir(),
|
||||
IsSymlink: info.Mode()&os.ModeSymlink != 0,
|
||||
Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds
|
||||
Children: make([]string, 0),
|
||||
MimeType: mimeTypePtr,
|
||||
Encoding: &encoding,
|
||||
}
|
||||
}
|
||||
|
||||
func PathOutsideRoot(fullPath string) bool {
|
||||
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
|
||||
}
|
||||
|
||||
func RetardCheck(fullPath string) {
|
||||
// Make sure we never do anything outside of the root dir.
|
||||
if PathOutsideRoot(fullPath) {
|
||||
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
|
||||
}
|
||||
}
|
|
@ -50,14 +50,15 @@ func (w *Walker) processPath(relPath string) error {
|
|||
log.Warnf("processPath - %s - %s", relPath, err)
|
||||
continue
|
||||
}
|
||||
subPathFull := filepath.Join(w.root, subPath)
|
||||
|
||||
subPathFull := filepath.Join(fullPath, name)
|
||||
err = w.walkFunc(subPathFull, info, err)
|
||||
if errors.Is(err, filepath.SkipDir) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If this child is a directory, add it to the queue then move on.
|
||||
if info.Mode().IsDir() {
|
||||
if err == nil && info.Mode().IsDir() {
|
||||
w.addJob(WalkJob{
|
||||
StartPath: subPath,
|
||||
Walker: w,
|
||||
|
|
|
@ -33,22 +33,20 @@ func readDirNames(dirname string) ([]string, error) {
|
|||
return names, nil
|
||||
}
|
||||
|
||||
// lstat is a wrapper for os.Lstat which accepts a path
|
||||
// relative to Walker.root and also follows symlinks
|
||||
// lstat is a wrapper for os.Lstat which accepts a path relative to Walker.root and also follows symlinks
|
||||
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
||||
path := filepath.Join(w.root, relPath)
|
||||
info, err = os.Lstat(path)
|
||||
fullPath := filepath.Join(w.root, relPath)
|
||||
info, err = os.Lstat(fullPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// check if this is a symlink
|
||||
if w.followSymlinks {
|
||||
if info.Mode()&os.ModeSymlink > 0 {
|
||||
path, err = filepath.EvalSymlinks(path)
|
||||
fullPath, err = filepath.EvalSymlinks(fullPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info, err = os.Lstat(path)
|
||||
info, err = os.Lstat(fullPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ func AdminCrawlsInfo(w http.ResponseWriter, r *http.Request, sharedCache *lru.Ca
|
|||
"queue": map[string]interface{}{
|
||||
"size": Workers.Queue.GetQueueSize(),
|
||||
},
|
||||
"initialCrawlElapsed": config.InitialCrawlElapsed,
|
||||
}
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
|
|
@ -1,13 +1,6 @@
|
|||
package config
|
||||
|
||||
var FollowSymlinks bool
|
||||
// Various global variables.
|
||||
|
||||
//var CachePrintNew bool
|
||||
//var RootDir string
|
||||
//var CrawlerParseMIME bool
|
||||
//var MaxWorkers int
|
||||
//var HttpAllowDuringInitialCrawl bool
|
||||
//var RestrictedDownloadPaths []string
|
||||
//var ElasticsearchEnable bool
|
||||
//var ElasticsearchEndpoint string
|
||||
//var ElasticsearchSyncInterval int
|
||||
var FollowSymlinks bool
|
||||
var InitialCrawlElapsed int
|
||||
|
|
|
@ -80,6 +80,7 @@ func main() {
|
|||
var err error
|
||||
cfg, err := config.SetConfig(cliArgs.configFile)
|
||||
if err != nil {
|
||||
log.Infof("Using config file: %s", cliArgs.configFile)
|
||||
log.Fatalf("Failed to load config file: %s", err)
|
||||
}
|
||||
|
||||
|
@ -114,6 +115,7 @@ func main() {
|
|||
cache.InitialCrawl(sharedCache)
|
||||
duration := time.Since(start).Round(time.Second)
|
||||
keys := sharedCache.Keys()
|
||||
config.InitialCrawlElapsed = int(duration)
|
||||
log.Infof("Initial crawl completed in %s. %d items added to the cache.", duration, len(keys))
|
||||
}
|
||||
|
||||
|
@ -151,6 +153,10 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
go func() {
|
||||
log.Println(http.ListenAndServe("localhost:6060", nil))
|
||||
}()
|
||||
|
||||
select {}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue