dealing with memory usage
This commit is contained in:
parent
72e6355869
commit
82636792ea
|
@ -1,6 +1,7 @@
|
||||||
.idea
|
.idea
|
||||||
config.yml
|
config.yml
|
||||||
config.yaml
|
config.yaml
|
||||||
|
build/
|
||||||
|
|
||||||
# ---> Go
|
# ---> Go
|
||||||
# If you prefer the allow list template instead of the deny list, see community template:
|
# If you prefer the allow list template instead of the deny list, see community template:
|
||||||
|
|
|
@ -1,102 +1,5 @@
|
||||||
package CacheItem
|
package CacheItem
|
||||||
|
|
||||||
import (
|
|
||||||
"crazyfs/config"
|
|
||||||
"crazyfs/file"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
func PathOutsideRoot(fullPath string) bool {
|
|
||||||
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
|
|
||||||
}
|
|
||||||
|
|
||||||
func RetardCheck(fullPath string) {
|
|
||||||
// Make sure we never do anything outside of the root dir.
|
|
||||||
if PathOutsideRoot(fullPath) {
|
|
||||||
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewItem(fullPath string, info os.FileInfo) *Item {
|
|
||||||
RetardCheck(fullPath)
|
|
||||||
if config.GetConfig().CachePrintNew {
|
|
||||||
log.Debugf("CACHE - new: %s", fullPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
pathExists, _ := file.PathExists(fullPath)
|
|
||||||
if !pathExists {
|
|
||||||
if info.Mode()&os.ModeSymlink > 0 {
|
|
||||||
// Ignore symlinks
|
|
||||||
return nil
|
|
||||||
} else {
|
|
||||||
log.Warnf("NewItem - StartPath does not exist: %s", fullPath)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var mimeType string
|
|
||||||
var encoding string
|
|
||||||
var ext string
|
|
||||||
var err error
|
|
||||||
if !info.IsDir() {
|
|
||||||
var mimePath string
|
|
||||||
if config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
|
|
||||||
mimePath, _ = filepath.EvalSymlinks(fullPath)
|
|
||||||
} else {
|
|
||||||
mimePath = fullPath
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.GetConfig().CrawlerParseMIME {
|
|
||||||
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
|
|
||||||
} else {
|
|
||||||
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.GetConfig().CrawlerParseEncoding {
|
|
||||||
encoding, err = file.DetectFileEncoding(fullPath)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("ITEM - Error detecting file encoding of file %s - %v", fullPath, err)
|
|
||||||
encoding = "utf-8" // fall back to utf-8
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
log.Warnf("StartPath does not exist: %s", fullPath)
|
|
||||||
return nil
|
|
||||||
} else if err != nil {
|
|
||||||
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create pointers for mimeType and ext
|
|
||||||
var mimeTypePtr, extPtr *string
|
|
||||||
if mimeType != "" {
|
|
||||||
mimeTypePtr = &mimeType
|
|
||||||
}
|
|
||||||
if ext != "" {
|
|
||||||
extPtr = &ext
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Item{
|
|
||||||
Path: file.StripRootDir(fullPath),
|
|
||||||
Name: info.Name(),
|
|
||||||
Size: info.Size(),
|
|
||||||
Extension: extPtr,
|
|
||||||
Modified: info.ModTime().UTC().Format(time.RFC3339Nano),
|
|
||||||
Mode: uint32(info.Mode().Perm()),
|
|
||||||
IsDir: info.IsDir(),
|
|
||||||
IsSymlink: info.Mode()&os.ModeSymlink != 0,
|
|
||||||
Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds
|
|
||||||
Children: make([]string, 0),
|
|
||||||
MimeType: mimeTypePtr,
|
|
||||||
Encoding: &encoding,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type Item struct {
|
type Item struct {
|
||||||
Path string `json:"path"`
|
Path string `json:"path"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
package CacheItem
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crazyfs/config"
|
||||||
|
"crazyfs/file"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewItem(fullPath string, info os.FileInfo) *Item {
|
||||||
|
RetardCheck(fullPath)
|
||||||
|
if config.GetConfig().CachePrintNew {
|
||||||
|
log.Debugf("CACHE - new: %s", fullPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
pathExists, _ := file.PathExists(fullPath)
|
||||||
|
if !pathExists {
|
||||||
|
if info.Mode()&os.ModeSymlink > 0 {
|
||||||
|
// Ignore symlinks
|
||||||
|
return nil
|
||||||
|
} else {
|
||||||
|
log.Warnf("NewItem - StartPath does not exist: %s", fullPath)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var mimeType string
|
||||||
|
var encoding string
|
||||||
|
var ext string
|
||||||
|
var err error
|
||||||
|
if !info.IsDir() {
|
||||||
|
var mimePath string
|
||||||
|
if config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
|
||||||
|
mimePath, _ = filepath.EvalSymlinks(fullPath)
|
||||||
|
} else {
|
||||||
|
mimePath = fullPath
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.GetConfig().CrawlerParseMIME {
|
||||||
|
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
|
||||||
|
} else {
|
||||||
|
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.GetConfig().CrawlerParseEncoding {
|
||||||
|
encoding, err = file.DetectFileEncoding(fullPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("ITEM - Error detecting file encoding of file %s - %v", fullPath, err)
|
||||||
|
encoding = "utf-8" // fall back to utf-8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
log.Warnf("StartPath does not exist: %s", fullPath)
|
||||||
|
return nil
|
||||||
|
} else if err != nil {
|
||||||
|
log.Warnf("Error detecting MIME type of file %s - %v", fullPath, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create pointers for mimeType and ext
|
||||||
|
var mimeTypePtr, extPtr *string
|
||||||
|
if mimeType != "" {
|
||||||
|
mimeTypePtr = &mimeType
|
||||||
|
}
|
||||||
|
if ext != "" {
|
||||||
|
extPtr = &ext
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Item{
|
||||||
|
Path: file.StripRootDir(fullPath),
|
||||||
|
Name: info.Name(),
|
||||||
|
Size: info.Size(),
|
||||||
|
Extension: extPtr,
|
||||||
|
Modified: info.ModTime().UTC().Format(time.RFC3339Nano),
|
||||||
|
Mode: uint32(info.Mode().Perm()),
|
||||||
|
IsDir: info.IsDir(),
|
||||||
|
IsSymlink: info.Mode()&os.ModeSymlink != 0,
|
||||||
|
Cached: time.Now().UnixNano() / int64(time.Millisecond), // Set the created time to now in milliseconds
|
||||||
|
Children: make([]string, 0),
|
||||||
|
MimeType: mimeTypePtr,
|
||||||
|
Encoding: &encoding,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func PathOutsideRoot(fullPath string) bool {
|
||||||
|
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RetardCheck(fullPath string) {
|
||||||
|
// Make sure we never do anything outside of the root dir.
|
||||||
|
if PathOutsideRoot(fullPath) {
|
||||||
|
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
|
||||||
|
}
|
||||||
|
}
|
|
@ -50,14 +50,15 @@ func (w *Walker) processPath(relPath string) error {
|
||||||
log.Warnf("processPath - %s - %s", relPath, err)
|
log.Warnf("processPath - %s - %s", relPath, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
subPathFull := filepath.Join(w.root, subPath)
|
|
||||||
|
subPathFull := filepath.Join(fullPath, name)
|
||||||
err = w.walkFunc(subPathFull, info, err)
|
err = w.walkFunc(subPathFull, info, err)
|
||||||
if errors.Is(err, filepath.SkipDir) {
|
if errors.Is(err, filepath.SkipDir) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this child is a directory, add it to the queue then move on.
|
// If this child is a directory, add it to the queue then move on.
|
||||||
if info.Mode().IsDir() {
|
if err == nil && info.Mode().IsDir() {
|
||||||
w.addJob(WalkJob{
|
w.addJob(WalkJob{
|
||||||
StartPath: subPath,
|
StartPath: subPath,
|
||||||
Walker: w,
|
Walker: w,
|
||||||
|
|
|
@ -33,22 +33,20 @@ func readDirNames(dirname string) ([]string, error) {
|
||||||
return names, nil
|
return names, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// lstat is a wrapper for os.Lstat which accepts a path
|
// lstat is a wrapper for os.Lstat which accepts a path relative to Walker.root and also follows symlinks
|
||||||
// relative to Walker.root and also follows symlinks
|
|
||||||
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
||||||
path := filepath.Join(w.root, relPath)
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
info, err = os.Lstat(path)
|
info, err = os.Lstat(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// check if this is a symlink
|
|
||||||
if w.followSymlinks {
|
if w.followSymlinks {
|
||||||
if info.Mode()&os.ModeSymlink > 0 {
|
if info.Mode()&os.ModeSymlink > 0 {
|
||||||
path, err = filepath.EvalSymlinks(path)
|
fullPath, err = filepath.EvalSymlinks(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
info, err = os.Lstat(path)
|
info, err = os.Lstat(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ func AdminCrawlsInfo(w http.ResponseWriter, r *http.Request, sharedCache *lru.Ca
|
||||||
"queue": map[string]interface{}{
|
"queue": map[string]interface{}{
|
||||||
"size": Workers.Queue.GetQueueSize(),
|
"size": Workers.Queue.GetQueueSize(),
|
||||||
},
|
},
|
||||||
|
"initialCrawlElapsed": config.InitialCrawlElapsed,
|
||||||
}
|
}
|
||||||
w.Header().Set("Cache-Control", "no-store")
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
|
|
@ -1,13 +1,6 @@
|
||||||
package config
|
package config
|
||||||
|
|
||||||
var FollowSymlinks bool
|
// Various global variables.
|
||||||
|
|
||||||
//var CachePrintNew bool
|
var FollowSymlinks bool
|
||||||
//var RootDir string
|
var InitialCrawlElapsed int
|
||||||
//var CrawlerParseMIME bool
|
|
||||||
//var MaxWorkers int
|
|
||||||
//var HttpAllowDuringInitialCrawl bool
|
|
||||||
//var RestrictedDownloadPaths []string
|
|
||||||
//var ElasticsearchEnable bool
|
|
||||||
//var ElasticsearchEndpoint string
|
|
||||||
//var ElasticsearchSyncInterval int
|
|
||||||
|
|
|
@ -80,6 +80,7 @@ func main() {
|
||||||
var err error
|
var err error
|
||||||
cfg, err := config.SetConfig(cliArgs.configFile)
|
cfg, err := config.SetConfig(cliArgs.configFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Infof("Using config file: %s", cliArgs.configFile)
|
||||||
log.Fatalf("Failed to load config file: %s", err)
|
log.Fatalf("Failed to load config file: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,6 +115,7 @@ func main() {
|
||||||
cache.InitialCrawl(sharedCache)
|
cache.InitialCrawl(sharedCache)
|
||||||
duration := time.Since(start).Round(time.Second)
|
duration := time.Since(start).Round(time.Second)
|
||||||
keys := sharedCache.Keys()
|
keys := sharedCache.Keys()
|
||||||
|
config.InitialCrawlElapsed = int(duration)
|
||||||
log.Infof("Initial crawl completed in %s. %d items added to the cache.", duration, len(keys))
|
log.Infof("Initial crawl completed in %s. %d items added to the cache.", duration, len(keys))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -151,6 +153,10 @@ func main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
log.Println(http.ListenAndServe("localhost:6060", nil))
|
||||||
|
}()
|
||||||
|
|
||||||
select {}
|
select {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue