redo error handling, bug fixes
This commit is contained in:
parent
dc3b164520
commit
9a450571bd
|
@ -10,7 +10,8 @@ import (
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// HandleFileNotFound if the data is not in the cache, start a new crawler
|
// HandleFileNotFound if the data is not in the cache, start a new crawler.
|
||||||
|
// If it encounters an error, it will return a bad status code to the HTTP client and log the error.
|
||||||
func HandleFileNotFound(relPath string, fullPath string, w http.ResponseWriter) *cacheitem.Item {
|
func HandleFileNotFound(relPath string, fullPath string, w http.ResponseWriter) *cacheitem.Item {
|
||||||
// TODO: implement some sort of backoff or delay for repeated calls to recache the same path.
|
// TODO: implement some sort of backoff or delay for repeated calls to recache the same path.
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ func APISearch(w http.ResponseWriter, r *http.Request) {
|
||||||
// Perform the Elasticsearch query
|
// Perform the Elasticsearch query
|
||||||
resp, err := elastic.SimpleQuery(queryString, excludeElements)
|
resp, err := elastic.SimpleQuery(queryString, excludeElements)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(`ROUTES:APISearch - Failed to perform Elasticsearch query "%s" - %s`, queryString, err)
|
log.Errorf(`ROUTES:Search - Failed to perform Elasticsearch query "%s" - %s`, queryString, err)
|
||||||
helpers.Return500Msg(w)
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -81,7 +81,7 @@ func APISearch(w http.ResponseWriter, r *http.Request) {
|
||||||
var respData map[string]interface{}
|
var respData map[string]interface{}
|
||||||
err = json.NewDecoder(resp.Body).Decode(&respData)
|
err = json.NewDecoder(resp.Body).Decode(&respData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(`ROUTES:APISearch - Failed to parse Elasticsearch response for query "%s" - %s`, queryString, err)
|
log.Errorf(`ROUTES:Search - Failed to parse Elasticsearch response for query "%s" - %s`, queryString, err)
|
||||||
helpers.Return500Msg(w)
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -152,7 +152,7 @@ func APISearch(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
|
|
||||||
searchDuration := time.Since(searchStart) // .Round(time.Second)
|
searchDuration := time.Since(searchStart) // .Round(time.Second)
|
||||||
log.Debugf(`ROUTES:APISearch - %s - Query: "%s" - Results: %d - Elapsed: %d`, logging.GetRealIP(r), queryString, len(results), searchDuration)
|
log.Debugf(`ROUTES:Search - %s - Query: "%s" - Results: %d - Elapsed: %d`, logging.GetRealIP(r), queryString, len(results), searchDuration)
|
||||||
|
|
||||||
response := map[string]interface{}{
|
response := map[string]interface{}{
|
||||||
"results": results,
|
"results": results,
|
||||||
|
|
|
@ -36,6 +36,22 @@ func APIAdminCrawlsInfo(w http.ResponseWriter, r *http.Request) {
|
||||||
elastic.FullSyncRunning.Release(1)
|
elastic.FullSyncRunning.Release(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var elasticWorkers map[string]interface{}
|
||||||
|
if (refreshSyncRunning || fullSyncRunning) && globals.ElasticCrawlers != nil {
|
||||||
|
// If no sync is running then these vars will not exist.
|
||||||
|
elasticWorkers = map[string]interface{}{
|
||||||
|
"busy": globals.ElasticCrawlers.BusyWorkers,
|
||||||
|
"alive": config.GetConfig().ElasticsearchSyncThreads,
|
||||||
|
"queueSize": globals.ElasticCrawlers.Queue.GetQueueSize(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
elasticWorkers = map[string]interface{}{
|
||||||
|
"busy": 0,
|
||||||
|
"alive": 0,
|
||||||
|
"queueSize": 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
response := map[string]interface{}{
|
response := map[string]interface{}{
|
||||||
"crawls": map[string]interface{}{
|
"crawls": map[string]interface{}{
|
||||||
"active": directorycrawler.GetActiveCrawls(),
|
"active": directorycrawler.GetActiveCrawls(),
|
||||||
|
@ -46,15 +62,11 @@ func APIAdminCrawlsInfo(w http.ResponseWriter, r *http.Request) {
|
||||||
"alive": config.GetConfig().DirectoryCrawlers,
|
"alive": config.GetConfig().DirectoryCrawlers,
|
||||||
},
|
},
|
||||||
"queue": map[string]interface{}{
|
"queue": map[string]interface{}{
|
||||||
"size": globals.DirectoryCrawlers.Queue.GetQueueSize(),
|
"items": globals.DirectoryCrawlers.Queue.GetQueueSize(),
|
||||||
},
|
},
|
||||||
"initialCrawlElapsed": config.InitialCrawlElapsed,
|
"initialCrawlElapsed": config.InitialCrawlElapsed,
|
||||||
"elastic": map[string]interface{}{
|
"elastic": map[string]interface{}{
|
||||||
"deleteWorkers": map[string]interface{}{
|
"workers": elasticWorkers,
|
||||||
"busy": globals.ElasticCrawlers.BusyWorkers,
|
|
||||||
"alive": config.GetConfig().ElasticsearchSyncThreads,
|
|
||||||
"queueSize": globals.ElasticCrawlers.Queue.GetQueueSize(),
|
|
||||||
},
|
|
||||||
"syncRunning": map[string]interface{}{
|
"syncRunning": map[string]interface{}{
|
||||||
"refresh": refreshSyncRunning,
|
"refresh": refreshSyncRunning,
|
||||||
"full": fullSyncRunning,
|
"full": fullSyncRunning,
|
||||||
|
|
|
@ -6,7 +6,6 @@ import (
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/file"
|
"crazyfs/file"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -31,13 +30,13 @@ func APIAdminRecache(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
pathArg := requestBody["path"]
|
relPathArg := requestBody["path"]
|
||||||
|
|
||||||
// Clean the path to prevent directory traversal
|
// Clean the path to prevent directory traversal
|
||||||
fullPath, errJoin := file.SafeJoin(pathArg)
|
fullPath := file.SafeJoinRoot(relPathArg)
|
||||||
traversalAttack, errTraverse := file.DetectTraversal(pathArg)
|
traversalAttack, errTraverse := file.DetectTraversal(relPathArg)
|
||||||
if traversalAttack || errJoin != nil {
|
if traversalAttack {
|
||||||
log.Errorf("ROUTES:ADMIN:Recache - Failed to clean path: %s - error: %s - traversal attack detected: %t - traversal attack detection: %s", pathArg, errJoin, traversalAttack, errTraverse)
|
log.Errorf(`ROUTES:ADMIN:Recache - invalid path: "%s". Error: %s`, relPathArg, errTraverse)
|
||||||
helpers.Return400Msg("invalid file path", w)
|
helpers.Return400Msg("invalid file path", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -49,7 +48,7 @@ func APIAdminRecache(w http.ResponseWriter, r *http.Request) {
|
||||||
// Check and re-cache the directory
|
// Check and re-cache the directory
|
||||||
err = cache.Recache(fullPath)
|
err = cache.Recache(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
response["message"] = fmt.Sprintf("recache failed")
|
response["message"] = "recache failed"
|
||||||
response["error"] = err.Error()
|
response["error"] = err.Error()
|
||||||
w.WriteHeader(http.StatusConflict)
|
w.WriteHeader(http.StatusConflict)
|
||||||
log.Errorf("Admin triggered recache for %s - %s", fullPath, err)
|
log.Errorf("Admin triggered recache for %s - %s", fullPath, err)
|
||||||
|
|
|
@ -17,20 +17,20 @@ func APIDownload(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
pathArg := r.URL.Query().Get("path")
|
relPathArg := r.URL.Query().Get("path")
|
||||||
if pathArg == "" {
|
if relPathArg == "" {
|
||||||
helpers.Return400Msg("missing path", w)
|
helpers.Return400Msg("missing path", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
paths := strings.Split(pathArg, ",")
|
paths := strings.Split(relPathArg, ",")
|
||||||
var cleanPaths []string
|
var cleanPaths []string
|
||||||
if len(paths) > 1 {
|
if len(paths) > 1 {
|
||||||
for _, path := range paths {
|
for _, path := range paths {
|
||||||
cleanPath, errJoin := file.SafeJoin(path)
|
cleanPath := file.SafeJoinRoot(path)
|
||||||
traversalAttack, errTraverse := file.DetectTraversal(path)
|
traversalAttack, errTraverse := file.DetectTraversal(path)
|
||||||
if traversalAttack || errJoin != nil {
|
if traversalAttack {
|
||||||
log.Errorf("ROUTES:Download - Failed to clean path: %s - error: %s - traversal attack detected: %t - traversal attack detection: %s", path, errJoin, traversalAttack, errTraverse)
|
log.Errorf(`ROUTES:Download - invalid path: "%s". Error: %s`, path, errTraverse)
|
||||||
helpers.Return400Msg("invalid file path", w)
|
helpers.Return400Msg("invalid file path", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -50,10 +50,10 @@ func APIDownload(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single file or directory
|
// Single file or directory
|
||||||
fullPath, errJoin := file.SafeJoin(pathArg)
|
fullPath := file.SafeJoinRoot(relPathArg)
|
||||||
traversalAttack, errTraverse := file.DetectTraversal(pathArg)
|
traversalAttack, errTraverse := file.DetectTraversal(relPathArg)
|
||||||
if traversalAttack || errJoin != nil {
|
if traversalAttack {
|
||||||
log.Errorf("ROUTES:Download - Failed to clean path: %s - error: %s - traversal attack detected: %t - traversal attack detection: %s", pathArg, errJoin, traversalAttack, errTraverse)
|
log.Errorf(`ROUTES:Download - invalid path: "%s". Error: %s`, relPathArg, errTraverse)
|
||||||
helpers.Return400Msg("invalid file path", w)
|
helpers.Return400Msg("invalid file path", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -97,7 +97,7 @@ func APIDownload(w http.ResponseWriter, r *http.Request) {
|
||||||
var mimeType string
|
var mimeType string
|
||||||
var err error
|
var err error
|
||||||
if item.MimeType == nil { // only if the MIME type of this item has not been set yet
|
if item.MimeType == nil { // only if the MIME type of this item has not been set yet
|
||||||
_, mimeType, _, err = file.GetMimeType(fullPath, true, nil)
|
_, mimeType, _, err = file.GetMimeType(fullPath, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("ROUTES:Download - Error detecting MIME type: %v", err)
|
log.Errorf("ROUTES:Download - Error detecting MIME type: %v", err)
|
||||||
} else if mimeType != "" {
|
} else if mimeType != "" {
|
||||||
|
|
|
@ -16,8 +16,8 @@ func APIList(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
pathArg := r.URL.Query().Get("path")
|
relPathArg := r.URL.Query().Get("path")
|
||||||
if pathArg == "" {
|
if relPathArg == "" {
|
||||||
helpers.Return400Msg("path parameter is required", w)
|
helpers.Return400Msg("path parameter is required", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -35,10 +35,10 @@ func APIList(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
fullPath, errJoin := file.SafeJoin(pathArg)
|
fullPath := file.SafeJoinRoot(relPathArg)
|
||||||
traversalAttack, errTraverse := file.DetectTraversal(pathArg)
|
traversalAttack, errTraverse := file.DetectTraversal(relPathArg)
|
||||||
if traversalAttack || errJoin != nil {
|
if traversalAttack {
|
||||||
log.Errorf("ROUTES:List - Failed to clean path: %s - error: %s - traversal attack detected: %t - traversal attack detection: %s", pathArg, errJoin, traversalAttack, errTraverse)
|
log.Errorf(`ROUTES:Download - invalid path: "%s". Error: %s`, relPathArg, errTraverse)
|
||||||
helpers.Return400Msg("invalid file path", w)
|
helpers.Return400Msg("invalid file path", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -62,21 +62,20 @@ func APIList(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
} else {
|
} else {
|
||||||
// Only update the mime in the cache if it hasn't been set already.
|
// Only update the mime in the cache if it hasn't been set already.
|
||||||
// TODO: need to make sure that when a re-crawl is triggered, the MimeType is set back to nil
|
|
||||||
if cacheItem.MimeType == nil {
|
if cacheItem.MimeType == nil {
|
||||||
fileExists, mimeType, ext, err := file.GetMimeType(fullPath, true, nil)
|
fileExists, mimeType, ext, err := file.GetMimeType(fullPath, true)
|
||||||
if !fileExists {
|
|
||||||
helpers.ReturnFake404Msg("file not found", w)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("ROUTES:List - Error detecting MIME type: %v", err)
|
log.Warnf("ROUTES:List - Error detecting MIME type: %v", err)
|
||||||
helpers.Return500Msg(w)
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Update the original cached cacheitem's MIME in the sharedCache
|
if !fileExists {
|
||||||
|
helpers.ReturnFake404Msg("file not found", w)
|
||||||
|
}
|
||||||
|
// Update the original cached item's MIME in the sharedCache.
|
||||||
cacheItem.MimeType = &mimeType
|
cacheItem.MimeType = &mimeType
|
||||||
cacheItem.Extension = &ext
|
cacheItem.Extension = &ext
|
||||||
sharedcache.Cache.Add(relPath, cacheItem) // take the address of cacheitem
|
sharedcache.Cache.Add(relPath, cacheItem) // take the address of item
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@ import (
|
||||||
"crazyfs/cache"
|
"crazyfs/cache"
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/file"
|
"crazyfs/file"
|
||||||
"crazyfs/logging"
|
|
||||||
"crazyfs/sharedcache"
|
"crazyfs/sharedcache"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/disintegration/imaging"
|
"github.com/disintegration/imaging"
|
||||||
|
@ -17,23 +16,23 @@ import (
|
||||||
"image/color"
|
"image/color"
|
||||||
"image/png"
|
"image/png"
|
||||||
"net/http"
|
"net/http"
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
func APIThumbnail(w http.ResponseWriter, r *http.Request) {
|
func APIThumbnail(w http.ResponseWriter, r *http.Request) {
|
||||||
if cache.InitialCrawlInProgress && !config.GetConfig().HttpAllowDuringInitialCrawl {
|
if cache.InitialCrawlInProgress && !config.GetConfig().HttpAllowDuringInitialCrawl {
|
||||||
helpers.HandleRejectDuringInitialCrawl(w)
|
|
||||||
returnDummyPNG(w)
|
returnDummyPNG(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log := logging.GetLogger()
|
// Validate the path arg.
|
||||||
relPath := file.StripRootDir(filepath.Join(config.GetConfig().RootDir, r.URL.Query().Get("path")))
|
relPathArg := r.URL.Query().Get("path")
|
||||||
relPath = strings.TrimSuffix(relPath, "/")
|
if relPathArg == "" {
|
||||||
fullPath := filepath.Join(config.GetConfig().RootDir, relPath)
|
helpers.Return400Msg("path parameter is required", w)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Validate args before doing any operations
|
// Validate height and width args.
|
||||||
width, err := getPositiveIntFromQuery(r, "width")
|
width, err := getPositiveIntFromQuery(r, "width")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
helpers.Return400Msg("height and width must both be positive numbers", w)
|
helpers.Return400Msg("height and width must both be positive numbers", w)
|
||||||
|
@ -45,6 +44,7 @@ func APIThumbnail(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate the quality arg.
|
||||||
pngQuality, err := getPositiveIntFromQuery(r, "quality")
|
pngQuality, err := getPositiveIntFromQuery(r, "quality")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
helpers.Return400Msg("quality must be a positive number", w)
|
helpers.Return400Msg("quality must be a positive number", w)
|
||||||
|
@ -54,6 +54,7 @@ func APIThumbnail(w http.ResponseWriter, r *http.Request) {
|
||||||
pngQuality = 50
|
pngQuality = 50
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate the scale args.
|
||||||
autoScale := r.URL.Query().Get("auto") != ""
|
autoScale := r.URL.Query().Get("auto") != ""
|
||||||
square := r.URL.Query().Get("square") != ""
|
square := r.URL.Query().Get("square") != ""
|
||||||
if (width != 0 && height != 0) && (width != height) {
|
if (width != 0 && height != 0) && (width != height) {
|
||||||
|
@ -61,33 +62,42 @@ func APIThumbnail(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to get the data from the cache
|
// Do path operations last since most of our CPU and memory usage comes from string operations.
|
||||||
|
fullPath := file.SafeJoinRoot(relPathArg) // Form the full path first, which also cleans it.
|
||||||
|
pathIsSafe, errTraverse := file.DetectTraversal(fullPath) // Verify that this isn't an invalid path.
|
||||||
|
if !pathIsSafe {
|
||||||
|
log.Errorf(`ROUTES:Download - invalid path: "%s". Error: %s`, fullPath, errTraverse)
|
||||||
|
helpers.Return400Msg("invalid path", w)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
relPath := file.StripRootDir(fullPath) // Then re-create the relative path from the clean path.
|
||||||
|
|
||||||
|
// Try to get the item from the cache.
|
||||||
item, found := sharedcache.Cache.Get(relPath)
|
item, found := sharedcache.Cache.Get(relPath)
|
||||||
if !found {
|
if !found {
|
||||||
item = helpers.HandleFileNotFound(relPath, fullPath, w)
|
item = helpers.HandleFileNotFound(relPath, fullPath, w)
|
||||||
}
|
}
|
||||||
if item == nil {
|
if item == nil {
|
||||||
returnDummyPNG(w)
|
helpers.Return400Msg("file not found", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if item.IsDir {
|
if item.IsDir {
|
||||||
helpers.Return400Msg("that's a directory", w)
|
helpers.Return400Msg("that's a directory", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the MIME type of the file
|
// Get the MIME type of the file
|
||||||
fileExists, mimeType, ext, err := file.GetMimeType(fullPath, true, nil)
|
fileExists, mimeType, ext, err := file.GetMimeType(fullPath, true)
|
||||||
if !fileExists {
|
if !fileExists {
|
||||||
helpers.Return400Msg("file not found", w)
|
helpers.Return400Msg("file not found", w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("ROUTES:Thumb - Error detecting MIME type: %v", err)
|
log.Warnf(`ROUTES:Thumb - Error detecting MIME type for "%s". %v`, fullPath, err)
|
||||||
returnDummyPNG(w)
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Update the cacheitem's MIME in the sharedCache
|
// Update the item's MIME in the cache.
|
||||||
item.MimeType = &mimeType
|
item.MimeType = &mimeType
|
||||||
item.Extension = &ext
|
item.Extension = &ext
|
||||||
sharedcache.Cache.Add(relPath, item)
|
sharedcache.Cache.Add(relPath, item)
|
||||||
|
@ -101,8 +111,8 @@ func APIThumbnail(w http.ResponseWriter, r *http.Request) {
|
||||||
// Convert the image to a PNG
|
// Convert the image to a PNG
|
||||||
imageBytes, err := file.ConvertToPNG(fullPath, mimeType)
|
imageBytes, err := file.ConvertToPNG(fullPath, mimeType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("ROUTES:Thumb - Error converting %s to PNG: %v", fullPath, err)
|
log.Warnf(`ROUTES:Thumb - Error converting "%s". %v`, fullPath, err)
|
||||||
returnDummyPNG(w)
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,22 +120,23 @@ func APIThumbnail(w http.ResponseWriter, r *http.Request) {
|
||||||
var img image.Image
|
var img image.Image
|
||||||
img, err = png.Decode(bytes.NewReader(imageBytes))
|
img, err = png.Decode(bytes.NewReader(imageBytes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("ROUTES:Thumb - Error decoding %s image data: %v", fullPath, err)
|
log.Warnf(`ROUTES:Thumb - Error decoding "%s". %v`, fullPath, err)
|
||||||
returnDummyPNG(w)
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resize the image
|
// Resize the image
|
||||||
img, err = resizeImage(img, width, height, square, autoScale)
|
img, err = resizeImage(img, width, height, square, autoScale)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
helpers.Return400Msg(err.Error(), w)
|
log.Warnf(`ROUTES:Thumb - Error resizing "%s". %v`, fullPath, err)
|
||||||
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
buf, err := file.CompressPNGFile(img, pngQuality)
|
buf, err := file.CompressPNGFile(img, pngQuality)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("ROUTES:Thumb - Error compressing %s to PNG: %v", fullPath, err)
|
log.Warnf(`ROUTES:Thumb - Error compressing "%s". %v`, fullPath, err)
|
||||||
returnDummyPNG(w)
|
helpers.Return500Msg(w)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,7 +197,7 @@ func resizeImage(img image.Image, width, height int, square, autoScale bool) (im
|
||||||
size = helpers.Max(img.Bounds().Dx(), img.Bounds().Dy())
|
size = helpers.Max(img.Bounds().Dx(), img.Bounds().Dy())
|
||||||
}
|
}
|
||||||
|
|
||||||
// First, make the image square by scaling the smallest dimension to the larget size
|
// First, make the image square by scaling the smallest dimension to the largest size.
|
||||||
if img.Bounds().Dx() > img.Bounds().Dy() {
|
if img.Bounds().Dx() > img.Bounds().Dy() {
|
||||||
width = 0
|
width = 0
|
||||||
height = size
|
height = size
|
||||||
|
@ -196,7 +207,7 @@ func resizeImage(img image.Image, width, height int, square, autoScale bool) (im
|
||||||
}
|
}
|
||||||
resized := resize.Resize(uint(width), uint(height), img, resize.Lanczos3)
|
resized := resize.Resize(uint(width), uint(height), img, resize.Lanczos3)
|
||||||
|
|
||||||
// Then crop the image to the target size
|
// Then crop the image to the target size.
|
||||||
img = imaging.CropCenter(resized, size, size)
|
img = imaging.CropCenter(resized, size, size)
|
||||||
} else {
|
} else {
|
||||||
if width == 0 && height == 0 {
|
if width == 0 && height == 0 {
|
||||||
|
@ -211,12 +222,12 @@ func resizeImage(img image.Image, width, height int, square, autoScale bool) (im
|
||||||
height = 300
|
height = 300
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Don't auto-resize because this endpoint can also be used for simply reducing the quality of an image
|
// Don't auto-resize because this endpoint can also be used for simply reducing the quality of an image.
|
||||||
width = img.Bounds().Dx()
|
width = img.Bounds().Dx()
|
||||||
height = img.Bounds().Dy()
|
height = img.Bounds().Dy()
|
||||||
}
|
}
|
||||||
} else if width == 0 {
|
} else if width == 0 {
|
||||||
// If only width is provided, calculate the height based on the image's aspect ratio
|
// If only width is provided, calculate the height based on the image's aspect ratio.
|
||||||
width = img.Bounds().Dx() * height / img.Bounds().Dy()
|
width = img.Bounds().Dx() * height / img.Bounds().Dy()
|
||||||
} else if height == 0 {
|
} else if height == 0 {
|
||||||
height = img.Bounds().Dy() * width / img.Bounds().Dx()
|
height = img.Bounds().Dy() * width / img.Bounds().Dx()
|
||||||
|
|
|
@ -18,25 +18,32 @@ func StartCrawler() error {
|
||||||
// startRecurringCrawl never exits.
|
// startRecurringCrawl never exits.
|
||||||
func startRecurringCrawl() {
|
func startRecurringCrawl() {
|
||||||
crawlTicker := time.NewTicker(time.Duration(config.GetConfig().CrawlModeCrawlInterval) * time.Second)
|
crawlTicker := time.NewTicker(time.Duration(config.GetConfig().CrawlModeCrawlInterval) * time.Second)
|
||||||
|
|
||||||
printTicker := time.NewTicker(60 * time.Second)
|
printTicker := time.NewTicker(60 * time.Second)
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
go logCacheStatus("CACHE STATUS", printTicker, log.Debugf, ctx)
|
go logCacheStatus("CACHE STATUS", printTicker, log.Debugf, ctx)
|
||||||
|
|
||||||
time.Sleep(time.Duration(config.GetConfig().CrawlModeCrawlInterval) * time.Second)
|
time.Sleep(time.Duration(config.GetConfig().CrawlModeCrawlInterval) * time.Second)
|
||||||
|
|
||||||
|
i := 0
|
||||||
for range crawlTicker.C {
|
for range crawlTicker.C {
|
||||||
dc := directorycrawler.NewDirectoryCrawler(globals.DirectoryCrawlers.Queue)
|
dc := directorycrawler.NewDirectoryCrawler(globals.DirectoryCrawlers.Queue)
|
||||||
log.Infoln("CRAWLER - Recurring - Starting a crawl...")
|
log.Infoln("CRAWLER - Recurring - Starting a crawl...")
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
err := dc.Crawl(config.GetConfig().RootDir, nil)
|
err := dc.Crawl(config.GetConfig().RootDir, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("CRAWLER - Recurring - Crawl failed: %s", err)
|
if i == 0 {
|
||||||
|
// Exit if we failed to crawl on the first recurrence.
|
||||||
|
log.Fatalf("CRAWLER - Recurring - Crawl failed: %s", err)
|
||||||
|
} else {
|
||||||
|
log.Errorf("CRAWLER - Recurring - Crawl failed: %s", err)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
duration := time.Since(start).Round(time.Second)
|
duration := time.Since(start).Round(time.Second)
|
||||||
log.Infof("CRAWLER - Recurring - Crawl completed in %s", duration)
|
log.Infof("CRAWLER - Recurring - Crawl completed in %s", duration)
|
||||||
log.Debugf("%d/%d items in the cache.", len(sharedcache.Cache.Keys()), config.GetConfig().CacheSize)
|
log.Debugf("%d/%d items in the cache.", len(sharedcache.Cache.Keys()), config.GetConfig().CacheSize)
|
||||||
}
|
}
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,11 +54,9 @@ func logCacheStatus(msg string, ticker *time.Ticker, logFn func(format string, a
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
if !InitialCrawlInProgress {
|
|
||||||
logStr := "%s - %d/%d items in the cache. Busy workers: %d. Jobs queued: %d. Running crawls: %d."
|
logStr := "%s - %d/%d items in the cache. Busy workers: %d. Jobs queued: %d. Running crawls: %d."
|
||||||
logFn(logStr,
|
logFn(logStr,
|
||||||
msg, len(sharedcache.Cache.Keys()), config.GetConfig().CacheSize, atomic.LoadInt32(&globals.DirectoryCrawlers.BusyWorkers), globals.DirectoryCrawlers.Queue.GetQueueSize(), directorycrawler.GetTotalActiveCrawls())
|
msg, len(sharedcache.Cache.Keys()), config.GetConfig().CacheSize, atomic.LoadInt32(&globals.DirectoryCrawlers.BusyWorkers), globals.DirectoryCrawlers.Queue.GetQueueSize(), directorycrawler.GetTotalActiveCrawls())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/directorycrawler"
|
"crazyfs/directorycrawler"
|
||||||
"crazyfs/globals"
|
"crazyfs/globals"
|
||||||
"crazyfs/logging"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
@ -16,14 +15,9 @@ var InitialCrawlInProgress bool
|
||||||
var InitialCrawlLock sync.RWMutex
|
var InitialCrawlLock sync.RWMutex
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
InitialCrawlInProgress = false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func InitialCrawl() {
|
func InitialCrawl() error {
|
||||||
log = logging.GetLogger()
|
|
||||||
|
|
||||||
log.Infof("CRAWLER:Inital - Starting the crawl for %s", config.GetConfig().RootDir)
|
|
||||||
|
|
||||||
ticker := time.NewTicker(3 * time.Second)
|
ticker := time.NewTicker(3 * time.Second)
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
@ -32,13 +26,12 @@ func InitialCrawl() {
|
||||||
InitialCrawlLock.Lock()
|
InitialCrawlLock.Lock()
|
||||||
InitialCrawlInProgress = true
|
InitialCrawlInProgress = true
|
||||||
dc := directorycrawler.NewDirectoryCrawler(globals.DirectoryCrawlers.Queue)
|
dc := directorycrawler.NewDirectoryCrawler(globals.DirectoryCrawlers.Queue)
|
||||||
//start := time.Now()
|
|
||||||
err := dc.Crawl(config.GetConfig().RootDir, nil)
|
err := dc.Crawl(config.GetConfig().RootDir, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("CRAWLER:Inital - failed: %s", err)
|
return err
|
||||||
}
|
}
|
||||||
InitialCrawlInProgress = false
|
InitialCrawlInProgress = false
|
||||||
InitialCrawlLock.Unlock()
|
InitialCrawlLock.Unlock()
|
||||||
ticker.Stop()
|
ticker.Stop()
|
||||||
//log.Infof("INITIAL CRAWL - finished the initial crawl in %s", time.Since(start).Round(time.Second))
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ func InitRecacheSemaphore(limit int) {
|
||||||
sem = make(chan struct{}, limit)
|
sem = make(chan struct{}, limit)
|
||||||
}
|
}
|
||||||
|
|
||||||
func CheckAndRecache(path string) {
|
func CheckAndRecacheBG(path string) {
|
||||||
item, found := sharedcache.Cache.Get(path)
|
item, found := sharedcache.Cache.Get(path)
|
||||||
if found && time.Now().UnixNano()/int64(time.Millisecond)-item.Cached > int64(config.GetConfig().CacheTime)*60*1000 {
|
if found && time.Now().UnixNano()/int64(time.Millisecond)-item.Cached > int64(config.GetConfig().CacheTime)*60*1000 {
|
||||||
log.Debugf("CACHE:Recache - re-caching: %s", path)
|
log.Debugf("CACHE:Recache - re-caching: %s", path)
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO: this is old and probably does not work
|
||||||
func SearchLRU(queryString string, excludeElements []string, limitResults int) []*cacheitem.Item {
|
func SearchLRU(queryString string, excludeElements []string, limitResults int) []*cacheitem.Item {
|
||||||
results := make([]*cacheitem.Item, 0)
|
results := make([]*cacheitem.Item, 0)
|
||||||
|
|
||||||
|
@ -41,6 +42,7 @@ func SearchLRU(queryString string, excludeElements []string, limitResults int) [
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: this is old and probably does not work
|
||||||
func searchKey(key string, queryString string, excludeElements []string, sem chan struct{}, resultsChan chan *cacheitem.Item) {
|
func searchKey(key string, queryString string, excludeElements []string, sem chan struct{}, resultsChan chan *cacheitem.Item) {
|
||||||
// Acquire a token
|
// Acquire a token
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
|
|
|
@ -38,9 +38,9 @@ func NewItem(fullPath string, info os.FileInfo) (*Item, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.GetConfig().CrawlerParseMIME {
|
if config.GetConfig().CrawlerParseMIME {
|
||||||
_, mimeType, ext, err = file.GetMimeType(mimePath, true, &info)
|
_, mimeType, ext, err = file.GetMimeType(mimePath, true)
|
||||||
} else {
|
} else {
|
||||||
_, mimeType, ext, err = file.GetMimeType(mimePath, false, &info)
|
_, mimeType, ext, err = file.GetMimeType(mimePath, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.GetConfig().CrawlerParseEncoding {
|
if config.GetConfig().CrawlerParseEncoding {
|
||||||
|
|
|
@ -165,9 +165,12 @@ func main() {
|
||||||
log.Infof("Server started on port %s", cfg.HTTPPort)
|
log.Infof("Server started on port %s", cfg.HTTPPort)
|
||||||
|
|
||||||
if cliArgs.initialCrawl || cfg.InitialCrawl {
|
if cliArgs.initialCrawl || cfg.InitialCrawl {
|
||||||
log.Infoln("Preforming initial crawl...")
|
log.Infof(`Preforming initial crawl for "%s"`, config.GetConfig().RootDir)
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
cache.InitialCrawl()
|
err := cache.InitialCrawl()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Inital crawl failed: %s", err)
|
||||||
|
}
|
||||||
duration := time.Since(start).Round(time.Second)
|
duration := time.Since(start).Round(time.Second)
|
||||||
keys := sharedcache.Cache.Keys()
|
keys := sharedcache.Cache.Keys()
|
||||||
config.InitialCrawlElapsed = int(duration.Seconds())
|
config.InitialCrawlElapsed = int(duration.Seconds())
|
||||||
|
|
|
@ -148,9 +148,9 @@ func GetActiveCrawls() map[string]*ActiveCrawl {
|
||||||
func GetFinishedCrawls() []FinishedCrawl {
|
func GetFinishedCrawls() []FinishedCrawl {
|
||||||
finishedCrawlsMutex.RLock()
|
finishedCrawlsMutex.RLock()
|
||||||
defer finishedCrawlsMutex.RUnlock()
|
defer finishedCrawlsMutex.RUnlock()
|
||||||
finishedCrawlsCopy := make([]FinishedCrawl, 0, maxFinishedCrawls)
|
finishedCrawlsCopy := make([]FinishedCrawl, len(finishedCrawls))
|
||||||
for k, v := range finishedCrawls {
|
for i, v := range finishedCrawls {
|
||||||
finishedCrawlsCopy[k] = v
|
finishedCrawlsCopy[i] = v
|
||||||
}
|
}
|
||||||
return finishedCrawlsCopy
|
return finishedCrawlsCopy
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,21 +16,19 @@ func InitializeDirectoryCrawlerWorkers() *globals.DcWorkers {
|
||||||
d.Queue = dcWorkers.Queue
|
d.Queue = dcWorkers.Queue
|
||||||
d.BusyWorkers = dcWorkers.BusyWorkers
|
d.BusyWorkers = dcWorkers.BusyWorkers
|
||||||
globals.DirectoryCrawlers = d
|
globals.DirectoryCrawlers = d
|
||||||
log.Debugf("CRAWLERS - Started %d directory crawler dc_workers.", config.GetConfig().DirectoryCrawlers)
|
log.Debugf("CRAWLERS - Started %d directory crawler workers.", config.GetConfig().DirectoryCrawlers)
|
||||||
return d
|
return d
|
||||||
}
|
}
|
||||||
|
|
||||||
func directoryCrawlerWorker(w *workers.CrawlWorkers) {
|
func directoryCrawlerWorker(w *workers.CrawlWorkers) {
|
||||||
|
// Reminder that this worker type does not support shutdown
|
||||||
for {
|
for {
|
||||||
job := w.Queue.GetJob()
|
job := w.Queue.GetJob()
|
||||||
|
|
||||||
// TODO: reminder that this worker type does not support shutdown
|
|
||||||
|
|
||||||
atomic.AddInt32(&w.BusyWorkers, 1)
|
atomic.AddInt32(&w.BusyWorkers, 1)
|
||||||
|
|
||||||
err := job.Walker.ReadPathAndQueue(job.StartPath)
|
err := job.Walker.ReadPathAndQueue(job.StartPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("DirCrawlWorker - %s - %s", job.StartPath, err)
|
log.Warnf(`DirCrawlWorker:ReadPathAndQueue - error for "%s" - %s`, job.StartPath, err)
|
||||||
}
|
}
|
||||||
job.Walker.Wg.Done()
|
job.Walker.Wg.Done()
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ func (dc *DirectoryCrawler) walkRecursiveFunc(fullPath string, info os.FileInfo,
|
||||||
file.RetardCheck(fullPath)
|
file.RetardCheck(fullPath)
|
||||||
processErr := dc.processPath(fullPath, info)
|
processErr := dc.processPath(fullPath, info)
|
||||||
if processErr != nil {
|
if processErr != nil {
|
||||||
log.Errorf(`walkRecursiveFunc failed on "%s": %s`, fullPath, processErr)
|
|
||||||
return processErr
|
return processErr
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -30,12 +29,10 @@ func (dc *DirectoryCrawler) walkNonRecursiveFunc(fullPath string, dir os.DirEntr
|
||||||
file.RetardCheck(fullPath)
|
file.RetardCheck(fullPath)
|
||||||
info, infoErr := dir.Info()
|
info, infoErr := dir.Info()
|
||||||
if infoErr != nil {
|
if infoErr != nil {
|
||||||
log.Errorf(`CRAWLER:walkNonRecursiveFunc - Get info failed on "%s": %s`, fullPath, infoErr)
|
|
||||||
return infoErr
|
return infoErr
|
||||||
}
|
}
|
||||||
processErr := dc.processPath(fullPath, info)
|
processErr := dc.processPath(fullPath, info)
|
||||||
if processErr != nil {
|
if processErr != nil {
|
||||||
log.Errorf(`walkNonRecursiveFunc failed on "%s": %s`, fullPath, processErr)
|
|
||||||
return processErr
|
return processErr
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -69,7 +66,6 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc filepath.WalkFunc) e
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(`CRAWLER:Crawl - os.Lstat() failed on "%s": %s`, fullPath, err)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if info.Mode()&os.ModeSymlink != 0 {
|
if info.Mode()&os.ModeSymlink != 0 {
|
||||||
|
@ -97,7 +93,7 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, walkFunc filepath.WalkFunc) e
|
||||||
// If the path is a directory, start a walk
|
// If the path is a directory, start a walk
|
||||||
err := queuedwalk.Walk(fullPath, config.FollowSymlinks, walkFunc, dc.queue)
|
err := queuedwalk.Walk(fullPath, config.FollowSymlinks, walkFunc, dc.queue)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(`CRAWLER:Crawl - Crawl for "%s" failed: %s`, fullPath, err)
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If the path is a file, add it to the cache directly
|
// If the path is a file, add it to the cache directly
|
||||||
|
@ -138,7 +134,6 @@ func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string, walkFunc fs.WalkDi
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(`CRAWLER:CrawlNoRecursion - os.Lstat() failed on "%s": %s`, fullPath, err)
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if info.Mode()&os.ModeSymlink != 0 {
|
if info.Mode()&os.ModeSymlink != 0 {
|
||||||
|
@ -172,7 +167,6 @@ func (dc *DirectoryCrawler) CrawlNoRecursion(fullPath string, walkFunc fs.WalkDi
|
||||||
|
|
||||||
err := filepath.WalkDir(fullPath, dc.walkNonRecursiveFunc)
|
err := filepath.WalkDir(fullPath, dc.walkNonRecursiveFunc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(`CRAWLER:CrawlNoRecursion - Crawl for "%s" failed: %s`, fullPath, err)
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
item, _ = sharedcache.Cache.Get(relPath)
|
item, _ = sharedcache.Cache.Get(relPath)
|
||||||
|
|
|
@ -53,7 +53,6 @@ func (dc *DirectoryCrawler) processPath(fullPath string, info os.FileInfo) error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// StartPath is a file
|
|
||||||
err := dc.addCacheItem(fullPath, info)
|
err := dc.addCacheItem(fullPath, info)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -40,10 +40,10 @@ func elasticDeleteWorker(w *workers.CrawlWorkers) {
|
||||||
atomic.AddInt32(&w.BusyWorkers, 1)
|
atomic.AddInt32(&w.BusyWorkers, 1)
|
||||||
|
|
||||||
if job.Extra == nil {
|
if job.Extra == nil {
|
||||||
// Jobs without any extras are the standard Walk jobs
|
// Jobs without any extras are the standard Walk jobs that add items to Elastic.
|
||||||
err := job.Walker.ReadPathAndQueue(job.StartPath)
|
err := job.Walker.ReadPathAndQueue(job.StartPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("ELCrawlWorker - %s - %s", job.StartPath, err)
|
log.Warnf("ELCrawlWorker:Add - %s - %s", job.StartPath, err)
|
||||||
}
|
}
|
||||||
job.Walker.Wg.Done()
|
job.Walker.Wg.Done()
|
||||||
} else {
|
} else {
|
||||||
|
@ -55,9 +55,9 @@ func elasticDeleteWorker(w *workers.CrawlWorkers) {
|
||||||
key := e["key"].(string)
|
key := e["key"].(string)
|
||||||
err := DeleteFromElasticsearch(key)
|
err := DeleteFromElasticsearch(key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf(`ELASTIC:Delete - Error deleting key "%s" - %s`, key, err)
|
log.Errorf(`ELCrawlWorker:Delete - Error deleting key "%s" - %s`, key, err)
|
||||||
} else {
|
} else {
|
||||||
log.Debugf(`ELASTIC:Delete - Deleted path: "%s"`, job.StartPath)
|
log.Debugf(`ELCrawlWorker:Delete - Deleted path: "%s"`, job.StartPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
"crazyfs/config"
|
"crazyfs/config"
|
||||||
"crazyfs/directorycrawler"
|
"crazyfs/directorycrawler"
|
||||||
"crazyfs/globals"
|
"crazyfs/globals"
|
||||||
"fmt"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
@ -53,13 +52,13 @@ func syncElasticsearch(doFullSync bool) {
|
||||||
var syncType string
|
var syncType string
|
||||||
if doFullSync {
|
if doFullSync {
|
||||||
if !FullSyncRunning.TryAcquire(1) {
|
if !FullSyncRunning.TryAcquire(1) {
|
||||||
log.Fatalln("ELASTIC - Failed to acquire the FullSyncRunning semaphore. This is a logic error.")
|
panic("ELASTIC - Failed to acquire the FullSyncRunning semaphore. This is a logic error.")
|
||||||
}
|
}
|
||||||
defer FullSyncRunning.Release(1)
|
defer FullSyncRunning.Release(1)
|
||||||
syncType = "full refresh"
|
syncType = "full refresh"
|
||||||
} else {
|
} else {
|
||||||
if !RefreshSyncRunning.TryAcquire(1) {
|
if !RefreshSyncRunning.TryAcquire(1) {
|
||||||
log.Fatalln("ELASTIC - Failed to acquire the RefreshSyncRunning semaphore. This is a logic error.")
|
panic("ELASTIC - Failed to acquire the RefreshSyncRunning semaphore. This is a logic error.")
|
||||||
}
|
}
|
||||||
defer RefreshSyncRunning.Release(1)
|
defer RefreshSyncRunning.Release(1)
|
||||||
syncType = "refresh"
|
syncType = "refresh"
|
||||||
|
@ -76,7 +75,7 @@ func syncElasticsearch(doFullSync bool) {
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
ticker := time.NewTicker(time.Duration(config.GetConfig().CrawlModeCrawlInterval) * time.Second)
|
ticker := time.NewTicker(60 * time.Second)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -85,8 +84,9 @@ func syncElasticsearch(doFullSync bool) {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
logStr := "ELASTIC - Busy Elastic delete workers: %d. Elastic deletes queued: %d"
|
elapsed := time.Since(start)
|
||||||
log.Debugf(logStr, globals.ElasticCrawlers.BusyWorkers, globals.ElasticCrawlers.Queue.GetQueueSize())
|
logStr := "ELASTIC - Sync in progress. Elapsed: %d. Busy Elastic delete workers: %d. Elastic deletes queued: %d"
|
||||||
|
log.Debugf(logStr, elapsed, globals.ElasticCrawlers.BusyWorkers, globals.ElasticCrawlers.Queue.GetQueueSize())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
@ -115,7 +115,6 @@ func syncElasticsearch(doFullSync bool) {
|
||||||
// Shut down the elastic sync workers once we've finished.
|
// Shut down the elastic sync workers once we've finished.
|
||||||
globals.ElasticCrawlers.Queue.Terminate()
|
globals.ElasticCrawlers.Queue.Terminate()
|
||||||
aliveWorkers.Wait()
|
aliveWorkers.Wait()
|
||||||
fmt.Println("cleared ElasticCrawlers")
|
|
||||||
globals.ElasticCrawlers = nil
|
globals.ElasticCrawlers = nil
|
||||||
|
|
||||||
duration := time.Since(start)
|
duration := time.Since(start)
|
||||||
|
@ -123,13 +122,13 @@ func syncElasticsearch(doFullSync bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func logElasticConnError(err error) {
|
func logElasticConnError(err error) {
|
||||||
log.Errorf("ELASTIC - Failed to read the index: %s", err)
|
log.Errorf("ELASTIC - Failed to read the index: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
// EnableElasticsearchConnection tests the connection to Elastic and enables the backend if it's successful.
|
// EnableElasticsearchConnection tests the connection to Elastic and enables the backend if it's successful.
|
||||||
func EnableElasticsearchConnection() {
|
func EnableElasticsearchConnection() {
|
||||||
esSize, err := getElasticSize()
|
esSize, err := getElasticSize()
|
||||||
if err != nil {
|
if err != nil || esSize == -1 {
|
||||||
logElasticConnError(err)
|
logElasticConnError(err)
|
||||||
Enabled = false
|
Enabled = false
|
||||||
return
|
return
|
||||||
|
|
|
@ -8,6 +8,8 @@ import (
|
||||||
"crazyfs/file"
|
"crazyfs/file"
|
||||||
"crazyfs/sharedcache"
|
"crazyfs/sharedcache"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
"github.com/elastic/go-elasticsearch/v8/esapi"
|
"github.com/elastic/go-elasticsearch/v8/esapi"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -25,6 +27,7 @@ var globalPathsByKeyMutex sync.RWMutex
|
||||||
// fullSync is another global variable accessed by the workers and only set by syncElasticsearch()
|
// fullSync is another global variable accessed by the workers and only set by syncElasticsearch()
|
||||||
var fullSync bool
|
var fullSync bool
|
||||||
|
|
||||||
|
// Errors from this function are received by the Elastic sync workers.
|
||||||
func addToElasticsearch(fullPath string, info os.FileInfo, incomingErr error) error {
|
func addToElasticsearch(fullPath string, info os.FileInfo, incomingErr error) error {
|
||||||
relPath := file.StripRootDir(fullPath)
|
relPath := file.StripRootDir(fullPath)
|
||||||
if !shouldExclude(relPath, config.GetConfig().ElasticsearchExcludePatterns) {
|
if !shouldExclude(relPath, config.GetConfig().ElasticsearchExcludePatterns) {
|
||||||
|
@ -35,7 +38,7 @@ func addToElasticsearch(fullPath string, info os.FileInfo, incomingErr error) er
|
||||||
// Delete this item from Elastic in order to avoid any strange inconsistencies.
|
// Delete this item from Elastic in order to avoid any strange inconsistencies.
|
||||||
err := DeleteFromElasticsearch(encodeToBase64(relPath))
|
err := DeleteFromElasticsearch(encodeToBase64(relPath))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("ELASTIC:Add - Failed to delete \"%s\" - %s", relPath, err)
|
return errors.New(fmt.Sprintf(`Failed to delete "%s" - %s`, relPath, err))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
globalPathsByKeyMutex.RLock()
|
globalPathsByKeyMutex.RLock()
|
||||||
|
@ -43,27 +46,25 @@ func addToElasticsearch(fullPath string, info os.FileInfo, incomingErr error) er
|
||||||
if _, ok := globalPathsByKey[relPath]; ok {
|
if _, ok := globalPathsByKey[relPath]; ok {
|
||||||
// Item already exists.
|
// Item already exists.
|
||||||
if fullSync {
|
if fullSync {
|
||||||
performAddToElasticsearch(cacheItem)
|
return performAddToElasticsearch(cacheItem)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
performAddToElasticsearch(cacheItem)
|
return performAddToElasticsearch(cacheItem)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func performAddToElasticsearch(item *cacheitem.Item) {
|
func performAddToElasticsearch(item *cacheitem.Item) error {
|
||||||
preparedItem, err := prepareCacheItem(item)
|
preparedItem, err := prepareCacheItem(item)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("ELASTIC:Add - Error preparing new item: %s", err)
|
return err
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := json.Marshal(preparedItem)
|
data, err := json.Marshal(preparedItem)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("ELASTIC:Add - Error marshaling new item: %s", err)
|
return err
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
req := esapi.IndexRequest{
|
req := esapi.IndexRequest{
|
||||||
|
@ -74,20 +75,20 @@ func performAddToElasticsearch(item *cacheitem.Item) {
|
||||||
}
|
}
|
||||||
res, err := req.Do(context.Background(), ElasticClient)
|
res, err := req.Do(context.Background(), ElasticClient)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("ELASTIC:Add - Error getting response: %s", err)
|
return err
|
||||||
return
|
|
||||||
}
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
|
||||||
if res.IsError() {
|
if res.IsError() {
|
||||||
var e map[string]interface{}
|
var e map[string]interface{}
|
||||||
if err := json.NewDecoder(res.Body).Decode(&e); err != nil {
|
if err := json.NewDecoder(res.Body).Decode(&e); err != nil {
|
||||||
log.Errorf("ELASTIC:Add - Error parsing the response body: %s", err)
|
return errors.New(fmt.Sprintf("Error parsing the response body: %s", err))
|
||||||
}
|
}
|
||||||
log.Errorf(`ELASTIC:Add - Error indexing document "%s" - Status code: %d - %s`, item.Path, res.StatusCode, e)
|
return errors.New(fmt.Sprintf(`Error indexing document "%s" - Status code: %d - Response: %s`, item.Path, res.StatusCode, e))
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugf(`ELASTIC:Add - Added: "%s"`, preparedItem.Path)
|
log.Debugf(`ELASTIC:Add - Added: "%s"`, preparedItem.Path)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// prepareCacheItem is used to get an item ready to insert into Elastic.
|
// prepareCacheItem is used to get an item ready to insert into Elastic.
|
||||||
|
|
|
@ -1,48 +1,32 @@
|
||||||
package file
|
package file
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crazyfs/config"
|
|
||||||
"crazyfs/logging"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"github.com/gabriel-vasile/mimetype"
|
"github.com/gabriel-vasile/mimetype"
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
"mime"
|
"mime"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
var log *logrus.Logger
|
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
log = logging.GetLogger()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetMimeType(path string, analyze bool, passedInfo *os.FileInfo) (bool, string, string, error) {
|
func GetMimeType(path string, analyze bool) (bool, string, string, error) {
|
||||||
var MIME *mimetype.MIME
|
var MIME *mimetype.MIME
|
||||||
var mimeType string
|
var mimeType string
|
||||||
var ext string
|
var ext string
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
var info os.FileInfo
|
info, err := os.Lstat(path)
|
||||||
if config.FollowSymlinks {
|
|
||||||
info, err = os.Lstat(path)
|
|
||||||
} else {
|
|
||||||
if info == nil {
|
|
||||||
info, err = os.Stat(path)
|
|
||||||
} else {
|
|
||||||
info = *passedInfo
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//if config.FollowSymlinks {
|
|
||||||
// info, err = os.Stat(path)
|
|
||||||
//} else {
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// File does not exist
|
// File does not exist
|
||||||
return false, "", "", err
|
return false, "", "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if info.Mode()&os.ModeSymlink != 0 {
|
||||||
|
return false, "", "", RejectSymlinkErr
|
||||||
|
}
|
||||||
|
|
||||||
if !info.IsDir() {
|
if !info.IsDir() {
|
||||||
if info.Mode()&os.ModeSymlink != 0 {
|
if info.Mode()&os.ModeSymlink != 0 {
|
||||||
return false, "", "", RejectSymlinkErr
|
return false, "", "", RejectSymlinkErr
|
||||||
|
@ -51,7 +35,7 @@ func GetMimeType(path string, analyze bool, passedInfo *os.FileInfo) (bool, stri
|
||||||
if analyze {
|
if analyze {
|
||||||
MIME, err = mimetype.DetectFile(path)
|
MIME, err = mimetype.DetectFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, "", "", errors.New(fmt.Sprintf("Error analyzing MIME type: %v", err))
|
return false, "", "", err
|
||||||
}
|
}
|
||||||
mimeType = MIME.String()
|
mimeType = MIME.String()
|
||||||
} else {
|
} else {
|
||||||
|
@ -65,12 +49,3 @@ func GetMimeType(path string, analyze bool, passedInfo *os.FileInfo) (bool, stri
|
||||||
}
|
}
|
||||||
return true, mimeType, ext, nil
|
return true, mimeType, ext, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func StripRootDir(path string) string {
|
|
||||||
if path == "/" || path == config.GetConfig().RootDir || path == "" {
|
|
||||||
// Avoid erasing our path
|
|
||||||
return "/"
|
|
||||||
} else {
|
|
||||||
return strings.TrimSuffix(strings.TrimPrefix(path, config.GetConfig().RootDir), "/")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -8,41 +8,45 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SafeJoin Clean the provided path
|
// SafeJoinRoot cleans and joins the provided relative path with the root directory path to form the full path.
|
||||||
func SafeJoin(pathArg string) (string, error) {
|
func SafeJoinRoot(relPath string) string {
|
||||||
cleanPath := filepath.Join(config.GetConfig().RootDir, filepath.Clean(pathArg))
|
cleanPath := filepath.Join(config.GetConfig().RootDir, filepath.Clean(relPath))
|
||||||
cleanPath = strings.TrimRight(cleanPath, "/")
|
cleanPath = strings.TrimRight(cleanPath, "/")
|
||||||
return cleanPath, nil
|
return cleanPath
|
||||||
|
}
|
||||||
|
|
||||||
|
func DetectTraversal(relPath string) (bool, error) {
|
||||||
|
if strings.HasPrefix(relPath, config.GetConfig().RootDir) {
|
||||||
|
panic(fmt.Sprintf(`file.DetectTraversal() was given a path that had the root directory prefixed instead of a relative path. Make sure to call file.SafeJoinRoot() and then file.DetectTraversal(). Offending path: %s`, relPath))
|
||||||
}
|
}
|
||||||
|
|
||||||
func DetectTraversal(pathArg string) (bool, error) {
|
|
||||||
// Remove the trailing slash so our checks always handle the same format
|
// Remove the trailing slash so our checks always handle the same format
|
||||||
if pathArg != "/" {
|
if relPath != "/" {
|
||||||
pathArg = strings.TrimRight(pathArg, "/")
|
relPath = strings.TrimRight(relPath, "/")
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the path starts with "~", a directory traversal attack is being attempted
|
// If the path starts with "~", a directory traversal attack is being attempted
|
||||||
if strings.HasPrefix(pathArg, "~") {
|
if strings.HasPrefix(relPath, "~") {
|
||||||
return true, fmt.Errorf("includes home directory: %s", pathArg)
|
return true, fmt.Errorf("includes home directory: %s", relPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// The file path should ALWAYS be absolute.
|
// The file path should ALWAYS be absolute.
|
||||||
// For example: /Documents
|
// For example: /Documents
|
||||||
if !filepath.IsAbs(pathArg) {
|
if !filepath.IsAbs(relPath) {
|
||||||
return true, fmt.Errorf("is not absolute path: %s", pathArg)
|
return true, fmt.Errorf("is not absolute path: %s", relPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanArg := filepath.Clean(pathArg)
|
cleanArg := filepath.Clean(relPath)
|
||||||
cleanPath := filepath.Join(config.GetConfig().RootDir, cleanArg)
|
cleanPath := filepath.Join(config.GetConfig().RootDir, cleanArg)
|
||||||
|
|
||||||
// If the path is not within the base path, return an error
|
// If the path is not within the base path, return an error
|
||||||
if !strings.HasPrefix(cleanPath, config.GetConfig().RootDir) {
|
if !strings.HasPrefix(cleanPath, config.GetConfig().RootDir) {
|
||||||
return true, fmt.Errorf("the full path is outside the root dir: %s", pathArg)
|
return true, fmt.Errorf("the full path is outside the root dir: %s", relPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the cleaned path is not the same as the original path, a directory traversal attack is being attempted
|
// If the cleaned path is not the same as the original path, a directory traversal attack is being attempted
|
||||||
if pathArg != cleanArg {
|
if relPath != cleanArg {
|
||||||
return true, fmt.Errorf("path. Clean modified the path arg from %s to %s", pathArg, cleanArg)
|
return true, fmt.Errorf("path. Clean modified the path arg from %s to %s", relPath, cleanArg)
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, nil
|
return false, nil
|
||||||
|
@ -69,3 +73,12 @@ func PathExists(path string) (bool, error) {
|
||||||
|
|
||||||
return true, nil // File or symlink exists and is not broken
|
return true, nil // File or symlink exists and is not broken
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func StripRootDir(path string) string {
|
||||||
|
if path == "/" || path == config.GetConfig().RootDir || path == "" {
|
||||||
|
// Don't erase the path if it's the root.
|
||||||
|
return "/"
|
||||||
|
} else {
|
||||||
|
return strings.TrimSuffix(strings.TrimPrefix(path, config.GetConfig().RootDir), "/")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -5,6 +5,8 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Inspired by https://github.com/iafan/cwalk/tree/master
|
||||||
|
|
||||||
// Walk is a wrapper function for the Walker object that mimics the behavior of filepath.Walk, and doesn't follow symlinks.
|
// Walk is a wrapper function for the Walker object that mimics the behavior of filepath.Walk, and doesn't follow symlinks.
|
||||||
func Walk(root string, followSymlinks bool, walkFn filepath.WalkFunc, queue *JobQueue) error {
|
func Walk(root string, followSymlinks bool, walkFn filepath.WalkFunc, queue *JobQueue) error {
|
||||||
file.RetardCheck(root)
|
file.RetardCheck(root)
|
||||||
|
@ -14,5 +16,5 @@ func Walk(root string, followSymlinks bool, walkFn filepath.WalkFunc, queue *Job
|
||||||
walkFunc: walkFn,
|
walkFunc: walkFn,
|
||||||
queue: queue,
|
queue: queue,
|
||||||
}
|
}
|
||||||
return w.walk("", walkFn)
|
return w.walker("")
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,13 +3,12 @@ package queuedwalk
|
||||||
import (
|
import (
|
||||||
"crazyfs/file"
|
"crazyfs/file"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Walker.go is the implementation behind `queuedwalk()`, which is a filesystem queuedwalk
|
// Walker.go is the implementation behind `Walk()`, which is a filesystem walk using workers that pull jobs from a queue.
|
||||||
// using workers that pull jobs from a queue.
|
|
||||||
|
|
||||||
// ErrNotDir indicates that the path, which is being passed to a walker function, does not point to a directory.
|
// ErrNotDir indicates that the path, which is being passed to a walker function, does not point to a directory.
|
||||||
var ErrNotDir = errors.New("not a directory")
|
var ErrNotDir = errors.New("not a directory")
|
||||||
|
@ -23,6 +22,49 @@ type Walker struct {
|
||||||
queue *JobQueue
|
queue *JobQueue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory
|
||||||
|
// in the tree, starting with the root directory.
|
||||||
|
// It is only called one place: `Walk()` in Walk.go. It walks the starting directory and sends jobs to the workers.
|
||||||
|
// Any errors in this function will be propagated to whatever called Walk.
|
||||||
|
// Similar to filepath.WalkDir().
|
||||||
|
func (w *Walker) walker(relPath string) error {
|
||||||
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
|
|
||||||
|
info, err := w.lstat(relPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reject symlinks
|
||||||
|
if info.Mode()&os.ModeSymlink != 0 {
|
||||||
|
return file.RejectSymlinkErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the input path was a file, reject it. We can only walk directories.
|
||||||
|
if !info.Mode().IsDir() {
|
||||||
|
return ErrNotDir
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute the walkFunc
|
||||||
|
walkFuncErr := w.walkFunc(fullPath, info, err)
|
||||||
|
if errors.Is(walkFuncErr, filepath.SkipDir) || errors.Is(walkFuncErr, filepath.SkipAll) {
|
||||||
|
return nil
|
||||||
|
} // If the walkFunc wants to skip this dir
|
||||||
|
if walkFuncErr != nil {
|
||||||
|
return walkFuncErr
|
||||||
|
} // If we encountered an actual error.
|
||||||
|
|
||||||
|
// Let the workers handle everything else.
|
||||||
|
w.addJob(Job{
|
||||||
|
StartPath: relPath,
|
||||||
|
Walker: w,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Wait for the workers to finish the job we just added.
|
||||||
|
w.Wg.Wait()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// addJob increments the job counter and pushes the path to the job queue.
|
// addJob increments the job counter and pushes the path to the job queue.
|
||||||
func (w *Walker) addJob(job Job) {
|
func (w *Walker) addJob(job Job) {
|
||||||
w.Wg.Add(1)
|
w.Wg.Add(1)
|
||||||
|
@ -34,7 +76,6 @@ func (w *Walker) ReadPathAndQueue(relPath string) error {
|
||||||
fullPath := filepath.Join(w.root, relPath)
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
names, err := readDirNames(fullPath)
|
names, err := readDirNames(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Walker:ReadPathAndQueue:readDirNames - %s", err)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,16 +84,17 @@ func (w *Walker) ReadPathAndQueue(relPath string) error {
|
||||||
subPath := filepath.Join(relPath, name)
|
subPath := filepath.Join(relPath, name)
|
||||||
info, err := w.lstat(subPath)
|
info, err := w.lstat(subPath)
|
||||||
|
|
||||||
|
// Print the error rather than stopping our scan and propagating the error up.
|
||||||
|
// We do this because failures here alright and safe to ignore.
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !errors.Is(err, file.RejectSymlinkErr) {
|
if !errors.Is(err, file.RejectSymlinkErr) {
|
||||||
// Only print a warning if it's not a symlink error.
|
// We don't care about symlink-related errors and if we print them all we will overwhelm the console.
|
||||||
log.Warnf("Walker:ReadPathAndQueue - %s - %s", relPath, err)
|
log.Warnf("Walker:ReadPathAndQueue - %s - %s", relPath, err)
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if info == nil {
|
if info == nil {
|
||||||
log.Warnf("Walker:ReadPathAndQueue - %s - %s", relPath, err)
|
log.Warnf(`Walker:ReadPathAndQueue - lstat() was null for "%s"`, relPath)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,9 +103,12 @@ func (w *Walker) ReadPathAndQueue(relPath string) error {
|
||||||
if errors.Is(err, filepath.SkipDir) {
|
if errors.Is(err, filepath.SkipDir) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// If this child is a directory, add it to the queue then move on.
|
// If this child is a directory, add it to the queue then move on.
|
||||||
if err == nil && info.Mode().IsDir() {
|
if info.Mode().IsDir() {
|
||||||
w.addJob(Job{
|
w.addJob(Job{
|
||||||
StartPath: subPath,
|
StartPath: subPath,
|
||||||
Walker: w,
|
Walker: w,
|
||||||
|
@ -72,42 +117,3 @@ func (w *Walker) ReadPathAndQueue(relPath string) error {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Walk recursively descends into subdirectories, calling the user-defined walkFn for each file or directory
|
|
||||||
// in the tree, starting with the root directory. It is only called one place: `queuedwalk()` in queuedwalk.go
|
|
||||||
func (w *Walker) walk(relPath string, walkFn filepath.WalkFunc) error {
|
|
||||||
// TODO: compare with filepath.WalkDir()
|
|
||||||
|
|
||||||
fullPath := filepath.Join(w.root, relPath)
|
|
||||||
|
|
||||||
info, err := w.lstat(relPath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
err = w.walkFunc(fullPath, info, err)
|
|
||||||
if errors.Is(err, filepath.SkipDir) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if info == nil {
|
|
||||||
return fmt.Errorf("broken symlink: %s", relPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !info.Mode().IsDir() {
|
|
||||||
return ErrNotDir
|
|
||||||
}
|
|
||||||
|
|
||||||
// Let the workers handle everything else.
|
|
||||||
w.addJob(Job{
|
|
||||||
StartPath: relPath,
|
|
||||||
Walker: w,
|
|
||||||
})
|
|
||||||
|
|
||||||
// Wait for the workers to finish reading the file system.
|
|
||||||
w.Wg.Wait()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
|
@ -15,8 +15,9 @@ import (
|
||||||
|
|
||||||
// readDirNames reads the directory named by dirname and returns
|
// readDirNames reads the directory named by dirname and returns
|
||||||
// a list of directory entries.
|
// a list of directory entries.
|
||||||
func readDirNames(dirname string) ([]string, error) {
|
func readDirNames(fullPath string) ([]string, error) {
|
||||||
f, err := os.Open(dirname)
|
file.RetardCheck(fullPath)
|
||||||
|
f, err := os.Open(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -34,9 +35,10 @@ func readDirNames(dirname string) ([]string, error) {
|
||||||
return names, nil
|
return names, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// lstat is a wrapper for os.Lstat which accepts a path relative to Walker.root and also rejects symlinks
|
// lstat is a wrapper for os.Lstat which accepts a path relative to Walker.root and also rejects symlinks.
|
||||||
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
func (w *Walker) lstat(relPath string) (info os.FileInfo, err error) {
|
||||||
fullPath := filepath.Join(w.root, relPath)
|
fullPath := filepath.Join(w.root, relPath)
|
||||||
|
file.RetardCheck(fullPath)
|
||||||
info, err = os.Lstat(fullPath)
|
info, err = os.Lstat(fullPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
|
@ -18,7 +18,7 @@ func init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResponseItem is what is returned by the HTTP API as a JSON object.
|
// ResponseItem is what is returned by the HTTP API as a JSON object.
|
||||||
// We don't return a `cacheitem.Item` because having a separate `responseitem`
|
// We don't return a `cacheitem.Item` because having a separate `ResponseItem`
|
||||||
// object allows us to customize the structure without messing with the original item.
|
// object allows us to customize the structure without messing with the original item.
|
||||||
type ResponseItem struct {
|
type ResponseItem struct {
|
||||||
Path string `json:"path"`
|
Path string `json:"path"`
|
||||||
|
@ -36,28 +36,6 @@ type ResponseItem struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewResponseItem(cacheItem *cacheitem.Item) *ResponseItem {
|
func NewResponseItem(cacheItem *cacheitem.Item) *ResponseItem {
|
||||||
// TODO: this should never happen and can probably be removed.
|
|
||||||
// Problem was linked to the scenario where an item was not found in the cache
|
|
||||||
// so a new crawl was triggered but the `childItem` var was never updated.
|
|
||||||
//defer func() {
|
|
||||||
// if r := recover(); r != nil {
|
|
||||||
// copiedItem := &cacheitem.Item{
|
|
||||||
// Path: cacheitem.Path,
|
|
||||||
// Name: cacheitem.Name,
|
|
||||||
// Size: cacheitem.Size,
|
|
||||||
// Extension: cacheitem.Extension,
|
|
||||||
// Modified: cacheitem.Modified,
|
|
||||||
// Mode: cacheitem.Mode,
|
|
||||||
// IsDir: cacheitem.IsDir,
|
|
||||||
// IsSymlink: cacheitem.IsSymlink,
|
|
||||||
// Cached: cacheitem.Cached,
|
|
||||||
// Children: nil,
|
|
||||||
// MimeType: cacheitem.MimeType,
|
|
||||||
// }
|
|
||||||
// log.Fatalf("Recovered from panic: %s - %+v - %s", r, copiedItem, debug.Stack())
|
|
||||||
// }
|
|
||||||
//}()
|
|
||||||
|
|
||||||
newResponseItem := &ResponseItem{
|
newResponseItem := &ResponseItem{
|
||||||
Path: cacheItem.Path,
|
Path: cacheItem.Path,
|
||||||
Name: cacheItem.Name,
|
Name: cacheItem.Name,
|
||||||
|
|
Loading…
Reference in New Issue