add sorting arg to /list and /search with option to sort folders first

fix crawler on /list
fix json encoding empty children array to null on /list
fix recache
This commit is contained in:
Cyberes 2023-07-18 10:58:29 -06:00
parent fabe432ac4
commit f40907dd8a
4 changed files with 124 additions and 25 deletions

View File

@ -11,6 +11,7 @@ import (
lru "github.com/hashicorp/golang-lru/v2"
"log"
"net/http"
"sort"
"strconv"
"strings"
)
@ -48,7 +49,18 @@ func SearchFile(w http.ResponseWriter, r *http.Request, cfg *config.Config, shar
limitResults = 0
}
var results []*data.Item
sortArg := r.URL.Query().Get("sort")
var folderSorting string
if sortArg == "default" || sortArg == "" {
folderSorting = "default"
} else if sortArg == "folders" {
folderSorting = "folders"
} else {
helpers.Return400Msg("folders arg must be 'default' (to not do any sorting) or 'first' (to sort the folders to the front of the list)", w)
return
}
results := make([]*data.Item, 0)
outer:
for _, key := range sharedCache.Keys() {
cacheItem, found := sharedCache.Get(key)
@ -95,6 +107,12 @@ outer:
}
if folderSorting == "folders" {
sort.Slice(results, func(i, j int) bool {
return results[i].IsDir && !results[j].IsDir
})
}
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{

View File

@ -13,6 +13,7 @@ import (
"math"
"net/http"
"path/filepath"
"sort"
"strconv"
"strings"
)
@ -26,6 +27,17 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
log := logging.GetLogger()
pathArg := r.URL.Query().Get("path")
sortArg := r.URL.Query().Get("sort")
var folderSorting string
if sortArg == "default" || sortArg == "" {
folderSorting = "default"
} else if sortArg == "folders" {
folderSorting = "folders"
} else {
helpers.Return400Msg("folders arg must be 'default' (to not do any sorting) or 'first' (to sort the folders to the front of the list)", w)
return
}
// Clean the path to prevent directory traversal
// filepath.Clean() below will do most of the work but these are just a few checks
// Also this will break the cache because it will create another entry for the relative path
@ -41,20 +53,26 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
fullPath := filepath.Join(cfg.RootDir, filepath.Clean("/"+pathArg))
relPath := cache.StripRootDir(fullPath, cfg.RootDir)
needToCrawlRecusive := false
// Try to get the data from the cache
cacheItem, found := sharedCache.Get(relPath)
if !found {
needToCrawlRecusive = true
cacheItem = helpers.HandleFileNotFound(relPath, fullPath, sharedCache, cfg, w)
// Start a recursive crawl in the background.
// We've already gotten our cached item (may be null if it doesn't exist) so this won't affect our results
go func() {
log.Debugf("Starting background recursive crawl for %s", fullPath)
dc := cache.NewDirectoryCrawler(sharedCache)
err := dc.Crawl(fullPath, cfg.CachePrintNew, cfg.CrawlWorkers, cfg.RootDir, cfg.CrawlerParseMIME)
if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err)
}
}()
}
if cacheItem == nil {
// The errors have already been handled in handleFileNotFound() so we're good to just exit
return
return // The errors have already been handled in handleFileNotFound() so we're good to just exit
}
// Create a deep copy of the item
// Create a deep copy of the cached item so we don't modify the item in the cache
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
dec := gob.NewDecoder(&buf)
@ -82,7 +100,7 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
})
return
} else {
// Only update it if it hasn't been set already.
// Only update the mime in the cache if it hasn't been set already.
// TODO: need to make sure that when a re-crawl is triggered, the Type is set back to nil
if item.Type == nil {
fileExists, mimeType, ext, err := cache.GetMimeType(fullPath, true)
@ -99,25 +117,14 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
})
return
}
// Update the item's MIME in the sharedCache
item.Type = &mimeType
item.Extension = &ext
sharedCache.Add(relPath, &item) // take the address of item
// Update the original cached item's MIME in the sharedCache
cacheItem.Type = &mimeType
cacheItem.Extension = &ext
sharedCache.Add(relPath, cacheItem) // take the address of item
}
}
}
if needToCrawlRecusive {
go func() {
log.Debugf("Starting background recursive crawl for %s", fullPath)
dc := cache.NewDirectoryCrawler(sharedCache)
err := dc.Crawl(fullPath, cfg.CachePrintNew, cfg.CrawlWorkers, cfg.RootDir, cfg.CrawlerParseMIME)
if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err)
}
}()
}
response := map[string]interface{}{}
// Pagination
@ -138,7 +145,20 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
response["total_pages"] = int(totalPages)
}
paginatedChildren := item.Children
if folderSorting == "folders" {
sort.Slice(item.Children, func(i, j int) bool {
return item.Children[i].IsDir && !item.Children[j].IsDir
})
}
// Set the children to an empty array so that the JSON encoder doesn't return it as nil
var paginatedChildren []*data.Item // this var is either the full item list or a paginated list depending on the query args
if item.Children != nil {
paginatedChildren = item.Children
} else {
paginatedChildren = make([]*data.Item, 0)
}
pageParam := r.URL.Query().Get("page")
if pageParam != "" {
page, err := strconv.Atoi(pageParam)
@ -178,6 +198,10 @@ func ListDir(w http.ResponseWriter, r *http.Request, cfg *config.Config, sharedC
w.Header().Set("Cache-Control", "no-store")
//}
for i := range paginatedChildren {
paginatedChildren[i].Children = nil
}
response["item"] = map[string]interface{}{
"path": item.Path,
"name": item.Name,

33
src/cache/crawl.go vendored
View File

@ -164,6 +164,14 @@ func (dc *DirectoryCrawler) Crawl(path string, CachePrintNew bool, numWorkers in
activeScans[path] = true
mapMutex.Unlock()
// Get a list of all keys in the cache that belong to this directory
keys := make([]string, 0)
for _, key := range dc.cache.Keys() {
if strings.HasPrefix(key, path) {
keys = append(keys, key)
}
}
// Remove all entries in the cache that belong to this directory so we can start fresh
for _, key := range dc.cache.Keys() {
if strings.HasPrefix(key, path) {
@ -195,6 +203,11 @@ func (dc *DirectoryCrawler) Crawl(path string, CachePrintNew bool, numWorkers in
dc.cache.Add(StripRootDir(path, RootDir), NewItem(path, info, CachePrintNew, RootDir, CrawlerParseMIME))
}
// After crawling, remove any keys that are still in the list (these are items that were not found on the filesystem)
for _, key := range keys {
dc.cache.Remove(key)
}
// Mark the scan as inactive
mapMutex.Lock()
activeScans[path] = false
@ -269,6 +282,26 @@ func (dc *DirectoryCrawler) walkDir(dir string, n *sync.WaitGroup, workers []*wo
// Add the directory to the cache after all of its children have been processed
dc.cache.Add(StripRootDir(dir, RootDir), dirItem)
// If the directory is not the root directory, update the parent directory's Children field
if dir != RootDir {
parentDir := filepath.Dir(dir)
parentItem, found := dc.cache.Get(StripRootDir(parentDir, RootDir))
if found {
// Remove the old version of the directory from the parent's Children field
for i, child := range parentItem.Children {
if child.Path == StripRootDir(dir, RootDir) {
parentItem.Children = append(parentItem.Children[:i], parentItem.Children[i+1:]...)
break
}
}
// Add the new version of the directory to the parent's Children field
parentItem.Children = append(parentItem.Children, dirItem)
// Update the parent directory in the cache
dc.cache.Add(StripRootDir(parentDir, RootDir), parentItem)
}
}
return nil
}

26
src/cache/recache.go vendored
View File

@ -49,16 +49,40 @@ func Recache(path string, cfg *config.Config, sharedCache *lru.Cache[string, *da
parentDir := filepath.Dir(path)
parentItem, found := sharedCache.Get(parentDir)
if found {
// Remove the old sub-directory from the parent directory's Children field
for i, child := range parentItem.Children {
if child.Path == path {
parentItem.Children = append(parentItem.Children[:i], parentItem.Children[i+1:]...)
break
}
}
// Update the parent directory's Children field to include the new sub-directory
info, err := os.Stat(path)
if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error())
} else {
newItem := NewItem(path, info, cfg.CachePrintNew, cfg.RootDir, cfg.CrawlerParseMIME)
parentItem.Children = append(parentItem.Children, newItem)
// Create a new slice that contains all items from the Children field except the old directory
newChildren := make([]*data.Item, 0, len(parentItem.Children))
for _, child := range parentItem.Children {
if child.Path != newItem.Path {
newChildren = append(newChildren, child)
}
}
// Append the new directory to the newChildren slice
newChildren = append(newChildren, newItem)
// Assign the newChildren slice to the Children field
parentItem.Children = newChildren
// Update the parent directory in the cache
sharedCache.Add(parentDir, parentItem)
}
} else {
// If the parent directory isn't in the cache, crawl it
err := dc.Crawl(parentDir, cfg.CachePrintNew, cfg.CrawlWorkers, cfg.RootDir, cfg.CrawlerParseMIME)
if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error())
}
}
}()
}