fix error when a file in the cache is not found on the disk

This commit is contained in:
Cyberes 2023-12-11 16:18:12 -07:00
parent 634f3eb8ea
commit a96708f6cf
8 changed files with 83 additions and 48 deletions

View File

@ -3,6 +3,7 @@ package CacheItem
import (
"crazyfs/config"
"crazyfs/file"
"fmt"
"os"
"path/filepath"
"strings"
@ -12,7 +13,7 @@ import (
func NewItem(fullPath string, info os.FileInfo) *Item {
if !strings.HasPrefix(fullPath, config.GetConfig().RootDir) {
// Retard check
log.Fatalf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath)
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath))
}
if config.GetConfig().CachePrintNew {

View File

@ -8,6 +8,7 @@ import (
"fmt"
lru "github.com/hashicorp/golang-lru/v2"
"net/http"
"os"
"strings"
)
@ -138,8 +139,30 @@ func Download(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[str
w.Header().Set("Content-Type", mimeType+"; charset="+encoding)
}
// =============================================================================================================
// Send the file to the client.
http.ServeFile(w, r, fullPath)
// We are using `http.ServeContent()` since this allows us to catch and handle any missing files. `http.ServeFile()` returns
// the default 404 page if the file is missing from the disk.
// Open the file
openFile, err := os.Open(fullPath)
if err != nil {
sharedCache.Remove(relPath) // remove it from the cache
helpers.ReturnFake404Msg("file missing from disk, cache out of date", w)
return
}
defer openFile.Close()
// Get the file info
fileInfo, err := openFile.Stat()
if err != nil {
log.Errorf(`DOWNLOAD - failed to stat file "%s" - %s`, fullPath, err)
helpers.Return500Msg(w)
return
}
// If the file exists, serve it
http.ServeContent(w, r, fileInfo.Name(), fileInfo.ModTime(), openFile)
} else {
// Stream archive of the directory here
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.zip"`, item.Name))

View File

@ -5,7 +5,6 @@ import (
"crazyfs/cache"
"crazyfs/cache/DirectoryCrawler"
"crazyfs/config"
"crazyfs/logging"
lru "github.com/hashicorp/golang-lru/v2"
"net/http"
"os"
@ -15,9 +14,7 @@ import (
// HandleFileNotFound if the data is not in the cache, start a new crawler
func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[string, *CacheItem.Item], w http.ResponseWriter) *CacheItem.Item {
log := logging.GetLogger()
//log.Fatalf("CRAWLER - %s not in cache, crawling", fullPath)
// TODO: implement some sort of backoff or delay for repeated calls to recache the same path.
log.Debugf("CRAWLER - %s not in cache, crawling", fullPath)
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
@ -35,9 +32,21 @@ func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[
// return nil
//}
// We don't want to traverse the entire directory tree since we'll only return the current directory anyways
item, err := dc.CrawlNoRecursion(fullPath)
// Start a recursive crawl in the background immediately, so we don't risk the client disconnecting before we've had
// a chance to kick of a recursive crawl.
go func() {
log.Debugf("Starting background recursive crawl for %s", fullPath)
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
start := time.Now()
err := dc.Crawl(fullPath)
if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err)
}
log.Debugf("Finished background recursive crawl for %s, elapsed time: %s", fullPath, time.Since(start).Round(time.Second))
}()
// Start a blocking non-recursive crawl.
item, err := dc.CrawlNoRecursion(fullPath)
if os.IsNotExist(err) || item == nil {
ReturnFake404Msg("path not found", w)
return nil
@ -47,35 +56,26 @@ func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[
return nil
}
// Start a recursive crawl in the background.
// We've already gotten our cached CacheItem (may be null if it doesn't exist) so this won't affect our results
go func() {
log.Debugf("Starting background recursive crawl for %s", fullPath)
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
start := time.Now()
err := dc.Crawl(fullPath, true)
if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err)
}
log.Debugf("Finished background recursive crawl for %s, elapsed time: %s", fullPath, time.Since(start).Round(time.Second))
}()
// Try to get the data from the cache again
// Try to get the data from the cache again.
item, found := sharedCache.Get(relPath)
if !found {
// TODO: let's not re-check the disk if the file is still not in the cache. Instead, let's just assume that it doesn't exist.
ReturnFake404Msg("path not found", w)
// TODO: this is the old code in case this isn't the right approach.
// If the data is still not in the cache, check if the file or directory exists.
// We could check if the file exists before checking the cache but we want to limit disk reads.
if _, err := os.Stat(fullPath); os.IsNotExist(err) {
log.Debugf("File not in cache: %s", fullPath)
// If the file or directory does not exist, return a 404 status code and a message
ReturnFake404Msg("file or directory not found", w)
return nil
} else if err != nil {
// If there was an error checking if the file or directory exists, return a 500 status code and the error
log.Errorf("LIST - %s", err.Error())
Return500Msg(w)
return nil
}
//if _, err := os.Stat(fullPath); os.IsNotExist(err) {
// log.Debugf("File not in cache: %s", fullPath)
// // If the file or directory does not exist, return a 404 status code and a message
// ReturnFake404Msg("file or directory not found", w)
// return nil
//} else if err != nil {
// // If there was an error checking if the file or directory exists, return a 500 status code and the error
// log.Errorf("LIST - %s", err.Error())
// Return500Msg(w)
// return nil
//}
}
// If CacheItem is still nil, error

View File

@ -9,21 +9,29 @@ import (
)
func (dc *DirectoryCrawler) walkRecursiveFunc(path string, info os.FileInfo, err error) error {
dc.processPath(path, info)
processErr := dc.processPath(path, info)
if processErr != nil {
log.Errorf("CRAWLER - walkRecursiveFunc() failed - %s - %s", processErr, path)
return processErr
}
return nil
}
func (dc *DirectoryCrawler) walkNonRecursiveFunc(path string, dir os.DirEntry, err error) error {
info, err := dir.Info()
if err != nil {
log.Errorf("CRAWLER - walkNonRecursiveFunc() - get info failed - %s", err)
return err
info, infoErr := dir.Info()
if infoErr != nil {
log.Errorf("CRAWLER - walkNonRecursiveFunc() - get info failed - %s - %s", infoErr, path)
return infoErr
}
processErr := dc.processPath(path, info)
if processErr != nil {
log.Errorf("CRAWLER - walkNonRecursiveFunc() failed - %s - %s", processErr, path)
return processErr
}
dc.processPath(path, info)
return nil
}
func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
func (dc *DirectoryCrawler) Crawl(fullPath string) error {
info, err := os.Lstat(fullPath)
if os.IsNotExist(err) {
// If the path doesn't exist, just silently exit
@ -33,11 +41,13 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
log.Errorf("CRAWLER - Crawl() - os.Lstat() failed - %s", err)
return err
}
//if !config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
// msg := fmt.Sprintf("CRAWL - tried to crawl a symlink (not allowed in config): %s", fullPath)
// log.Warnf(msg)
// return errors.New(msg)
//}
relPath := file.StripRootDir(fullPath)
dc.cache.Remove(relPath)
@ -51,7 +61,7 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
}
}
// Remove all entries in the cache that belong to this directory so we can start fresh
// Remove all entries in the cache that belong to this directory, so we can start fresh.
for _, key := range keys {
dc.cache.Remove(key)
}
@ -62,11 +72,12 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
log.Errorf("CRAWLER - crawl for %s failed: %s", fullPath, err)
}
// TODO: don't think this is needed since we remove all the children of this item
// After crawling, remove any keys that are still in the list (these are items that were not found on the filesystem)
//dc.CleanupDeletedFiles(path)
//dc.CleanupDeletedFiles(fullPath)
} else {
// If the path is a file, add it to the cache directly
dc.AddCacheItem(relPath, info)
dc.AddCacheItem(fullPath, info)
}
return nil
}

View File

@ -43,7 +43,7 @@ func startCrawl(sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGr
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
log.Infoln("CRAWLER - Starting a crawl...")
start := time.Now()
err := dc.Crawl(config.GetConfig().RootDir, true)
err := dc.Crawl(config.GetConfig().RootDir)
duration := time.Since(start).Round(time.Second)
if err != nil {
log.Warnf("CRAWLER - Crawl failed: %s", err)

View File

@ -26,7 +26,7 @@ func InitialCrawl(sharedCache *lru.Cache[string, *CacheItem.Item]) {
InitialCrawlInProgress = true
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
//start := time.Now()
err := dc.Crawl(config.GetConfig().RootDir, true)
err := dc.Crawl(config.GetConfig().RootDir)
if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err)
}

View File

@ -27,7 +27,7 @@ func CheckAndRecache(path string, sharedCache *lru.Cache[string, *CacheItem.Item
go func() {
defer func() { <-sem }() // release the token when done
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
err := dc.Crawl(path, true)
err := dc.Crawl(path)
if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error())
}
@ -43,7 +43,7 @@ func Recache(path string, sharedCache *lru.Cache[string, *CacheItem.Item]) {
go func() {
defer func() { <-sem }() // release the token when done
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
err := dc.Crawl(path, true)
err := dc.Crawl(path)
if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error())
}
@ -84,7 +84,7 @@ func Recache(path string, sharedCache *lru.Cache[string, *CacheItem.Item]) {
} else {
// If the parent directory isn't in the cache, crawl it
log.Infof("RECACHE - crawling parent directory since it isn't in the cache yet: %s", parentDir)
err := dc.Crawl(parentDir, true)
err := dc.Crawl(parentDir)
if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error())
}