fix error when a file in the cache is not found on the disk

This commit is contained in:
Cyberes 2023-12-11 16:18:12 -07:00
parent 634f3eb8ea
commit a96708f6cf
8 changed files with 83 additions and 48 deletions

View File

@ -3,6 +3,7 @@ package CacheItem
import ( import (
"crazyfs/config" "crazyfs/config"
"crazyfs/file" "crazyfs/file"
"fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@ -12,7 +13,7 @@ import (
func NewItem(fullPath string, info os.FileInfo) *Item { func NewItem(fullPath string, info os.FileInfo) *Item {
if !strings.HasPrefix(fullPath, config.GetConfig().RootDir) { if !strings.HasPrefix(fullPath, config.GetConfig().RootDir) {
// Retard check // Retard check
log.Fatalf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath) panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath))
} }
if config.GetConfig().CachePrintNew { if config.GetConfig().CachePrintNew {

View File

@ -8,6 +8,7 @@ import (
"fmt" "fmt"
lru "github.com/hashicorp/golang-lru/v2" lru "github.com/hashicorp/golang-lru/v2"
"net/http" "net/http"
"os"
"strings" "strings"
) )
@ -138,8 +139,30 @@ func Download(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[str
w.Header().Set("Content-Type", mimeType+"; charset="+encoding) w.Header().Set("Content-Type", mimeType+"; charset="+encoding)
} }
// =============================================================================================================
// Send the file to the client. // Send the file to the client.
http.ServeFile(w, r, fullPath) // We are using `http.ServeContent()` since this allows us to catch and handle any missing files. `http.ServeFile()` returns
// the default 404 page if the file is missing from the disk.
// Open the file
openFile, err := os.Open(fullPath)
if err != nil {
sharedCache.Remove(relPath) // remove it from the cache
helpers.ReturnFake404Msg("file missing from disk, cache out of date", w)
return
}
defer openFile.Close()
// Get the file info
fileInfo, err := openFile.Stat()
if err != nil {
log.Errorf(`DOWNLOAD - failed to stat file "%s" - %s`, fullPath, err)
helpers.Return500Msg(w)
return
}
// If the file exists, serve it
http.ServeContent(w, r, fileInfo.Name(), fileInfo.ModTime(), openFile)
} else { } else {
// Stream archive of the directory here // Stream archive of the directory here
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.zip"`, item.Name)) w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.zip"`, item.Name))

View File

@ -5,7 +5,6 @@ import (
"crazyfs/cache" "crazyfs/cache"
"crazyfs/cache/DirectoryCrawler" "crazyfs/cache/DirectoryCrawler"
"crazyfs/config" "crazyfs/config"
"crazyfs/logging"
lru "github.com/hashicorp/golang-lru/v2" lru "github.com/hashicorp/golang-lru/v2"
"net/http" "net/http"
"os" "os"
@ -15,9 +14,7 @@ import (
// HandleFileNotFound if the data is not in the cache, start a new crawler // HandleFileNotFound if the data is not in the cache, start a new crawler
func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[string, *CacheItem.Item], w http.ResponseWriter) *CacheItem.Item { func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[string, *CacheItem.Item], w http.ResponseWriter) *CacheItem.Item {
log := logging.GetLogger() // TODO: implement some sort of backoff or delay for repeated calls to recache the same path.
//log.Fatalf("CRAWLER - %s not in cache, crawling", fullPath)
log.Debugf("CRAWLER - %s not in cache, crawling", fullPath) log.Debugf("CRAWLER - %s not in cache, crawling", fullPath)
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
@ -35,9 +32,21 @@ func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[
// return nil // return nil
//} //}
// We don't want to traverse the entire directory tree since we'll only return the current directory anyways // Start a recursive crawl in the background immediately, so we don't risk the client disconnecting before we've had
item, err := dc.CrawlNoRecursion(fullPath) // a chance to kick of a recursive crawl.
go func() {
log.Debugf("Starting background recursive crawl for %s", fullPath)
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
start := time.Now()
err := dc.Crawl(fullPath)
if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err)
}
log.Debugf("Finished background recursive crawl for %s, elapsed time: %s", fullPath, time.Since(start).Round(time.Second))
}()
// Start a blocking non-recursive crawl.
item, err := dc.CrawlNoRecursion(fullPath)
if os.IsNotExist(err) || item == nil { if os.IsNotExist(err) || item == nil {
ReturnFake404Msg("path not found", w) ReturnFake404Msg("path not found", w)
return nil return nil
@ -47,35 +56,26 @@ func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[
return nil return nil
} }
// Start a recursive crawl in the background. // Try to get the data from the cache again.
// We've already gotten our cached CacheItem (may be null if it doesn't exist) so this won't affect our results
go func() {
log.Debugf("Starting background recursive crawl for %s", fullPath)
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
start := time.Now()
err := dc.Crawl(fullPath, true)
if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err)
}
log.Debugf("Finished background recursive crawl for %s, elapsed time: %s", fullPath, time.Since(start).Round(time.Second))
}()
// Try to get the data from the cache again
item, found := sharedCache.Get(relPath) item, found := sharedCache.Get(relPath)
if !found { if !found {
// TODO: let's not re-check the disk if the file is still not in the cache. Instead, let's just assume that it doesn't exist.
ReturnFake404Msg("path not found", w)
// TODO: this is the old code in case this isn't the right approach.
// If the data is still not in the cache, check if the file or directory exists. // If the data is still not in the cache, check if the file or directory exists.
// We could check if the file exists before checking the cache but we want to limit disk reads. // We could check if the file exists before checking the cache but we want to limit disk reads.
if _, err := os.Stat(fullPath); os.IsNotExist(err) { //if _, err := os.Stat(fullPath); os.IsNotExist(err) {
log.Debugf("File not in cache: %s", fullPath) // log.Debugf("File not in cache: %s", fullPath)
// If the file or directory does not exist, return a 404 status code and a message // // If the file or directory does not exist, return a 404 status code and a message
ReturnFake404Msg("file or directory not found", w) // ReturnFake404Msg("file or directory not found", w)
return nil // return nil
} else if err != nil { //} else if err != nil {
// If there was an error checking if the file or directory exists, return a 500 status code and the error // // If there was an error checking if the file or directory exists, return a 500 status code and the error
log.Errorf("LIST - %s", err.Error()) // log.Errorf("LIST - %s", err.Error())
Return500Msg(w) // Return500Msg(w)
return nil // return nil
} //}
} }
// If CacheItem is still nil, error // If CacheItem is still nil, error

View File

@ -9,21 +9,29 @@ import (
) )
func (dc *DirectoryCrawler) walkRecursiveFunc(path string, info os.FileInfo, err error) error { func (dc *DirectoryCrawler) walkRecursiveFunc(path string, info os.FileInfo, err error) error {
dc.processPath(path, info) processErr := dc.processPath(path, info)
if processErr != nil {
log.Errorf("CRAWLER - walkRecursiveFunc() failed - %s - %s", processErr, path)
return processErr
}
return nil return nil
} }
func (dc *DirectoryCrawler) walkNonRecursiveFunc(path string, dir os.DirEntry, err error) error { func (dc *DirectoryCrawler) walkNonRecursiveFunc(path string, dir os.DirEntry, err error) error {
info, err := dir.Info() info, infoErr := dir.Info()
if err != nil { if infoErr != nil {
log.Errorf("CRAWLER - walkNonRecursiveFunc() - get info failed - %s", err) log.Errorf("CRAWLER - walkNonRecursiveFunc() - get info failed - %s - %s", infoErr, path)
return err return infoErr
}
processErr := dc.processPath(path, info)
if processErr != nil {
log.Errorf("CRAWLER - walkNonRecursiveFunc() failed - %s - %s", processErr, path)
return processErr
} }
dc.processPath(path, info)
return nil return nil
} }
func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error { func (dc *DirectoryCrawler) Crawl(fullPath string) error {
info, err := os.Lstat(fullPath) info, err := os.Lstat(fullPath)
if os.IsNotExist(err) { if os.IsNotExist(err) {
// If the path doesn't exist, just silently exit // If the path doesn't exist, just silently exit
@ -33,11 +41,13 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
log.Errorf("CRAWLER - Crawl() - os.Lstat() failed - %s", err) log.Errorf("CRAWLER - Crawl() - os.Lstat() failed - %s", err)
return err return err
} }
//if !config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 { //if !config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
// msg := fmt.Sprintf("CRAWL - tried to crawl a symlink (not allowed in config): %s", fullPath) // msg := fmt.Sprintf("CRAWL - tried to crawl a symlink (not allowed in config): %s", fullPath)
// log.Warnf(msg) // log.Warnf(msg)
// return errors.New(msg) // return errors.New(msg)
//} //}
relPath := file.StripRootDir(fullPath) relPath := file.StripRootDir(fullPath)
dc.cache.Remove(relPath) dc.cache.Remove(relPath)
@ -51,7 +61,7 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
} }
} }
// Remove all entries in the cache that belong to this directory so we can start fresh // Remove all entries in the cache that belong to this directory, so we can start fresh.
for _, key := range keys { for _, key := range keys {
dc.cache.Remove(key) dc.cache.Remove(key)
} }
@ -62,11 +72,12 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
log.Errorf("CRAWLER - crawl for %s failed: %s", fullPath, err) log.Errorf("CRAWLER - crawl for %s failed: %s", fullPath, err)
} }
// TODO: don't think this is needed since we remove all the children of this item
// After crawling, remove any keys that are still in the list (these are items that were not found on the filesystem) // After crawling, remove any keys that are still in the list (these are items that were not found on the filesystem)
//dc.CleanupDeletedFiles(path) //dc.CleanupDeletedFiles(fullPath)
} else { } else {
// If the path is a file, add it to the cache directly // If the path is a file, add it to the cache directly
dc.AddCacheItem(relPath, info) dc.AddCacheItem(fullPath, info)
} }
return nil return nil
} }

View File

@ -43,7 +43,7 @@ func startCrawl(sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGr
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
log.Infoln("CRAWLER - Starting a crawl...") log.Infoln("CRAWLER - Starting a crawl...")
start := time.Now() start := time.Now()
err := dc.Crawl(config.GetConfig().RootDir, true) err := dc.Crawl(config.GetConfig().RootDir)
duration := time.Since(start).Round(time.Second) duration := time.Since(start).Round(time.Second)
if err != nil { if err != nil {
log.Warnf("CRAWLER - Crawl failed: %s", err) log.Warnf("CRAWLER - Crawl failed: %s", err)

View File

@ -26,7 +26,7 @@ func InitialCrawl(sharedCache *lru.Cache[string, *CacheItem.Item]) {
InitialCrawlInProgress = true InitialCrawlInProgress = true
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
//start := time.Now() //start := time.Now()
err := dc.Crawl(config.GetConfig().RootDir, true) err := dc.Crawl(config.GetConfig().RootDir)
if err != nil { if err != nil {
log.Errorf("LIST - background recursive crawl failed: %s", err) log.Errorf("LIST - background recursive crawl failed: %s", err)
} }

View File

@ -27,7 +27,7 @@ func CheckAndRecache(path string, sharedCache *lru.Cache[string, *CacheItem.Item
go func() { go func() {
defer func() { <-sem }() // release the token when done defer func() { <-sem }() // release the token when done
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
err := dc.Crawl(path, true) err := dc.Crawl(path)
if err != nil { if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error()) log.Errorf("RECACHE ERROR: %s", err.Error())
} }
@ -43,7 +43,7 @@ func Recache(path string, sharedCache *lru.Cache[string, *CacheItem.Item]) {
go func() { go func() {
defer func() { <-sem }() // release the token when done defer func() { <-sem }() // release the token when done
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache) dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
err := dc.Crawl(path, true) err := dc.Crawl(path)
if err != nil { if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error()) log.Errorf("RECACHE ERROR: %s", err.Error())
} }
@ -84,7 +84,7 @@ func Recache(path string, sharedCache *lru.Cache[string, *CacheItem.Item]) {
} else { } else {
// If the parent directory isn't in the cache, crawl it // If the parent directory isn't in the cache, crawl it
log.Infof("RECACHE - crawling parent directory since it isn't in the cache yet: %s", parentDir) log.Infof("RECACHE - crawling parent directory since it isn't in the cache yet: %s", parentDir)
err := dc.Crawl(parentDir, true) err := dc.Crawl(parentDir)
if err != nil { if err != nil {
log.Errorf("RECACHE ERROR: %s", err.Error()) log.Errorf("RECACHE ERROR: %s", err.Error())
} }