fix error when a file in the cache is not found on the disk
This commit is contained in:
parent
634f3eb8ea
commit
a96708f6cf
|
@ -3,6 +3,7 @@ package CacheItem
|
|||
import (
|
||||
"crazyfs/config"
|
||||
"crazyfs/file"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
@ -12,7 +13,7 @@ import (
|
|||
func NewItem(fullPath string, info os.FileInfo) *Item {
|
||||
if !strings.HasPrefix(fullPath, config.GetConfig().RootDir) {
|
||||
// Retard check
|
||||
log.Fatalf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath)
|
||||
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir: %s", fullPath))
|
||||
}
|
||||
|
||||
if config.GetConfig().CachePrintNew {
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"fmt"
|
||||
lru "github.com/hashicorp/golang-lru/v2"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
@ -138,8 +139,30 @@ func Download(w http.ResponseWriter, r *http.Request, sharedCache *lru.Cache[str
|
|||
w.Header().Set("Content-Type", mimeType+"; charset="+encoding)
|
||||
}
|
||||
|
||||
// =============================================================================================================
|
||||
// Send the file to the client.
|
||||
http.ServeFile(w, r, fullPath)
|
||||
// We are using `http.ServeContent()` since this allows us to catch and handle any missing files. `http.ServeFile()` returns
|
||||
// the default 404 page if the file is missing from the disk.
|
||||
|
||||
// Open the file
|
||||
openFile, err := os.Open(fullPath)
|
||||
if err != nil {
|
||||
sharedCache.Remove(relPath) // remove it from the cache
|
||||
helpers.ReturnFake404Msg("file missing from disk, cache out of date", w)
|
||||
return
|
||||
}
|
||||
defer openFile.Close()
|
||||
|
||||
// Get the file info
|
||||
fileInfo, err := openFile.Stat()
|
||||
if err != nil {
|
||||
log.Errorf(`DOWNLOAD - failed to stat file "%s" - %s`, fullPath, err)
|
||||
helpers.Return500Msg(w)
|
||||
return
|
||||
}
|
||||
|
||||
// If the file exists, serve it
|
||||
http.ServeContent(w, r, fileInfo.Name(), fileInfo.ModTime(), openFile)
|
||||
} else {
|
||||
// Stream archive of the directory here
|
||||
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s.zip"`, item.Name))
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
"crazyfs/cache"
|
||||
"crazyfs/cache/DirectoryCrawler"
|
||||
"crazyfs/config"
|
||||
"crazyfs/logging"
|
||||
lru "github.com/hashicorp/golang-lru/v2"
|
||||
"net/http"
|
||||
"os"
|
||||
|
@ -15,9 +14,7 @@ import (
|
|||
|
||||
// HandleFileNotFound if the data is not in the cache, start a new crawler
|
||||
func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[string, *CacheItem.Item], w http.ResponseWriter) *CacheItem.Item {
|
||||
log := logging.GetLogger()
|
||||
|
||||
//log.Fatalf("CRAWLER - %s not in cache, crawling", fullPath)
|
||||
// TODO: implement some sort of backoff or delay for repeated calls to recache the same path.
|
||||
|
||||
log.Debugf("CRAWLER - %s not in cache, crawling", fullPath)
|
||||
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
|
||||
|
@ -35,9 +32,21 @@ func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[
|
|||
// return nil
|
||||
//}
|
||||
|
||||
// We don't want to traverse the entire directory tree since we'll only return the current directory anyways
|
||||
item, err := dc.CrawlNoRecursion(fullPath)
|
||||
// Start a recursive crawl in the background immediately, so we don't risk the client disconnecting before we've had
|
||||
// a chance to kick of a recursive crawl.
|
||||
go func() {
|
||||
log.Debugf("Starting background recursive crawl for %s", fullPath)
|
||||
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
|
||||
start := time.Now()
|
||||
err := dc.Crawl(fullPath)
|
||||
if err != nil {
|
||||
log.Errorf("LIST - background recursive crawl failed: %s", err)
|
||||
}
|
||||
log.Debugf("Finished background recursive crawl for %s, elapsed time: %s", fullPath, time.Since(start).Round(time.Second))
|
||||
}()
|
||||
|
||||
// Start a blocking non-recursive crawl.
|
||||
item, err := dc.CrawlNoRecursion(fullPath)
|
||||
if os.IsNotExist(err) || item == nil {
|
||||
ReturnFake404Msg("path not found", w)
|
||||
return nil
|
||||
|
@ -47,35 +56,26 @@ func HandleFileNotFound(relPath string, fullPath string, sharedCache *lru.Cache[
|
|||
return nil
|
||||
}
|
||||
|
||||
// Start a recursive crawl in the background.
|
||||
// We've already gotten our cached CacheItem (may be null if it doesn't exist) so this won't affect our results
|
||||
go func() {
|
||||
log.Debugf("Starting background recursive crawl for %s", fullPath)
|
||||
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
|
||||
start := time.Now()
|
||||
err := dc.Crawl(fullPath, true)
|
||||
if err != nil {
|
||||
log.Errorf("LIST - background recursive crawl failed: %s", err)
|
||||
}
|
||||
log.Debugf("Finished background recursive crawl for %s, elapsed time: %s", fullPath, time.Since(start).Round(time.Second))
|
||||
}()
|
||||
|
||||
// Try to get the data from the cache again
|
||||
// Try to get the data from the cache again.
|
||||
item, found := sharedCache.Get(relPath)
|
||||
if !found {
|
||||
// TODO: let's not re-check the disk if the file is still not in the cache. Instead, let's just assume that it doesn't exist.
|
||||
ReturnFake404Msg("path not found", w)
|
||||
|
||||
// TODO: this is the old code in case this isn't the right approach.
|
||||
// If the data is still not in the cache, check if the file or directory exists.
|
||||
// We could check if the file exists before checking the cache but we want to limit disk reads.
|
||||
if _, err := os.Stat(fullPath); os.IsNotExist(err) {
|
||||
log.Debugf("File not in cache: %s", fullPath)
|
||||
// If the file or directory does not exist, return a 404 status code and a message
|
||||
ReturnFake404Msg("file or directory not found", w)
|
||||
return nil
|
||||
} else if err != nil {
|
||||
// If there was an error checking if the file or directory exists, return a 500 status code and the error
|
||||
log.Errorf("LIST - %s", err.Error())
|
||||
Return500Msg(w)
|
||||
return nil
|
||||
}
|
||||
//if _, err := os.Stat(fullPath); os.IsNotExist(err) {
|
||||
// log.Debugf("File not in cache: %s", fullPath)
|
||||
// // If the file or directory does not exist, return a 404 status code and a message
|
||||
// ReturnFake404Msg("file or directory not found", w)
|
||||
// return nil
|
||||
//} else if err != nil {
|
||||
// // If there was an error checking if the file or directory exists, return a 500 status code and the error
|
||||
// log.Errorf("LIST - %s", err.Error())
|
||||
// Return500Msg(w)
|
||||
// return nil
|
||||
//}
|
||||
}
|
||||
|
||||
// If CacheItem is still nil, error
|
||||
|
|
|
@ -9,21 +9,29 @@ import (
|
|||
)
|
||||
|
||||
func (dc *DirectoryCrawler) walkRecursiveFunc(path string, info os.FileInfo, err error) error {
|
||||
dc.processPath(path, info)
|
||||
processErr := dc.processPath(path, info)
|
||||
if processErr != nil {
|
||||
log.Errorf("CRAWLER - walkRecursiveFunc() failed - %s - %s", processErr, path)
|
||||
return processErr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dc *DirectoryCrawler) walkNonRecursiveFunc(path string, dir os.DirEntry, err error) error {
|
||||
info, err := dir.Info()
|
||||
if err != nil {
|
||||
log.Errorf("CRAWLER - walkNonRecursiveFunc() - get info failed - %s", err)
|
||||
return err
|
||||
info, infoErr := dir.Info()
|
||||
if infoErr != nil {
|
||||
log.Errorf("CRAWLER - walkNonRecursiveFunc() - get info failed - %s - %s", infoErr, path)
|
||||
return infoErr
|
||||
}
|
||||
processErr := dc.processPath(path, info)
|
||||
if processErr != nil {
|
||||
log.Errorf("CRAWLER - walkNonRecursiveFunc() failed - %s - %s", processErr, path)
|
||||
return processErr
|
||||
}
|
||||
dc.processPath(path, info)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
|
||||
func (dc *DirectoryCrawler) Crawl(fullPath string) error {
|
||||
info, err := os.Lstat(fullPath)
|
||||
if os.IsNotExist(err) {
|
||||
// If the path doesn't exist, just silently exit
|
||||
|
@ -33,11 +41,13 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
|
|||
log.Errorf("CRAWLER - Crawl() - os.Lstat() failed - %s", err)
|
||||
return err
|
||||
}
|
||||
|
||||
//if !config.FollowSymlinks && info.Mode()&os.ModeSymlink > 0 {
|
||||
// msg := fmt.Sprintf("CRAWL - tried to crawl a symlink (not allowed in config): %s", fullPath)
|
||||
// log.Warnf(msg)
|
||||
// return errors.New(msg)
|
||||
//}
|
||||
|
||||
relPath := file.StripRootDir(fullPath)
|
||||
|
||||
dc.cache.Remove(relPath)
|
||||
|
@ -51,7 +61,7 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
|
|||
}
|
||||
}
|
||||
|
||||
// Remove all entries in the cache that belong to this directory so we can start fresh
|
||||
// Remove all entries in the cache that belong to this directory, so we can start fresh.
|
||||
for _, key := range keys {
|
||||
dc.cache.Remove(key)
|
||||
}
|
||||
|
@ -62,11 +72,12 @@ func (dc *DirectoryCrawler) Crawl(fullPath string, shouldBlock bool) error {
|
|||
log.Errorf("CRAWLER - crawl for %s failed: %s", fullPath, err)
|
||||
}
|
||||
|
||||
// TODO: don't think this is needed since we remove all the children of this item
|
||||
// After crawling, remove any keys that are still in the list (these are items that were not found on the filesystem)
|
||||
//dc.CleanupDeletedFiles(path)
|
||||
//dc.CleanupDeletedFiles(fullPath)
|
||||
} else {
|
||||
// If the path is a file, add it to the cache directly
|
||||
dc.AddCacheItem(relPath, info)
|
||||
dc.AddCacheItem(fullPath, info)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ func startCrawl(sharedCache *lru.Cache[string, *CacheItem.Item], wg *sync.WaitGr
|
|||
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
|
||||
log.Infoln("CRAWLER - Starting a crawl...")
|
||||
start := time.Now()
|
||||
err := dc.Crawl(config.GetConfig().RootDir, true)
|
||||
err := dc.Crawl(config.GetConfig().RootDir)
|
||||
duration := time.Since(start).Round(time.Second)
|
||||
if err != nil {
|
||||
log.Warnf("CRAWLER - Crawl failed: %s", err)
|
||||
|
|
|
@ -26,7 +26,7 @@ func InitialCrawl(sharedCache *lru.Cache[string, *CacheItem.Item]) {
|
|||
InitialCrawlInProgress = true
|
||||
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
|
||||
//start := time.Now()
|
||||
err := dc.Crawl(config.GetConfig().RootDir, true)
|
||||
err := dc.Crawl(config.GetConfig().RootDir)
|
||||
if err != nil {
|
||||
log.Errorf("LIST - background recursive crawl failed: %s", err)
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ func CheckAndRecache(path string, sharedCache *lru.Cache[string, *CacheItem.Item
|
|||
go func() {
|
||||
defer func() { <-sem }() // release the token when done
|
||||
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
|
||||
err := dc.Crawl(path, true)
|
||||
err := dc.Crawl(path)
|
||||
if err != nil {
|
||||
log.Errorf("RECACHE ERROR: %s", err.Error())
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ func Recache(path string, sharedCache *lru.Cache[string, *CacheItem.Item]) {
|
|||
go func() {
|
||||
defer func() { <-sem }() // release the token when done
|
||||
dc := DirectoryCrawler.NewDirectoryCrawler(sharedCache)
|
||||
err := dc.Crawl(path, true)
|
||||
err := dc.Crawl(path)
|
||||
if err != nil {
|
||||
log.Errorf("RECACHE ERROR: %s", err.Error())
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ func Recache(path string, sharedCache *lru.Cache[string, *CacheItem.Item]) {
|
|||
} else {
|
||||
// If the parent directory isn't in the cache, crawl it
|
||||
log.Infof("RECACHE - crawling parent directory since it isn't in the cache yet: %s", parentDir)
|
||||
err := dc.Crawl(parentDir, true)
|
||||
err := dc.Crawl(parentDir)
|
||||
if err != nil {
|
||||
log.Errorf("RECACHE ERROR: %s", err.Error())
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue