fix children directories not being added to parents, keep working on obscure exception, improve build script

This commit is contained in:
Cyberes 2024-03-14 16:57:00 -06:00
parent 8c76455b60
commit c19304c65f
7 changed files with 47 additions and 38 deletions

View File

@ -10,8 +10,8 @@ I needed to serve a very large dataset full of small files publicly over the int
existing solutions were subpar and I found myself having to create confusing Openresty scripts and complex CDN caching existing solutions were subpar and I found myself having to create confusing Openresty scripts and complex CDN caching
to keep things responsive and server load low. I gave up and decided to create my own solution. to keep things responsive and server load low. I gave up and decided to create my own solution.
You will likely need to store your data on an SSD for this. With an SSD, my server was able to crawl over 6 million You absolutely need an SSD for this. With an SSD, my server was able to crawl over 6 million files stored in a very
files stored in a very complicated directory tree in just 5 minutes. complicated directory tree in just 5 minutes.
## Features ## Features

18
build.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
if [ -z ${1+x} ]; then
VERSION="0.0.0"
else
VERSION="$1"
fi
mkdir -p "$SCRIPT_DIR/dist"
cd "$SCRIPT_DIR/src" || exit 1
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs"
chmod +x "$SCRIPT_DIR/dist/crazyfs"
echo "Finished building -> $SCRIPT_DIR/dist/crazyfs"

View File

@ -90,8 +90,8 @@ func PathOutsideRoot(fullPath string) bool {
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir) return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
} }
// RetardCheck makes sure we never do anything outside the root dir.
func RetardCheck(fullPath string) { func RetardCheck(fullPath string) {
// Make sure we never do anything outside the root dir.
if PathOutsideRoot(fullPath) { if PathOutsideRoot(fullPath) {
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath)) panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
} }

View File

@ -40,12 +40,11 @@ func (dc *DirectoryCrawler) processPath(fullPath string, info os.FileInfo) error
parentItem, found := SharedCache.Cache.Get(strippedParentDir) parentItem, found := SharedCache.Cache.Get(strippedParentDir)
if found { if found {
// Remove the old version of the directory from the parent's Children field // Remove the old version of the directory from the parent's Children field
newChildren, foundOldDir := removeOldDir(parentItem.Children, relPath) newChildren, _ := removeOldDir(parentItem.Children, relPath)
// Add the new version of the directory to the parent's Children field only if it wasn't found // Always add the new version of the directory to the parent's Children field
if !foundOldDir {
parentItem.Children = append(newChildren, relPath) parentItem.Children = append(newChildren, relPath)
}
// Update the parent directory in the cache // Update the parent directory in the cache
SharedCache.Cache.Add(strippedParentDir, parentItem) SharedCache.Cache.Add(strippedParentDir, parentItem)
} }

View File

@ -77,9 +77,10 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning // This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
// because that would be an extra os.Lstat() call in processPath(). // because that would be an extra os.Lstat() call in processPath().
if !found { if !found {
log.Debugf("CRAWLER - %s not in cache, crawling", child) crawlRelPath := filepath.Join(config.GetConfig().RootDir, child)
log.Debugf(`CRAWLER - "%s" ("%s") not in cache, crawling`, child, crawlRelPath)
dc := DirectoryCrawler.NewDirectoryCrawler() dc := DirectoryCrawler.NewDirectoryCrawler()
item, err := dc.CrawlNoRecursion(filepath.Join(config.GetConfig().RootDir, child)) item, err := dc.CrawlNoRecursion(crawlRelPath)
if err != nil { if err != nil {
log.Errorf("NewResponseItem - CrawlNoRecursion - %s", err) log.Errorf("NewResponseItem - CrawlNoRecursion - %s", err)
continue // skip this child continue // skip this child
@ -90,6 +91,7 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
} }
} }
if childItem != nil {
copiedChildItem := &CacheItem.Item{ copiedChildItem := &CacheItem.Item{
Path: childItem.Path, Path: childItem.Path,
Name: childItem.Name, Name: childItem.Name,
@ -104,6 +106,9 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
MimeType: childItem.MimeType, MimeType: childItem.MimeType,
} }
children = append(children, copiedChildItem) children = append(children, copiedChildItem)
} else {
log.Errorf(`NewResponseItem - copiedChildItem for "%s" was null! - %+v`, child, cacheItem)
}
} }
item.Children = children item.Children = children
} }

View File

@ -1,14 +0,0 @@
#!/bin/bash
if [ -z ${1+x} ]; then
VERSION="0.0.0"
else
VERSION="$1"
fi
mkdir -p ../dist
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o ../dist/crazyfs
chmod +x ../dist/crazyfs

View File

@ -26,7 +26,8 @@ func addToElasticsearch(fullPath string, info os.FileInfo, incomingErr error) er
if !shouldExclude(relPath, config.GetConfig().ElasticsearchExcludePatterns) { if !shouldExclude(relPath, config.GetConfig().ElasticsearchExcludePatterns) {
cacheItem, found := SharedCache.Cache.Get(relPath) cacheItem, found := SharedCache.Cache.Get(relPath)
if !found { if !found {
log.Debugf(`ELASTICSEARCH - path "%s" exists on disk, but not in the LRU cache. Deleting from Elastic.`, relPath) // I don't think this should ever happen
log.Errorf(`ELASTICSEARCH - path "%s" exists on disk, but not in the LRU cache. Deleting from Elastic.`, relPath)
// Delete this item from Elastic in order to avoid any strange inconsistencies. // Delete this item from Elastic in order to avoid any strange inconsistencies.
err := deleteFromElasticsearch(encodeToBase64(relPath)) err := deleteFromElasticsearch(encodeToBase64(relPath))
if err != nil { if err != nil {