fix children directories not being added to parents, keep working on obscure exception, improve build script

This commit is contained in:
Cyberes 2024-03-14 16:57:00 -06:00
parent 8c76455b60
commit c19304c65f
7 changed files with 47 additions and 38 deletions

View File

@ -10,8 +10,8 @@ I needed to serve a very large dataset full of small files publicly over the int
existing solutions were subpar and I found myself having to create confusing Openresty scripts and complex CDN caching
to keep things responsive and server load low. I gave up and decided to create my own solution.
You will likely need to store your data on an SSD for this. With an SSD, my server was able to crawl over 6 million
files stored in a very complicated directory tree in just 5 minutes.
You absolutely need an SSD for this. With an SSD, my server was able to crawl over 6 million files stored in a very
complicated directory tree in just 5 minutes.
## Features

18
build.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
if [ -z ${1+x} ]; then
VERSION="0.0.0"
else
VERSION="$1"
fi
mkdir -p "$SCRIPT_DIR/dist"
cd "$SCRIPT_DIR/src" || exit 1
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs"
chmod +x "$SCRIPT_DIR/dist/crazyfs"
echo "Finished building -> $SCRIPT_DIR/dist/crazyfs"

View File

@ -90,8 +90,8 @@ func PathOutsideRoot(fullPath string) bool {
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
}
// RetardCheck makes sure we never do anything outside the root dir.
func RetardCheck(fullPath string) {
// Make sure we never do anything outside the root dir.
if PathOutsideRoot(fullPath) {
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
}

View File

@ -40,12 +40,11 @@ func (dc *DirectoryCrawler) processPath(fullPath string, info os.FileInfo) error
parentItem, found := SharedCache.Cache.Get(strippedParentDir)
if found {
// Remove the old version of the directory from the parent's Children field
newChildren, foundOldDir := removeOldDir(parentItem.Children, relPath)
newChildren, _ := removeOldDir(parentItem.Children, relPath)
// Add the new version of the directory to the parent's Children field only if it wasn't found
if !foundOldDir {
// Always add the new version of the directory to the parent's Children field
parentItem.Children = append(newChildren, relPath)
}
// Update the parent directory in the cache
SharedCache.Cache.Add(strippedParentDir, parentItem)
}

View File

@ -77,9 +77,10 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
// because that would be an extra os.Lstat() call in processPath().
if !found {
log.Debugf("CRAWLER - %s not in cache, crawling", child)
crawlRelPath := filepath.Join(config.GetConfig().RootDir, child)
log.Debugf(`CRAWLER - "%s" ("%s") not in cache, crawling`, child, crawlRelPath)
dc := DirectoryCrawler.NewDirectoryCrawler()
item, err := dc.CrawlNoRecursion(filepath.Join(config.GetConfig().RootDir, child))
item, err := dc.CrawlNoRecursion(crawlRelPath)
if err != nil {
log.Errorf("NewResponseItem - CrawlNoRecursion - %s", err)
continue // skip this child
@ -90,6 +91,7 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
}
}
if childItem != nil {
copiedChildItem := &CacheItem.Item{
Path: childItem.Path,
Name: childItem.Name,
@ -104,6 +106,9 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
MimeType: childItem.MimeType,
}
children = append(children, copiedChildItem)
} else {
log.Errorf(`NewResponseItem - copiedChildItem for "%s" was null! - %+v`, child, cacheItem)
}
}
item.Children = children
}

View File

@ -1,14 +0,0 @@
#!/bin/bash
if [ -z ${1+x} ]; then
VERSION="0.0.0"
else
VERSION="$1"
fi
mkdir -p ../dist
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o ../dist/crazyfs
chmod +x ../dist/crazyfs

View File

@ -26,7 +26,8 @@ func addToElasticsearch(fullPath string, info os.FileInfo, incomingErr error) er
if !shouldExclude(relPath, config.GetConfig().ElasticsearchExcludePatterns) {
cacheItem, found := SharedCache.Cache.Get(relPath)
if !found {
log.Debugf(`ELASTICSEARCH - path "%s" exists on disk, but not in the LRU cache. Deleting from Elastic.`, relPath)
// I don't think this should ever happen
log.Errorf(`ELASTICSEARCH - path "%s" exists on disk, but not in the LRU cache. Deleting from Elastic.`, relPath)
// Delete this item from Elastic in order to avoid any strange inconsistencies.
err := deleteFromElasticsearch(encodeToBase64(relPath))
if err != nil {