fix children directories not being added to parents, keep working on obscure exception, improve build script
This commit is contained in:
parent
8c76455b60
commit
c19304c65f
|
@ -10,8 +10,8 @@ I needed to serve a very large dataset full of small files publicly over the int
|
||||||
existing solutions were subpar and I found myself having to create confusing Openresty scripts and complex CDN caching
|
existing solutions were subpar and I found myself having to create confusing Openresty scripts and complex CDN caching
|
||||||
to keep things responsive and server load low. I gave up and decided to create my own solution.
|
to keep things responsive and server load low. I gave up and decided to create my own solution.
|
||||||
|
|
||||||
You will likely need to store your data on an SSD for this. With an SSD, my server was able to crawl over 6 million
|
You absolutely need an SSD for this. With an SSD, my server was able to crawl over 6 million files stored in a very
|
||||||
files stored in a very complicated directory tree in just 5 minutes.
|
complicated directory tree in just 5 minutes.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
|
||||||
|
if [ -z ${1+x} ]; then
|
||||||
|
VERSION="0.0.0"
|
||||||
|
else
|
||||||
|
VERSION="$1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$SCRIPT_DIR/dist"
|
||||||
|
|
||||||
|
cd "$SCRIPT_DIR/src" || exit 1
|
||||||
|
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o "$SCRIPT_DIR/dist/crazyfs"
|
||||||
|
|
||||||
|
chmod +x "$SCRIPT_DIR/dist/crazyfs"
|
||||||
|
|
||||||
|
echo "Finished building -> $SCRIPT_DIR/dist/crazyfs"
|
|
@ -90,8 +90,8 @@ func PathOutsideRoot(fullPath string) bool {
|
||||||
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
|
return !strings.HasPrefix(fullPath, config.GetConfig().RootDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RetardCheck makes sure we never do anything outside the root dir.
|
||||||
func RetardCheck(fullPath string) {
|
func RetardCheck(fullPath string) {
|
||||||
// Make sure we never do anything outside the root dir.
|
|
||||||
if PathOutsideRoot(fullPath) {
|
if PathOutsideRoot(fullPath) {
|
||||||
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
|
panic(fmt.Sprintf("NewItem was not passed an absolute path. The path must start with the RootDir (%s). Failing path: %s", config.GetConfig().RootDir, fullPath))
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,12 +40,11 @@ func (dc *DirectoryCrawler) processPath(fullPath string, info os.FileInfo) error
|
||||||
parentItem, found := SharedCache.Cache.Get(strippedParentDir)
|
parentItem, found := SharedCache.Cache.Get(strippedParentDir)
|
||||||
if found {
|
if found {
|
||||||
// Remove the old version of the directory from the parent's Children field
|
// Remove the old version of the directory from the parent's Children field
|
||||||
newChildren, foundOldDir := removeOldDir(parentItem.Children, relPath)
|
newChildren, _ := removeOldDir(parentItem.Children, relPath)
|
||||||
|
|
||||||
// Add the new version of the directory to the parent's Children field only if it wasn't found
|
// Always add the new version of the directory to the parent's Children field
|
||||||
if !foundOldDir {
|
|
||||||
parentItem.Children = append(newChildren, relPath)
|
parentItem.Children = append(newChildren, relPath)
|
||||||
}
|
|
||||||
// Update the parent directory in the cache
|
// Update the parent directory in the cache
|
||||||
SharedCache.Cache.Add(strippedParentDir, parentItem)
|
SharedCache.Cache.Add(strippedParentDir, parentItem)
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,9 +77,10 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
|
||||||
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
|
// This also be triggered if we encounter a broken symlink. We don't check for broken symlinks when scanning
|
||||||
// because that would be an extra os.Lstat() call in processPath().
|
// because that would be an extra os.Lstat() call in processPath().
|
||||||
if !found {
|
if !found {
|
||||||
log.Debugf("CRAWLER - %s not in cache, crawling", child)
|
crawlRelPath := filepath.Join(config.GetConfig().RootDir, child)
|
||||||
|
log.Debugf(`CRAWLER - "%s" ("%s") not in cache, crawling`, child, crawlRelPath)
|
||||||
dc := DirectoryCrawler.NewDirectoryCrawler()
|
dc := DirectoryCrawler.NewDirectoryCrawler()
|
||||||
item, err := dc.CrawlNoRecursion(filepath.Join(config.GetConfig().RootDir, child))
|
item, err := dc.CrawlNoRecursion(crawlRelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("NewResponseItem - CrawlNoRecursion - %s", err)
|
log.Errorf("NewResponseItem - CrawlNoRecursion - %s", err)
|
||||||
continue // skip this child
|
continue // skip this child
|
||||||
|
@ -90,6 +91,7 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if childItem != nil {
|
||||||
copiedChildItem := &CacheItem.Item{
|
copiedChildItem := &CacheItem.Item{
|
||||||
Path: childItem.Path,
|
Path: childItem.Path,
|
||||||
Name: childItem.Name,
|
Name: childItem.Name,
|
||||||
|
@ -104,6 +106,9 @@ func NewResponseItem(cacheItem *CacheItem.Item) *ResponseItem {
|
||||||
MimeType: childItem.MimeType,
|
MimeType: childItem.MimeType,
|
||||||
}
|
}
|
||||||
children = append(children, copiedChildItem)
|
children = append(children, copiedChildItem)
|
||||||
|
} else {
|
||||||
|
log.Errorf(`NewResponseItem - copiedChildItem for "%s" was null! - %+v`, child, cacheItem)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
item.Children = children
|
item.Children = children
|
||||||
}
|
}
|
||||||
|
|
14
src/build.sh
14
src/build.sh
|
@ -1,14 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
|
|
||||||
if [ -z ${1+x} ]; then
|
|
||||||
VERSION="0.0.0"
|
|
||||||
else
|
|
||||||
VERSION="$1"
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p ../dist
|
|
||||||
|
|
||||||
go build -v -trimpath -ldflags "-s -w -X main.VersionDate=$(date -u --iso-8601=minutes) -X main.Version=v$VERSION" -o ../dist/crazyfs
|
|
||||||
|
|
||||||
chmod +x ../dist/crazyfs
|
|
|
@ -26,7 +26,8 @@ func addToElasticsearch(fullPath string, info os.FileInfo, incomingErr error) er
|
||||||
if !shouldExclude(relPath, config.GetConfig().ElasticsearchExcludePatterns) {
|
if !shouldExclude(relPath, config.GetConfig().ElasticsearchExcludePatterns) {
|
||||||
cacheItem, found := SharedCache.Cache.Get(relPath)
|
cacheItem, found := SharedCache.Cache.Get(relPath)
|
||||||
if !found {
|
if !found {
|
||||||
log.Debugf(`ELASTICSEARCH - path "%s" exists on disk, but not in the LRU cache. Deleting from Elastic.`, relPath)
|
// I don't think this should ever happen
|
||||||
|
log.Errorf(`ELASTICSEARCH - path "%s" exists on disk, but not in the LRU cache. Deleting from Elastic.`, relPath)
|
||||||
// Delete this item from Elastic in order to avoid any strange inconsistencies.
|
// Delete this item from Elastic in order to avoid any strange inconsistencies.
|
||||||
err := deleteFromElasticsearch(encodeToBase64(relPath))
|
err := deleteFromElasticsearch(encodeToBase64(relPath))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
Loading…
Reference in New Issue