mirror of https://github.com/yt-dlp/yt-dlp.git
Try to mitigate the problem of loading a fully sorted archive
Sorted archives turn the binary tree into a linked list and make things horribly slow. This is an incomplete mitigation for this issue.
This commit is contained in:
parent
1de7ea76f8
commit
1d74d8d9f6
|
@ -113,12 +113,14 @@ from .version import __version__
|
||||||
if compat_os_name == 'nt':
|
if compat_os_name == 'nt':
|
||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
|
# Archive tree
|
||||||
class ArchiveTree(object):
|
class ArchiveTree(object):
|
||||||
def __init__(self, line):
|
def __init__(self, line):
|
||||||
self.left = None
|
self.left = None
|
||||||
self.right = None
|
self.right = None
|
||||||
self.line = line
|
self.line = line
|
||||||
|
|
||||||
|
# Tree insertion
|
||||||
def at_insert(self, line):
|
def at_insert(self, line):
|
||||||
# print("at_insert: ", line)
|
# print("at_insert: ", line)
|
||||||
cur = self
|
cur = self
|
||||||
|
@ -130,6 +132,7 @@ class ArchiveTree(object):
|
||||||
cur.left = ArchiveTree(line)
|
cur.left = ArchiveTree(line)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
|
# print("LEFT")
|
||||||
cur = cur.left
|
cur = cur.left
|
||||||
continue
|
continue
|
||||||
elif line > cur.line:
|
elif line > cur.line:
|
||||||
|
@ -137,6 +140,7 @@ class ArchiveTree(object):
|
||||||
cur.right = ArchiveTree(line)
|
cur.right = ArchiveTree(line)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
|
# print("RIGHT")
|
||||||
cur = cur.right
|
cur = cur.right
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
@ -410,16 +414,55 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
"""Preload the archive, if any is specified"""
|
"""Preload the archive, if any is specified"""
|
||||||
def preload_download_archive(self):
|
def preload_download_archive(self):
|
||||||
|
lines = []
|
||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||||
for line in archive_file:
|
for line in archive_file:
|
||||||
self.archive.at_insert(line.strip())
|
lines.append(line.strip())
|
||||||
except IOError as ioe:
|
except IOError as ioe:
|
||||||
if ioe.errno != errno.ENOENT:
|
if ioe.errno != errno.ENOENT:
|
||||||
raise
|
raise
|
||||||
|
lmax = len(lines)
|
||||||
|
if lmax >= 4:
|
||||||
|
# Populate binary search tree by splitting the archive list in half
|
||||||
|
# and then adding from the outside edges inward
|
||||||
|
# This mitigates the worst case where the archive has been sorted
|
||||||
|
ptrLL = 0
|
||||||
|
ptrLR = lmax // 2
|
||||||
|
ptrRL = ptrLR + 1
|
||||||
|
ptrRR = lmax - 1
|
||||||
|
inserted = 0
|
||||||
|
while True:
|
||||||
|
# print("ptrs: %d %d %d %d" % (ptrLL, ptrLR, ptrRL, ptrRR))
|
||||||
|
if ptrLR > ptrLL:
|
||||||
|
self.archive.at_insert(lines[ptrLR])
|
||||||
|
inserted += 1
|
||||||
|
ptrLR -= 1;
|
||||||
|
if ptrRL < ptrRR:
|
||||||
|
self.archive.at_insert(lines[ptrRL])
|
||||||
|
inserted += 1
|
||||||
|
ptrRL += 1;
|
||||||
|
if ptrLL < ptrLR:
|
||||||
|
self.archive.at_insert(lines[ptrLL])
|
||||||
|
inserted += 1
|
||||||
|
ptrLL += 1;
|
||||||
|
if ptrRR > ptrRL:
|
||||||
|
self.archive.at_insert(lines[ptrRR])
|
||||||
|
inserted += 1
|
||||||
|
ptrRR -= 1;
|
||||||
|
if ptrLL == ptrLR and ptrRL == ptrRR:
|
||||||
|
print("inserted: %d, lmax: %d" % (inserted, lmax))
|
||||||
|
break
|
||||||
|
elif lmax > 0:
|
||||||
|
# Skip multi-line logic for a single line
|
||||||
|
for idx in lines:
|
||||||
|
self.archive.at_insert(idx)
|
||||||
|
else:
|
||||||
|
# No lines were loaded
|
||||||
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def check_deprecated(param, option, suggestion):
|
def check_deprecated(param, option, suggestion):
|
||||||
|
|
Loading…
Reference in New Issue