replace with yt-dlp

This commit is contained in:
Cyberes 2022-03-08 17:49:55 -07:00
parent 9c9fb37a0e
commit 0a12a7e452
1 changed files with 263 additions and 262 deletions

525
automated-youtube-dl.py Normal file → Executable file
View File

@ -1,262 +1,263 @@
from __future__ import unicode_literals
import argparse
import json
import logging
import logging.config
import os
import re
import subprocess
import sys
import time
from subprocess import call
import youtube_dl
done = False
urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
parser = argparse.ArgumentParser()
parser.add_argument("file")
parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives')
parser.add_argument('--output', '-o', help='name of the 7z archive')
parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive')
parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with')
parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages')
parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true')
parser.add_argument('--max-size', '-m', type=int, default=2000, help="max size of video in mb")
parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not")
args = parser.parse_args()
if args.output is not None and '7z' in args.output:
print('no .7z extension in file name')
sys.exit(1)
if re.match(urlRegex, args.file) is None:
isURL = False
if os.path.exists(args.file) is False:
print('file does not exist')
sys.exit(1)
lines = list(open(args.file, 'r'))
for i in range(len(lines)):
if re.match(urlRegex, lines[i]) is not None:
lines[i] = lines[i].strip('\n')
else:
print('line {} not a url'.format(i))
sys.exit(1)
else:
isURL = True
try:
os.mkdir('downloads')
except OSError as error:
pass
logger = logging.getLogger('youtube_dl')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logging.StreamHandler(stream=sys.stderr)
logger.addHandler(fh)
os.system('youtube-dl --rm-cache-dir')
# since youtube-dl updates so much check for updates
# if not args.no_update:
# print('checking for updates...')
# pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
# pipJson = json.loads(pipOut.stdout)
# updatable = []
# for x in pipJson:
# updatable.append(x['name'])
# logger.info(str(len(updatable)) + ' updatable pip packages')
# print(str(len(updatable)) + ' updatable pip packages')
# for x in updatable:
# sys.stdout.write('\x1b[2K') # erase last line
# sys.stdout.write('\rupdating ' + x)
# sys.stdout.flush()
# logger.info('updating ' + x)
# process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# process.wait()
# print()
def doLog(msg, level):
global logger, fh, formatter
if level == 'debug':
logger.info(msg)
elif level == 'warning':
logger.warning(msg)
elif level == 'error':
logger.error(msg)
def compress(name):
global args
if args.output is not None:
if args.encrypt:
pwd = input('password: ')
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name)
elif args.password is not None:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name)
else:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name)
os.system(cmd)
for file in os.scandir('downloads'):
os.unlink(file.path)
def checkSize(url):
if args.check_size:
ytdl = youtube_dl.YoutubeDL()
info = ytdl.extract_info(url, download=False)
max = 0
maxBytes = args.max_size * 1000000
for x in info['formats']:
try:
if x['filesize'] > max:
max = x['filesize']
except TypeError as e:
pass
if max > maxBytes:
return False
else:
return True
else:
return True
def getUrls(playlist):
proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8")
if proc.stdout == '':
logger.error('')
print('missing get-urls.sh')
sys.exit(1)
if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1:
ret = re.findall(r'(https:\/\/.*)', proc.stdout)
return ret
else:
logger.error('missing jq. see readme for installation instruction')
print('missing jq. see readme for installation instructions')
sys.exit(1)
class ytdlLogger(object):
def debug(self, msg):
if '[download]' not in msg:
doLog(msg, 'debug')
def warning(self, msg):
doLog(msg, 'warning')
def error(self, msg):
doLog(msg, 'error')
def my_hook(d):
global done, videoInc, mediaAmount
if d['status'] == 'finished':
if not done:
done = True
else:
done = False
elif d['status'] == 'downloading':
if not done:
if isURL:
length = '{}/{}'.format(videoInc, mediaAmount)
else:
length = '{}/{}'.format(videoInc, len(lines))
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'),
d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str']))
except KeyError as error:
pass
sys.stdout.flush()
else:
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {}'.format(d['status']))
except KeyError as error:
pass
sys.stdout.flush()
ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M')
ydl_opts = {
'merge_output_format': 'mkv',
'allsubtitles': True,
'logtostderr': True,
'format': ytdlFormat,
'outtmpl': 'downloads/\'%(title)s\' - (\'%(uploader)s\' - \'%(uploader_id)s\') - %(id)s',
'postprocessors': [{
'key': 'FFmpegMetadata',
'key': 'EmbedThumbnail',
'key': 'FFmpegEmbedSubtitle'
}],
'logger': ytdlLogger(),
'progress_hooks': [my_hook],
}
ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(ytdlFormat)
videoInc = 1
if not isURL:
for line in lines:
for x in getUrls(line):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([x])
print()
if args.playlists:
compress('{}-{}'.format(args.output, i))
else:
compress(args.output)
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
else:
mediaAmount = str(len(getUrls(args.file)))
for x in getUrls(args.file):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([x])
except youtube_dl.utils.DownloadError as e:
print(e)
print()
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
compress(args.output)
print()
from __future__ import unicode_literals
import argparse
import json
import logging
import logging.config
import os
import re
import subprocess
import sys
import time
from subprocess import call
import yt_dlp #youtube_dl
done = False
urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
parser = argparse.ArgumentParser()
parser.add_argument("file")
parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives')
parser.add_argument('--output', '-o', help='name of the 7z archive')
parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive')
parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with')
parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages')
parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true')
parser.add_argument('--max-size', '-m', type=int, default=1000, help="max size of video in mb")
parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not")
args = parser.parse_args()
if args.output is not None and '7z' in args.output:
print('no .7z extension in file name')
sys.exit(1)
if re.match(urlRegex, args.file) is None:
isURL = False
if os.path.exists(args.file) is False:
print('file does not exist')
sys.exit(1)
lines = list(open(args.file, 'r'))
for i in range(len(lines)):
if re.match(urlRegex, lines[i]) is not None:
lines[i] = lines[i].strip('\n')
else:
print('line {} not a url'.format(i))
sys.exit(1)
else:
isURL = True
try:
os.mkdir('downloads')
except OSError as error:
pass
logger = logging.getLogger('youtube_dl')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logging.StreamHandler(stream=sys.stderr)
logger.addHandler(fh)
os.system('yt-dlp --rm-cache-dir')
# since youtube-dl updates so much check for updates
# if not args.no_update:
# print('checking for updates...')
# pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
# pipJson = json.loads(pipOut.stdout)
# updatable = []
# for x in pipJson:
# updatable.append(x['name'])
# logger.info(str(len(updatable)) + ' updatable pip packages')
# print(str(len(updatable)) + ' updatable pip packages')
# for x in updatable:
# sys.stdout.write('\x1b[2K') # erase last line
# sys.stdout.write('\rupdating ' + x)
# sys.stdout.flush()
# logger.info('updating ' + x)
# process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# process.wait()
# print()
def doLog(msg, level):
global logger, fh, formatter
if level == 'debug':
logger.info(msg)
elif level == 'warning':
logger.warning(msg)
elif level == 'error':
logger.error(msg)
def compress(name):
global args
if args.output is not None:
if args.encrypt:
pwd = input('password: ')
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name)
elif args.password is not None:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name)
else:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name)
os.system(cmd)
for file in os.scandir('downloads'):
os.unlink(file.path)
def checkSize(url):
if args.check_size:
ytdl = yt_dlp.YoutubeDL()
info = ytdl.extract_info(url, download=False)
max = 0
maxBytes = args.max_size * 1000000
for x in info['formats']:
try:
if x['filesize'] > max:
max = x['filesize']
except TypeError as e:
pass
if max > maxBytes:
return False
else:
return True
else:
return True
def getUrls(playlist):
proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8")
if proc.stdout == '':
logger.error('')
print('missing get-urls.sh')
sys.exit(1)
if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1:
ret = re.findall(r'(https:\/\/.*)', proc.stdout)
return ret
else:
logger.error('missing jq. see readme for installation instruction')
print('missing jq. see readme for installation instructions')
sys.exit(1)
class ytdlLogger(object):
def debug(self, msg):
if '[download]' not in msg:
doLog(msg, 'debug')
def warning(self, msg):
doLog(msg, 'warning')
def error(self, msg):
doLog(msg, 'error')
def my_hook(d):
global done, videoInc, mediaAmount
if d['status'] == 'finished':
if not done:
done = True
else:
done = False
elif d['status'] == 'downloading':
if not done:
if isURL:
length = '{}/{}'.format(videoInc, mediaAmount)
else:
length = '{}/{}'.format(videoInc, len(lines))
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'),
d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str']))
except KeyError as error:
pass
sys.stdout.flush()
else:
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {}'.format(d['status']))
except KeyError as error:
pass
sys.stdout.flush()
ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M')
ydl_opts = {
'merge_output_format': 'mkv',
'allsubtitles': True,
'logtostderr': True,
'format': ytdlFormat,
'outtmpl': 'downloads/%(title)s - (%(uploader)s, %(uploader_id)s) - %(id)s',
# 'outtmpl': 'downloads/%(title)s',
'postprocessors': [{
'key': 'FFmpegMetadata',
'key': 'EmbedThumbnail',
'key': 'FFmpegEmbedSubtitle'
}],
'logger': ytdlLogger(),
'progress_hooks': [my_hook],
}
ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(ytdlFormat)
videoInc = 1
if not isURL:
for line in lines:
for x in getUrls(line):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([x])
print()
if args.playlists:
compress('{}-{}'.format(args.output, i))
else:
compress(args.output)
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
else:
mediaAmount = str(len(getUrls(args.file)))
for x in getUrls(args.file):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([x])
except Exception as e:
print(e)
print()
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
compress(args.output)
print()