diff --git a/automated-youtube-dl.py b/automated-youtube-dl.py old mode 100644 new mode 100755 index b4be405..8966ce0 --- a/automated-youtube-dl.py +++ b/automated-youtube-dl.py @@ -1,262 +1,263 @@ -from __future__ import unicode_literals - -import argparse -import json -import logging -import logging.config -import os -import re -import subprocess -import sys -import time -from subprocess import call - -import youtube_dl - -done = False -urlRegex = re.compile( - r'^(?:http|ftp)s?://' # http:// or https:// - r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... - r'localhost|' # localhost... - r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip - r'(?::\d+)?' # optional port - r'(?:/?|[/?]\S+)$', re.IGNORECASE) - -parser = argparse.ArgumentParser() -parser.add_argument("file") -parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives') -parser.add_argument('--output', '-o', help='name of the 7z archive') -parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive') -parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with') -parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages') -parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true') -parser.add_argument('--max-size', '-m', type=int, default=2000, help="max size of video in mb") -parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not") -args = parser.parse_args() - -if args.output is not None and '7z' in args.output: - print('no .7z extension in file name') - sys.exit(1) - -if re.match(urlRegex, args.file) is None: - isURL = False - if os.path.exists(args.file) is False: - print('file does not exist') - sys.exit(1) - lines = list(open(args.file, 'r')) - for i in range(len(lines)): - if re.match(urlRegex, lines[i]) is not None: - lines[i] = lines[i].strip('\n') - else: - print('line {} not a url'.format(i)) - sys.exit(1) -else: - isURL = True - -try: - os.mkdir('downloads') -except OSError as error: - pass - -logger = logging.getLogger('youtube_dl') -logger.setLevel(logging.DEBUG) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log') -fh.setLevel(logging.DEBUG) -fh.setFormatter(formatter) -logging.StreamHandler(stream=sys.stderr) -logger.addHandler(fh) - -os.system('youtube-dl --rm-cache-dir') - -# since youtube-dl updates so much check for updates -# if not args.no_update: -# print('checking for updates...') -# pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True) -# pipJson = json.loads(pipOut.stdout) -# updatable = [] -# for x in pipJson: -# updatable.append(x['name']) -# logger.info(str(len(updatable)) + ' updatable pip packages') -# print(str(len(updatable)) + ' updatable pip packages') -# for x in updatable: -# sys.stdout.write('\x1b[2K') # erase last line -# sys.stdout.write('\rupdating ' + x) -# sys.stdout.flush() -# logger.info('updating ' + x) -# process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) -# process.wait() -# print() - - -def doLog(msg, level): - global logger, fh, formatter - if level == 'debug': - logger.info(msg) - elif level == 'warning': - logger.warning(msg) - elif level == 'error': - logger.error(msg) - - -def compress(name): - global args - if args.output is not None: - if args.encrypt: - pwd = input('password: ') - cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name) - elif args.password is not None: - cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name) - else: - cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name) - os.system(cmd) - for file in os.scandir('downloads'): - os.unlink(file.path) - - -def checkSize(url): - if args.check_size: - ytdl = youtube_dl.YoutubeDL() - info = ytdl.extract_info(url, download=False) - max = 0 - maxBytes = args.max_size * 1000000 - for x in info['formats']: - try: - if x['filesize'] > max: - max = x['filesize'] - except TypeError as e: - pass - if max > maxBytes: - return False - else: - return True - else: - return True - - -def getUrls(playlist): - proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8") - if proc.stdout == '': - logger.error('') - print('missing get-urls.sh') - sys.exit(1) - if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1: - ret = re.findall(r'(https:\/\/.*)', proc.stdout) - return ret - else: - logger.error('missing jq. see readme for installation instruction') - print('missing jq. see readme for installation instructions') - sys.exit(1) - - -class ytdlLogger(object): - def debug(self, msg): - if '[download]' not in msg: - doLog(msg, 'debug') - - def warning(self, msg): - doLog(msg, 'warning') - - def error(self, msg): - doLog(msg, 'error') - - -def my_hook(d): - global done, videoInc, mediaAmount - if d['status'] == 'finished': - if not done: - done = True - else: - done = False - elif d['status'] == 'downloading': - if not done: - if isURL: - length = '{}/{}'.format(videoInc, mediaAmount) - else: - length = '{}/{}'.format(videoInc, len(lines)) - sys.stdout.write('\x1b[2K') - try: - sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'), - d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str'])) - except KeyError as error: - pass - sys.stdout.flush() - else: - sys.stdout.write('\x1b[2K') - try: - sys.stdout.write('\r[{}] {}'.format(d['status'])) - except KeyError as error: - pass - sys.stdout.flush() - - -ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M') -ydl_opts = { - 'merge_output_format': 'mkv', - 'allsubtitles': True, - 'logtostderr': True, - 'format': ytdlFormat, - 'outtmpl': 'downloads/\'%(title)s\' - (\'%(uploader)s\' - \'%(uploader_id)s\') - %(id)s', - 'postprocessors': [{ - 'key': 'FFmpegMetadata', - 'key': 'EmbedThumbnail', - 'key': 'FFmpegEmbedSubtitle' - }], - 'logger': ytdlLogger(), - 'progress_hooks': [my_hook], -} -ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(ytdlFormat) - -videoInc = 1 -if not isURL: - for line in lines: - for x in getUrls(line): - if checkSize(x): - if args.check_size: - print('========================') - print('[info] Video size ok') - if args.cmd: - ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) - ret = re.findall(r'(.*\n)', ytOut.stdout) - for x in ret: - logger.info(x.strip('\n')) - print(x.strip('\n')) - else: - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - ydl.download([x]) - print() - if args.playlists: - compress('{}-{}'.format(args.output, i)) - else: - compress(args.output) - else: - print('[info] Video too big') - logger.info('skipping ' + x + ' because too big') - videoInc = videoInc + 1 -else: - mediaAmount = str(len(getUrls(args.file))) - for x in getUrls(args.file): - if checkSize(x): - if args.check_size: - print('========================') - print('[info] Video size ok') - if args.cmd: - ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) - ret = re.findall(r'(.*\n)', ytOut.stdout) - for x in ret: - logger.info(x.strip('\n')) - print(x.strip('\n')) - else: - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - try: - ydl.download([x]) - except youtube_dl.utils.DownloadError as e: - print(e) - print() - else: - print('[info] Video too big') - logger.info('skipping ' + x + ' because too big') - videoInc = videoInc + 1 - compress(args.output) - -print() +from __future__ import unicode_literals + +import argparse +import json +import logging +import logging.config +import os +import re +import subprocess +import sys +import time +from subprocess import call + +import yt_dlp #youtube_dl + +done = False +urlRegex = re.compile( + r'^(?:http|ftp)s?://' # http:// or https:// + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... + r'localhost|' # localhost... + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip + r'(?::\d+)?' # optional port + r'(?:/?|[/?]\S+)$', re.IGNORECASE) + +parser = argparse.ArgumentParser() +parser.add_argument("file") +parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives') +parser.add_argument('--output', '-o', help='name of the 7z archive') +parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive') +parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with') +parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages') +parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true') +parser.add_argument('--max-size', '-m', type=int, default=1000, help="max size of video in mb") +parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not") +args = parser.parse_args() + +if args.output is not None and '7z' in args.output: + print('no .7z extension in file name') + sys.exit(1) + +if re.match(urlRegex, args.file) is None: + isURL = False + if os.path.exists(args.file) is False: + print('file does not exist') + sys.exit(1) + lines = list(open(args.file, 'r')) + for i in range(len(lines)): + if re.match(urlRegex, lines[i]) is not None: + lines[i] = lines[i].strip('\n') + else: + print('line {} not a url'.format(i)) + sys.exit(1) +else: + isURL = True + +try: + os.mkdir('downloads') +except OSError as error: + pass + +logger = logging.getLogger('youtube_dl') +logger.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log') +fh.setLevel(logging.DEBUG) +fh.setFormatter(formatter) +logging.StreamHandler(stream=sys.stderr) +logger.addHandler(fh) + +os.system('yt-dlp --rm-cache-dir') + +# since youtube-dl updates so much check for updates +# if not args.no_update: +# print('checking for updates...') +# pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True) +# pipJson = json.loads(pipOut.stdout) +# updatable = [] +# for x in pipJson: +# updatable.append(x['name']) +# logger.info(str(len(updatable)) + ' updatable pip packages') +# print(str(len(updatable)) + ' updatable pip packages') +# for x in updatable: +# sys.stdout.write('\x1b[2K') # erase last line +# sys.stdout.write('\rupdating ' + x) +# sys.stdout.flush() +# logger.info('updating ' + x) +# process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +# process.wait() +# print() + + +def doLog(msg, level): + global logger, fh, formatter + if level == 'debug': + logger.info(msg) + elif level == 'warning': + logger.warning(msg) + elif level == 'error': + logger.error(msg) + + +def compress(name): + global args + if args.output is not None: + if args.encrypt: + pwd = input('password: ') + cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name) + elif args.password is not None: + cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name) + else: + cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name) + os.system(cmd) + for file in os.scandir('downloads'): + os.unlink(file.path) + + +def checkSize(url): + if args.check_size: + ytdl = yt_dlp.YoutubeDL() + info = ytdl.extract_info(url, download=False) + max = 0 + maxBytes = args.max_size * 1000000 + for x in info['formats']: + try: + if x['filesize'] > max: + max = x['filesize'] + except TypeError as e: + pass + if max > maxBytes: + return False + else: + return True + else: + return True + + +def getUrls(playlist): + proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8") + if proc.stdout == '': + logger.error('') + print('missing get-urls.sh') + sys.exit(1) + if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1: + ret = re.findall(r'(https:\/\/.*)', proc.stdout) + return ret + else: + logger.error('missing jq. see readme for installation instruction') + print('missing jq. see readme for installation instructions') + sys.exit(1) + + +class ytdlLogger(object): + def debug(self, msg): + if '[download]' not in msg: + doLog(msg, 'debug') + + def warning(self, msg): + doLog(msg, 'warning') + + def error(self, msg): + doLog(msg, 'error') + + +def my_hook(d): + global done, videoInc, mediaAmount + if d['status'] == 'finished': + if not done: + done = True + else: + done = False + elif d['status'] == 'downloading': + if not done: + if isURL: + length = '{}/{}'.format(videoInc, mediaAmount) + else: + length = '{}/{}'.format(videoInc, len(lines)) + sys.stdout.write('\x1b[2K') + try: + sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'), + d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str'])) + except KeyError as error: + pass + sys.stdout.flush() + else: + sys.stdout.write('\x1b[2K') + try: + sys.stdout.write('\r[{}] {}'.format(d['status'])) + except KeyError as error: + pass + sys.stdout.flush() + + +ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M') +ydl_opts = { + 'merge_output_format': 'mkv', + 'allsubtitles': True, + 'logtostderr': True, + 'format': ytdlFormat, + 'outtmpl': 'downloads/%(title)s - (%(uploader)s, %(uploader_id)s) - %(id)s', + # 'outtmpl': 'downloads/%(title)s', + 'postprocessors': [{ + 'key': 'FFmpegMetadata', + 'key': 'EmbedThumbnail', + 'key': 'FFmpegEmbedSubtitle' + }], + 'logger': ytdlLogger(), + 'progress_hooks': [my_hook], +} +ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(ytdlFormat) + +videoInc = 1 +if not isURL: + for line in lines: + for x in getUrls(line): + if checkSize(x): + if args.check_size: + print('========================') + print('[info] Video size ok') + if args.cmd: + ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) + ret = re.findall(r'(.*\n)', ytOut.stdout) + for x in ret: + logger.info(x.strip('\n')) + print(x.strip('\n')) + else: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([x]) + print() + if args.playlists: + compress('{}-{}'.format(args.output, i)) + else: + compress(args.output) + else: + print('[info] Video too big') + logger.info('skipping ' + x + ' because too big') + videoInc = videoInc + 1 +else: + mediaAmount = str(len(getUrls(args.file))) + for x in getUrls(args.file): + if checkSize(x): + if args.check_size: + print('========================') + print('[info] Video size ok') + if args.cmd: + ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) + ret = re.findall(r'(.*\n)', ytOut.stdout) + for x in ret: + logger.info(x.strip('\n')) + print(x.strip('\n')) + else: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + try: + ydl.download([x]) + except Exception as e: + print(e) + print() + else: + print('[info] Video too big') + logger.info('skipping ' + x + ' because too big') + videoInc = videoInc + 1 + compress(args.output) + +print()