from __future__ import unicode_literals import argparse import json import logging import logging.config import os import re import subprocess import sys import time from subprocess import call import yt_dlp #youtube_dl done = False urlRegex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... r'localhost|' # localhost... r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) parser = argparse.ArgumentParser() parser.add_argument("file") parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives') parser.add_argument('--output', '-o', help='name of the 7z archive') parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive') parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with') parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages') parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true') parser.add_argument('--max-size', '-m', type=int, default=1000, help="max size of video in mb") parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not") args = parser.parse_args() if args.output is not None and '7z' in args.output: print('no .7z extension in file name') sys.exit(1) if re.match(urlRegex, args.file) is None: isURL = False if os.path.exists(args.file) is False: print('file does not exist') sys.exit(1) lines = list(open(args.file, 'r')) for i in range(len(lines)): if re.match(urlRegex, lines[i]) is not None: lines[i] = lines[i].strip('\n') else: print('line {} not a url'.format(i)) sys.exit(1) else: isURL = True try: os.mkdir('downloads') except OSError as error: pass logger = logging.getLogger('youtube_dl') logger.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log') fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) logging.StreamHandler(stream=sys.stderr) logger.addHandler(fh) os.system('yt-dlp --rm-cache-dir') # since youtube-dl updates so much check for updates # if not args.no_update: # print('checking for updates...') # pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True) # pipJson = json.loads(pipOut.stdout) # updatable = [] # for x in pipJson: # updatable.append(x['name']) # logger.info(str(len(updatable)) + ' updatable pip packages') # print(str(len(updatable)) + ' updatable pip packages') # for x in updatable: # sys.stdout.write('\x1b[2K') # erase last line # sys.stdout.write('\rupdating ' + x) # sys.stdout.flush() # logger.info('updating ' + x) # process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # process.wait() # print() def doLog(msg, level): global logger, fh, formatter if level == 'debug': logger.info(msg) elif level == 'warning': logger.warning(msg) elif level == 'error': logger.error(msg) def compress(name): global args if args.output is not None: if args.encrypt: pwd = input('password: ') cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name) elif args.password is not None: cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name) else: cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name) os.system(cmd) for file in os.scandir('downloads'): os.unlink(file.path) def checkSize(url): if args.check_size: ytdl = yt_dlp.YoutubeDL() info = ytdl.extract_info(url, download=False) max = 0 maxBytes = args.max_size * 1000000 for x in info['formats']: try: if x['filesize'] > max: max = x['filesize'] except TypeError as e: pass if max > maxBytes: return False else: return True else: return True def getUrls(playlist): proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8") if proc.stdout == '': logger.error('') print('missing get-urls.sh') sys.exit(1) if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1: ret = re.findall(r'(https:\/\/.*)', proc.stdout) return ret else: logger.error('missing jq. see readme for installation instruction') print('missing jq. see readme for installation instructions') sys.exit(1) class ytdlLogger(object): def debug(self, msg): if '[download]' not in msg: doLog(msg, 'debug') def warning(self, msg): doLog(msg, 'warning') def error(self, msg): doLog(msg, 'error') def my_hook(d): global done, videoInc, mediaAmount if d['status'] == 'finished': if not done: done = True else: done = False elif d['status'] == 'downloading': if not done: if isURL: length = '{}/{}'.format(videoInc, mediaAmount) else: length = '{}/{}'.format(videoInc, len(lines)) sys.stdout.write('\x1b[2K') try: sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'), d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str'])) except KeyError as error: pass sys.stdout.flush() else: sys.stdout.write('\x1b[2K') try: sys.stdout.write('\r[{}] {}'.format(d['status'])) except KeyError as error: pass sys.stdout.flush() ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M') ydl_opts = { 'merge_output_format': 'mkv', 'allsubtitles': True, 'logtostderr': True, 'format': ytdlFormat, 'outtmpl': 'downloads/%(title)s - (%(uploader)s, %(uploader_id)s) - %(id)s', # 'outtmpl': 'downloads/%(title)s', 'postprocessors': [{ 'key': 'FFmpegMetadata', 'key': 'EmbedThumbnail', 'key': 'FFmpegEmbedSubtitle' }], 'logger': ytdlLogger(), 'progress_hooks': [my_hook], } ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(ytdlFormat) videoInc = 1 if not isURL: for line in lines: for x in getUrls(line): if checkSize(x): if args.check_size: print('========================') print('[info] Video size ok') if args.cmd: ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) ret = re.findall(r'(.*\n)', ytOut.stdout) for x in ret: logger.info(x.strip('\n')) print(x.strip('\n')) else: with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([x]) print() if args.playlists: compress('{}-{}'.format(args.output, i)) else: compress(args.output) else: print('[info] Video too big') logger.info('skipping ' + x + ' because too big') videoInc = videoInc + 1 else: mediaAmount = str(len(getUrls(args.file))) for x in getUrls(args.file): if checkSize(x): if args.check_size: print('========================') print('[info] Video size ok') if args.cmd: ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) ret = re.findall(r'(.*\n)', ytOut.stdout) for x in ret: logger.info(x.strip('\n')) print(x.strip('\n')) else: with yt_dlp.YoutubeDL(ydl_opts) as ydl: try: ydl.download([x]) except Exception as e: print(e) print() else: print('[info] Video too big') logger.info('skipping ' + x + ' because too big') videoInc = videoInc + 1 compress(args.output) print()