2022-03-08 17:49:55 -07:00
from __future__ import unicode_literals
import argparse
import json
import logging
import logging . config
import os
import re
import subprocess
import sys
import time
from subprocess import call
import yt_dlp #youtube_dl
done = False
urlRegex = re . compile (
r ' ^(?:http|ftp)s?:// ' # http:// or https://
r ' (?:(?:[A-Z0-9](?:[A-Z0-9-] { 0,61}[A-Z0-9])? \ .)+(?:[A-Z] { 2,6} \ .?|[A-Z0-9-] { 2,} \ .?)| ' # domain...
r ' localhost| ' # localhost...
r ' \ d { 1,3} \ . \ d { 1,3} \ . \ d { 1,3} \ . \ d { 1,3}) ' # ...or ip
r ' (?:: \ d+)? ' # optional port
r ' (?:/?|[/?] \ S+)$ ' , re . IGNORECASE )
parser = argparse . ArgumentParser ( )
parser . add_argument ( " file " )
parser . add_argument ( ' --playlists ' , ' -pl ' , action = ' store_true ' , help = ' is the input file a list of playlists? enables the sequential renaming of the 7z archives ' )
parser . add_argument ( ' --output ' , ' -o ' , help = ' name of the 7z archive ' )
parser . add_argument ( ' --encrypt ' , ' -e ' , action = ' store_true ' , help = ' encrypt the compressed 7z archive ' )
parser . add_argument ( ' --password ' , ' -p ' , action = ' store_true ' , help = ' password to encrypt the compressed 7z archive with ' )
parser . add_argument ( ' --no-update ' , ' -nu ' , action = ' store_true ' , help = ' don \t update Pip packages ' )
parser . add_argument ( ' --cmd ' , ' -c ' , help = ' use the bash youtube-dl instead of the embedded python version ' , action = ' store_true ' )
parser . add_argument ( ' --max-size ' , ' -m ' , type = int , default = 1000 , help = " max size of video in mb " )
parser . add_argument ( ' --check-size ' , ' -ch ' , action = ' store_true ' , help = " verify the video is smaller than the max size and skip if not " )
args = parser . parse_args ( )
if args . output is not None and ' 7z ' in args . output :
print ( ' no .7z extension in file name ' )
sys . exit ( 1 )
if re . match ( urlRegex , args . file ) is None :
isURL = False
if os . path . exists ( args . file ) is False :
print ( ' file does not exist ' )
sys . exit ( 1 )
lines = list ( open ( args . file , ' r ' ) )
for i in range ( len ( lines ) ) :
if re . match ( urlRegex , lines [ i ] ) is not None :
lines [ i ] = lines [ i ] . strip ( ' \n ' )
else :
print ( ' line {} not a url ' . format ( i ) )
sys . exit ( 1 )
else :
isURL = True
try :
os . mkdir ( ' downloads ' )
except OSError as error :
pass
logger = logging . getLogger ( ' youtube_dl ' )
logger . setLevel ( logging . DEBUG )
formatter = logging . Formatter ( ' %(asctime)s - %(name)s - %(levelname)s - %(message)s ' )
fh = logging . FileHandler ( ' downloads/youtube_dl- ' + str ( int ( time . time ( ) ) ) + ' .log ' )
fh . setLevel ( logging . DEBUG )
fh . setFormatter ( formatter )
logging . StreamHandler ( stream = sys . stderr )
logger . addHandler ( fh )
os . system ( ' yt-dlp --rm-cache-dir ' )
# since youtube-dl updates so much check for updates
# if not args.no_update:
# print('checking for updates...')
# pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
# pipJson = json.loads(pipOut.stdout)
# updatable = []
# for x in pipJson:
# updatable.append(x['name'])
# logger.info(str(len(updatable)) + ' updatable pip packages')
# print(str(len(updatable)) + ' updatable pip packages')
# for x in updatable:
# sys.stdout.write('\x1b[2K') # erase last line
# sys.stdout.write('\rupdating ' + x)
# sys.stdout.flush()
# logger.info('updating ' + x)
# process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# process.wait()
# print()
def doLog ( msg , level ) :
global logger , fh , formatter
if level == ' debug ' :
logger . info ( msg )
elif level == ' warning ' :
logger . warning ( msg )
elif level == ' error ' :
logger . error ( msg )
def compress ( name ) :
global args
if args . output is not None :
if args . encrypt :
pwd = input ( ' password: ' )
cmd = ' 7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p " {} " " {} .7z " " downloads/* " ' . format ( pwd , name )
elif args . password is not None :
cmd = ' 7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p {} " {} .7z " " downloads/* " ' . format ( args . password , name )
else :
cmd = ' 7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on " {} .7z " " downloads/* " ' . format ( name )
os . system ( cmd )
for file in os . scandir ( ' downloads ' ) :
os . unlink ( file . path )
def checkSize ( url ) :
if args . check_size :
ytdl = yt_dlp . YoutubeDL ( )
info = ytdl . extract_info ( url , download = False )
max = 0
maxBytes = args . max_size * 1000000
for x in info [ ' formats ' ] :
try :
if x [ ' filesize ' ] > max :
max = x [ ' filesize ' ]
except TypeError as e :
pass
if max > maxBytes :
return False
else :
return True
else :
return True
def getUrls ( playlist ) :
proc = subprocess . run ( [ ' bash ' , ' get-urls.sh ' , playlist ] , capture_output = True , encoding = " utf-8 " )
if proc . stdout == ' ' :
logger . error ( ' ' )
print ( ' missing get-urls.sh ' )
sys . exit ( 1 )
if proc . stdout . find ( ' get-urls.sh: line 1: jq: command not found ' ) == - 1 :
ret = re . findall ( r ' (https: \ / \ /.*) ' , proc . stdout )
return ret
else :
logger . error ( ' missing jq. see readme for installation instruction ' )
print ( ' missing jq. see readme for installation instructions ' )
sys . exit ( 1 )
class ytdlLogger ( object ) :
def debug ( self , msg ) :
if ' [download] ' not in msg :
doLog ( msg , ' debug ' )
def warning ( self , msg ) :
doLog ( msg , ' warning ' )
def error ( self , msg ) :
doLog ( msg , ' error ' )
def my_hook ( d ) :
global done , videoInc , mediaAmount
if d [ ' status ' ] == ' finished ' :
if not done :
done = True
else :
done = False
elif d [ ' status ' ] == ' downloading ' :
if not done :
if isURL :
length = ' {} / {} ' . format ( videoInc , mediaAmount )
else :
length = ' {} / {} ' . format ( videoInc , len ( lines ) )
sys . stdout . write ( ' \x1b [2K ' )
try :
sys . stdout . write ( ' \r [ {} ] {} -> " {} " | {} {} {} {} ' . format ( d [ ' status ' ] , length , d [ ' filename ' ] . strip ( ' downloads/ ' ) ,
d [ ' _speed_str ' ] , d [ ' _total_bytes_str ' ] , d [ ' _eta_str ' ] , d [ ' _percent_str ' ] ) )
except KeyError as error :
pass
sys . stdout . flush ( )
else :
sys . stdout . write ( ' \x1b [2K ' )
try :
sys . stdout . write ( ' \r [ {} ] {} ' . format ( d [ ' status ' ] ) )
except KeyError as error :
pass
sys . stdout . flush ( )
ytdlFormat = ' (bestvideo[ {} ][vcodec^=av01][height>=1080][fps>30]/bestvideo[ {} ][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[ {} ][vcodec=vp9][height>=1080][fps>30]/bestvideo[ {} ][vcodec^=av01][height>=1080]/bestvideo[ {} ][vcodec=vp9.2][height>=1080]/bestvideo[ {} ][vcodec=vp9][height>=1080]/bestvideo[ {} ][height>=1080]/bestvideo[ {} ][vcodec^=av01][height>=720][fps>30]/bestvideo[ {} ][vcodec=vp9.2][height>=720][fps>30]/bestvideo[ {} ][vcodec=vp9][height>=720][fps>30]/bestvideo[ {} ][vcodec^=av01][height>=720]/bestvideo[ {} ][vcodec=vp9.2][height>=720]/bestvideo[ {} ][vcodec=vp9][height>=720]/bestvideo[ {} ][height>=720]/bestvideo[ {} ])+(bestaudio[acodec=opus]/bestaudio)/best ' . replace ( ' {} ' , ' filesize< ' + str ( args . max_size ) + ' M ' )
ydl_opts = {
' merge_output_format ' : ' mkv ' ,
' allsubtitles ' : True ,
' logtostderr ' : True ,
' format ' : ytdlFormat ,
' outtmpl ' : ' downloads/ %(title)s - ( %(uploader)s , %(uploader_id)s ) - %(id)s ' ,
# 'outtmpl': 'downloads/%(title)s',
' postprocessors ' : [ {
' key ' : ' FFmpegMetadata ' ,
' key ' : ' EmbedThumbnail ' ,
' key ' : ' FFmpegEmbedSubtitle '
} ] ,
' logger ' : ytdlLogger ( ) ,
' progress_hooks ' : [ my_hook ] ,
}
ytdlCMD = ' youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f " {} " --merge-output-format mkv -o " downloads/ %(title)s - %(id)s . %(ext)s " --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format " best " --geo-bypass ' . format ( ytdlFormat )
videoInc = 1
if not isURL :
for line in lines :
for x in getUrls ( line ) :
if checkSize ( x ) :
if args . check_size :
print ( ' ======================== ' )
print ( ' [info] Video size ok ' )
if args . cmd :
ytOut = subprocess . run ( ytdlCMD + ' " ' + x + ' " ' , capture_output = True , encoding = " utf-8 " , shell = True )
ret = re . findall ( r ' (.* \ n) ' , ytOut . stdout )
for x in ret :
logger . info ( x . strip ( ' \n ' ) )
print ( x . strip ( ' \n ' ) )
else :
with yt_dlp . YoutubeDL ( ydl_opts ) as ydl :
ydl . download ( [ x ] )
print ( )
if args . playlists :
compress ( ' {} - {} ' . format ( args . output , i ) )
else :
compress ( args . output )
else :
print ( ' [info] Video too big ' )
logger . info ( ' skipping ' + x + ' because too big ' )
videoInc = videoInc + 1
else :
mediaAmount = str ( len ( getUrls ( args . file ) ) )
for x in getUrls ( args . file ) :
if checkSize ( x ) :
if args . check_size :
print ( ' ======================== ' )
print ( ' [info] Video size ok ' )
if args . cmd :
ytOut = subprocess . run ( ytdlCMD + ' " ' + x + ' " ' , capture_output = True , encoding = " utf-8 " , shell = True )
ret = re . findall ( r ' (.* \ n) ' , ytOut . stdout )
for x in ret :
logger . info ( x . strip ( ' \n ' ) )
print ( x . strip ( ' \n ' ) )
else :
with yt_dlp . YoutubeDL ( ydl_opts ) as ydl :
try :
ydl . download ( [ x ] )
except Exception as e :
print ( e )
print ( )
else :
print ( ' [info] Video too big ' )
logger . info ( ' skipping ' + x + ' because too big ' )
videoInc = videoInc + 1
compress ( args . output )
print ( )