This commit is contained in:
Cyberes 2020-01-09 20:56:28 -07:00
parent 365776dd61
commit 8f98d77eff
3 changed files with 321 additions and 321 deletions

View File

@ -1,43 +1,43 @@
# automated-youtube-dl # automated-youtube-dl
_Automated YouTube Archival_ _Automated YouTube Archival_
Download, compress, and send your YouTube videos. Download, compress, and send your YouTube videos.
### Installation ### Installation
1. Install Python 3.7 1. Install Python 3.7
1. This won't uninstall or overwrite any other python versions. 1. This won't uninstall or overwrite any other python versions.
```console-bash ```console-bash
sudo apt update sudo apt update
sudo apt install software-properties-common sudo apt install software-properties-common
sudo add-apt-repository ppa:deadsnakes/ppa sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install python3.7 sudo apt install python3.7
sudo python3.7 -m pip install pip sudo python3.7 -m pip install pip
``` ```
2. Install youtube-dl 2. Install youtube-dl
1. `sudo python3.7 -m pip install youtube-dl` 1. `sudo python3.7 -m pip install youtube-dl`
3. Install jq 3. Install jq
1. `sudo apt-get install jq` 1. `sudo apt-get install jq`
### Usage ### Usage
`python3.7 youtubedl-big-archive.py <url of youtube video, channel, or playlist> --output <name of 7z archive>` `python3.7 youtubedl-big-archive.py <url of youtube video, channel, or playlist> --output <name of 7z archive>`
**Arguments:** **Arguments:**
- `file` The URL of the YouTube video, channel, playlist, or path to a txt file with URLs. Positional - `file` The URL of the YouTube video, channel, playlist, or path to a txt file with URLs. Positional
- `--playlists`, `-pl` Is the input file a list of playlists? Enables the sequential renaming of the 7z archives - `--playlists`, `-pl` Is the input file a list of playlists? Enables the sequential renaming of the 7z archives
- `--output`, `-o` The name of the 7z archive to create. If unset no 7z archive is created - `--output`, `-o` The name of the 7z archive to create. If unset no 7z archive is created
- `--encrypt`, `-e` Encrypt the compressed 7z archive. If set will ask for password - `--encrypt`, `-e` Encrypt the compressed 7z archive. If set will ask for password
- `--password`, `-p` Provide the password to encrypt the compressed 7z archive with. Don't combine `--encrypt` and `--password` - `--password`, `-p` Provide the password to encrypt the compressed 7z archive with. Don't combine `--encrypt` and `--password`
- `--no-update`, `-nu` Don't update any Pip packages. You'd want to update because youtube-dl updates so much - `--no-update`, `-nu` Don't update any Pip packages. You'd want to update because youtube-dl updates so much
- `--max-size`, `-m` Max size of video in mb. Default is 2000 mb (2 gb) - `--max-size`, `-m` Max size of video in mb. Default is 2000 mb (2 gb)
- `--check-size`, `-c` Verify the video is smaller than the max size and skip if not - `--check-size`, `-c` Verify the video is smaller than the max size and skip if not
- `--cmd`, `-c` Use the bash (commandline) youtube-dl instead of the embedded python version - `--cmd`, `-c` Use the bash (commandline) youtube-dl instead of the embedded python version

View File

@ -1,258 +1,258 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import argparse import argparse
import json import json
import logging import logging
import logging.config import logging.config
import os import os
import re import re
import subprocess import subprocess
import sys import sys
import time import time
from subprocess import call from subprocess import call
import youtube_dl import youtube_dl
done = False done = False
urlRegex = re.compile( urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https:// r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost... r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE) r'(?:/?|[/?]\S+)$', re.IGNORECASE)
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("file") parser.add_argument("file")
parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives') parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives')
parser.add_argument('--output', '-o', help='name of the 7z archive') parser.add_argument('--output', '-o', help='name of the 7z archive')
parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive') parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive')
parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with') parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with')
parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages') parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages')
parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true') parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true')
parser.add_argument('--max-size', '-m', type=int, default=2000, help="max size of video in mb") parser.add_argument('--max-size', '-m', type=int, default=2000, help="max size of video in mb")
parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not") parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not")
args = parser.parse_args() args = parser.parse_args()
if args.output is not None and '7z' in args.output: if args.output is not None and '7z' in args.output:
print('no .7z extension in file name') print('no .7z extension in file name')
sys.exit(1) sys.exit(1)
if re.match(urlRegex, args.file) is None: if re.match(urlRegex, args.file) is None:
isURL = False isURL = False
if os.path.exists(args.file) is False: if os.path.exists(args.file) is False:
print('file does not exist') print('file does not exist')
sys.exit(1) sys.exit(1)
lines = list(open(args.file, 'r')) lines = list(open(args.file, 'r'))
for i in range(len(lines)): for i in range(len(lines)):
if re.match(urlRegex, lines[i]) is not None: if re.match(urlRegex, lines[i]) is not None:
lines[i] = lines[i].strip('\n') lines[i] = lines[i].strip('\n')
else: else:
print('line {} not a url'.format(i)) print('line {} not a url'.format(i))
sys.exit(1) sys.exit(1)
else: else:
isURL = True isURL = True
try: try:
os.mkdir('downloads') os.mkdir('downloads')
except OSError as error: except OSError as error:
pass pass
logger = logging.getLogger('youtube_dl') logger = logging.getLogger('youtube_dl')
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log') fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log')
fh.setLevel(logging.DEBUG) fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter) fh.setFormatter(formatter)
logging.StreamHandler(stream=sys.stderr) logging.StreamHandler(stream=sys.stderr)
logger.addHandler(fh) logger.addHandler(fh)
# since youtube-dl updates so much check for updates # since youtube-dl updates so much check for updates
if not args.no_update: if not args.no_update:
print('checking for updates...') print('checking for updates...')
pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True) pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
pipJson = json.loads(pipOut.stdout) pipJson = json.loads(pipOut.stdout)
updatable = [] updatable = []
for x in pipJson: for x in pipJson:
updatable.append(x['name']) updatable.append(x['name'])
logger.info(str(len(updatable)) + ' updatable pip packages') logger.info(str(len(updatable)) + ' updatable pip packages')
print(str(len(updatable)) + ' updatable pip packages') print(str(len(updatable)) + ' updatable pip packages')
for x in updatable: for x in updatable:
sys.stdout.write('\x1b[2K') # erase last line sys.stdout.write('\x1b[2K') # erase last line
sys.stdout.write('\rupdating ' + x) sys.stdout.write('\rupdating ' + x)
sys.stdout.flush() sys.stdout.flush()
logger.info('updating ' + x) logger.info('updating ' + x)
process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.wait() process.wait()
print() print()
def doLog(msg, level): def doLog(msg, level):
global logger, fh, formatter global logger, fh, formatter
if level == 'debug': if level == 'debug':
logger.info(msg) logger.info(msg)
elif level == 'warning': elif level == 'warning':
logger.warning(msg) logger.warning(msg)
elif level == 'error': elif level == 'error':
logger.error(msg) logger.error(msg)
def compress(name): def compress(name):
global args global args
if args.output is not None: if args.output is not None:
if args.encrypt: if args.encrypt:
pwd = input('password: ') pwd = input('password: ')
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name) cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name)
elif args.password is not None: elif args.password is not None:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name) cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name)
else: else:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name) cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name)
os.system(cmd) os.system(cmd)
for file in os.scandir('downloads'): for file in os.scandir('downloads'):
os.unlink(file.path) os.unlink(file.path)
def checkSize(url): def checkSize(url):
if args.check_size: if args.check_size:
ytdl = youtube_dl.YoutubeDL() ytdl = youtube_dl.YoutubeDL()
info = ytdl.extract_info(url, download=False) info = ytdl.extract_info(url, download=False)
max = 0 max = 0
maxBytes = args.max_size * 1000000 maxBytes = args.max_size * 1000000
for x in info['formats']: for x in info['formats']:
try: try:
if x['filesize'] > max: if x['filesize'] > max:
max = x['filesize'] max = x['filesize']
except TypeError as e: except TypeError as e:
pass pass
if max > maxBytes: if max > maxBytes:
return False return False
else: else:
return True return True
else: else:
return True return True
def getUrls(playlist): def getUrls(playlist):
proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8") proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8")
if proc.stdout == '': if proc.stdout == '':
logger.error('missing get-urls.sh') logger.error('missing get-urls.sh')
print('missing get-urls.sh') print('missing get-urls.sh')
sys.exit(1) sys.exit(1)
if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1: if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1:
ret = re.findall(r'(https:\/\/.*)', proc.stdout) ret = re.findall(r'(https:\/\/.*)', proc.stdout)
return ret return ret
else: else:
logger.error('missing jq. see readme for installation instruction') logger.error('missing jq. see readme for installation instruction')
print('missing jq. see readme for installation instructions') print('missing jq. see readme for installation instructions')
sys.exit(1) sys.exit(1)
class ytdlLogger(object): class ytdlLogger(object):
def debug(self, msg): def debug(self, msg):
if '[download]' not in msg: if '[download]' not in msg:
doLog(msg, 'debug') doLog(msg, 'debug')
def warning(self, msg): def warning(self, msg):
doLog(msg, 'warning') doLog(msg, 'warning')
def error(self, msg): def error(self, msg):
doLog(msg, 'error') doLog(msg, 'error')
def my_hook(d): def my_hook(d):
global done, videoInc, mediaAmount global done, videoInc, mediaAmount
if d['status'] == 'finished': if d['status'] == 'finished':
if not done: if not done:
done = True done = True
else: else:
done = False done = False
elif d['status'] == 'downloading': elif d['status'] == 'downloading':
if not done: if not done:
if isURL: if isURL:
length = '{}/{}'.format(videoInc, mediaAmount) length = '{}/{}'.format(videoInc, mediaAmount)
else: else:
length = '{}/{}'.format(videoInc, len(lines)) length = '{}/{}'.format(videoInc, len(lines))
sys.stdout.write('\x1b[2K') sys.stdout.write('\x1b[2K')
try: try:
sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'), sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'),
d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str'])) d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str']))
except KeyError as error: except KeyError as error:
pass pass
sys.stdout.flush() sys.stdout.flush()
else: else:
sys.stdout.write('\x1b[2K') sys.stdout.write('\x1b[2K')
try: try:
sys.stdout.write('\r[{}] {}'.format(d['status'])) sys.stdout.write('\r[{}] {}'.format(d['status']))
except KeyError as error: except KeyError as error:
pass pass
sys.stdout.flush() sys.stdout.flush()
ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M') ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M')
ydl_opts = { ydl_opts = {
'merge_output_format': 'mkv', 'merge_output_format': 'mkv',
'allsubtitles': True, 'allsubtitles': True,
'logtostderr': True, 'logtostderr': True,
'format': ytdlFormat, 'format': ytdlFormat,
'outtmpl': 'downloads/%(title)s - %(id)s.%(ext)s', 'outtmpl': 'downloads/%(title)s - %(id)s.%(ext)s',
'postprocessors': [{ 'postprocessors': [{
'key': 'FFmpegMetadata', 'key': 'FFmpegMetadata',
'key': 'EmbedThumbnail', 'key': 'EmbedThumbnail',
'key': 'FFmpegEmbedSubtitle' 'key': 'FFmpegEmbedSubtitle'
}], }],
'logger': ytdlLogger(), 'logger': ytdlLogger(),
'progress_hooks': [my_hook], 'progress_hooks': [my_hook],
} }
ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format( ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(
ytdlFormat) ytdlFormat)
videoInc = 1 videoInc = 1
if not isURL: if not isURL:
for line in lines: for line in lines:
for x in getUrls(line): for x in getUrls(line):
if checkSize(x): if checkSize(x):
if args.check_size: if args.check_size:
print('========================') print('========================')
print('[info] Video size ok') print('[info] Video size ok')
if args.cmd: if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout) ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret: for x in ret:
logger.info(x.strip('\n')) logger.info(x.strip('\n'))
print(x.strip('\n')) print(x.strip('\n'))
else: else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([x]) ydl.download([x])
print() print()
if args.playlists: if args.playlists:
compress('{}-{}'.format(args.output, i)) compress('{}-{}'.format(args.output, i))
else: else:
compress(args.output) compress(args.output)
else: else:
print('[info] Video too big') print('[info] Video too big')
logger.info('skipping ' + x + ' because too big') logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1 videoInc = videoInc + 1
else: else:
mediaAmount = str(len(getUrls(args.file))) mediaAmount = str(len(getUrls(args.file)))
for x in getUrls(args.file): for x in getUrls(args.file):
if checkSize(x): if checkSize(x):
if args.check_size: if args.check_size:
print('========================') print('========================')
print('[info] Video size ok') print('[info] Video size ok')
if args.cmd: if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True) ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout) ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret: for x in ret:
logger.info(x.strip('\n')) logger.info(x.strip('\n'))
print(x.strip('\n')) print(x.strip('\n'))
else: else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([x]) ydl.download([x])
print() print()
else: else:
print('[info] Video too big') print('[info] Video too big')
logger.info('skipping ' + x + ' because too big') logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1 videoInc = videoInc + 1
compress(args.output) compress(args.output)
print() print()

View File

@ -1,21 +1,21 @@
import subprocess import subprocess
import sys import sys
import json import json
# run this with sudo because sometimes packages require sudo to update/install # run this with sudo because sometimes packages require sudo to update/install
print('checking for updates...') print('checking for updates...')
pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True) pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
pipJson = json.loads(pipOut.stdout) pipJson = json.loads(pipOut.stdout)
updatable = [] updatable = []
for x in pipJson: for x in pipJson:
i = 1 i = 1
updatable.append(x['name']) updatable.append(x['name'])
for x in updatable: for x in updatable:
sys.stdout.write('\x1b[2K') # erase last line sys.stdout.write('\x1b[2K') # erase last line
sys.stdout.write('\rupdating {} {}/{}'.format(x, i, str(len(updatable)))) sys.stdout.write('\rupdating {} {}/{}'.format(x, i, str(len(updatable))))
sys.stdout.flush() sys.stdout.flush()
process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.wait() process.wait()
i = i + 1 i = i + 1
print() print()