This commit is contained in:
Cyberes 2020-01-09 20:56:28 -07:00
parent 365776dd61
commit 8f98d77eff
3 changed files with 321 additions and 321 deletions

View File

@ -1,43 +1,43 @@
# automated-youtube-dl
_Automated YouTube Archival_
Download, compress, and send your YouTube videos.
### Installation
1. Install Python 3.7
1. This won't uninstall or overwrite any other python versions.
```console-bash
sudo apt update
sudo apt install software-properties-common
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install python3.7
sudo python3.7 -m pip install pip
```
2. Install youtube-dl
1. `sudo python3.7 -m pip install youtube-dl`
3. Install jq
1. `sudo apt-get install jq`
### Usage
`python3.7 youtubedl-big-archive.py <url of youtube video, channel, or playlist> --output <name of 7z archive>`
**Arguments:**
- `file` The URL of the YouTube video, channel, playlist, or path to a txt file with URLs. Positional
- `--playlists`, `-pl` Is the input file a list of playlists? Enables the sequential renaming of the 7z archives
- `--output`, `-o` The name of the 7z archive to create. If unset no 7z archive is created
- `--encrypt`, `-e` Encrypt the compressed 7z archive. If set will ask for password
- `--password`, `-p` Provide the password to encrypt the compressed 7z archive with. Don't combine `--encrypt` and `--password`
- `--no-update`, `-nu` Don't update any Pip packages. You'd want to update because youtube-dl updates so much
- `--max-size`, `-m` Max size of video in mb. Default is 2000 mb (2 gb)
- `--check-size`, `-c` Verify the video is smaller than the max size and skip if not
- `--cmd`, `-c` Use the bash (commandline) youtube-dl instead of the embedded python version
# automated-youtube-dl
_Automated YouTube Archival_
Download, compress, and send your YouTube videos.
### Installation
1. Install Python 3.7
1. This won't uninstall or overwrite any other python versions.
```console-bash
sudo apt update
sudo apt install software-properties-common
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install python3.7
sudo python3.7 -m pip install pip
```
2. Install youtube-dl
1. `sudo python3.7 -m pip install youtube-dl`
3. Install jq
1. `sudo apt-get install jq`
### Usage
`python3.7 youtubedl-big-archive.py <url of youtube video, channel, or playlist> --output <name of 7z archive>`
**Arguments:**
- `file` The URL of the YouTube video, channel, playlist, or path to a txt file with URLs. Positional
- `--playlists`, `-pl` Is the input file a list of playlists? Enables the sequential renaming of the 7z archives
- `--output`, `-o` The name of the 7z archive to create. If unset no 7z archive is created
- `--encrypt`, `-e` Encrypt the compressed 7z archive. If set will ask for password
- `--password`, `-p` Provide the password to encrypt the compressed 7z archive with. Don't combine `--encrypt` and `--password`
- `--no-update`, `-nu` Don't update any Pip packages. You'd want to update because youtube-dl updates so much
- `--max-size`, `-m` Max size of video in mb. Default is 2000 mb (2 gb)
- `--check-size`, `-c` Verify the video is smaller than the max size and skip if not
- `--cmd`, `-c` Use the bash (commandline) youtube-dl instead of the embedded python version

View File

@ -1,258 +1,258 @@
from __future__ import unicode_literals
import argparse
import json
import logging
import logging.config
import os
import re
import subprocess
import sys
import time
from subprocess import call
import youtube_dl
done = False
urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
parser = argparse.ArgumentParser()
parser.add_argument("file")
parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives')
parser.add_argument('--output', '-o', help='name of the 7z archive')
parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive')
parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with')
parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages')
parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true')
parser.add_argument('--max-size', '-m', type=int, default=2000, help="max size of video in mb")
parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not")
args = parser.parse_args()
if args.output is not None and '7z' in args.output:
print('no .7z extension in file name')
sys.exit(1)
if re.match(urlRegex, args.file) is None:
isURL = False
if os.path.exists(args.file) is False:
print('file does not exist')
sys.exit(1)
lines = list(open(args.file, 'r'))
for i in range(len(lines)):
if re.match(urlRegex, lines[i]) is not None:
lines[i] = lines[i].strip('\n')
else:
print('line {} not a url'.format(i))
sys.exit(1)
else:
isURL = True
try:
os.mkdir('downloads')
except OSError as error:
pass
logger = logging.getLogger('youtube_dl')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logging.StreamHandler(stream=sys.stderr)
logger.addHandler(fh)
# since youtube-dl updates so much check for updates
if not args.no_update:
print('checking for updates...')
pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
pipJson = json.loads(pipOut.stdout)
updatable = []
for x in pipJson:
updatable.append(x['name'])
logger.info(str(len(updatable)) + ' updatable pip packages')
print(str(len(updatable)) + ' updatable pip packages')
for x in updatable:
sys.stdout.write('\x1b[2K') # erase last line
sys.stdout.write('\rupdating ' + x)
sys.stdout.flush()
logger.info('updating ' + x)
process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.wait()
print()
def doLog(msg, level):
global logger, fh, formatter
if level == 'debug':
logger.info(msg)
elif level == 'warning':
logger.warning(msg)
elif level == 'error':
logger.error(msg)
def compress(name):
global args
if args.output is not None:
if args.encrypt:
pwd = input('password: ')
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name)
elif args.password is not None:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name)
else:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name)
os.system(cmd)
for file in os.scandir('downloads'):
os.unlink(file.path)
def checkSize(url):
if args.check_size:
ytdl = youtube_dl.YoutubeDL()
info = ytdl.extract_info(url, download=False)
max = 0
maxBytes = args.max_size * 1000000
for x in info['formats']:
try:
if x['filesize'] > max:
max = x['filesize']
except TypeError as e:
pass
if max > maxBytes:
return False
else:
return True
else:
return True
def getUrls(playlist):
proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8")
if proc.stdout == '':
logger.error('missing get-urls.sh')
print('missing get-urls.sh')
sys.exit(1)
if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1:
ret = re.findall(r'(https:\/\/.*)', proc.stdout)
return ret
else:
logger.error('missing jq. see readme for installation instruction')
print('missing jq. see readme for installation instructions')
sys.exit(1)
class ytdlLogger(object):
def debug(self, msg):
if '[download]' not in msg:
doLog(msg, 'debug')
def warning(self, msg):
doLog(msg, 'warning')
def error(self, msg):
doLog(msg, 'error')
def my_hook(d):
global done, videoInc, mediaAmount
if d['status'] == 'finished':
if not done:
done = True
else:
done = False
elif d['status'] == 'downloading':
if not done:
if isURL:
length = '{}/{}'.format(videoInc, mediaAmount)
else:
length = '{}/{}'.format(videoInc, len(lines))
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'),
d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str']))
except KeyError as error:
pass
sys.stdout.flush()
else:
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {}'.format(d['status']))
except KeyError as error:
pass
sys.stdout.flush()
ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M')
ydl_opts = {
'merge_output_format': 'mkv',
'allsubtitles': True,
'logtostderr': True,
'format': ytdlFormat,
'outtmpl': 'downloads/%(title)s - %(id)s.%(ext)s',
'postprocessors': [{
'key': 'FFmpegMetadata',
'key': 'EmbedThumbnail',
'key': 'FFmpegEmbedSubtitle'
}],
'logger': ytdlLogger(),
'progress_hooks': [my_hook],
}
ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(
ytdlFormat)
videoInc = 1
if not isURL:
for line in lines:
for x in getUrls(line):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([x])
print()
if args.playlists:
compress('{}-{}'.format(args.output, i))
else:
compress(args.output)
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
else:
mediaAmount = str(len(getUrls(args.file)))
for x in getUrls(args.file):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([x])
print()
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
compress(args.output)
print()
from __future__ import unicode_literals
import argparse
import json
import logging
import logging.config
import os
import re
import subprocess
import sys
import time
from subprocess import call
import youtube_dl
done = False
urlRegex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
parser = argparse.ArgumentParser()
parser.add_argument("file")
parser.add_argument('--playlists', '-pl', action='store_true', help='is the input file a list of playlists? enables the sequential renaming of the 7z archives')
parser.add_argument('--output', '-o', help='name of the 7z archive')
parser.add_argument('--encrypt', '-e', action='store_true', help='encrypt the compressed 7z archive')
parser.add_argument('--password', '-p', action='store_true', help='password to encrypt the compressed 7z archive with')
parser.add_argument('--no-update', '-nu', action='store_true', help='don\t update Pip packages')
parser.add_argument('--cmd', '-c', help='use the bash youtube-dl instead of the embedded python version', action='store_true')
parser.add_argument('--max-size', '-m', type=int, default=2000, help="max size of video in mb")
parser.add_argument('--check-size', '-ch', action='store_true', help="verify the video is smaller than the max size and skip if not")
args = parser.parse_args()
if args.output is not None and '7z' in args.output:
print('no .7z extension in file name')
sys.exit(1)
if re.match(urlRegex, args.file) is None:
isURL = False
if os.path.exists(args.file) is False:
print('file does not exist')
sys.exit(1)
lines = list(open(args.file, 'r'))
for i in range(len(lines)):
if re.match(urlRegex, lines[i]) is not None:
lines[i] = lines[i].strip('\n')
else:
print('line {} not a url'.format(i))
sys.exit(1)
else:
isURL = True
try:
os.mkdir('downloads')
except OSError as error:
pass
logger = logging.getLogger('youtube_dl')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh = logging.FileHandler('downloads/youtube_dl-' + str(int(time.time())) + '.log')
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logging.StreamHandler(stream=sys.stderr)
logger.addHandler(fh)
# since youtube-dl updates so much check for updates
if not args.no_update:
print('checking for updates...')
pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
pipJson = json.loads(pipOut.stdout)
updatable = []
for x in pipJson:
updatable.append(x['name'])
logger.info(str(len(updatable)) + ' updatable pip packages')
print(str(len(updatable)) + ' updatable pip packages')
for x in updatable:
sys.stdout.write('\x1b[2K') # erase last line
sys.stdout.write('\rupdating ' + x)
sys.stdout.flush()
logger.info('updating ' + x)
process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.wait()
print()
def doLog(msg, level):
global logger, fh, formatter
if level == 'debug':
logger.info(msg)
elif level == 'warning':
logger.warning(msg)
elif level == 'error':
logger.error(msg)
def compress(name):
global args
if args.output is not None:
if args.encrypt:
pwd = input('password: ')
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p"{}" "{}.7z" "downloads/*"'.format(pwd, name)
elif args.password is not None:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on -p{} "{}.7z" "downloads/*"'.format(args.password, name)
else:
cmd = '7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=1024m -ms=on -mhe=on "{}.7z" "downloads/*"'.format(name)
os.system(cmd)
for file in os.scandir('downloads'):
os.unlink(file.path)
def checkSize(url):
if args.check_size:
ytdl = youtube_dl.YoutubeDL()
info = ytdl.extract_info(url, download=False)
max = 0
maxBytes = args.max_size * 1000000
for x in info['formats']:
try:
if x['filesize'] > max:
max = x['filesize']
except TypeError as e:
pass
if max > maxBytes:
return False
else:
return True
else:
return True
def getUrls(playlist):
proc = subprocess.run(['bash', 'get-urls.sh', playlist], capture_output=True, encoding="utf-8")
if proc.stdout == '':
logger.error('missing get-urls.sh')
print('missing get-urls.sh')
sys.exit(1)
if proc.stdout.find('get-urls.sh: line 1: jq: command not found') == -1:
ret = re.findall(r'(https:\/\/.*)', proc.stdout)
return ret
else:
logger.error('missing jq. see readme for installation instruction')
print('missing jq. see readme for installation instructions')
sys.exit(1)
class ytdlLogger(object):
def debug(self, msg):
if '[download]' not in msg:
doLog(msg, 'debug')
def warning(self, msg):
doLog(msg, 'warning')
def error(self, msg):
doLog(msg, 'error')
def my_hook(d):
global done, videoInc, mediaAmount
if d['status'] == 'finished':
if not done:
done = True
else:
done = False
elif d['status'] == 'downloading':
if not done:
if isURL:
length = '{}/{}'.format(videoInc, mediaAmount)
else:
length = '{}/{}'.format(videoInc, len(lines))
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {} -> "{}" | {} {} {} {}'.format(d['status'], length, d['filename'].strip('downloads/'),
d['_speed_str'], d['_total_bytes_str'], d['_eta_str'], d['_percent_str']))
except KeyError as error:
pass
sys.stdout.flush()
else:
sys.stdout.write('\x1b[2K')
try:
sys.stdout.write('\r[{}] {}'.format(d['status']))
except KeyError as error:
pass
sys.stdout.flush()
ytdlFormat = '(bestvideo[{}][vcodec^=av01][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=1080][fps>30]/bestvideo[{}][vcodec=vp9][height>=1080][fps>30]/bestvideo[{}][vcodec^=av01][height>=1080]/bestvideo[{}][vcodec=vp9.2][height>=1080]/bestvideo[{}][vcodec=vp9][height>=1080]/bestvideo[{}][height>=1080]/bestvideo[{}][vcodec^=av01][height>=720][fps>30]/bestvideo[{}][vcodec=vp9.2][height>=720][fps>30]/bestvideo[{}][vcodec=vp9][height>=720][fps>30]/bestvideo[{}][vcodec^=av01][height>=720]/bestvideo[{}][vcodec=vp9.2][height>=720]/bestvideo[{}][vcodec=vp9][height>=720]/bestvideo[{}][height>=720]/bestvideo[{}])+(bestaudio[acodec=opus]/bestaudio)/best'.replace('{}', 'filesize<' + str(args.max_size) + 'M')
ydl_opts = {
'merge_output_format': 'mkv',
'allsubtitles': True,
'logtostderr': True,
'format': ytdlFormat,
'outtmpl': 'downloads/%(title)s - %(id)s.%(ext)s',
'postprocessors': [{
'key': 'FFmpegMetadata',
'key': 'EmbedThumbnail',
'key': 'FFmpegEmbedSubtitle'
}],
'logger': ytdlLogger(),
'progress_hooks': [my_hook],
}
ytdlCMD = 'youtube-dl -i --add-metadata --all-subs --embed-subs --embed-thumbnail -f "{}" --merge-output-format mkv -o "downloads/%(title)s - %(id)s.%(ext)s" --write-annotations --write-info-json --write-description --write-all-thumbnails --write-sub --sub-format "best" --geo-bypass'.format(
ytdlFormat)
videoInc = 1
if not isURL:
for line in lines:
for x in getUrls(line):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([x])
print()
if args.playlists:
compress('{}-{}'.format(args.output, i))
else:
compress(args.output)
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
else:
mediaAmount = str(len(getUrls(args.file)))
for x in getUrls(args.file):
if checkSize(x):
if args.check_size:
print('========================')
print('[info] Video size ok')
if args.cmd:
ytOut = subprocess.run(ytdlCMD + ' "' + x + '"', capture_output=True, encoding="utf-8", shell=True)
ret = re.findall(r'(.*\n)', ytOut.stdout)
for x in ret:
logger.info(x.strip('\n'))
print(x.strip('\n'))
else:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([x])
print()
else:
print('[info] Video too big')
logger.info('skipping ' + x + ' because too big')
videoInc = videoInc + 1
compress(args.output)
print()

View File

@ -1,21 +1,21 @@
import subprocess
import sys
import json
# run this with sudo because sometimes packages require sudo to update/install
print('checking for updates...')
pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
pipJson = json.loads(pipOut.stdout)
updatable = []
for x in pipJson:
i = 1
updatable.append(x['name'])
for x in updatable:
sys.stdout.write('\x1b[2K') # erase last line
sys.stdout.write('\rupdating {} {}/{}'.format(x, i, str(len(updatable))))
sys.stdout.flush()
process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.wait()
i = i + 1
import subprocess
import sys
import json
# run this with sudo because sometimes packages require sudo to update/install
print('checking for updates...')
pipOut = subprocess.run('python3.7 -m pip list --outdated --format json', capture_output=True, encoding="utf-8", shell=True)
pipJson = json.loads(pipOut.stdout)
updatable = []
for x in pipJson:
i = 1
updatable.append(x['name'])
for x in updatable:
sys.stdout.write('\x1b[2K') # erase last line
sys.stdout.write('\rupdating {} {}/{}'.format(x, i, str(len(updatable))))
sys.stdout.flush()
process = subprocess.Popen('python3.7 -m pip install --upgrade ' + x, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process.wait()
i = i + 1
print()