Fix and refactor `prepare_outtmpl`

The following tests would have failed previously:
%(id)d %(id)r
%(ext)s-%(ext|def)d
%(width|)d
%(id)r %(height)r
%(formats.0)r
%s
This commit is contained in:
pukkandan 2021-06-03 23:30:38 +05:30
parent 9d83ad93d0
commit 752cda3880
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
6 changed files with 192 additions and 160 deletions

View File

@ -17,7 +17,7 @@ from yt_dlp.compat import compat_str, compat_urllib_error
from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.postprocessor.common import PostProcessor
from yt_dlp.utils import ExtractorError, match_filter_func from yt_dlp.utils import ExtractorError, float_or_none, match_filter_func
TEST_URL = 'http://localhost/sample.mp4' TEST_URL = 'http://localhost/sample.mp4'
@ -648,8 +648,7 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(test_dict['extractor'], 'Foo') self.assertEqual(test_dict['extractor'], 'Foo')
self.assertEqual(test_dict['playlist'], 'funny videos') self.assertEqual(test_dict['playlist'], 'funny videos')
def test_prepare_filename(self): outtmpl_info = {
info = {
'id': '1234', 'id': '1234',
'ext': 'mp4', 'ext': 'mp4',
'width': None, 'width': None,
@ -657,47 +656,87 @@ class TestYoutubeDL(unittest.TestCase):
'title1': '$PATH', 'title1': '$PATH',
'title2': '%PATH%', 'title2': '%PATH%',
'timestamp': 1618488000, 'timestamp': 1618488000,
'formats': [{'id': 'id1'}, {'id': 'id2'}] 'duration': 100000,
'playlist_index': 1,
'_last_playlist_index': 100,
'n_entries': 10,
'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}]
} }
def fname(templ, na_placeholder='NA'): def test_prepare_outtmpl(self):
params = {'outtmpl': templ} def out(tmpl, **params):
if na_placeholder != 'NA': params['outtmpl'] = tmpl
params['outtmpl_na_placeholder'] = na_placeholder
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
return ydl.prepare_filename(info) ydl._num_downloads = 1
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4') outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, self.outtmpl_info)
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') return outtmpl % tmpl_dict
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
# Replace missing fields with 'NA' by default self.assertEqual(out('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4') self.assertEqual(out('%(duration_string)s'), '27:46:40')
# Or by provided placeholder self.assertTrue(float_or_none(out('%(epoch)d')))
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4') self.assertEqual(out('%(resolution)s'), '1080p')
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4') self.assertEqual(out('%(playlist_index)s'), '001')
self.assertEqual(fname('%(height)s.%(ext)s'), '1080.mp4') self.assertEqual(out('%(autonumber)s'), '00001')
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') self.assertEqual(out('%(autonumber+2)03d', autonumber_start=3), '005')
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4') self.assertEqual(out('%(autonumber)s', autonumber_size=3), '001')
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4') self.assertEqual(out('%%'), '%')
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') self.assertEqual(out('%%%%'), '%%')
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') self.assertEqual(out('%(invalid@tmpl|def)s', outtmpl_na_placeholder='none'), 'none')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(out('%()s'), 'NA')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(out('%s'), '%s')
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
self.assertEqual(out(NA_TEST_OUTTMPL), 'NA-NA-def-1234.mp4')
self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder='none'), 'none-none-def-1234.mp4')
self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder=''), '--def-1234.mp4')
FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s'
self.assertEqual(out(FMT_TEST_OUTTMPL % 's'), '1080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % 'd'), '1080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '6d'), ' 1080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '-6d'), '1080 .mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '06d'), '001080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % ' 06d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % ' 06d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '0 6d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '0 6d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % ' 0 6d'), ' 01080.mp4')
self.assertEqual(out('%(id)d'), '1234')
self.assertEqual(out('%(id)d %(id)r'), "1234 '1234'")
self.assertEqual(out('%(ext)s-%(ext|def)d'), 'mp4-def')
self.assertEqual(out('%(width|0)04d'), '0000')
self.assertEqual(out('%(width|)d', outtmpl_na_placeholder='none'), '')
FORMATS = self.outtmpl_info['formats']
self.assertEqual(out('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20')
self.assertEqual(out('%(id+1-height+3)05d'), '00158')
self.assertEqual(out('%(width+100)05d'), 'NA')
self.assertEqual(out('%(formats.0)s'), str(FORMATS[0]))
self.assertEqual(out('%(formats.-1.id)s'), str(FORMATS[-1]['id']))
self.assertEqual(out('%(formats.3)s'), 'NA')
self.assertEqual(out('%(formats.:2:-1)r'), repr(FORMATS[:2:-1]))
self.assertEqual(out('%(formats.0.id.-1+id)f'), '1235.000000')
def test_prepare_filename(self):
def fname(templ):
params = {'outtmpl': templ}
ydl = YoutubeDL(params)
return ydl.prepare_filename(self.outtmpl_info)
self.assertEqual(fname('%%'), '%') self.assertEqual(fname('%%'), '%')
self.assertEqual(fname('%%%%'), '%%') self.assertEqual(fname('%%%%'), '%%')
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
self.assertEqual(fname('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20')
self.assertEqual(fname('%(id+1)05d'), '01235') self.assertEqual(fname('%(id)r %(height)r'), "'1234' 1080")
self.assertEqual(fname('%(width+100)05d'), 'NA') self.assertEqual(fname('%(formats.0)r'), "{'id' - 'id1'}")
self.assertEqual(fname('%(formats.0)s').replace("u", ""), "{'id' - 'id1'}")
self.assertEqual(fname('%(formats.-1.id)s'), 'id2')
self.assertEqual(fname('%(formats.2)s'), 'NA')
def test_format_note(self): def test_format_note(self):
ydl = YoutubeDL() ydl = YoutubeDL()

View File

@ -8,7 +8,10 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_shlex_quote
from yt_dlp.postprocessor import ( from yt_dlp.postprocessor import (
ExecAfterDownloadPP,
FFmpegThumbnailsConvertorPP, FFmpegThumbnailsConvertorPP,
MetadataFromFieldPP, MetadataFromFieldPP,
MetadataFromTitlePP, MetadataFromTitlePP,
@ -55,3 +58,14 @@ class TestConvertThumbnail(unittest.TestCase):
for _, out in tests: for _, out in tests:
os.remove(file.format(out)) os.remove(file.format(out))
class TestExecAfterDownload(unittest.TestCase):
def test_parse_cmd(self):
pp = ExecAfterDownloadPP(YoutubeDL(), '')
info = {'filepath': 'file name'}
quoted_filepath = compat_shlex_quote(info['filepath'])
self.assertEqual(pp.parse_cmd('echo', info), 'echo %s' % quoted_filepath)
self.assertEqual(pp.parse_cmd('echo.{}', info), 'echo.%s' % quoted_filepath)
self.assertEqual(pp.parse_cmd('echo "%(filepath)s"', info), 'echo "%s"' % info['filepath'])

View File

@ -64,7 +64,7 @@ from .utils import (
float_or_none, float_or_none,
format_bytes, format_bytes,
format_field, format_field,
FORMAT_RE, STR_FORMAT_RE,
formatSeconds, formatSeconds,
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
@ -815,52 +815,26 @@ class YoutubeDL(object):
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)""" """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
template_dict = dict(info_dict) info_dict = dict(info_dict)
na = self.params.get('outtmpl_na_placeholder', 'NA') na = self.params.get('outtmpl_na_placeholder', 'NA')
# duration_string info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
formatSeconds(info_dict['duration'], '-' if sanitize else ':') formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None if info_dict.get('duration', None) is not None
else None) else None)
info_dict['epoch'] = int(time.time())
# epoch info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
template_dict['epoch'] = int(time.time()) if info_dict.get('resolution') is None:
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
# autonumber
autonumber_size = self.params.get('autonumber_size')
if autonumber_size is None:
autonumber_size = 5
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
# resolution if not defined
if template_dict.get('resolution') is None:
if template_dict.get('width') and template_dict.get('height'):
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
elif template_dict.get('height'):
template_dict['resolution'] = '%sp' % template_dict['height']
elif template_dict.get('width'):
template_dict['resolution'] = '%dx?' % template_dict['width']
# For fields playlist_index and autonumber convert all occurrences # For fields playlist_index and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility # of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = { field_size_compat_map = {
'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')), 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
'autonumber': autonumber_size, 'autonumber': self.params.get('autonumber_size') or 5,
} }
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
if mobj:
outtmpl = re.sub(
FIELD_SIZE_COMPAT_RE,
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl)
numeric_fields = list(self._NUMERIC_FIELDS) EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*')
if sanitize is None:
sanitize = lambda k, v: v
EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
# Field is of the form key1.key2... # Field is of the form key1.key2...
# where keys (except first) can be string, int or slice # where keys (except first) can be string, int or slice
FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*' FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
@ -876,71 +850,76 @@ class YoutubeDL(object):
'+': float.__add__, '+': float.__add__,
'-': float.__sub__, '-': float.__sub__,
} }
for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl): tmpl_dict = {}
final_key = outer_mobj.group('key')
str_type = outer_mobj.group('type') def get_value(mdict):
value = None
mobj = re.match(INTERNAL_FORMAT_RE, final_key)
if mobj is not None:
mobj = mobj.groupdict()
# Object traversal # Object traversal
fields = mobj['fields'].split('.') fields = mdict['fields'].split('.')
value = traverse_dict(template_dict, fields) value = traverse_dict(info_dict, fields)
# Negative # Negative
if mobj['negate']: if mdict['negate']:
value = float_or_none(value) value = float_or_none(value)
if value is not None: if value is not None:
value *= -1 value *= -1
# Do maths # Do maths
if mobj['maths']: if mdict['maths']:
value = float_or_none(value) value = float_or_none(value)
operator = None operator = None
for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]: for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]:
if item == '': if item == '' or value is None:
value = None return None
if value is None:
break
if operator: if operator:
item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
offset = float_or_none(item) offset = float_or_none(item)
if offset is None: if offset is None:
offset = float_or_none(traverse_dict(template_dict, item.split('.'))) offset = float_or_none(traverse_dict(info_dict, item.split('.')))
try: try:
value = operator(value, multiplier * offset) value = operator(value, multiplier * offset)
except (TypeError, ZeroDivisionError): except (TypeError, ZeroDivisionError):
value = None return None
operator = None operator = None
else: else:
operator = MATH_FUNCTIONS[item] operator = MATH_FUNCTIONS[item]
# Datetime formatting # Datetime formatting
if mobj['strf_format']: if mdict['strf_format']:
value = strftime_or_none(value, mobj['strf_format']) value = strftime_or_none(value, mdict['strf_format'])
# Set default
if value is None and mobj['default'] is not None: return value
value = mobj['default']
# Sanitize def create_key(outer_mobj):
if str_type in 'crs' and value is not None: # string if not outer_mobj.group('has_key'):
value = sanitize('%{}'.format(str_type) % fields[-1], value) return '%{}'.format(outer_mobj.group(0))
else: # numeric
numeric_fields.append(final_key) key = outer_mobj.group('key')
fmt = outer_mobj.group('format')
mobj = re.match(INTERNAL_FORMAT_RE, key)
if mobj is None:
value, default = None, na
else:
mobj = mobj.groupdict()
default = mobj['default'] if mobj['default'] is not None else na
value = get_value(mobj)
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
fmt = '0{:d}d'.format(field_size_compat_map[key])
value = default if value is None else value
key += '\0%s' % fmt
if fmt[-1] not in 'crs': # numeric
value = float_or_none(value) value = float_or_none(value)
if value is not None: if value is None:
template_dict[final_key] = value value, fmt = default, 's'
if sanitize:
if fmt[-1] == 'r':
# If value is an object, sanitize might convert it to a string
# So we convert it to repr first
value, fmt = repr(value), '%ss' % fmt[:-1]
value = sanitize(key, value)
tmpl_dict[key] = value
return '%({key}){fmt}'.format(key=key, fmt=fmt)
# Missing numeric fields used together with integer presentation types return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict
# in format specification will break the argument substitution since
# string NA placeholder is returned for missing fields. We will patch
# output template for missing fields to meet string presentation type.
for numeric_field in numeric_fields:
if template_dict.get(numeric_field) is None:
outtmpl = re.sub(
FORMAT_RE.format(re.escape(numeric_field)),
r'%({0})s'.format(numeric_field), outtmpl)
template_dict = collections.defaultdict(lambda: na, (
(k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items() if v is not None))
return outtmpl, template_dict
def _prepare_filename(self, info_dict, tmpl_type='default'): def _prepare_filename(self, info_dict, tmpl_type='default'):
try: try:
@ -966,7 +945,7 @@ class YoutubeDL(object):
force_ext = OUTTMPL_TYPES.get(tmpl_type) force_ext = OUTTMPL_TYPES.get(tmpl_type)
if force_ext is not None: if force_ext is not None:
filename = replace_extension(filename, force_ext, template_dict.get('ext')) filename = replace_extension(filename, force_ext, info_dict.get('ext'))
# https://github.com/blackjack4494/youtube-dlc/issues/85 # https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False) trim_file_name = self.params.get('trim_file_name', False)

View File

@ -1,13 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import subprocess import subprocess
from .common import PostProcessor from .common import PostProcessor
from ..compat import compat_shlex_quote from ..compat import compat_shlex_quote
from ..utils import ( from ..utils import (
encodeArgument, encodeArgument,
FORMAT_RE,
PostProcessingError, PostProcessingError,
) )
@ -23,15 +21,15 @@ class ExecAfterDownloadPP(PostProcessor):
return 'Exec' return 'Exec'
def parse_cmd(self, cmd, info): def parse_cmd(self, cmd, info):
# If no %(key)s is found, replace {} for backard compatibility tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info)
if not re.search(FORMAT_RE.format(r'[^)]*'), cmd): if tmpl_dict: # if there are no replacements, tmpl_dict = {}
return tmpl % tmpl_dict
# If no replacements are found, replace {} for backard compatibility
if '{}' not in cmd: if '{}' not in cmd:
cmd += ' {}' cmd += ' {}'
return cmd.replace('{}', compat_shlex_quote(info['filepath'])) return cmd.replace('{}', compat_shlex_quote(info['filepath']))
tmpl, info_copy = self._downloader.prepare_outtmpl(cmd, info)
return tmpl % info_copy
def run(self, info): def run(self, info):
cmd = self.parse_cmd(self.exec_cmd, info) cmd = self.parse_cmd(self.exec_cmd, info)
self.to_screen('Executing command: %s' % cmd) self.to_screen('Executing command: %s' % cmd)

View File

@ -54,9 +54,9 @@ class MetadataFromFieldPP(PostProcessor):
def run(self, info): def run(self, info):
for dictn in self._data: for dictn in self._data:
tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info) tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
data_to_parse = tmpl % info_copy data_to_parse = tmpl % tmpl_dict
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl)) self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
match = re.search(dictn['regex'], data_to_parse) match = re.search(dictn['regex'], data_to_parse)
if match is None: if match is None:
self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out'])) self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))

View File

@ -4393,15 +4393,17 @@ OUTTMPL_TYPES = {
# As of [1] format syntax is: # As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
FORMAT_RE = r'''(?x) STR_FORMAT_RE = r'''(?x)
(?<!%) (?<!%)
% %
\({0}\) # mapping key (?P<has_key>\((?P<key>{0})\))? # mapping key
(?P<format>
(?:[#0\-+ ]+)? # conversion flags (optional) (?:[#0\-+ ]+)? # conversion flags (optional)
(?:\d+)? # minimum field width (optional) (?:\d+)? # minimum field width (optional)
(?:\.\d+)? # precision (optional) (?:\.\d+)? # precision (optional)
[hlL]? # length modifier (optional) [hlL]? # length modifier (optional)
(?P<type>[diouxXeEfFgGcrs%]) # conversion type [diouxXeEfFgGcrs] # conversion type
)
''' '''