[tagesschau] make some generic titles more specific
This turns for example `Ganze Sendung` into `tagesschau 20 Uhr - 04.12.14 20:00` as with the old extractor, which is especially important for playlists of such videos.
This commit is contained in:
parent
2a0d9305f4
commit
ac5b267afe
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -87,7 +88,7 @@ class TagesschauIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'video-45741',
|
'id': 'video-45741',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ganze Sendung',
|
'title': 'tagesschau 20 Uhr - 04.12.14 20:00',
|
||||||
'description': '04.12.2014 20:00',
|
'description': '04.12.2014 20:00',
|
||||||
'thumbnail': r're:^https?:.*\.jpg$',
|
'thumbnail': r're:^https?:.*\.jpg$',
|
||||||
'uploader': 'tagesschau',
|
'uploader': 'tagesschau',
|
||||||
|
@ -153,6 +154,18 @@ class TagesschauIE(InfoExtractor):
|
||||||
'title': 'Bericht aus Berlin: Sommerinterview mit Angela Merkel',
|
'title': 'Bericht aus Berlin: Sommerinterview mit Angela Merkel',
|
||||||
'description': '19.07.2015 19:05 Uhr',
|
'description': '19.07.2015 19:05 Uhr',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# handling of generic title
|
||||||
|
'url': 'https://www.tagesschau.de/multimedia/video/video-835681.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'video-835681',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tagesschau in 100 Sekunden - 13.03.21 17:35',
|
||||||
|
'upload_date': '20210313',
|
||||||
|
'uploader': 'Tagesschau24',
|
||||||
|
'description': '13.03.2021 17:35',
|
||||||
|
'timestamp': 1615656900,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -192,6 +205,25 @@ class TagesschauIE(InfoExtractor):
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('id')
|
return mobj.group('id')
|
||||||
|
|
||||||
|
def _handle_generic_titles(self, title, pixelConf):
|
||||||
|
if strip_or_none(title, '').lower() not in ('ganze sendung', '100 sekunden',
|
||||||
|
'tagesschau in 100 sekunden'):
|
||||||
|
return title
|
||||||
|
# otherwise find more meaningful title than the generic Ganze Sendung/100 Sekunden
|
||||||
|
for item in pixelConf:
|
||||||
|
if item.get('tracker') == 'AGFdebug':
|
||||||
|
s = try_get(item, lambda x: x['clipData']['program'], compat_str)
|
||||||
|
if s:
|
||||||
|
# extract date and time
|
||||||
|
parts = (try_get(item, lambda x: x['clipData']['title'], compat_str)
|
||||||
|
or '').split('_')[-2:]
|
||||||
|
if len(parts) == 2:
|
||||||
|
title = "%s - %s" % (s, ' '.join(parts))
|
||||||
|
else:
|
||||||
|
title = s
|
||||||
|
break
|
||||||
|
return title
|
||||||
|
|
||||||
def _extract_from_player(self, player_div, video_id_fallback, title_fallback):
|
def _extract_from_player(self, player_div, video_id_fallback, title_fallback):
|
||||||
player_data = unescapeHTML(self._search_regex(
|
player_data = unescapeHTML(self._search_regex(
|
||||||
r'data-config=(?P<quote>["\'])(?P<data>[^"\']*)(?P=quote)',
|
r'data-config=(?P<quote>["\'])(?P<data>[^"\']*)(?P=quote)',
|
||||||
|
@ -219,7 +251,8 @@ class TagesschauIE(InfoExtractor):
|
||||||
|
|
||||||
video_id = self._video_id_from_url(webpage_url)
|
video_id = self._video_id_from_url(webpage_url)
|
||||||
duration = None
|
duration = None
|
||||||
for item in (try_get(meta, lambda x: x['pc']['_pixelConfig'], list) or []):
|
pixelConf = try_get(meta, lambda x: x['pc']['_pixelConfig'], list) or []
|
||||||
|
for item in pixelConf:
|
||||||
video_id = (video_id or try_get(item,
|
video_id = (video_id or try_get(item,
|
||||||
[lambda x: x['playerID'],
|
[lambda x: x['playerID'],
|
||||||
lambda x: x['clipData']['playerId']], compat_str))
|
lambda x: x['clipData']['playerId']], compat_str))
|
||||||
|
@ -265,6 +298,7 @@ class TagesschauIE(InfoExtractor):
|
||||||
title = (try_get(mc, [lambda x: x['_info']['clipTitle'],
|
title = (try_get(mc, [lambda x: x['_info']['clipTitle'],
|
||||||
lambda x: x['_download']['title']], compat_str)
|
lambda x: x['_download']['title']], compat_str)
|
||||||
or title_fallback)
|
or title_fallback)
|
||||||
|
title = self._handle_generic_titles(title, pixelConf)
|
||||||
|
|
||||||
sub_url = url_or_none(mc.get('_subtitleUrl'))
|
sub_url = url_or_none(mc.get('_subtitleUrl'))
|
||||||
subs = {'de': [{'ext': 'ttml', 'url': sub_url}]} if sub_url else None
|
subs = {'de': [{'ext': 'ttml', 'url': sub_url}]} if sub_url else None
|
||||||
|
|
Loading…
Reference in New Issue