[tagesschau] set description=None for empty descriptions

This commit is contained in:
Roland Hieber 2015-07-19 05:09:29 +02:00
parent 3c6ae8b59e
commit 726adc43ec
1 changed files with 6 additions and 9 deletions

View File

@ -36,7 +36,7 @@ class TagesschauIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '3771', 'id': '3771',
'ext': 'mp4', 'ext': 'mp4',
'description': '', 'description': None,
'title': 'Sendung: tagesschau (mit Gebärdensprache) \t14.07.2015 20:00 Uhr', 'title': 'Sendung: tagesschau (mit Gebärdensprache) \t14.07.2015 20:00 Uhr',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
} }
@ -66,7 +66,7 @@ class TagesschauIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '959', 'id': '959',
'ext': 'mp4', 'ext': 'mp4',
'description': '', 'description': None,
'title': 'Sendung: tagesschau vor 20 Jahren \t14.07.2015 22:45 Uhr', 'title': 'Sendung: tagesschau vor 20 Jahren \t14.07.2015 22:45 Uhr',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
} }
@ -76,7 +76,7 @@ class TagesschauIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '3299', 'id': '3299',
'ext': 'mp4', 'ext': 'mp4',
'description': '', 'description': None,
'title': 'Nach dem Referendum: Schaltgespräch nach Athen', 'title': 'Nach dem Referendum: Schaltgespräch nach Athen',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
} }
@ -154,12 +154,9 @@ class TagesschauIE(InfoExtractor):
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
# there are some videos without description # there are some videos without description
description = "" description = ""
try: description = self._html_search_regex(
description = self._html_search_regex( r'(?s)<p class="teasertext">(.*?)</p>',
r'(?s)<p class="teasertext">(.*?)</p>', webpage, 'description', fatal=False, default=None)
webpage, 'description', fatal=False)
except ExtractorError:
pass
title = self._html_search_regex( title = self._html_search_regex(
r'<span class="headline".*?>(.*?)</span>', webpage, 'title') r'<span class="headline".*?>(.*?)</span>', webpage, 'title')