[mtv] Extract subtitles (Closes #4811)
This commit is contained in:
parent
20b4492c71
commit
e525d9a3df
|
@ -2,10 +2,11 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
@ -22,7 +23,7 @@ def _media_xml_tag(tag):
|
||||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||||
|
|
||||||
|
|
||||||
class MTVServicesInfoExtractor(InfoExtractor):
|
class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
||||||
_MOBILE_TEMPLATE = None
|
_MOBILE_TEMPLATE = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -89,6 +90,28 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _extract_subtitles(self, mdoc, mtvn_id):
|
||||||
|
subtitles = {}
|
||||||
|
FORMATS = {
|
||||||
|
'scc': 'cea-608',
|
||||||
|
'eia-608': 'cea-608',
|
||||||
|
'xml': 'ttml',
|
||||||
|
}
|
||||||
|
subtitles_format = FORMATS.get(
|
||||||
|
self._downloader.params.get('subtitlesformat'), 'ttml')
|
||||||
|
for transcript in mdoc.findall('.//transcript'):
|
||||||
|
if transcript.get('kind') != 'captions':
|
||||||
|
continue
|
||||||
|
lang = transcript.get('srclang')
|
||||||
|
for typographic in transcript.findall('./typographic'):
|
||||||
|
captions_format = typographic.get('format')
|
||||||
|
if captions_format == subtitles_format:
|
||||||
|
subtitles[lang] = compat_str(typographic.get('src'))
|
||||||
|
break
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(mtvn_id, subtitles)
|
||||||
|
return self.extract_subtitles(mtvn_id, subtitles)
|
||||||
|
|
||||||
def _get_video_info(self, itemdoc):
|
def _get_video_info(self, itemdoc):
|
||||||
uri = itemdoc.find('guid').text
|
uri = itemdoc.find('guid').text
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
|
@ -135,6 +158,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
||||||
|
'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -167,7 +191,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
mgid = self._search_regex(
|
mgid = self._search_regex(
|
||||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||||
webpage, 'mgid')
|
webpage, 'mgid')
|
||||||
return self._get_videos_info(mgid)
|
|
||||||
|
videos_info = self._get_videos_info(mgid)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
return
|
||||||
|
return videos_info
|
||||||
|
|
||||||
|
|
||||||
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||||
|
|
Loading…
Reference in New Issue