[youtube] improve subtitle extraction
This commit is contained in:
parent
efef4ddf51
commit
65eee5a745
|
@ -1664,7 +1664,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
player_response,
|
player_response,
|
||||||
lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
|
lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
|
||||||
if pctr:
|
if pctr:
|
||||||
def process_language(container, base_url, caption, query):
|
def process_language(container, base_url, lang_code, query):
|
||||||
lang_subs = []
|
lang_subs = []
|
||||||
for fmt in self._SUBTITLE_FORMATS:
|
for fmt in self._SUBTITLE_FORMATS:
|
||||||
query.update({
|
query.update({
|
||||||
|
@ -1674,35 +1674,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'ext': fmt,
|
'ext': fmt,
|
||||||
'url': update_url_query(base_url, query),
|
'url': update_url_query(base_url, query),
|
||||||
})
|
})
|
||||||
subtitles[caption['languageCode']] = lang_subs
|
container[lang_code] = lang_subs
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for caption_track in pctr['captionTracks']:
|
for caption_track in (pctr.get('captionTracks') or []):
|
||||||
base_url = caption_track['baseUrl']
|
base_url = caption_track.get('baseUrl')
|
||||||
|
if not base_url:
|
||||||
|
continue
|
||||||
if caption_track.get('kind') != 'asr':
|
if caption_track.get('kind') != 'asr':
|
||||||
lang_subs = []
|
lang_code = caption_track.get('languageCode')
|
||||||
for fmt in self._SUBTITLE_FORMATS:
|
if not lang_code:
|
||||||
lang_subs.append({
|
continue
|
||||||
'ext': fmt,
|
process_language(
|
||||||
'url': update_url_query(base_url, {
|
subtitles, base_url, lang_code, {})
|
||||||
'fmt': fmt,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
subtitles[caption_track['languageCode']] = lang_subs
|
|
||||||
continue
|
continue
|
||||||
automatic_captions = {}
|
automatic_captions = {}
|
||||||
for translation_language in pctr['translationLanguages']:
|
for translation_language in (pctr.get('translationLanguages') or []):
|
||||||
translation_language_code = translation_language['languageCode']
|
translation_language_code = translation_language.get('languageCode')
|
||||||
lang_subs = []
|
if not translation_language_code:
|
||||||
for fmt in self._SUBTITLE_FORMATS:
|
continue
|
||||||
lang_subs.append({
|
process_language(
|
||||||
'ext': fmt,
|
automatic_captions, base_url, translation_language_code,
|
||||||
'url': update_url_query(base_url, {
|
{'tlang': translation_language_code})
|
||||||
'fmt': fmt,
|
|
||||||
'tlang': translation_language_code,
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
automatic_captions[translation_language_code] = lang_subs
|
|
||||||
info['automatic_captions'] = automatic_captions
|
info['automatic_captions'] = automatic_captions
|
||||||
info['subtitles'] = subtitles
|
info['subtitles'] = subtitles
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue