mirror of https://github.com/yt-dlp/yt-dlp.git
[ie/mediasite] Extract transcripts
This commit is contained in:
parent
f101e5d34c
commit
ed4d9a40c1
|
@ -5,6 +5,7 @@ import urllib.parse
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
@ -268,7 +269,29 @@ class MediasiteIE(InfoExtractor):
|
||||||
formats.extend(stream_formats)
|
formats.extend(stream_formats)
|
||||||
|
|
||||||
# XXX: Presentation['Presenters']
|
# XXX: Presentation['Presenters']
|
||||||
# XXX: Presentation['Transcript']
|
transcripts = presentation.get('Transcripts', {})
|
||||||
|
captions, subtitles = {}, {}
|
||||||
|
for transcript in transcripts:
|
||||||
|
lang_code = traverse_obj(
|
||||||
|
transcript, (('DetailedLanguageCode', 'LanguageCode'), {str}), get_all=False)
|
||||||
|
lang_name = transcript.get('Language')
|
||||||
|
t = {
|
||||||
|
'url': transcript.get('CaptionsUrl'),
|
||||||
|
'name': lang_name,
|
||||||
|
}
|
||||||
|
if 'Auto-Generated' in lang_name:
|
||||||
|
captions.setdefault(lang_code, []).append(t)
|
||||||
|
else:
|
||||||
|
subtitles.setdefault(lang_code, []).append(t)
|
||||||
|
if transcript_url := presentation.get('TranscriptUrl'):
|
||||||
|
if determine_ext(transcript_url) != 'txt':
|
||||||
|
if len(transcripts) == 1 and captions:
|
||||||
|
captions.setdefault(lang_code, []).append({
|
||||||
|
'url': transcript_url,
|
||||||
|
'name': lang_name,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
subtitles.setdefault('und', []).append({'url': transcript_url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': resource_id,
|
'id': resource_id,
|
||||||
|
@ -277,6 +300,8 @@ class MediasiteIE(InfoExtractor):
|
||||||
'duration': float_or_none(presentation.get('Duration'), 1000),
|
'duration': float_or_none(presentation.get('Duration'), 1000),
|
||||||
'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
|
'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'automatic_captions': captions,
|
||||||
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue