[ie/gem.cbc.ca:live] Fix extractor (#10565)

Authored by: scribblemaniac, bashonly
This commit is contained in:
scribblemaniac 2024-08-05 19:02:21 -06:00 committed by GitHub
parent 406f4c2e47
commit fc5eecfa31
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 61 additions and 26 deletions

View File

@ -806,11 +806,11 @@ class CBCGemLiveIE(InfoExtractor):
'title': 'Ottawa', 'title': 'Ottawa',
'description': 'The live TV channel and local programming from Ottawa', 'description': 'The live TV channel and local programming from Ottawa',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
'is_live': True, 'live_status': 'is_live',
'id': 'AyqZwxRqh8EH', 'id': 'AyqZwxRqh8EH',
'ext': 'mp4', 'ext': 'mp4',
'timestamp': 1492106160, 'release_timestamp': 1492106160,
'upload_date': '20170413', 'release_date': '20170413',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
}, },
'skip': 'Live might have ended', 'skip': 'Live might have ended',
@ -839,32 +839,65 @@ class CBCGemLiveIE(InfoExtractor):
'description': 'March 24, 2023 | President Bidens Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.', 'description': 'March 24, 2023 | President Bidens Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
'live_status': 'is_live', 'live_status': 'is_live',
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
'timestamp': 1679706000, 'release_timestamp': 1679706000,
'upload_date': '20230325', 'release_date': '20230325',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'skip': 'Live might have ended', 'skip': 'Live might have ended',
}, },
{ # event replay (medianetlive)
'url': 'https://gem.cbc.ca/live-event/42314',
'md5': '297a9600f554f2258aed01514226a697',
'info_dict': {
'id': '42314',
'ext': 'mp4',
'live_status': 'was_live',
'title': 'Women\'s Soccer - Canada vs New Zealand',
'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
'thumbnail': r're:https://.+default\.jpg',
'release_timestamp': 1721917200,
'release_date': '20240725',
},
'params': {'skip_download': True},
'skip': 'Replay might no longer be available',
},
{ # event replay (medianetlive)
'url': 'https://gem.cbc.ca/live-event/43273',
'only_matching': True,
},
] ]
_GEO_COUNTRIES = ['CA']
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data'] video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
# Two types of metadata JSON # Three types of video_info JSON: info in root, freeTv stream/item, event replay
if not video_info.get('formattedIdMedia'): if not video_info.get('formattedIdMedia'):
video_info = traverse_obj( if traverse_obj(video_info, ('event', 'key')) == video_id:
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}), video_info = video_info['event']
get_all=False, default={}) else:
video_info = traverse_obj(video_info, (
('freeTv', ('streams', ...)), 'items',
lambda _, v: v['key'].partition('-')[0] == video_id, any)) or {}
video_stream_id = video_info.get('formattedIdMedia') video_stream_id = video_info.get('formattedIdMedia')
if not video_stream_id: if not video_stream_id:
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) raise ExtractorError(
'Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
live_status = 'was_live' if video_info.get('isVodEnabled') else 'is_live'
release_timestamp = traverse_obj(video_info, ('airDate', {parse_iso8601}))
if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
formats = []
live_status = 'is_upcoming'
self.raise_no_formats('This livestream has not yet started', expected=True)
else:
stream_data = self._download_json( stream_data = self._download_json(
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={ 'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
'appCode': 'mpx', 'appCode': 'medianetlive',
'connectionType': 'hd', 'connectionType': 'hd',
'deviceType': 'ipad', 'deviceType': 'ipad',
'idMedia': video_stream_id, 'idMedia': video_stream_id,
@ -873,15 +906,17 @@ class CBCGemLiveIE(InfoExtractor):
'tech': 'hls', 'tech': 'hls',
'manifestType': 'desktop', 'manifestType': 'desktop',
}) })
formats = self._extract_m3u8_formats(
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
return { return {
'id': video_id, 'id': video_id,
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True), 'formats': formats,
'is_live': True, 'live_status': live_status,
'release_timestamp': release_timestamp,
**traverse_obj(video_info, { **traverse_obj(video_info, {
'title': 'title', 'title': ('title', {str}),
'description': 'description', 'description': ('description', {str}),
'thumbnail': ('images', 'card', 'url'), 'thumbnail': ('images', 'card', 'url'),
'timestamp': ('airDate', {parse_iso8601}),
}), }),
} }