From 71407b3ecaf9346fe316a24d1753d365ed343ee7 Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 7 Sep 2021 23:05:27 +0530 Subject: [PATCH] [Olympics] Add replay extractor (#905) Closes #897 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/olympics.py | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 yt_dlp/extractor/olympics.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 18df1549b..4910bd14f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -961,6 +961,7 @@ from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE +from .olympics import OlympicsReplayIE from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py new file mode 100644 index 000000000..0bc9206ed --- /dev/null +++ b/yt_dlp/extractor/olympics.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class OlympicsReplayIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?olympics\.com/tokyo-2020/(?:[a-z]{2}/)?replay/(?P[^/#&?]+)' + _TESTS = [{ + 'url': 'https://olympics.com/tokyo-2020/en/replay/300622eb-abc0-43ea-b03b-c5f2d429ec7b/jumping-team-qualifier', + 'info_dict': { + 'id': '300622eb-abc0-43ea-b03b-c5f2d429ec7b', + 'ext': 'mp4', + 'title': 'Jumping Team Qualifier', + 'release_date': '20210806', + 'upload_date': '20210713', + }, + 'params': { + 'format': 'bv', + }, + }, { + 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', + 'only_matching': True, + }] + + def _real_extract(self, url): + id = self._match_id(url) + # The parameters are hardcoded in the webpage, it's not necessary to download the webpage just for these parameters. + # If in downloading webpage serves other functions aswell, then extract these parameters from it. + token_url = 'https://appovptok.ovpobs.tv/api/identity/app/token?api_key=OTk5NDcxOjpvY3N3LWFwaXVzZXI%3D&api_secret=ODY4ODM2MjE3ODMwYmVjNTAxMWZlMDJiMTYxZmY0MjFiMjMwMjllMjJmNDA1YWRiYzA5ODcxYTZjZTljZDkxOTo6NTM2NWIzNjRlMTM1ZmI2YWNjNmYzMGMzOGM3NzZhZTY%3D' + token = self._download_webpage(token_url, id) + headers = {'x-obs-app-token': token} + data_json = self._download_json(f'https://appocswtok.ovpobs.tv/api/schedule-sessions/{id}?include=stream', + id, headers=headers) + meta_data = data_json['data']['attributes'] + for t_dict in data_json['included']: + if t_dict.get('type') == 'Stream': + stream_data = t_dict['attributes'] + m3u8_url = self._download_json( + 'https://meteringtok.ovpobs.tv/api/playback-sessions', id, headers=headers, query={ + 'alias': stream_data['alias'], + 'stream': stream_data['stream'], + 'type': 'vod' + })['data']['attributes']['url'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + self._sort_formats(formats) + + return { + 'id': id, + 'title': meta_data['title'], + 'release_date': unified_strdate(meta_data.get('start') or meta_data.get('broadcastPublished')), + 'upload_date': unified_strdate(meta_data.get('publishedAt')), + 'formats': formats, + 'subtitles': subtitles, + }