yt-dlp/yt_dlp/extractor/vidflex.py

import base64
import json

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    join_nonempty,
    mimetype2ext,
    url_or_none,
)
from ..utils.traversal import traverse_obj


class VidflexIE(InfoExtractor):
    _DOMAINS_RE = [
        r'[^.]+\.vidflex\.tv',
        r'(?:www\.)?acactv\.ca',
        r'(?:www\.)?albertalacrossetv\.com',
        r'(?:www\.)?cjfltv\.com',
        r'(?:www\.)?figureitoutbaseball\.com',
        r'(?:www\.)?ocaalive\.com',
        r'(?:www\.)?pegasussports\.tv',
        r'(?:www\.)?praxisseries\.ca',
        r'(?:www\.)?silenticetv\.com',
        r'(?:www\.)?tuffhedemantv\.com',
        r'(?:www\.)?watchfuntv\.com',
        r'live\.ofsaa\.on\.ca',
        r'tv\.procoro\.ca',
        r'tv\.realcastmedia\.net',
        r'tv\.fringetheatre\.ca',
        r'video\.haisla\.ca',
        r'video\.hockeycanada\.ca',
        r'video\.huuayaht\.org',
        r'video\.turningpointensemble\.ca',
        r'videos\.livingworks\.net',
        r'videos\.telusworldofscienceedmonton\.ca',
        r'watch\.binghamtonbulldogs\.com',
        r'watch\.rekindle\.tv',
        r'watch\.wpca\.com',
    ]
    _VALID_URL = rf'https?://(?:{"|".join(_DOMAINS_RE)})/[a-z]{{2}}(?:-[a-z]{{2}})?/c/[\w-]+\.(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://video.hockeycanada.ca/en/c/nwt-micd-up-with-jamie-lee-rattray.107486',
        'only_matching': True,
    }, {
        # m3u8 + https
        'url': 'https://video.hockeycanada.ca/en-us/c/nwt-micd-up-with-jamie-lee-rattray.107486',
        'info_dict': {
            'id': '107486',
            'title': 'NWT: Mic’d up with Jamie Lee Rattray',
            'ext': 'mp4',
            'duration': 115,
            'timestamp': 1634310409,
            'upload_date': '20211015',
            'tags': ['English', '2021', "National Women's Team"],
            'description': 'md5:efb1cf6165b48cc3f5555c4262dd5b23',
            'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
        },
        'params': {'skip_download': True},
    }, {
        'url': 'https://video.hockeycanada.ca/en/c/mwc-remembering-the-wild-ride-in-riga.112307',
        'info_dict': {
            'id': '112307',
            'title': 'MWC: Remembering the wild ride in Riga',
            'ext': 'mp4',
            'duration': 322,
            'timestamp': 1716235607,
            'upload_date': '20240520',
            'tags': ['English', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
            'description': r're:.+Canada’s National Men’s Team.+',
            'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
        },
        'params': {'skip_download': True},
    }, {
        # the same video in French
        'url': 'https://video.hockeycanada.ca/fr/c/cmm-retour-sur-un-parcours-endiable-a-riga.112304',
        'info_dict': {
            'id': '112304',
            'title': 'CMM : Retour sur un parcours endiablé à Riga',
            'ext': 'mp4',
            'duration': 322,
            'timestamp': 1716235545,
            'upload_date': '20240520',
            'tags': ['French', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
            'description': 'md5:cf825222882a3dab1cd62cffcf3b4d1f',
            'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
        },
        'params': {'skip_download': True},
    }, {
        'url': 'https://myfbcgreenville.vidflex.tv/en/c/may-12th-2024.658',
        'only_matching': True,
    }, {
        'url': 'https://www.figureitoutbaseball.com/en/c/fiob-podcast-14-dan-bertolini-ncaa-d1-head-coach-recorded-11-29-2018.1367',
        'only_matching': True,
    }, {
        'url': 'https://videos.telusworldofscienceedmonton.ca/en/c/the-aurora-project-timelapse-4.577',
        'only_matching': True,
    }, {
        'url': 'https://www.tuffhedemantv.com/en/c/2022-tuff-hedeman-tour-hobbs-nm-january-22.227',
        'only_matching': True,
    }, {
        'url': 'https://www.albertalacrossetv.com/en/c/up-floor-ground-balls-one-more.3449',
        'only_matching': True,
    }, {
        'url': 'https://www.silenticetv.com/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
        'only_matching': True,
    }, {
        'url': 'https://jphl.vidflex.tv/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        data_url = self._html_search_regex(
            r'content_api:\s*(["\'])(?P<url>https?://(?:(?!\1).)+)\1', webpage, 'content api url', group='url')
        media_config = traverse_obj(
            self._download_json(data_url, video_id),
            ('config', {base64.b64decode}, {bytes.decode}, {json.loads}, {dict}))

        return {
            'id': video_id,
            'formats': list(self._yield_formats(media_config, video_id)),
            **self._search_json_ld(
                webpage.replace('/*<![CDATA[*/', '').replace('/*]]>*/', ''), video_id),
        }

    def _yield_formats(self, media_config, video_id):
        for media_source in traverse_obj(media_config, ('media', 'source', lambda _, v: url_or_none(v['src']))):
            media_url = media_source['src']
            media_type = mimetype2ext(media_source.get('type'))

            if media_type == 'm3u8':
                yield from self._extract_m3u8_formats(media_url, video_id, fatal=False, m3u8_id='hls')
            elif media_type == 'mp4':
                bitrate = self._search_regex(r'_(\d+)k\.mp4', media_url, 'bitrate', default=None)
                yield {
                    'format_id': join_nonempty('http', bitrate),
                    'url': media_url,
                    'ext': 'mp4',
                    'tbr': int_or_none(bitrate),
                }
            else:
                yield {
                    'url': media_url,
                    'ext': media_type,
                }