From 637ccf3523e6e47ad412d5ff71e2eeb5f93ec833 Mon Sep 17 00:00:00 2001 From: pj47x Date: Mon, 2 Sep 2024 20:58:17 +1000 Subject: [PATCH] [ie/manyvids] Fix ManyVids extractor after website update --- yt_dlp/extractor/manyvids.py | 199 +++++++++++++++-------------------- 1 file changed, 87 insertions(+), 112 deletions(-) diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index 8caa8f87f..1f6e70d07 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -1,113 +1,100 @@ -import re + from .common import InfoExtractor -from ..utils import ( - determine_ext, - extract_attributes, - int_or_none, - str_to_int, - url_or_none, - urlencode_postdata, -) +from .. import traverse_obj +from ..utils import determine_ext, int_or_none, parse_count, parse_duration, parse_iso8601, url_or_none class ManyVidsIE(InfoExtractor): - _WORKING = False + _WORKING = True _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P\d+)' - _TESTS = [{ - # preview video - 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', - 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', - 'info_dict': { - 'id': '133957', - 'ext': 'mp4', - 'title': 'everthing about me (Preview)', - 'uploader': 'ellyxxix', - 'view_count': int, - 'like_count': int, + _TESTS = [ + { + # Dead preview video + 'skip': True, + 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', + 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', + 'info_dict': { + 'id': '133957', + 'ext': 'mp4', + 'title': 'everthing about me (Preview)', + 'uploader': 'ellyxxix', + 'view_count': int, + 'like_count': int, + }, }, - }, { - # full video - 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', - 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f', - 'info_dict': { - 'id': '935718', - 'ext': 'mp4', - 'title': 'MY FACE REVEAL', - 'description': 'md5:ec5901d41808b3746fed90face161612', - 'uploader': 'Sarah Calanthe', - 'view_count': int, - 'like_count': int, + { + # preview video + 'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks', + 'md5': '738dc723f7735ee9602f7ea352a6d058', + 'info_dict': { + 'id': '530341', + 'ext': 'mp4', + 'title': 'MV Tips & Tricks (Preview)', + 'description': 'md5:c3bae98c0f9453237c28b0f8795d9f83', + 'thumbnail': 'https://cdn5.manyvids.com/php_uploads/video_images/DestinyDiaz/thumbs/thumb_Hs26ATOO7fcZaI9sx3XT_screenshot_001.jpg', + 'uploader': 'DestinyDiaz', + 'view_count': int, + 'like_count': int, + 'release_timestamp': 1508419904, + 'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'], + 'release_date': '20171019', + 'duration': 3167.0, + }, }, - }] + { + # full video + 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', + 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f', + 'info_dict': { + 'id': '935718', + 'ext': 'mp4', + 'title': 'MY FACE REVEAL', + 'description': 'md5:ec5901d41808b3746fed90face161612', + 'thumbnail': 'https://ods.manyvids.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/thumbs/custom_1_180_5be09c1dcce03.jpg', + 'uploader': 'Sarah Calanthe', + 'view_count': int, + 'like_count': int, + 'release_date': '20181110', + 'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'], + 'release_timestamp': 1541851200, + 'duration': 224.0, + }, + }, + ] def _real_extract(self, url): video_id = self._match_id(url) - real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js' - try: - webpage = self._download_webpage(real_url, video_id) - except Exception: - # probably useless fallback - webpage = self._download_webpage(url, video_id) + info = traverse_obj( + self._download_json(f'https://www.manyvids.com/bff/store/video/{video_id}', video_id), + ('data', {dict})) or {} - info = self._search_regex( - r'''(]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''', - webpage, 'meta details', default='') - info = extract_attributes(info) - - player = self._search_regex( - r'''(]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''', - webpage, 'player details', default='') - player = extract_attributes(player) + video_urls = traverse_obj( + self._download_json(f'https://www.manyvids.com/bff/store/video/{video_id}/private', video_id), + ('data', {dict})) or {} video_urls_and_ids = ( - (info.get('data-meta-video'), 'video'), - (player.get('data-video-transcoded'), 'transcoded'), - (player.get('data-video-filepath'), 'filepath'), - (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'), + (traverse_obj(video_urls, ('teaser', 'filepath')), 'preview'), + (video_urls.get('transcodedFilepath'), 'transcoded'), + (video_urls.get('filepath'), 'filepath'), ) - def txt_or_none(s, default=None): - return (s.strip() or default) if isinstance(s, str) else default + title = info.get('title') + uploader = traverse_obj(info, ('model', 'displayName')) + description = info.get('description') + likes = parse_count(info.get('likes')) + views = parse_count(info.get('views')) + thumbnail = url_or_none(info.get('screenshot')) or url_or_none(info.get('thumbnail')) + release_timestamp = parse_iso8601(info.get('launchDate')) + duration = parse_duration(info.get('videoDuration')) + tags = [t.get('label') for t in info.get('tagList')] - uploader = txt_or_none(info.get('data-meta-author')) - - def mung_title(s): - if uploader: - s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s) - return txt_or_none(s) - - title = ( - mung_title(info.get('data-meta-title')) - or self._html_search_regex( - (r']+class=["\']item-title[^>]+>([^<]+)', - r']+class=["\']h2 m-0["\'][^>]*>([^<]+)'), - webpage, 'title', default=None) - or self._html_search_meta( - 'twitter:title', webpage, 'title', fatal=True)) - - title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title - - if any(p in webpage for p in ('preview_videos', '_preview.mp4')): + # If the video formats JSON only contains a teaser object, then it is a preview + if video_urls.get('teaser') and not video_urls.get('filepath'): title += ' (Preview)' - - mv_token = self._search_regex( - r'data-mvtoken=(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'mv token', default=None, group='value') - - if mv_token: - # Sets some cookies - self._download_webpage( - 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', - video_id, note='Setting format cookies', fatal=False, - data=urlencode_postdata({ - 'mvtoken': mv_token, - 'vid': video_id, - }), headers={ - 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest', - }) + self.report_warning( + f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}') formats = [] for v_url, fmt in video_urls_and_ids: @@ -130,33 +117,21 @@ class ManyVidsIE(InfoExtractor): if f.get('height') is None: f['height'] = int_or_none( self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) - if '/preview/' in f['url']: - f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview'))) + if 'preview' in f['format_id']: f['preference'] = -10 if 'transcoded' in f['format_id']: f['preference'] = f.get('preference', -1) - 1 - def get_likes(): - likes = self._search_regex( - rf'''(]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''', - webpage, 'likes', default='') - likes = extract_attributes(likes) - return int_or_none(likes.get('data-likes')) - - def get_views(): - return str_to_int(self._html_search_regex( - r'''(?s)]*\bclass\s*=["']views-wrapper\b[^>]+>.+?]+>\s*(\d[\d,.]*)\s*''', - webpage, 'view count', default=None)) - return { 'id': video_id, 'title': title, 'formats': formats, - 'description': txt_or_none(info.get('data-meta-description')), - 'uploader': txt_or_none(info.get('data-meta-author')), - 'thumbnail': ( - url_or_none(info.get('data-meta-image')) - or url_or_none(player.get('data-video-screenshot'))), - 'view_count': get_views(), - 'like_count': get_likes(), + 'description': description, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'view_count': views, + 'like_count': likes, + 'release_timestamp': release_timestamp, + 'duration': duration, + 'tags': tags, }