From 2da9a86399369929592561d11b6f7faf224fa847 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 24 Dec 2020 13:05:50 +0100 Subject: [PATCH] [streetvoice] fix extraction(closes #27455)(closes #27492) --- youtube_dl/extractor/streetvoice.py | 95 ++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/streetvoice.py b/youtube_dl/extractor/streetvoice.py index 91612c7f2..f21681ae7 100644 --- a/youtube_dl/extractor/streetvoice.py +++ b/youtube_dl/extractor/streetvoice.py @@ -2,25 +2,40 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str -from ..utils import unified_strdate +from ..utils import ( + int_or_none, + parse_iso8601, + str_or_none, + strip_or_none, + try_get, + urljoin, +) class StreetVoiceIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://streetvoice.com/skippylu/songs/94440/', - 'md5': '15974627fc01a29e492c98593c2fd472', + 'url': 'https://streetvoice.com/skippylu/songs/123688/', + 'md5': '0eb535970629a5195685355f3ed60bfd', 'info_dict': { - 'id': '94440', + 'id': '123688', 'ext': 'mp3', - 'title': '輸', - 'description': 'Crispy脆樂團 - 輸', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 260, - 'upload_date': '20091018', + 'title': '流浪', + 'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 270, + 'upload_date': '20100923', 'uploader': 'Crispy脆樂團', 'uploader_id': '627810', + 'uploader_url': 're:^https?://streetvoice.com/skippylu/', + 'timestamp': 1285261661, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + 'track': '流浪', + 'track_id': '123688', + 'album': '2010', } }, { 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', @@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor): def _real_extract(self, url): song_id = self._match_id(url) - - song = self._download_json( - 'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'') - + base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id + song = self._download_json(base_url, song_id, query={ + 'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username', + }) title = song['name'] - author = song['user']['nickname'] + + formats = [] + for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]: + f_url = (self._download_json( + base_url + suffix + '/', song_id, + 'Downloading %s format URL' % format_id, + data=b'', fatal=False) or {}).get('file') + if not f_url: + continue + f = { + 'ext': 'mp3', + 'format_id': format_id, + 'url': f_url, + 'vcodec': 'none', + } + if format_id == 'hls': + f['protocol'] = 'm3u8_native' + abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None) + if abr: + abr = int(abr) + f.update({ + 'abr': abr, + 'tbr': abr, + }) + formats.append(f) + + user = song.get('user') or {} + username = user.get('username') + get_count = lambda x: int_or_none(song.get(x + '_count')) return { 'id': song_id, - 'url': song['file'], + 'formats': formats, 'title': title, - 'description': '%s - %s' % (author, title), - 'thumbnail': self._proto_relative_url(song.get('image'), 'http:'), - 'duration': song.get('length'), - 'upload_date': unified_strdate(song.get('created_at')), - 'uploader': author, - 'uploader_id': compat_str(song['user']['id']), + 'description': strip_or_none(song.get('synopsis')), + 'thumbnail': song.get('image'), + 'duration': int_or_none(song.get('length')), + 'timestamp': parse_iso8601(song.get('created_at')), + 'uploader': try_get(user, lambda x: x['profile']['nickname']), + 'uploader_id': str_or_none(user.get('id')), + 'uploader_url': urljoin(url, '/%s/' % username) if username else None, + 'view_count': get_count('plays'), + 'like_count': get_count('likes'), + 'comment_count': get_count('comments'), + 'repost_count': get_count('share'), + 'track': title, + 'track_id': song_id, + 'album': try_get(song, lambda x: x['album']['name']), }