[soundcloud:user] Rework extractor (Closes #6399)

This commit is contained in:
Sergey M․ 2015-07-31 00:54:26 +06:00
parent 1f04873517
commit 80fb6d4aa4
1 changed files with 101 additions and 30 deletions

View File

@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)? _VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/ (?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/ (?P<uploader>[\w\d-]+)/
(?!sets/|(?:likes|tracks)/?(?:$|[?#])) (?!(?:tracks|sets|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/? (?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$) (?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@ -293,60 +293,131 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$' _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|sets|reposts|likes|spotlight)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user' IE_NAME = 'soundcloud:user'
_TESTS = [{ _TESTS = [{
'url': 'https://soundcloud.com/the-concept-band', 'url': 'https://soundcloud.com/the-akashic-chronicler',
'info_dict': { 'info_dict': {
'id': '9615865', 'id': '114582580',
'title': 'The Royal Concept', 'title': 'The Akashic Chronicler (All)',
}, },
'playlist_mincount': 12 'playlist_mincount': 112,
}, {
'url': 'https://soundcloud.com/the-concept-band/likes',
'info_dict': {
'id': '9615865',
'title': 'The Royal Concept',
},
'playlist_mincount': 1,
}, { }, {
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
'only_matching': True, 'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Tracks)',
},
'playlist_mincount': 50,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Playlists)',
},
'playlist_mincount': 3,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Reposts)',
},
'playlist_mincount': 9,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Likes)',
},
'playlist_mincount': 333,
}, {
'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': {
'id': '7098329',
'title': 'Grynpyret (Spotlight)',
},
'playlist_mincount': 1,
}] }]
_API_BASE = 'https://api.soundcloud.com'
_API_V2_BASE = 'https://api-v2.soundcloud.com'
_BASE_URL_MAP = {
'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
'tracks': '%s/users/%%s/tracks' % _API_BASE,
'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
'likes': '%s/users/%%s/likes' % _API_V2_BASE,
'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
}
_TITLE_MAP = {
'all': 'All',
'tracks': 'Tracks',
'sets': 'Playlists',
'reposts': 'Reposts',
'likes': 'Likes',
'spotlight': 'Spotlight',
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user') uploader = mobj.group('user')
resource = mobj.group('rsrc')
if resource is None:
resource = 'tracks'
elif resource == 'likes':
resource = 'favorites'
url = 'http://soundcloud.com/%s/' % uploader url = 'http://soundcloud.com/%s/' % uploader
resolv_url = self._resolv_url(url) resolv_url = self._resolv_url(url)
user = self._download_json( user = self._download_json(
resolv_url, uploader, 'Downloading user info') resolv_url, uploader, 'Downloading user info')
base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
resource = mobj.group('rsrc') or 'all'
base_url = self._BASE_URL_MAP[resource] % user['id']
next_href = None
entries = [] entries = []
for i in itertools.count(): for i in itertools.count():
if not next_href:
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'offset': i * 50, 'offset': i * 50,
'limit': 50, 'limit': 50,
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
'linked_partitioning': '1',
'representation': 'speedy',
}) })
new_entries = self._download_json( next_href = base_url + '?' + data
base_url + data, uploader, 'Downloading track page %s' % (i + 1))
if len(new_entries) == 0: response = self._download_json(
next_href, uploader, 'Downloading track page %s' % (i + 1))
collection = response['collection']
if not collection:
self.to_screen('%s: End page received' % uploader) self.to_screen('%s: End page received' % uploader)
break break
entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
def resolve_permalink_url(candidates):
for cand in candidates:
if isinstance(cand, dict):
permalink_url = cand.get('permalink_url')
if permalink_url and permalink_url.startswith('http'):
return permalink_url
for e in collection:
permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
if permalink_url:
entries.append(self.url_result(permalink_url))
if 'next_href' in response:
next_href = response['next_href']
if not next_href:
break
else:
next_href = None
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': compat_str(user['id']), 'id': compat_str(user['id']),
'title': user['username'], 'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
'entries': entries, 'entries': entries,
} }