2024-05-22 16:20:29 -06:00
import functools
2013-09-14 13:41:49 -06:00
import itertools
2020-08-06 07:41:41 -06:00
import json
2024-04-01 10:58:48 -06:00
import re
2013-06-23 12:57:44 -06:00
2024-04-01 10:58:48 -06:00
from . common import InfoExtractor , SearchInfoExtractor
from . . networking import HEADRequest
2023-07-09 01:53:02 -06:00
from . . networking . exceptions import HTTPError
2014-12-13 04:24:42 -07:00
from . . utils import (
2024-04-01 10:58:48 -06:00
KNOWN_EXTENSIONS ,
2013-06-23 12:57:44 -06:00
ExtractorError ,
2019-04-22 11:39:16 -06:00
float_or_none ,
2014-06-07 07:51:01 -06:00
int_or_none ,
2024-05-22 16:20:29 -06:00
join_nonempty ,
2019-04-22 11:39:16 -06:00
mimetype2ext ,
2021-09-04 22:56:46 -06:00
parse_qs ,
2019-04-22 11:39:16 -06:00
str_or_none ,
2024-04-01 10:58:48 -06:00
try_call ,
2019-02-10 09:44:08 -07:00
unified_timestamp ,
2017-09-03 03:18:24 -06:00
update_url_query ,
2019-02-02 09:40:06 -07:00
url_or_none ,
2020-03-22 02:24:07 -06:00
urlhandle_detect_ext ,
2017-09-03 03:18:24 -06:00
)
2024-04-01 10:21:46 -06:00
from . . utils . traversal import traverse_obj
2013-06-23 12:57:44 -06:00
2019-10-27 10:52:46 -06:00
class SoundcloudEmbedIE ( InfoExtractor ) :
2019-12-09 06:38:12 -07:00
_VALID_URL = r ' https?://(?:w|player|p) \ .soundcloud \ .com/player/?.*? \ burl=(?P<id>.+) '
2022-07-31 19:23:25 -06:00
_EMBED_REGEX = [ r ' <iframe[^>]+src=([ " \' ])(?P<url>(?:https?://)?(?:w \ .)?soundcloud \ .com/player.+?) \ 1 ' ]
2019-12-09 06:38:12 -07:00
_TEST = {
# from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
' url ' : ' https://w.soundcloud.com/player/?visual=true&url=https % 3A %2F %2F api.soundcloud.com %2F playlists %2F 922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey ' ,
' only_matching ' : True ,
}
2019-10-27 10:52:46 -06:00
def _real_extract ( self , url ) :
2021-09-04 22:56:46 -06:00
query = parse_qs ( url )
2019-12-09 06:38:12 -07:00
api_url = query [ ' url ' ] [ 0 ]
secret_token = query . get ( ' secret_token ' )
if secret_token :
api_url = update_url_query ( api_url , { ' secret_token ' : secret_token [ 0 ] } )
return self . url_result ( api_url )
2019-10-27 10:52:46 -06:00
2021-12-02 14:46:08 -07:00
class SoundcloudBaseIE ( InfoExtractor ) :
2022-03-04 07:08:55 -07:00
_NETRC_MACHINE = ' soundcloud '
2021-12-02 14:46:08 -07:00
_API_V2_BASE = ' https://api-v2.soundcloud.com/ '
_BASE_URL = ' https://soundcloud.com/ '
2022-03-04 07:08:55 -07:00
_USER_AGENT = ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36 '
_API_AUTH_QUERY_TEMPLATE = ' ?client_id= %s '
_API_AUTH_URL_PW = ' https://api-auth.soundcloud.com/web-auth/sign-in/password %s '
_API_VERIFY_AUTH_TOKEN = ' https://api-auth.soundcloud.com/connect/session %s '
_HEADERS = { }
2021-12-02 14:46:08 -07:00
2022-09-27 08:22:06 -06:00
_IMAGE_REPL_RE = r ' -([0-9a-z]+) \ .jpg '
_ARTWORK_MAP = {
' mini ' : 16 ,
' tiny ' : 20 ,
' small ' : 32 ,
' badge ' : 47 ,
' t67x67 ' : 67 ,
' large ' : 100 ,
' t300x300 ' : 300 ,
' crop ' : 400 ,
' t500x500 ' : 500 ,
' original ' : 0 ,
}
2024-05-22 16:20:29 -06:00
_DEFAULT_FORMATS = [ ' http_aac ' , ' hls_aac ' , ' http_opus ' , ' hls_opus ' , ' http_mp3 ' , ' hls_mp3 ' ]
@functools.cached_property
def _is_requested ( self ) :
return re . compile ( r ' | ' . join ( set (
re . escape ( pattern ) . replace ( r ' \ * ' , r ' .* ' ) if pattern != ' default '
else ' | ' . join ( map ( re . escape , self . _DEFAULT_FORMATS ) )
for pattern in self . _configuration_arg ( ' formats ' , [ ' default ' ] , ie_key = SoundcloudIE )
) ) ) . fullmatch
2021-12-02 14:46:08 -07:00
def _store_client_id ( self , client_id ) :
2022-06-22 22:14:22 -06:00
self . cache . store ( ' soundcloud ' , ' client_id ' , client_id )
2021-12-02 14:46:08 -07:00
def _update_client_id ( self ) :
webpage = self . _download_webpage ( ' https://soundcloud.com/ ' , None )
for src in reversed ( re . findall ( r ' <script[^>]+src= " ([^ " ]+) " ' , webpage ) ) :
script = self . _download_webpage ( src , None , fatal = False )
if script :
client_id = self . _search_regex (
r ' client_id \ s*: \ s* " ([0-9a-zA-Z] {32} ) " ' ,
script , ' client id ' , default = None )
if client_id :
self . _CLIENT_ID = client_id
self . _store_client_id ( client_id )
return
raise ExtractorError ( ' Unable to extract client id ' )
2024-06-13 17:01:19 -06:00
def _call_api ( self , * args , * * kwargs ) :
2021-12-02 14:46:08 -07:00
non_fatal = kwargs . get ( ' fatal ' ) is False
if non_fatal :
del kwargs [ ' fatal ' ]
query = kwargs . get ( ' query ' , { } ) . copy ( )
for _ in range ( 2 ) :
query [ ' client_id ' ] = self . _CLIENT_ID
kwargs [ ' query ' ] = query
try :
2024-06-13 17:01:19 -06:00
return self . _download_json ( * args , * * kwargs )
2021-12-02 14:46:08 -07:00
except ExtractorError as e :
2023-07-09 01:53:02 -06:00
if isinstance ( e . cause , HTTPError ) and e . cause . status in ( 401 , 403 ) :
2021-12-02 14:46:08 -07:00
self . _store_client_id ( None )
self . _update_client_id ( )
continue
elif non_fatal :
2024-06-11 17:09:58 -06:00
self . report_warning ( str ( e ) )
2021-12-02 14:46:08 -07:00
return False
raise
2022-03-18 14:53:33 -06:00
def _initialize_pre_login ( self ) :
2022-06-22 22:14:22 -06:00
self . _CLIENT_ID = self . cache . load ( ' soundcloud ' , ' client_id ' ) or ' a3e059563d7fd3372b49b37f00a00bcf '
2021-12-02 14:46:08 -07:00
2024-04-01 10:58:48 -06:00
def _verify_oauth_token ( self , token ) :
if self . _request_webpage (
self . _API_VERIFY_AUTH_TOKEN % ( self . _API_AUTH_QUERY_TEMPLATE % self . _CLIENT_ID ) ,
None , note = ' Verifying login token... ' , fatal = False ,
data = json . dumps ( { ' session ' : { ' access_token ' : token } } ) . encode ( ) ) :
self . _HEADERS [ ' Authorization ' ] = f ' OAuth { token } '
self . report_login ( )
else :
self . report_warning ( ' Provided authorization token is invalid. Continuing as guest ' )
def _real_initialize ( self ) :
if self . _HEADERS :
return
if token := try_call ( lambda : self . _get_cookies ( self . _BASE_URL ) [ ' oauth_token ' ] . value ) :
self . _verify_oauth_token ( token )
2022-03-18 14:53:33 -06:00
def _perform_login ( self , username , password ) :
if username != ' oauth ' :
2024-04-01 10:58:48 -06:00
raise ExtractorError (
2021-12-02 14:46:08 -07:00
' Login using username and password is not currently supported. '
2024-04-01 10:58:48 -06:00
' Use " --username oauth --password <oauth_token> " to login using an oauth token, '
f ' or else { self . _login_hint ( method = " cookies " ) } ' , expected = True )
if self . _HEADERS :
return
self . _verify_oauth_token ( password )
2021-12-02 14:46:08 -07:00
r '''
def genDevId ( ) :
def genNumBlock ( ) :
return ' ' . join ( [ str ( random . randrange ( 10 ) ) for i in range ( 6 ) ] )
return ' - ' . join ( [ genNumBlock ( ) for i in range ( 4 ) ] )
payload = {
' client_id ' : self . _CLIENT_ID ,
' recaptcha_pubkey ' : ' null ' ,
' recaptcha_response ' : ' null ' ,
' credentials ' : {
' identifier ' : username ,
' password ' : password
} ,
' signature ' : self . sign ( username , password , self . _CLIENT_ID ) ,
' device_id ' : genDevId ( ) ,
' user_agent ' : self . _USER_AGENT
}
2024-06-13 17:01:19 -06:00
response = self . _call_api (
2024-04-01 10:58:48 -06:00
self . _API_AUTH_URL_PW % ( self . _API_AUTH_QUERY_TEMPLATE % self . _CLIENT_ID ) ,
None , note = ' Verifying login token... ' , fatal = False ,
data = json . dumps ( payload ) . encode ( ) )
if token := traverse_obj ( response , ( ' session ' , ' access_token ' , { str } ) ) :
self . _HEADERS [ ' Authorization ' ] = f ' OAuth { token } '
self . report_login ( )
return
raise ExtractorError ( ' Unable to get access token, login may have failed ' , expected = True )
2021-12-02 14:46:08 -07:00
'''
# signature generation
def sign ( self , user , pw , clid ) :
a = 33
i = 1
s = 440123
w = 117
u = 1800000
l = 1042
b = 37
k = 37
c = 5
n = ' 0763ed7314c69015fd4a0dc16bbf4b90 ' # _KEY
y = ' 8 ' # _REV
r = ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36 ' # _USER_AGENT
e = user # _USERNAME
t = clid # _CLIENT_ID
d = ' - ' . join ( [ str ( mInt ) for mInt in [ a , i , s , w , u , l , b , k ] ] )
2024-06-11 17:09:58 -06:00
h = n + y + d + r + e + t + d + n
2021-12-02 14:46:08 -07:00
m = 8011470
2024-06-11 17:09:58 -06:00
for f in range ( len ( h ) ) :
2021-12-02 14:46:08 -07:00
m = ( m >> 1 ) + ( ( 1 & m ) << 23 )
m + = ord ( h [ f ] )
m & = 16777215
# c is not even needed
2024-06-11 17:09:58 -06:00
return f ' { y } : { d } : { m : x } : { c } '
2021-12-02 14:46:08 -07:00
2022-09-27 08:22:06 -06:00
def _extract_info_dict ( self , info , full_title = None , secret_token = None , extract_flat = False ) :
2024-06-11 17:09:58 -06:00
track_id = str ( info [ ' id ' ] )
2022-09-27 08:22:06 -06:00
title = info [ ' title ' ]
format_urls = set ( )
formats = [ ]
query = { ' client_id ' : self . _CLIENT_ID }
if secret_token :
query [ ' secret_token ' ] = secret_token
if not extract_flat and info . get ( ' downloadable ' ) and info . get ( ' has_downloads_left ' ) :
2024-06-13 17:01:19 -06:00
try :
# Do not use _call_api(); HTTP Error codes have different meanings for this request
download_data = self . _download_json (
f ' { self . _API_V2_BASE } tracks/ { track_id } /download ' , track_id ,
' Downloading original download format info JSON ' , query = query , headers = self . _HEADERS )
except ExtractorError as e :
if isinstance ( e . cause , HTTPError ) and e . cause . status == 401 :
self . report_warning (
' Original download format is only available '
f ' for registered users. { self . _login_hint ( ) } ' )
elif isinstance ( e . cause , HTTPError ) and e . cause . status == 403 :
self . write_debug ( ' Original download format is not available for this client ' )
else :
self . report_warning ( e . msg )
download_data = None
if redirect_url := traverse_obj ( download_data , ( ' redirectUri ' , { url_or_none } ) ) :
2022-09-27 08:22:06 -06:00
urlh = self . _request_webpage (
2024-06-13 17:01:19 -06:00
HEADRequest ( redirect_url ) , track_id , ' Checking original download format availability ' ,
' Original download format is not available ' , fatal = False )
2022-09-27 08:22:06 -06:00
if urlh :
2023-07-09 01:53:02 -06:00
format_url = urlh . url
2022-09-27 08:22:06 -06:00
format_urls . add ( format_url )
formats . append ( {
' format_id ' : ' download ' ,
' ext ' : urlhandle_detect_ext ( urlh ) or ' mp3 ' ,
' filesize ' : int_or_none ( urlh . headers . get ( ' Content-Length ' ) ) ,
' url ' : format_url ,
' quality ' : 10 ,
2024-03-31 14:01:33 -06:00
' format_note ' : ' Original ' ,
2022-09-27 08:22:06 -06:00
} )
def invalid_url ( url ) :
return not url or url in format_urls
def add_format ( f , protocol , is_preview = False ) :
mobj = re . search ( r ' \ .(?P<abr> \ d+) \ .(?P<ext>[0-9a-z] { 3,4})(?=[/?]) ' , stream_url )
if mobj :
for k , v in mobj . groupdict ( ) . items ( ) :
if not f . get ( k ) :
f [ k ] = v
format_id_list = [ ]
if protocol :
format_id_list . append ( protocol )
ext = f . get ( ' ext ' )
if ext == ' aac ' :
2024-03-31 14:01:33 -06:00
f . update ( {
' abr ' : 256 ,
' quality ' : 5 ,
' format_note ' : ' Premium ' ,
} )
2022-09-27 08:22:06 -06:00
for k in ( ' ext ' , ' abr ' ) :
2024-03-31 14:01:33 -06:00
v = str_or_none ( f . get ( k ) )
2022-09-27 08:22:06 -06:00
if v :
format_id_list . append ( v )
preview = is_preview or re . search ( r ' /(?:preview|playlist)/0/30/ ' , f [ ' url ' ] )
if preview :
format_id_list . append ( ' preview ' )
abr = f . get ( ' abr ' )
if abr :
f [ ' abr ' ] = int ( abr )
2024-05-22 16:20:29 -06:00
if protocol in ( ' hls ' , ' hls-aes ' ) :
2022-09-27 08:22:06 -06:00
protocol = ' m3u8 ' if ext == ' aac ' else ' m3u8_native '
else :
protocol = ' http '
f . update ( {
' format_id ' : ' _ ' . join ( format_id_list ) ,
' protocol ' : protocol ,
' preference ' : - 10 if preview else None ,
} )
formats . append ( f )
# New API
2024-04-01 10:21:46 -06:00
for t in traverse_obj ( info , ( ' media ' , ' transcodings ' , lambda _ , v : url_or_none ( v [ ' url ' ] ) ) ) :
if extract_flat :
break
format_url = t [ ' url ' ]
2024-05-22 16:20:29 -06:00
protocol = traverse_obj ( t , ( ' format ' , ' protocol ' , { str } ) )
if protocol == ' progressive ' :
protocol = ' http '
if protocol != ' hls ' and ' /hls ' in format_url :
protocol = ' hls '
if protocol == ' encrypted-hls ' or ' /encrypted-hls ' in format_url :
protocol = ' hls-aes '
ext = None
if preset := traverse_obj ( t , ( ' preset ' , { str_or_none } ) ) :
ext = preset . split ( ' _ ' ) [ 0 ]
if ext not in KNOWN_EXTENSIONS :
ext = mimetype2ext ( traverse_obj ( t , ( ' format ' , ' mime_type ' , { str } ) ) )
identifier = join_nonempty ( protocol , ext , delim = ' _ ' )
if not self . _is_requested ( identifier ) :
self . write_debug ( f ' " { identifier } " is not a requested format, skipping ' )
continue
2024-07-08 16:09:08 -06:00
# XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called
stream_url = traverse_obj ( self . _call_api (
format_url , track_id , f ' Downloading { identifier } format info JSON ' ,
query = query , headers = self . _HEADERS ) , ( ' url ' , { url_or_none } ) )
2022-09-27 08:22:06 -06:00
if invalid_url ( stream_url ) :
continue
format_urls . add ( stream_url )
add_format ( {
' url ' : stream_url ,
' ext ' : ext ,
2024-05-22 16:20:29 -06:00
} , protocol , t . get ( ' snipped ' ) or ' /preview/ ' in format_url )
2022-09-27 08:22:06 -06:00
for f in formats :
f [ ' vcodec ' ] = ' none '
if not formats and info . get ( ' policy ' ) == ' BLOCK ' :
self . raise_geo_restricted ( metadata_available = True )
user = info . get ( ' user ' ) or { }
thumbnails = [ ]
artwork_url = info . get ( ' artwork_url ' )
thumbnail = artwork_url or user . get ( ' avatar_url ' )
2024-06-11 17:09:58 -06:00
if isinstance ( thumbnail , str ) :
2022-09-27 08:22:06 -06:00
if re . search ( self . _IMAGE_REPL_RE , thumbnail ) :
for image_id , size in self . _ARTWORK_MAP . items ( ) :
i = {
' id ' : image_id ,
2024-06-11 17:09:58 -06:00
' url ' : re . sub ( self . _IMAGE_REPL_RE , f ' - { image_id } .jpg ' , thumbnail ) ,
2022-09-27 08:22:06 -06:00
}
if image_id == ' tiny ' and not artwork_url :
size = 18
elif image_id == ' original ' :
i [ ' preference ' ] = 10
if size :
i . update ( {
' width ' : size ,
' height ' : size ,
} )
thumbnails . append ( i )
else :
thumbnails = [ { ' url ' : thumbnail } ]
def extract_count ( key ) :
2024-06-11 17:09:58 -06:00
return int_or_none ( info . get ( f ' { key } _count ' ) )
2022-09-27 08:22:06 -06:00
return {
' id ' : track_id ,
' uploader ' : user . get ( ' username ' ) ,
' uploader_id ' : str_or_none ( user . get ( ' id ' ) ) or user . get ( ' permalink ' ) ,
' uploader_url ' : user . get ( ' permalink_url ' ) ,
' timestamp ' : unified_timestamp ( info . get ( ' created_at ' ) ) ,
' title ' : title ,
' description ' : info . get ( ' description ' ) ,
' thumbnails ' : thumbnails ,
' duration ' : float_or_none ( info . get ( ' duration ' ) , 1000 ) ,
' webpage_url ' : info . get ( ' permalink_url ' ) ,
' license ' : info . get ( ' license ' ) ,
' view_count ' : extract_count ( ' playback ' ) ,
' like_count ' : extract_count ( ' favoritings ' ) or extract_count ( ' likes ' ) ,
' comment_count ' : extract_count ( ' comment ' ) ,
' repost_count ' : extract_count ( ' reposts ' ) ,
2024-05-04 10:14:36 -06:00
' genres ' : traverse_obj ( info , ( ' genre ' , { str } , { lambda x : x or None } , all ) ) ,
2024-06-11 17:09:58 -06:00
' formats ' : formats if not extract_flat else None ,
2022-09-27 08:22:06 -06:00
}
2021-12-02 14:46:08 -07:00
@classmethod
def _resolv_url ( cls , url ) :
return cls . _API_V2_BASE + ' resolve?url= ' + url
class SoundcloudIE ( SoundcloudBaseIE ) :
2013-06-23 12:57:44 -06:00
""" Information extractor for soundcloud.com
To access the media , the uid of the song and a stream token
must be extracted from the page source and the script must make
a request to media . soundcloud . com / crossdomain . xml . Then
the media can be grabbed by requesting from an url composed
of the stream token and uid
"""
2014-05-04 19:12:41 -06:00
_VALID_URL = r ''' (?x)^(?:https?://)?
2013-12-19 08:39:01 -07:00
( ? : ( ? : ( ? : www \. | m \. ) ? soundcloud \. com /
2017-07-29 05:41:42 -06:00
( ? ! stations / track )
2013-12-09 11:57:00 -07:00
( ? P < uploader > [ \w \d - ] + ) /
2019-02-01 14:00:29 -07:00
( ? ! ( ? : tracks | albums | sets ( ? : / . + ? ) ? | reposts | likes | spotlight ) / ? ( ? : $ | [ ? #]))
2021-12-19 18:14:19 -07:00
( ? P < title > [ \w \d - ] + )
( ? : / ( ? P < token > ( ? ! ( ? : albums | sets | recommended ) ) [ ^ ? ] + ? ) ) ?
( ? : [ ? ] . * ) ? $ )
2019-10-27 10:52:46 -06:00
| ( ? : api ( ? : - v2 ) ? \. soundcloud \. com / tracks / ( ? P < track_id > \d + )
2014-12-03 03:49:53 -07:00
( ? : / ? \? secret_token = ( ? P < secret_token > [ ^ & ] + ) ) ? )
2013-07-24 06:39:21 -06:00
)
'''
2014-01-16 19:29:41 -07:00
IE_NAME = ' soundcloud '
2013-11-09 10:06:09 -07:00
_TESTS = [
{
2014-01-16 19:29:41 -07:00
' url ' : ' http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy ' ,
2024-05-04 10:14:36 -06:00
' md5 ' : ' de9bac153e7427a7333b4b0c1b6a18d2 ' ,
2014-01-16 19:29:41 -07:00
' info_dict ' : {
2014-10-25 14:32:01 -06:00
' id ' : ' 62986583 ' ,
2024-05-04 10:14:36 -06:00
' ext ' : ' opus ' ,
2019-02-10 09:44:08 -07:00
' title ' : ' Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1 ' ,
2014-10-25 14:32:01 -06:00
' description ' : ' No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d ' ,
' uploader ' : ' E.T. ExTerrestrial Music ' ,
2019-10-27 10:52:46 -06:00
' uploader_id ' : ' 1571244 ' ,
2019-02-10 09:44:08 -07:00
' timestamp ' : 1349920598 ,
' upload_date ' : ' 20121011 ' ,
2019-04-22 11:39:16 -06:00
' duration ' : 143.216 ,
2016-09-18 03:53:05 -06:00
' license ' : ' all-rights-reserved ' ,
2019-02-10 09:44:08 -07:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2024-05-04 10:14:36 -06:00
' thumbnail ' : ' https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg ' ,
' uploader_url ' : ' https://soundcloud.com/ethmusic ' ,
' genres ' : [ ] ,
2024-06-11 17:09:58 -06:00
} ,
2013-11-09 10:06:09 -07:00
} ,
2020-03-22 02:24:07 -06:00
# geo-restricted
2013-11-09 10:06:09 -07:00
{
2014-01-16 19:29:41 -07:00
' url ' : ' https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep ' ,
' info_dict ' : {
' id ' : ' 47127627 ' ,
2024-05-04 10:14:36 -06:00
' ext ' : ' opus ' ,
2014-01-16 19:29:41 -07:00
' title ' : ' Goldrushed ' ,
2014-03-09 05:20:34 -06:00
' description ' : ' From Stockholm Sweden \r \n Povel / Magnus / Filip / David \r \n www.theroyalconcept.com ' ,
2014-01-16 19:29:41 -07:00
' uploader ' : ' The Royal Concept ' ,
2019-10-27 10:52:46 -06:00
' uploader_id ' : ' 9615865 ' ,
2019-02-10 09:44:08 -07:00
' timestamp ' : 1337635207 ,
2014-01-16 19:29:41 -07:00
' upload_date ' : ' 20120521 ' ,
2020-03-22 02:24:07 -06:00
' duration ' : 227.155 ,
2016-09-18 03:53:05 -06:00
' license ' : ' all-rights-reserved ' ,
2019-02-10 09:44:08 -07:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2024-05-04 10:14:36 -06:00
' uploader_url ' : ' https://soundcloud.com/the-concept-band ' ,
' thumbnail ' : ' https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg ' ,
' genres ' : [ ' Alternative ' ] ,
2013-11-09 10:06:09 -07:00
} ,
} ,
2013-12-09 09:08:58 -07:00
# private link
{
2021-02-24 11:45:56 -07:00
' url ' : ' https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp ' ,
2014-01-16 19:29:41 -07:00
' md5 ' : ' aa0dd32bfea9b0c5ef4f02aacd080604 ' ,
' info_dict ' : {
' id ' : ' 123998367 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Youtube - Dl Test Video \' \' Ä↭ ' ,
2024-06-11 17:09:58 -06:00
' description ' : ' test chars: " \' / \\ ä↭ ' ,
2019-02-10 09:44:08 -07:00
' uploader ' : ' jaimeMF ' ,
2019-10-27 10:52:46 -06:00
' uploader_id ' : ' 69767071 ' ,
2019-02-10 09:44:08 -07:00
' timestamp ' : 1386604920 ,
2014-01-16 19:29:41 -07:00
' upload_date ' : ' 20131209 ' ,
2019-04-22 11:39:16 -06:00
' duration ' : 9.927 ,
2016-09-18 03:53:05 -06:00
' license ' : ' all-rights-reserved ' ,
2019-02-10 09:44:08 -07:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2024-05-04 10:14:36 -06:00
' uploader_url ' : ' https://soundcloud.com/jaimemf ' ,
' thumbnail ' : ' https://a1.sndcdn.com/images/default_avatar_large.png ' ,
' genres ' : [ ' youtubedl ' ] ,
2013-12-09 09:08:58 -07:00
} ,
} ,
2014-09-18 00:02:03 -06:00
# private link (alt format)
{
' url ' : ' https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp ' ,
' md5 ' : ' aa0dd32bfea9b0c5ef4f02aacd080604 ' ,
' info_dict ' : {
' id ' : ' 123998367 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Youtube - Dl Test Video \' \' Ä↭ ' ,
2024-06-11 17:09:58 -06:00
' description ' : ' test chars: " \' / \\ ä↭ ' ,
2019-02-10 09:44:08 -07:00
' uploader ' : ' jaimeMF ' ,
2019-10-27 10:52:46 -06:00
' uploader_id ' : ' 69767071 ' ,
2019-02-10 09:44:08 -07:00
' timestamp ' : 1386604920 ,
2014-09-18 00:02:03 -06:00
' upload_date ' : ' 20131209 ' ,
2019-04-22 11:39:16 -06:00
' duration ' : 9.927 ,
2016-09-18 03:53:05 -06:00
' license ' : ' all-rights-reserved ' ,
2019-02-10 09:44:08 -07:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2024-05-04 10:14:36 -06:00
' uploader_url ' : ' https://soundcloud.com/jaimemf ' ,
' thumbnail ' : ' https://a1.sndcdn.com/images/default_avatar_large.png ' ,
' genres ' : [ ' youtubedl ' ] ,
2014-09-18 00:02:03 -06:00
} ,
} ,
2013-12-10 05:04:21 -07:00
# downloadable song
{
2021-07-06 16:51:13 -06:00
' url ' : ' https://soundcloud.com/the80m/the-following ' ,
' md5 ' : ' 9ffcddb08c87d74fb5808a3c183a1d04 ' ,
2014-01-16 19:29:41 -07:00
' info_dict ' : {
2021-07-06 16:51:13 -06:00
' id ' : ' 343609555 ' ,
' ext ' : ' wav ' ,
2024-05-04 10:14:36 -06:00
' title ' : ' The Following ' ,
' description ' : ' ' ,
' uploader ' : ' 80M ' ,
' uploader_id ' : ' 312384765 ' ,
' uploader_url ' : ' https://soundcloud.com/the80m ' ,
' upload_date ' : ' 20170922 ' ,
' timestamp ' : 1506120436 ,
' duration ' : 397.228 ,
' thumbnail ' : ' https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg ' ,
' license ' : ' all-rights-reserved ' ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
' view_count ' : int ,
' genres ' : [ ' Dance & EDM ' ] ,
2013-12-10 05:04:21 -07:00
} ,
} ,
2017-09-03 03:18:24 -06:00
# private link, downloadable format
{
' url ' : ' https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd ' ,
' md5 ' : ' 64a60b16e617d41d0bef032b7f55441e ' ,
' info_dict ' : {
' id ' : ' 340344461 ' ,
' ext ' : ' wav ' ,
' title ' : ' Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav] ' ,
' description ' : ' md5:fa20ee0fca76a3d6df8c7e57f3715366 ' ,
' uploader ' : ' Ori Uplift Music ' ,
2019-10-27 10:52:46 -06:00
' uploader_id ' : ' 12563093 ' ,
2019-02-10 09:44:08 -07:00
' timestamp ' : 1504206263 ,
2017-09-03 03:18:24 -06:00
' upload_date ' : ' 20170831 ' ,
2019-04-22 11:39:16 -06:00
' duration ' : 7449.096 ,
2017-09-03 03:18:24 -06:00
' license ' : ' all-rights-reserved ' ,
2019-02-10 09:44:08 -07:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2024-05-04 10:14:36 -06:00
' thumbnail ' : ' https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg ' ,
' uploader_url ' : ' https://soundcloud.com/oriuplift ' ,
' genres ' : [ ' Trance ' ] ,
2017-09-03 03:18:24 -06:00
} ,
} ,
2018-01-04 18:25:42 -07:00
# no album art, use avatar pic for thumbnail
{
' url ' : ' https://soundcloud.com/garyvee/sideways-prod-mad-real ' ,
' md5 ' : ' 59c7872bc44e5d99b7211891664760c2 ' ,
' info_dict ' : {
' id ' : ' 309699954 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Sideways (Prod. Mad Real) ' ,
' description ' : ' md5:d41d8cd98f00b204e9800998ecf8427e ' ,
' uploader ' : ' garyvee ' ,
2019-10-27 10:52:46 -06:00
' uploader_id ' : ' 2366352 ' ,
2019-02-10 09:44:08 -07:00
' timestamp ' : 1488152409 ,
2018-01-04 18:25:42 -07:00
' upload_date ' : ' 20170226 ' ,
2019-04-22 11:39:16 -06:00
' duration ' : 207.012 ,
2018-01-04 18:25:42 -07:00
' thumbnail ' : r ' re:https?://.* \ .jpg ' ,
' license ' : ' all-rights-reserved ' ,
2019-02-10 09:44:08 -07:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2024-05-04 10:14:36 -06:00
' uploader_url ' : ' https://soundcloud.com/garyvee ' ,
' genres ' : [ ] ,
2018-01-04 18:25:42 -07:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2019-04-22 11:39:16 -06:00
{
' url ' : ' https://soundcloud.com/giovannisarani/mezzo-valzer ' ,
2024-05-04 10:14:36 -06:00
' md5 ' : ' 8227c3473a4264df6b02ad7e5b7527ac ' ,
2019-04-22 11:39:16 -06:00
' info_dict ' : {
' id ' : ' 583011102 ' ,
2024-05-04 10:14:36 -06:00
' ext ' : ' opus ' ,
2019-04-22 11:39:16 -06:00
' title ' : ' Mezzo Valzer ' ,
2024-05-04 10:14:36 -06:00
' description ' : ' md5:f4d5f39d52e0ccc2b4f665326428901a ' ,
' uploader ' : ' Giovanni Sarani ' ,
2019-10-27 10:52:46 -06:00
' uploader_id ' : ' 3352531 ' ,
2019-04-22 11:39:16 -06:00
' timestamp ' : 1551394171 ,
' upload_date ' : ' 20190228 ' ,
' duration ' : 180.157 ,
' thumbnail ' : r ' re:https?://.* \ .jpg ' ,
' license ' : ' all-rights-reserved ' ,
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2024-05-04 10:14:36 -06:00
' genres ' : [ ' Piano ' ] ,
' uploader_url ' : ' https://soundcloud.com/giovannisarani ' ,
2019-04-22 11:39:16 -06:00
} ,
2020-04-10 04:24:21 -06:00
} ,
{
2020-09-05 18:57:08 -06:00
# AAC HQ format available (account with active subscription needed)
2020-04-10 04:24:21 -06:00
' url ' : ' https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1 ' ,
' only_matching ' : True ,
} ,
2020-09-05 18:57:08 -06:00
{
# Go+ (account with active subscription needed)
' url ' : ' https://soundcloud.com/taylorswiftofficial/look-what-you-made-me-do ' ,
' only_matching ' : True ,
} ,
2013-11-09 10:06:09 -07:00
]
2013-06-23 12:57:44 -06:00
def _real_extract ( self , url ) :
2021-08-18 19:41:24 -06:00
mobj = self . _match_valid_url ( url )
2013-06-23 12:57:44 -06:00
2013-07-24 06:39:21 -06:00
track_id = mobj . group ( ' track_id ' )
2016-09-18 03:53:05 -06:00
2019-12-31 01:49:29 -07:00
query = { }
2019-10-27 10:52:46 -06:00
if track_id :
info_json_url = self . _API_V2_BASE + ' tracks/ ' + track_id
2013-07-24 06:39:21 -06:00
full_title = track_id
2014-09-18 00:02:03 -06:00
token = mobj . group ( ' secret_token ' )
if token :
2019-10-27 10:52:46 -06:00
query [ ' secret_token ' ] = token
2013-07-24 06:39:21 -06:00
else :
2024-06-11 17:09:58 -06:00
full_title = resolve_title = ' {} / {} ' . format ( * mobj . group ( ' uploader ' , ' title ' ) )
2013-12-09 09:08:58 -07:00
token = mobj . group ( ' token ' )
if token :
2024-06-11 17:09:58 -06:00
resolve_title + = f ' / { token } '
2019-10-27 10:52:46 -06:00
info_json_url = self . _resolv_url ( self . _BASE_URL + resolve_title )
2014-11-23 12:41:03 -07:00
2024-06-13 17:01:19 -06:00
info = self . _call_api (
2020-08-06 15:31:12 -06:00
info_json_url , full_title , ' Downloading info JSON ' , query = query , headers = self . _HEADERS )
2020-03-22 02:24:07 -06:00
2024-07-08 16:09:08 -06:00
for retry in self . RetryManager ( ) :
try :
return self . _extract_info_dict ( info , full_title , token )
except ExtractorError as e :
if not isinstance ( e . cause , HTTPError ) or not e . cause . status == 429 :
raise
self . report_warning (
' You have reached the API rate limit, which is ~600 requests per '
' 10 minutes. Use the --extractor-retries and --retry-sleep options '
' to configure an appropriate retry count and wait time ' , only_once = True )
retry . error = e . cause
2013-06-23 12:57:44 -06:00
2014-05-04 19:12:41 -06:00
2021-12-02 14:46:08 -07:00
class SoundcloudPlaylistBaseIE ( SoundcloudBaseIE ) :
2020-01-23 15:20:48 -07:00
def _extract_set ( self , playlist , token = None ) :
2024-06-11 17:09:58 -06:00
playlist_id = str ( playlist [ ' id ' ] )
2020-01-23 15:20:48 -07:00
tracks = playlist . get ( ' tracks ' ) or [ ]
2024-06-11 17:09:58 -06:00
if not all ( t . get ( ' permalink_url ' ) for t in tracks ) and token :
2024-06-13 17:01:19 -06:00
tracks = self . _call_api (
2020-01-23 15:20:48 -07:00
self . _API_V2_BASE + ' tracks ' , playlist_id ,
' Downloading tracks ' , query = {
2024-06-11 17:09:58 -06:00
' ids ' : ' , ' . join ( [ str ( t [ ' id ' ] ) for t in tracks ] ) ,
2020-01-23 15:20:48 -07:00
' playlistId ' : playlist_id ,
' playlistSecretToken ' : token ,
2020-08-06 15:31:12 -06:00
} , headers = self . _HEADERS )
2019-10-27 10:52:46 -06:00
entries = [ ]
for track in tracks :
track_id = str_or_none ( track . get ( ' id ' ) )
url = track . get ( ' permalink_url ' )
if not url :
if not track_id :
continue
url = self . _API_V2_BASE + ' tracks/ ' + track_id
if token :
url + = ' ?secret_token= ' + token
entries . append ( self . url_result (
url , SoundcloudIE . ie_key ( ) , track_id ) )
2020-01-23 15:20:48 -07:00
return self . playlist_result (
entries , playlist_id ,
playlist . get ( ' title ' ) ,
playlist . get ( ' description ' ) )
2016-09-24 06:18:01 -06:00
2016-09-24 06:29:49 -06:00
class SoundcloudSetIE ( SoundcloudPlaylistBaseIE ) :
2020-09-03 16:57:37 -06:00
_VALID_URL = r ' https?://(?:(?:www|m) \ .)?soundcloud \ .com/(?P<uploader>[ \ w \ d-]+)/sets/(?P<slug_title>[: \ w \ d-]+)(?:/(?P<token>[^?/]+))? '
2014-01-16 19:29:41 -07:00
IE_NAME = ' soundcloud:set '
2014-08-27 16:58:24 -06:00
_TESTS = [ {
' url ' : ' https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep ' ,
' info_dict ' : {
2015-02-01 07:24:38 -07:00
' id ' : ' 2284613 ' ,
2014-08-27 16:58:24 -06:00
' title ' : ' The Royal Concept EP ' ,
2020-01-23 15:20:48 -07:00
' description ' : ' md5:71d07087c7a449e8941a70a29e34671e ' ,
2014-08-27 16:58:24 -06:00
} ,
2017-06-27 08:26:46 -06:00
' playlist_mincount ' : 5 ,
2016-08-30 12:56:15 -06:00
} , {
' url ' : ' https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token ' ,
' only_matching ' : True ,
2020-09-03 16:57:37 -06:00
} , {
' url ' : ' https://soundcloud.com/discover/sets/weekly::flacmatic ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://soundcloud.com/discover/sets/charts-top:all-music:de ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://soundcloud.com/discover/sets/charts-top:hiphoprap:kr ' ,
' only_matching ' : True ,
2014-08-27 16:58:24 -06:00
} ]
2013-06-23 12:57:44 -06:00
def _real_extract ( self , url ) :
2021-08-18 19:41:24 -06:00
mobj = self . _match_valid_url ( url )
2013-06-23 12:57:44 -06:00
2024-06-11 17:09:58 -06:00
full_title = ' {} /sets/ {} ' . format ( * mobj . group ( ' uploader ' , ' slug_title ' ) )
2014-09-18 03:35:11 -06:00
token = mobj . group ( ' token ' )
if token :
full_title + = ' / ' + token
2013-06-23 12:57:44 -06:00
2024-06-13 17:01:19 -06:00
info = self . _call_api ( self . _resolv_url (
2020-08-06 15:31:12 -06:00
self . _BASE_URL + full_title ) , full_title , headers = self . _HEADERS )
2013-06-23 12:57:44 -06:00
if ' errors ' in info :
2024-06-11 17:09:58 -06:00
msgs = ( str ( err [ ' error_message ' ] ) for err in info [ ' errors ' ] )
raise ExtractorError ( ' unable to download video webpage: {} ' . format ( ' , ' . join ( msgs ) ) )
2013-06-23 12:57:44 -06:00
2020-01-23 15:20:48 -07:00
return self . _extract_set ( info , token )
2013-09-14 13:41:49 -06:00
2021-12-02 14:46:08 -07:00
class SoundcloudPagedPlaylistBaseIE ( SoundcloudBaseIE ) :
2017-07-29 05:41:42 -06:00
def _extract_playlist ( self , base_url , playlist_id , playlist_title ) :
2021-09-04 22:56:46 -06:00
return {
' _type ' : ' playlist ' ,
' id ' : playlist_id ,
' title ' : playlist_title ,
' entries ' : self . _entries ( base_url , playlist_id ) ,
}
2021-09-04 22:57:49 -06:00
def _entries ( self , url , playlist_id ) :
2020-11-21 07:50:42 -07:00
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
2020-09-12 03:35:11 -06:00
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
2021-09-04 22:56:46 -06:00
query = {
2020-09-09 10:24:20 -06:00
' limit ' : 200 ,
2017-07-29 05:41:42 -06:00
' linked_partitioning ' : ' 1 ' ,
2021-09-04 22:56:46 -06:00
' offset ' : 0 ,
2017-07-29 05:41:42 -06:00
}
for i in itertools . count ( ) :
2022-08-01 14:13:18 -06:00
for retry in self . RetryManager ( ) :
2021-09-04 22:57:49 -06:00
try :
2024-06-13 17:01:19 -06:00
response = self . _call_api (
2021-09-04 22:57:49 -06:00
url , playlist_id , query = query , headers = self . _HEADERS ,
2022-08-01 14:13:18 -06:00
note = f ' Downloading track page { i + 1 } ' )
2021-09-04 22:57:49 -06:00
break
except ExtractorError as e :
# Downloading page may result in intermittent 502 HTTP error
# See https://github.com/yt-dlp/yt-dlp/issues/872
2023-07-09 01:53:02 -06:00
if not isinstance ( e . cause , HTTPError ) or e . cause . status != 502 :
2021-09-04 22:57:49 -06:00
raise
2022-08-01 14:13:18 -06:00
retry . error = e
continue
2017-07-29 05:41:42 -06:00
2021-09-04 22:56:46 -06:00
def resolve_entry ( * candidates ) :
2019-02-02 09:40:06 -07:00
for cand in candidates :
if not isinstance ( cand , dict ) :
continue
permalink_url = url_or_none ( cand . get ( ' permalink_url ' ) )
2021-09-04 22:56:46 -06:00
if permalink_url :
return self . url_result (
permalink_url ,
SoundcloudIE . ie_key ( ) if SoundcloudIE . suitable ( permalink_url ) else None ,
str_or_none ( cand . get ( ' id ' ) ) , cand . get ( ' title ' ) )
2017-07-29 05:41:42 -06:00
2021-09-04 22:56:46 -06:00
for e in response [ ' collection ' ] or [ ] :
yield resolve_entry ( e , e . get ( ' track ' ) , e . get ( ' playlist ' ) )
2017-07-29 05:41:42 -06:00
2021-09-04 22:57:49 -06:00
url = response . get ( ' next_href ' )
2021-10-04 21:03:36 -06:00
if not url :
break
2021-09-04 22:56:46 -06:00
query . pop ( ' offset ' , None )
2017-07-29 05:41:42 -06:00
class SoundcloudUserIE ( SoundcloudPagedPlaylistBaseIE ) :
2015-07-31 15:50:03 -06:00
_VALID_URL = r ''' (?x)
https ? : / /
( ? : ( ? : www | m ) \. ) ? soundcloud \. com /
( ? P < user > [ ^ / ] + )
( ? : /
2019-02-01 14:00:29 -07:00
( ? P < rsrc > tracks | albums | sets | reposts | likes | spotlight )
2015-07-31 15:50:03 -06:00
) ?
/ ? ( ? : [ ? #].*)?$
'''
2014-01-16 19:29:41 -07:00
IE_NAME = ' soundcloud:user '
2014-08-27 16:58:24 -06:00
_TESTS = [ {
2019-02-01 14:11:32 -07:00
' url ' : ' https://soundcloud.com/soft-cell-official ' ,
2014-08-27 16:58:24 -06:00
' info_dict ' : {
2019-02-01 14:11:32 -07:00
' id ' : ' 207965082 ' ,
' title ' : ' Soft Cell (All) ' ,
2014-08-27 16:58:24 -06:00
} ,
2019-02-01 14:11:32 -07:00
' playlist_mincount ' : 28 ,
2014-08-27 16:58:24 -06:00
} , {
2019-02-01 14:11:32 -07:00
' url ' : ' https://soundcloud.com/soft-cell-official/tracks ' ,
2014-08-27 16:58:24 -06:00
' info_dict ' : {
2019-02-01 14:11:32 -07:00
' id ' : ' 207965082 ' ,
' title ' : ' Soft Cell (Tracks) ' ,
2014-08-27 16:58:24 -06:00
} ,
2019-02-01 14:11:32 -07:00
' playlist_mincount ' : 27 ,
2015-06-24 08:49:22 -06:00
} , {
2019-02-01 14:11:32 -07:00
' url ' : ' https://soundcloud.com/soft-cell-official/albums ' ,
' info_dict ' : {
' id ' : ' 207965082 ' ,
' title ' : ' Soft Cell (Albums) ' ,
} ,
' playlist_mincount ' : 1 ,
} , {
' url ' : ' https://soundcloud.com/jcv246/sets ' ,
2015-07-30 12:54:26 -06:00
' info_dict ' : {
2019-02-01 14:11:32 -07:00
' id ' : ' 12982173 ' ,
2019-10-27 10:52:46 -06:00
' title ' : ' Jordi / cv (Sets) ' ,
2015-07-30 12:54:26 -06:00
} ,
2016-09-24 06:18:01 -06:00
' playlist_mincount ' : 2 ,
2015-07-30 12:54:26 -06:00
} , {
2019-02-01 14:11:32 -07:00
' url ' : ' https://soundcloud.com/jcv246/reposts ' ,
2015-07-30 12:54:26 -06:00
' info_dict ' : {
2019-02-01 14:11:32 -07:00
' id ' : ' 12982173 ' ,
' title ' : ' Jordi / cv (Reposts) ' ,
2015-07-30 12:54:26 -06:00
} ,
2019-02-01 14:11:32 -07:00
' playlist_mincount ' : 6 ,
2015-07-30 12:54:26 -06:00
} , {
2019-02-01 14:11:32 -07:00
' url ' : ' https://soundcloud.com/clalberg/likes ' ,
2015-07-30 12:54:26 -06:00
' info_dict ' : {
2019-02-01 14:11:32 -07:00
' id ' : ' 11817582 ' ,
' title ' : ' clalberg (Likes) ' ,
2015-07-30 12:54:26 -06:00
} ,
2019-02-01 14:11:32 -07:00
' playlist_mincount ' : 5 ,
2015-07-30 12:54:26 -06:00
} , {
' url ' : ' https://soundcloud.com/grynpyret/spotlight ' ,
' info_dict ' : {
' id ' : ' 7098329 ' ,
2017-06-27 08:26:46 -06:00
' title ' : ' Grynpyret (Spotlight) ' ,
2015-07-30 12:54:26 -06:00
} ,
' playlist_mincount ' : 1 ,
2014-08-27 16:58:24 -06:00
} ]
2013-09-14 13:41:49 -06:00
2015-07-30 12:54:26 -06:00
_BASE_URL_MAP = {
2019-10-27 10:52:46 -06:00
' all ' : ' stream/users/ %s ' ,
' tracks ' : ' users/ %s /tracks ' ,
' albums ' : ' users/ %s /albums ' ,
' sets ' : ' users/ %s /playlists ' ,
' reposts ' : ' stream/users/ %s /reposts ' ,
' likes ' : ' users/ %s /likes ' ,
' spotlight ' : ' users/ %s /spotlight ' ,
2015-07-30 12:54:26 -06:00
}
2013-09-14 13:41:49 -06:00
def _real_extract ( self , url ) :
2021-08-18 19:41:24 -06:00
mobj = self . _match_valid_url ( url )
2013-09-14 13:41:49 -06:00
uploader = mobj . group ( ' user ' )
2024-06-13 17:01:19 -06:00
user = self . _call_api (
2019-10-27 10:52:46 -06:00
self . _resolv_url ( self . _BASE_URL + uploader ) ,
2020-08-06 15:31:12 -06:00
uploader , ' Downloading user info ' , headers = self . _HEADERS )
2015-07-30 12:54:26 -06:00
resource = mobj . group ( ' rsrc ' ) or ' all '
2017-07-29 05:41:42 -06:00
return self . _extract_playlist (
2019-10-27 10:52:46 -06:00
self . _API_V2_BASE + self . _BASE_URL_MAP [ resource ] % user [ ' id ' ] ,
str_or_none ( user . get ( ' id ' ) ) ,
2024-06-11 17:09:58 -06:00
' {} ( {} ) ' . format ( user [ ' username ' ] , resource . capitalize ( ) ) )
2016-01-07 12:54:31 -07:00
2013-09-14 13:41:49 -06:00
2022-12-29 11:46:43 -07:00
class SoundcloudUserPermalinkIE ( SoundcloudPagedPlaylistBaseIE ) :
_VALID_URL = r ' https?://api \ .soundcloud \ .com/users/(?P<id> \ d+) '
IE_NAME = ' soundcloud:user:permalink '
_TESTS = [ {
' url ' : ' https://api.soundcloud.com/users/30909869 ' ,
' info_dict ' : {
' id ' : ' 30909869 ' ,
' title ' : ' neilcic ' ,
} ,
' playlist_mincount ' : 23 ,
} ]
def _real_extract ( self , url ) :
user_id = self . _match_id ( url )
2024-06-13 17:01:19 -06:00
user = self . _call_api (
2022-12-29 11:46:43 -07:00
self . _resolv_url ( url ) , user_id , ' Downloading user info ' , headers = self . _HEADERS )
return self . _extract_playlist (
f ' { self . _API_V2_BASE } stream/users/ { user [ " id " ] } ' , str ( user [ ' id ' ] ) , user . get ( ' username ' ) )
2017-07-29 05:41:42 -06:00
class SoundcloudTrackStationIE ( SoundcloudPagedPlaylistBaseIE ) :
_VALID_URL = r ' https?://(?:(?:www|m) \ .)?soundcloud \ .com/stations/track/[^/]+/(?P<id>[^/?#&]+) '
IE_NAME = ' soundcloud:trackstation '
_TESTS = [ {
' url ' : ' https://soundcloud.com/stations/track/officialsundial/your-text ' ,
' info_dict ' : {
' id ' : ' 286017854 ' ,
2019-10-27 10:52:46 -06:00
' title ' : ' Track station: your text ' ,
2017-07-29 05:41:42 -06:00
} ,
' playlist_mincount ' : 47 ,
} ]
2015-07-30 12:54:26 -06:00
2017-07-29 05:41:42 -06:00
def _real_extract ( self , url ) :
track_name = self . _match_id ( url )
2015-07-30 12:54:26 -06:00
2024-06-13 17:01:19 -06:00
track = self . _call_api ( self . _resolv_url ( url ) , track_name , headers = self . _HEADERS )
2017-07-29 05:41:42 -06:00
track_id = self . _search_regex (
2019-10-27 10:52:46 -06:00
r ' soundcloud:track-stations:( \ d+) ' , track [ ' id ' ] , ' track id ' )
2013-09-14 13:41:49 -06:00
2017-07-29 05:41:42 -06:00
return self . _extract_playlist (
2024-06-11 17:09:58 -06:00
self . _API_V2_BASE + ' stations/ {} /tracks ' . format ( track [ ' id ' ] ) ,
track_id , ' Track station: {} ' . format ( track [ ' title ' ] ) )
2014-05-04 19:12:41 -06:00
2021-12-19 18:14:19 -07:00
class SoundcloudRelatedIE ( SoundcloudPagedPlaylistBaseIE ) :
_VALID_URL = r ' https?://(?:(?:www|m) \ .)?soundcloud \ .com/(?P<slug>[ \ w \ d-]+/[ \ w \ d-]+)/(?P<relation>albums|sets|recommended) '
IE_NAME = ' soundcloud:related '
_TESTS = [ {
' url ' : ' https://soundcloud.com/wajang/sexapil-pingers-5/recommended ' ,
' info_dict ' : {
' id ' : ' 1084577272 ' ,
' title ' : ' Sexapil - Pingers 5 (Recommended) ' ,
} ,
' playlist_mincount ' : 50 ,
} , {
' url ' : ' https://soundcloud.com/wajang/sexapil-pingers-5/albums ' ,
' info_dict ' : {
' id ' : ' 1084577272 ' ,
' title ' : ' Sexapil - Pingers 5 (Albums) ' ,
} ,
' playlist_mincount ' : 1 ,
} , {
' url ' : ' https://soundcloud.com/wajang/sexapil-pingers-5/sets ' ,
' info_dict ' : {
' id ' : ' 1084577272 ' ,
' title ' : ' Sexapil - Pingers 5 (Sets) ' ,
} ,
' playlist_mincount ' : 4 ,
} ]
_BASE_URL_MAP = {
' albums ' : ' tracks/ %s /albums ' ,
' sets ' : ' tracks/ %s /playlists_without_albums ' ,
' recommended ' : ' tracks/ %s /related ' ,
}
def _real_extract ( self , url ) :
slug , relation = self . _match_valid_url ( url ) . group ( ' slug ' , ' relation ' )
2024-06-13 17:01:19 -06:00
track = self . _call_api (
2021-12-19 18:14:19 -07:00
self . _resolv_url ( self . _BASE_URL + slug ) ,
slug , ' Downloading track info ' , headers = self . _HEADERS )
if track . get ( ' errors ' ) :
raise ExtractorError ( f ' { self . IE_NAME } said: %s ' % ' , ' . join (
str ( err [ ' error_message ' ] ) for err in track [ ' errors ' ] ) , expected = True )
return self . _extract_playlist (
self . _API_V2_BASE + self . _BASE_URL_MAP [ relation ] % track [ ' id ' ] , str ( track [ ' id ' ] ) ,
2024-06-11 17:09:58 -06:00
' {} ( {} ) ' . format ( track . get ( ' title ' ) or slug , relation . capitalize ( ) ) )
2021-12-19 18:14:19 -07:00
2016-09-24 06:29:49 -06:00
class SoundcloudPlaylistIE ( SoundcloudPlaylistBaseIE ) :
2019-10-27 10:52:46 -06:00
_VALID_URL = r ' https?://api(?:-v2)? \ .soundcloud \ .com/playlists/(?P<id>[0-9]+)(?:/? \ ?secret_token=(?P<token>[^&]+?))?$ '
2014-05-04 19:12:41 -06:00
IE_NAME = ' soundcloud:playlist '
2014-09-18 10:57:04 -06:00
_TESTS = [ {
2017-06-27 08:20:18 -06:00
' url ' : ' https://api.soundcloud.com/playlists/4110309 ' ,
2014-09-18 10:57:04 -06:00
' info_dict ' : {
' id ' : ' 4110309 ' ,
' title ' : ' TILT Brass - Bowery Poetry Club, August \' 03 [Non-Site SCR 02] ' ,
' description ' : ' re:.*?TILT Brass - Bowery Poetry Club ' ,
} ,
' playlist_count ' : 6 ,
} ]
2014-05-04 19:12:41 -06:00
def _real_extract ( self , url ) :
2021-08-18 19:41:24 -06:00
mobj = self . _match_valid_url ( url )
2014-05-04 19:12:41 -06:00
playlist_id = mobj . group ( ' id ' )
2019-12-31 01:49:29 -07:00
query = { }
2014-09-18 03:35:11 -06:00
token = mobj . group ( ' token ' )
if token :
2019-10-27 10:52:46 -06:00
query [ ' secret_token ' ] = token
2014-09-18 03:35:11 -06:00
2024-06-13 17:01:19 -06:00
data = self . _call_api (
2019-10-27 10:52:46 -06:00
self . _API_V2_BASE + ' playlists/ ' + playlist_id ,
2020-08-06 15:31:12 -06:00
playlist_id , ' Downloading playlist ' , query = query , headers = self . _HEADERS )
2014-05-04 19:12:41 -06:00
2020-01-23 15:20:48 -07:00
return self . _extract_set ( data , token )
2015-10-17 10:23:46 -06:00
2021-12-02 14:46:08 -07:00
class SoundcloudSearchIE ( SoundcloudBaseIE , SearchInfoExtractor ) :
2015-10-17 10:23:46 -06:00
IE_NAME = ' soundcloud:search '
2021-10-23 08:29:52 -06:00
IE_DESC = ' Soundcloud search '
_SEARCH_KEY = ' scsearch '
2015-10-17 10:23:46 -06:00
_TESTS = [ {
' url ' : ' scsearch15:post-avant jazzcore ' ,
' info_dict ' : {
2022-09-27 08:22:06 -06:00
' id ' : ' post-avant jazzcore ' ,
2015-10-17 10:23:46 -06:00
' title ' : ' post-avant jazzcore ' ,
} ,
' playlist_count ' : 15 ,
} ]
2015-10-30 16:56:07 -06:00
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
2015-10-17 10:23:46 -06:00
def _get_collection ( self , endpoint , collection_id , * * query ) :
2015-11-21 11:49:58 -07:00
limit = min (
2015-10-30 16:56:07 -06:00
query . get ( ' limit ' , self . _DEFAULT_RESULTS_PER_PAGE ) ,
self . _MAX_RESULTS_PER_PAGE )
2019-10-27 10:52:46 -06:00
query . update ( {
' limit ' : limit ,
' linked_partitioning ' : 1 ,
' offset ' : 0 ,
} )
next_url = update_url_query ( self . _API_V2_BASE + endpoint , query )
2015-10-17 10:23:46 -06:00
2015-11-21 08:21:21 -07:00
for i in itertools . count ( 1 ) :
2024-06-13 17:01:19 -06:00
response = self . _call_api (
2021-10-08 14:39:55 -06:00
next_url , collection_id , f ' Downloading page { i } ' ,
2020-08-06 15:31:12 -06:00
' Unable to download API page ' , headers = self . _HEADERS )
2015-10-17 10:23:46 -06:00
2021-10-08 14:39:55 -06:00
for item in response . get ( ' collection ' ) or [ ] :
if item :
2022-09-27 08:22:06 -06:00
yield self . url_result (
item [ ' uri ' ] , SoundcloudIE . ie_key ( ) , * * self . _extract_info_dict ( item , extract_flat = True ) )
2015-10-17 10:23:46 -06:00
2015-11-21 07:04:35 -07:00
next_url = response . get ( ' next_href ' )
2015-11-21 08:21:21 -07:00
if not next_url :
break
2015-10-17 10:23:46 -06:00
def _get_n_results ( self , query , n ) :
2021-11-18 10:08:00 -07:00
return self . playlist_result ( itertools . islice (
self . _get_collection ( ' search/tracks ' , query , limit = n , q = query ) ,
0 , None if n == float ( ' inf ' ) else n ) , query , query )