2014-01-27 16:56:09 -07:00
import itertools
2013-10-16 08:57:40 -06:00
from . common import InfoExtractor
2014-12-13 04:24:42 -07:00
from . . utils import (
2017-09-10 06:09:27 -06:00
bool_or_none ,
2024-05-26 13:27:21 -06:00
determine_ext ,
2017-07-04 03:26:02 -06:00
int_or_none ,
2021-08-22 13:02:00 -06:00
parse_qs ,
2024-08-26 15:17:25 -06:00
traverse_obj ,
2017-09-10 06:09:27 -06:00
try_get ,
unified_timestamp ,
2018-07-21 06:08:28 -06:00
url_or_none ,
2013-10-16 08:57:40 -06:00
)
2017-09-10 05:39:13 -06:00
class RutubeBaseIE ( InfoExtractor ) :
2019-02-10 10:49:51 -07:00
def _download_api_info ( self , video_id , query = None ) :
if not query :
query = { }
query [ ' format ' ] = ' json '
return self . _download_json (
2024-06-11 17:09:58 -06:00
f ' http://rutube.ru/api/video/ { video_id } / ' ,
2019-02-10 10:49:51 -07:00
video_id , ' Downloading video JSON ' ,
' Unable to download video JSON ' , query = query )
2023-03-04 06:33:17 -07:00
def _extract_info ( self , video , video_id = None , require_title = True ) :
2017-09-10 05:39:13 -06:00
title = video [ ' title ' ] if require_title else video . get ( ' title ' )
age_limit = video . get ( ' is_adult ' )
if age_limit is not None :
age_limit = 18 if age_limit is True else 0
uploader_id = try_get ( video , lambda x : x [ ' author ' ] [ ' id ' ] )
category = try_get ( video , lambda x : x [ ' category ' ] [ ' name ' ] )
2023-03-04 06:33:17 -07:00
description = video . get ( ' description ' )
duration = int_or_none ( video . get ( ' duration ' ) )
2017-09-10 05:39:13 -06:00
return {
2019-02-10 10:49:51 -07:00
' id ' : video . get ( ' id ' ) or video_id if video_id else video [ ' id ' ] ,
2017-09-10 05:39:13 -06:00
' title ' : title ,
2023-03-04 06:33:17 -07:00
' description ' : description ,
2017-09-10 05:39:13 -06:00
' thumbnail ' : video . get ( ' thumbnail_url ' ) ,
2023-03-04 06:33:17 -07:00
' duration ' : duration ,
2017-09-10 05:39:13 -06:00
' uploader ' : try_get ( video , lambda x : x [ ' author ' ] [ ' name ' ] ) ,
2024-06-11 17:09:58 -06:00
' uploader_id ' : str ( uploader_id ) if uploader_id else None ,
2017-09-10 05:39:13 -06:00
' timestamp ' : unified_timestamp ( video . get ( ' created_ts ' ) ) ,
2024-03-08 15:36:41 -07:00
' categories ' : [ category ] if category else None ,
2017-09-10 05:39:13 -06:00
' age_limit ' : age_limit ,
' view_count ' : int_or_none ( video . get ( ' hits ' ) ) ,
' comment_count ' : int_or_none ( video . get ( ' comments_count ' ) ) ,
2017-09-10 06:09:27 -06:00
' is_live ' : bool_or_none ( video . get ( ' is_livestream ' ) ) ,
2023-03-04 06:33:17 -07:00
' chapters ' : self . _extract_chapters_from_description ( description , duration ) ,
2017-09-10 05:39:13 -06:00
}
2019-02-10 10:49:51 -07:00
def _download_and_extract_info ( self , video_id , query = None ) :
return self . _extract_info (
self . _download_api_info ( video_id , query = query ) , video_id )
def _download_api_options ( self , video_id , query = None ) :
if not query :
query = { }
query [ ' format ' ] = ' json '
return self . _download_json (
2024-06-11 17:09:58 -06:00
f ' http://rutube.ru/api/play/options/ { video_id } / ' ,
2019-02-10 10:49:51 -07:00
video_id , ' Downloading options JSON ' ,
' Unable to download options JSON ' ,
headers = self . geo_verification_headers ( ) , query = query )
def _extract_formats ( self , options , video_id ) :
formats = [ ]
for format_id , format_url in options [ ' video_balancer ' ] . items ( ) :
ext = determine_ext ( format_url )
if ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , m3u8_id = format_id , fatal = False ) )
elif ext == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
format_url , video_id , f4m_id = format_id , fatal = False ) )
else :
formats . append ( {
' url ' : format_url ,
' format_id ' : format_id ,
} )
2024-08-26 15:17:25 -06:00
for hls_url in traverse_obj ( options , ( ' live_streams ' , ' hls ' , . . . , ' url ' , { url_or_none } ) ) :
formats . extend ( self . _extract_m3u8_formats ( hls_url , video_id , ext = ' mp4 ' , fatal = False ) )
2019-02-10 10:49:51 -07:00
return formats
def _download_and_extract_formats ( self , video_id , query = None ) :
return self . _extract_formats (
self . _download_api_options ( video_id , query = query ) , video_id )
2017-09-10 05:39:13 -06:00
class RutubeIE ( RutubeBaseIE ) :
2014-01-27 16:56:09 -07:00
IE_NAME = ' rutube '
2014-01-27 19:32:07 -07:00
IE_DESC = ' Rutube videos '
2024-08-26 15:17:25 -06:00
_VALID_URL = r ' https?://rutube \ .ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[ \ da-z] {32} ) '
2022-09-23 23:42:32 -06:00
_EMBED_REGEX = [ r ' <iframe[^>]+?src=([ " \' ])(?P<url>(?:https?:)?//rutube \ .ru/(?:play/)?embed/[ \ da-z] {32} .*?) \ 1 ' ]
2013-10-16 08:57:40 -06:00
2015-12-15 15:44:17 -07:00
_TESTS = [ {
2014-01-27 16:56:09 -07:00
' url ' : ' http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/ ' ,
2022-12-10 06:17:01 -07:00
' md5 ' : ' e33ac625efca66aba86cbec9851f2692 ' ,
2014-01-27 16:56:09 -07:00
' info_dict ' : {
2014-03-30 02:35:07 -06:00
' id ' : ' 3eac3b4561676c17df9132a9a1e62e3e ' ,
2019-02-10 10:49:51 -07:00
' ext ' : ' mp4 ' ,
2014-01-27 16:56:09 -07:00
' title ' : ' Раненный кенгуру забежал в аптеку ' ,
2014-01-27 19:31:14 -07:00
' description ' : ' http://www.ntdtv.ru ' ,
2019-02-10 10:49:51 -07:00
' duration ' : 81 ,
2014-01-27 16:56:09 -07:00
' uploader ' : ' NTDRussian ' ,
' uploader_id ' : ' 29790 ' ,
2017-09-10 05:39:13 -06:00
' timestamp ' : 1381943602 ,
2014-01-27 19:31:14 -07:00
' upload_date ' : ' 20131016 ' ,
2015-08-08 10:11:06 -06:00
' age_limit ' : 0 ,
2022-12-10 06:17:01 -07:00
' view_count ' : int ,
' thumbnail ' : ' http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg ' ,
2024-03-08 15:36:41 -07:00
' categories ' : [ ' Новости и СМИ ' ] ,
2023-03-04 06:33:17 -07:00
' chapters ' : [ ] ,
2013-10-16 08:57:40 -06:00
} ,
2023-03-04 06:33:17 -07:00
' expected_warnings ' : [ ' Unable to download f4m ' ] ,
2015-12-15 15:44:17 -07:00
} , {
' url ' : ' http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661 ' ,
' only_matching ' : True ,
2017-03-03 10:42:51 -07:00
} , {
' url ' : ' http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661 ' ,
' only_matching ' : True ,
2017-07-04 03:26:02 -06:00
} , {
' url ' : ' http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252 ' ,
' only_matching ' : True ,
2017-09-10 05:39:13 -06:00
} , {
' url ' : ' https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source ' ,
' only_matching ' : True ,
2022-12-10 06:17:01 -07:00
} , {
' url ' : ' https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg ' ,
' md5 ' : ' d106225f15d625538fe22971158e896f ' ,
' info_dict ' : {
' id ' : ' 884fb55f07a97ab673c7d654553e0f48 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Яцуноками, Nioh2 ' ,
' description ' : ' Nioh2: финал сражения с боссом Яцуноками ' ,
' duration ' : 15 ,
' uploader ' : ' mexus ' ,
' uploader_id ' : ' 24222106 ' ,
' timestamp ' : 1670646232 ,
' upload_date ' : ' 20221210 ' ,
' age_limit ' : 0 ,
' view_count ' : int ,
' thumbnail ' : ' http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg ' ,
2024-03-08 15:36:41 -07:00
' categories ' : [ ' Видеоигры ' ] ,
2023-03-04 06:33:17 -07:00
' chapters ' : [ ] ,
} ,
' expected_warnings ' : [ ' Unable to download f4m ' ] ,
} , {
' url ' : ' https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/ ' ,
' info_dict ' : {
' id ' : ' c65b465ad0c98c89f3b25cb03dcc87c6 ' ,
' ext ' : ' mp4 ' ,
' chapters ' : ' count:4 ' ,
2024-03-08 15:36:41 -07:00
' categories ' : [ ' Бизнес и предпринимательство ' ] ,
2023-03-04 06:33:17 -07:00
' description ' : ' md5:252feac1305257d8c1bab215cedde75d ' ,
' thumbnail ' : ' http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png ' ,
' duration ' : 782 ,
' age_limit ' : 0 ,
' uploader_id ' : ' 23491359 ' ,
' timestamp ' : 1677153329 ,
' view_count ' : int ,
' upload_date ' : ' 20230223 ' ,
' title ' : ' Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании ' ,
' uploader ' : ' Стас Быков ' ,
2022-12-10 06:17:01 -07:00
} ,
2023-03-04 06:33:17 -07:00
' expected_warnings ' : [ ' Unable to download f4m ' ] ,
2024-08-26 15:17:25 -06:00
} , {
' url ' : ' https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/ ' ,
' info_dict ' : {
' id ' : ' c58f502c7bb34a8fcdd976b221fca292 ' ,
' ext ' : ' mp4 ' ,
' categories ' : [ ' Телепередачи ' ] ,
' description ' : ' ' ,
' thumbnail ' : ' http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg ' ,
' live_status ' : ' is_live ' ,
' age_limit ' : 0 ,
' uploader_id ' : ' 23460655 ' ,
' timestamp ' : 1652972968 ,
' view_count ' : int ,
' upload_date ' : ' 20220519 ' ,
' title ' : r ' re:Первый канал. Прямой эфир \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} $ ' ,
' uploader ' : ' Первый канал ' ,
} ,
} , {
' url ' : ' https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/ ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/ ' ,
' only_matching ' : True ,
2015-12-15 15:44:17 -07:00
} ]
2013-10-16 08:57:40 -06:00
2017-07-04 03:26:02 -06:00
@classmethod
def suitable ( cls , url ) :
2024-06-11 17:09:58 -06:00
return False if RutubePlaylistIE . suitable ( url ) else super ( ) . suitable ( url )
2017-07-04 03:26:02 -06:00
2013-10-16 08:57:40 -06:00
def _real_extract ( self , url ) :
2014-12-13 04:24:42 -07:00
video_id = self . _match_id ( url )
2022-12-10 06:17:01 -07:00
query = parse_qs ( url )
info = self . _download_and_extract_info ( video_id , query )
info [ ' formats ' ] = self . _download_and_extract_formats ( video_id , query )
2017-09-10 05:39:13 -06:00
return info
2014-01-27 16:56:09 -07:00
2019-02-10 10:49:51 -07:00
class RutubeEmbedIE ( RutubeBaseIE ) :
2015-01-03 19:15:27 -07:00
IE_NAME = ' rutube:embed '
IE_DESC = ' Rutube embedded videos '
2016-09-08 04:04:57 -06:00
_VALID_URL = r ' https?://rutube \ .ru/(?:video|play)/embed/(?P<id>[0-9]+) '
2015-01-03 19:15:27 -07:00
2015-11-21 04:39:24 -07:00
_TESTS = [ {
2015-01-03 19:15:27 -07:00
' url ' : ' http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38= ' ,
' info_dict ' : {
' id ' : ' a10e53b86e8f349080f718582ce4c661 ' ,
2019-02-10 10:49:51 -07:00
' ext ' : ' mp4 ' ,
2017-09-10 05:39:13 -06:00
' timestamp ' : 1387830582 ,
2015-01-03 19:15:27 -07:00
' upload_date ' : ' 20131223 ' ,
' uploader_id ' : ' 297833 ' ,
' description ' : ' Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89 ' ,
' uploader ' : ' subziro89 ILya ' ,
' title ' : ' Мистический городок Эйри в Индиан 5 серия озвучка subziro89 ' ,
} ,
' params ' : {
2017-09-10 05:39:13 -06:00
' skip_download ' : True ,
2015-01-03 19:15:27 -07:00
} ,
2015-11-21 04:39:24 -07:00
} , {
' url ' : ' http://rutube.ru/play/embed/8083783 ' ,
' only_matching ' : True ,
2019-02-10 10:49:51 -07:00
} , {
# private video
' url ' : ' https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ ' ,
' only_matching ' : True ,
2015-11-21 04:39:24 -07:00
} ]
2015-01-03 19:15:27 -07:00
def _real_extract ( self , url ) :
embed_id = self . _match_id ( url )
2019-02-10 10:49:51 -07:00
# Query may contain private videos token and should be passed to API
# requests (see #19163)
2021-08-22 13:02:00 -06:00
query = parse_qs ( url )
2019-02-10 10:49:51 -07:00
options = self . _download_api_options ( embed_id , query )
video_id = options [ ' effective_video ' ]
formats = self . _extract_formats ( options , video_id )
info = self . _download_and_extract_info ( video_id , query )
info . update ( {
' extractor_key ' : ' Rutube ' ,
' formats ' : formats ,
} )
return info
2015-01-03 19:15:27 -07:00
2017-09-10 05:39:13 -06:00
class RutubePlaylistBaseIE ( RutubeBaseIE ) :
def _next_page_url ( self , page_num , playlist_id , * args , * * kwargs ) :
return self . _PAGE_TEMPLATE % ( playlist_id , page_num )
def _entries ( self , playlist_id , * args , * * kwargs ) :
next_page_url = None
for pagenum in itertools . count ( 1 ) :
page = self . _download_json (
next_page_url or self . _next_page_url (
pagenum , playlist_id , * args , * * kwargs ) ,
2024-06-11 17:09:58 -06:00
playlist_id , f ' Downloading page { pagenum } ' )
2017-09-10 05:39:13 -06:00
results = page . get ( ' results ' )
if not results or not isinstance ( results , list ) :
break
for result in results :
2018-07-21 06:08:28 -06:00
video_url = url_or_none ( result . get ( ' video_url ' ) )
if not video_url :
2017-09-10 05:39:13 -06:00
continue
2019-02-10 10:49:51 -07:00
entry = self . _extract_info ( result , require_title = False )
2017-09-10 05:39:13 -06:00
entry . update ( {
' _type ' : ' url ' ,
' url ' : video_url ,
' ie_key ' : RutubeIE . ie_key ( ) ,
} )
yield entry
next_page_url = page . get ( ' next ' )
if not next_page_url or not page . get ( ' has_next ' ) :
break
def _extract_playlist ( self , playlist_id , * args , * * kwargs ) :
return self . playlist_result (
self . _entries ( playlist_id , * args , * * kwargs ) ,
playlist_id , kwargs . get ( ' playlist_name ' ) )
def _real_extract ( self , url ) :
return self . _extract_playlist ( self . _match_id ( url ) )
2021-12-12 08:56:36 -07:00
class RutubeTagsIE ( RutubePlaylistBaseIE ) :
IE_NAME = ' rutube:tags '
IE_DESC = ' Rutube tags '
2016-03-21 09:36:32 -06:00
_VALID_URL = r ' https?://rutube \ .ru/tags/video/(?P<id> \ d+) '
2014-08-27 16:58:24 -06:00
_TESTS = [ {
' url ' : ' http://rutube.ru/tags/video/1800/ ' ,
' info_dict ' : {
' id ' : ' 1800 ' ,
} ,
' playlist_mincount ' : 68 ,
} ]
2014-01-27 16:56:09 -07:00
_PAGE_TEMPLATE = ' http://rutube.ru/api/tags/video/ %s /?page= %s &format=json '
2017-09-10 05:39:13 -06:00
class RutubeMovieIE ( RutubePlaylistBaseIE ) :
2014-01-27 16:56:09 -07:00
IE_NAME = ' rutube:movie '
2014-01-27 19:32:07 -07:00
IE_DESC = ' Rutube movies '
2016-03-21 09:36:32 -06:00
_VALID_URL = r ' https?://rutube \ .ru/metainfo/tv/(?P<id> \ d+) '
2014-01-27 16:56:09 -07:00
_MOVIE_TEMPLATE = ' http://rutube.ru/api/metainfo/tv/ %s /?format=json '
_PAGE_TEMPLATE = ' http://rutube.ru/api/metainfo/tv/ %s /video?page= %s &format=json '
def _real_extract ( self , url ) :
2014-12-13 04:24:42 -07:00
movie_id = self . _match_id ( url )
2014-03-30 03:26:35 -06:00
movie = self . _download_json (
2014-01-27 19:32:07 -07:00
self . _MOVIE_TEMPLATE % movie_id , movie_id ,
' Downloading movie JSON ' )
2017-09-10 05:39:13 -06:00
return self . _extract_playlist (
movie_id , playlist_name = movie . get ( ' name ' ) )
2014-01-27 18:47:17 -07:00
2017-09-10 05:39:13 -06:00
class RutubePersonIE ( RutubePlaylistBaseIE ) :
2014-01-27 18:47:17 -07:00
IE_NAME = ' rutube:person '
IE_DESC = ' Rutube person videos '
2016-03-21 09:36:32 -06:00
_VALID_URL = r ' https?://rutube \ .ru/video/person/(?P<id> \ d+) '
2014-08-27 16:58:24 -06:00
_TESTS = [ {
' url ' : ' http://rutube.ru/video/person/313878/ ' ,
' info_dict ' : {
' id ' : ' 313878 ' ,
} ,
' playlist_mincount ' : 37 ,
} ]
2014-01-27 18:47:17 -07:00
2014-01-27 19:32:07 -07:00
_PAGE_TEMPLATE = ' http://rutube.ru/api/video/person/ %s /?page= %s &format=json '
2017-07-04 03:26:02 -06:00
2017-09-10 05:39:13 -06:00
class RutubePlaylistIE ( RutubePlaylistBaseIE ) :
2017-07-04 03:26:02 -06:00
IE_NAME = ' rutube:playlist '
IE_DESC = ' Rutube playlists '
2017-09-10 05:39:13 -06:00
_VALID_URL = r ' https?://rutube \ .ru/(?:video|(?:play/)?embed)/[ \ da-z] {32} / \ ?.*? \ bpl_id=(?P<id> \ d+) '
2017-07-04 03:26:02 -06:00
_TESTS = [ {
2017-09-10 05:39:13 -06:00
' url ' : ' https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag ' ,
2017-07-04 03:26:02 -06:00
' info_dict ' : {
2017-09-10 05:39:13 -06:00
' id ' : ' 3097 ' ,
2017-07-04 03:26:02 -06:00
} ,
2017-09-10 05:39:13 -06:00
' playlist_count ' : 27 ,
} , {
' url ' : ' https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source ' ,
' only_matching ' : True ,
2017-07-04 03:26:02 -06:00
} ]
2017-09-10 05:39:13 -06:00
_PAGE_TEMPLATE = ' http://rutube.ru/api/playlist/ %s / %s /?page= %s &format=json '
2017-07-04 03:26:02 -06:00
2017-09-10 14:22:27 -06:00
@classmethod
def suitable ( cls , url ) :
2021-08-22 13:19:23 -06:00
from . . utils import int_or_none , parse_qs
2024-06-11 17:09:58 -06:00
if not super ( ) . suitable ( url ) :
2017-09-10 14:22:27 -06:00
return False
2021-08-22 13:02:00 -06:00
params = parse_qs ( url )
2017-09-10 05:39:13 -06:00
return params . get ( ' pl_type ' , [ None ] ) [ 0 ] and int_or_none ( params . get ( ' pl_id ' , [ None ] ) [ 0 ] )
2017-07-04 03:26:02 -06:00
2017-09-10 05:39:13 -06:00
def _next_page_url ( self , page_num , playlist_id , item_kind ) :
return self . _PAGE_TEMPLATE % ( item_kind , playlist_id , page_num )
2017-07-04 03:26:02 -06:00
2017-09-10 05:39:13 -06:00
def _real_extract ( self , url ) :
2021-08-22 13:02:00 -06:00
qs = parse_qs ( url )
2017-09-10 05:39:13 -06:00
playlist_kind = qs [ ' pl_type ' ] [ 0 ]
playlist_id = qs [ ' pl_id ' ] [ 0 ]
return self . _extract_playlist ( playlist_id , item_kind = playlist_kind )
2021-12-12 08:56:36 -07:00
class RutubeChannelIE ( RutubePlaylistBaseIE ) :
IE_NAME = ' rutube:channel '
IE_DESC = ' Rutube channel '
_VALID_URL = r ' https?://rutube \ .ru/channel/(?P<id> \ d+)/videos '
_TESTS = [ {
' url ' : ' https://rutube.ru/channel/639184/videos/ ' ,
' info_dict ' : {
' id ' : ' 639184 ' ,
} ,
' playlist_mincount ' : 133 ,
} ]
_PAGE_TEMPLATE = ' http://rutube.ru/api/video/person/ %s /?page= %s &format=json '