2014-01-21 06:16:44 -07:00
from __future__ import unicode_literals
2016-11-06 07:42:41 -07:00
import re
2013-06-23 14:27:16 -06:00
from . common import InfoExtractor
2016-05-05 09:02:54 -06:00
from . . utils import (
2020-05-19 14:39:41 -06:00
determine_ext ,
2016-05-05 09:02:54 -06:00
ExtractorError ,
int_or_none ,
2019-10-05 09:04:49 -06:00
merge_dicts ,
2016-05-05 09:02:54 -06:00
str_to_int ,
unified_strdate ,
2018-07-21 06:08:28 -06:00
url_or_none ,
2016-05-05 09:02:54 -06:00
)
2013-06-23 14:27:16 -06:00
class RedTubeIE ( InfoExtractor ) :
2020-09-19 22:39:42 -06:00
_VALID_URL = r ' https?://(?:(?: \ w+ \ .)?redtube \ .com/|embed \ .redtube \ .com/ \ ?.*? \ bid=)(?P<id>[0-9]+) '
2016-11-06 07:39:29 -07:00
_TESTS = [ {
2021-12-07 06:59:54 -07:00
' url ' : ' https://www.redtube.com/38864951 ' ,
' md5 ' : ' 4fba70cbca3aefd25767ab4b523c9878 ' ,
2014-01-21 06:16:44 -07:00
' info_dict ' : {
2021-12-07 06:59:54 -07:00
' id ' : ' 38864951 ' ,
2014-11-26 04:52:45 -07:00
' ext ' : ' mp4 ' ,
2021-12-07 06:59:54 -07:00
' title ' : ' Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu ' ,
' description ' : ' Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu ' ,
' upload_date ' : ' 20210111 ' ,
' timestamp ' : 1610343109 ,
' duration ' : 646 ,
2016-05-05 09:02:54 -06:00
' view_count ' : int ,
2015-03-25 08:09:01 -06:00
' age_limit ' : 18 ,
2021-12-07 06:59:54 -07:00
' thumbnail ' : r ' re:https:// \ wi-ph \ .rdtcdn \ .com/videos/.+/.+ \ .jpg ' ,
} ,
2016-11-06 07:39:29 -07:00
} , {
' url ' : ' http://embed.redtube.com/?bgcolor=000000&id=1443286 ' ,
' only_matching ' : True ,
2020-09-19 22:39:42 -06:00
} , {
' url ' : ' http://it.redtube.com/66418 ' ,
' only_matching ' : True ,
2016-11-06 07:39:29 -07:00
} ]
2013-06-23 14:27:16 -06:00
2016-11-06 07:42:41 -07:00
@staticmethod
def _extract_urls ( webpage ) :
return re . findall (
r ' <iframe[^>]+?src=[ " \' ](?P<url>(?:https?:)?//embed \ .redtube \ .com/ \ ?.*? \ bid= \ d+) ' ,
webpage )
2013-10-04 03:41:57 -06:00
def _real_extract ( self , url ) :
2014-11-26 04:52:45 -07:00
video_id = self . _match_id ( url )
2016-11-06 07:39:29 -07:00
webpage = self . _download_webpage (
' http://www.redtube.com/ %s ' % video_id , video_id )
2013-06-23 14:27:16 -06:00
2020-01-02 08:45:42 -07:00
ERRORS = (
( ( ' video-deleted-info ' , ' >This video has been removed ' ) , ' has been removed ' ) ,
( ( ' private_video_text ' , ' >This video is private ' , ' >Send a friend request to its owner to be able to view it ' ) , ' is private ' ) ,
)
for patterns , message in ERRORS :
if any ( p in webpage for p in patterns ) :
raise ExtractorError (
' Video %s %s ' % ( video_id , message ) , expected = True )
2015-03-25 08:08:35 -06:00
2019-10-05 09:04:49 -06:00
info = self . _search_json_ld ( webpage , video_id , default = { } )
if not info . get ( ' title ' ) :
info [ ' title ' ] = self . _html_search_regex (
2020-05-19 13:11:05 -06:00
( r ' <h( \ d)[^>]+class= " (?:video_title_text|videoTitle|video_title)[^ " ]* " >(?P<title>(?:(?! \ 1).)+)</h \ 1> ' ,
2019-10-05 09:04:49 -06:00
r ' (?:videoTitle|title) \ s*: \ s*([ " \' ])(?P<title>(?:(?! \ 1).)+) \ 1 ' , ) ,
webpage , ' title ' , group = ' title ' ,
default = None ) or self . _og_search_title ( webpage )
2016-05-05 09:02:54 -06:00
formats = [ ]
sources = self . _parse_json (
self . _search_regex (
r ' sources \ s*: \ s*( { .+?}) ' , webpage , ' source ' , default = ' {} ' ) ,
video_id , fatal = False )
if sources and isinstance ( sources , dict ) :
for format_id , format_url in sources . items ( ) :
if format_url :
formats . append ( {
' url ' : format_url ,
' format_id ' : format_id ,
' height ' : int_or_none ( format_id ) ,
} )
2017-09-05 09:45:07 -06:00
medias = self . _parse_json (
self . _search_regex (
2020-05-19 14:39:41 -06:00
r ' mediaDefinition[ " \' ]? \ s*: \ s*( \ [.+?} \ s* \ ]) ' , webpage ,
2017-09-05 09:45:07 -06:00
' media definitions ' , default = ' {} ' ) ,
video_id , fatal = False )
2021-12-07 06:59:54 -07:00
for media in medias if isinstance ( medias , list ) else [ ] :
format_url = url_or_none ( media . get ( ' videoUrl ' ) )
if not format_url :
continue
format_id = media . get ( ' format ' )
quality = media . get ( ' quality ' )
if format_id == ' hls ' or ( format_id == ' mp4 ' and not quality ) :
more_media = self . _download_json ( format_url , video_id , fatal = False )
else :
more_media = [ media ]
for media in more_media if isinstance ( more_media , list ) else [ ] :
2018-07-21 06:08:28 -06:00
format_url = url_or_none ( media . get ( ' videoUrl ' ) )
if not format_url :
2017-09-05 09:45:07 -06:00
continue
2021-12-07 06:59:54 -07:00
format_id = media . get ( ' format ' )
if format_id == ' hls ' or determine_ext ( format_url ) == ' m3u8 ' :
2020-05-19 14:39:41 -06:00
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' ,
2021-12-07 06:59:54 -07:00
entry_protocol = ' m3u8_native ' , m3u8_id = format_id or ' hls ' ,
2020-05-19 14:39:41 -06:00
fatal = False ) )
continue
2017-09-05 09:45:07 -06:00
format_id = media . get ( ' quality ' )
formats . append ( {
' url ' : format_url ,
2021-09-04 17:52:45 -06:00
' ext ' : ' mp4 ' ,
2017-09-05 09:45:07 -06:00
' format_id ' : format_id ,
' height ' : int_or_none ( format_id ) ,
} )
if not formats :
2016-05-05 09:02:54 -06:00
video_url = self . _html_search_regex (
r ' <source src= " (.+?) " type= " video/mp4 " > ' , webpage , ' video URL ' )
2021-09-04 17:52:45 -06:00
formats . append ( { ' url ' : video_url , ' ext ' : ' mp4 ' } )
2016-05-05 09:02:54 -06:00
self . _sort_formats ( formats )
thumbnail = self . _og_search_thumbnail ( webpage )
upload_date = unified_strdate ( self . _search_regex (
2019-10-05 09:04:49 -06:00
r ' <span[^>]+>(?:ADDED|Published on) ([^<]+)< ' ,
webpage , ' upload date ' , default = None ) )
2018-02-21 07:55:28 -07:00
duration = int_or_none ( self . _og_search_property (
' video:duration ' , webpage , default = None ) or self . _search_regex (
r ' videoDuration \ s*: \ s*( \ d+) ' , webpage , ' duration ' , default = None ) )
2016-05-05 09:02:54 -06:00
view_count = str_to_int ( self . _search_regex (
2018-02-02 08:32:53 -07:00
( r ' <div[^>]*>Views</div> \ s*<div[^>]*> \ s*([ \ d,.]+) ' ,
2019-10-05 09:04:49 -06:00
r ' <span[^>]*>VIEWS</span> \ s*</td> \ s*<td> \ s*([ \ d,.]+) ' ,
r ' <span[^>]+ \ bclass=[ " \' ]video_view_count[^>]*> \ s*([ \ d,.]+) ' ) ,
webpage , ' view count ' , default = None ) )
2016-05-05 09:02:54 -06:00
2013-10-06 08:39:35 -06:00
# No self-labeling, but they describe themselves as
# "Home of Videos Porno"
age_limit = 18
2019-10-05 09:04:49 -06:00
return merge_dicts ( info , {
2014-01-21 06:16:44 -07:00
' id ' : video_id ,
2014-11-26 04:52:45 -07:00
' ext ' : ' mp4 ' ,
2016-05-05 09:02:54 -06:00
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' duration ' : duration ,
' view_count ' : view_count ,
2013-10-06 08:39:35 -06:00
' age_limit ' : age_limit ,
2016-05-05 09:02:54 -06:00
' formats ' : formats ,
2019-10-05 09:04:49 -06:00
} )