2014-07-22 09:34:34 -06:00
from __future__ import unicode_literals
2016-06-19 16:40:00 -06:00
from . theplatform import ThePlatformFeedIE
2015-11-21 09:18:17 -07:00
from . . utils import (
2018-03-19 11:27:39 -06:00
ExtractorError ,
2016-04-01 00:33:37 -06:00
int_or_none ,
find_xpath_attr ,
2016-09-22 12:27:57 -06:00
xpath_element ,
xpath_text ,
update_url_query ,
2015-11-21 09:18:17 -07:00
)
2013-12-15 19:53:43 -07:00
2016-06-19 16:40:00 -06:00
class CBSBaseIE ( ThePlatformFeedIE ) :
2016-04-01 03:12:29 -06:00
def _parse_smil_subtitles ( self , smil , namespace = None , subtitles_lang = ' en ' ) :
2019-04-13 10:00:24 -06:00
subtitles = { }
for k , ext in [ ( ' sMPTE-TTCCURL ' , ' tt ' ) , ( ' ClosedCaptionURL ' , ' ttml ' ) , ( ' webVTTCaptionURL ' , ' vtt ' ) ] :
cc_e = find_xpath_attr ( smil , self . _xpath_ns ( ' .//param ' , namespace ) , ' name ' , k )
if cc_e is not None :
cc_url = cc_e . get ( ' value ' )
if cc_url :
subtitles . setdefault ( subtitles_lang , [ ] ) . append ( {
' ext ' : ext ,
' url ' : cc_url ,
} )
return subtitles
2016-04-01 03:12:29 -06:00
class CBSIE ( CBSBaseIE ) :
2016-06-19 16:55:19 -06:00
_VALID_URL = r ' (?:cbs:|https?://(?:www \ .)?(?:cbs \ .com/shows/[^/]+/video|colbertlateshow \ .com/(?:video|podcasts))/)(?P<id>[ \ w-]+) '
2013-12-15 19:53:43 -07:00
2014-07-22 08:56:42 -06:00
_TESTS = [ {
2014-07-22 09:34:34 -06:00
' url ' : ' http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/ ' ,
' info_dict ' : {
2016-04-01 00:33:37 -06:00
' id ' : ' _u7W953k6la293J7EPTd9oHkSPs6Xn6_ ' ,
' ext ' : ' mp4 ' ,
2014-07-22 09:34:34 -06:00
' title ' : ' Connect Chat feat. Garth Brooks ' ,
' description ' : ' Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS! ' ,
' duration ' : 1495 ,
2016-04-01 11:06:11 -06:00
' timestamp ' : 1385585425 ,
' upload_date ' : ' 20131127 ' ,
' uploader ' : ' CBSI-NEW ' ,
2013-12-15 19:53:43 -07:00
} ,
2016-08-20 06:25:32 -06:00
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
2014-07-22 09:34:34 -06:00
' _skip ' : ' Blocked outside the US ' ,
2015-06-09 09:23:53 -06:00
} , {
' url ' : ' http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/ ' ,
' only_matching ' : True ,
} , {
2015-06-09 09:39:45 -06:00
' url ' : ' http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/ ' ,
2015-06-09 09:23:53 -06:00
' only_matching ' : True ,
2014-07-22 08:56:42 -06:00
} ]
2016-08-20 06:25:32 -06:00
2017-04-27 12:23:52 -06:00
def _extract_video_info ( self , content_id , site = ' cbs ' , mpx_acc = 2198311517 ) :
2016-09-22 12:27:57 -06:00
items_data = self . _download_xml (
' http://can.cbs.com/thunder/player/videoPlayerService.php ' ,
2017-04-27 12:23:52 -06:00
content_id , query = { ' partner ' : site , ' contentId ' : content_id } )
2016-09-22 12:27:57 -06:00
video_data = xpath_element ( items_data , ' .//item ' )
2021-01-22 19:00:08 -07:00
title = xpath_text ( video_data , ' videoTitle ' , ' title ' ) or xpath_text ( video_data , ' videotitle ' , ' title ' )
2017-04-27 12:23:52 -06:00
tp_path = ' dJ5BDC/media/guid/ %d / %s ' % ( mpx_acc , content_id )
2016-09-22 12:27:57 -06:00
tp_release_url = ' http://link.theplatform.com/s/ ' + tp_path
asset_types = [ ]
subtitles = { }
formats = [ ]
2018-03-19 11:27:39 -06:00
last_e = None
2016-09-22 12:27:57 -06:00
for item in items_data . findall ( ' .//item ' ) :
asset_type = xpath_text ( item , ' assetType ' )
2019-06-08 17:12:51 -06:00
if not asset_type or asset_type in asset_types or ' HLS_FPS ' in asset_type or ' DASH_CENC ' in asset_type :
2016-08-20 06:25:32 -06:00
continue
2016-09-22 12:27:57 -06:00
asset_types . append ( asset_type )
query = {
' mbr ' : ' true ' ,
' assetTypes ' : asset_type ,
}
if asset_type . startswith ( ' HLS ' ) or asset_type in ( ' OnceURL ' , ' StreamPack ' ) :
query [ ' formats ' ] = ' MPEG4,M3U '
elif asset_type in ( ' RTMP ' , ' WIFI ' , ' 3G ' ) :
query [ ' formats ' ] = ' MPEG4,FLV '
2018-03-19 11:27:39 -06:00
try :
tp_formats , tp_subtitles = self . _extract_theplatform_smil (
update_url_query ( tp_release_url , query ) , content_id ,
' Downloading %s SMIL data ' % asset_type )
except ExtractorError as e :
last_e = e
continue
2016-09-22 12:27:57 -06:00
formats . extend ( tp_formats )
subtitles = self . _merge_subtitles ( subtitles , tp_subtitles )
2018-03-19 11:27:39 -06:00
if last_e and not formats :
raise last_e
2016-08-20 06:25:32 -06:00
self . _sort_formats ( formats )
2016-09-22 12:27:57 -06:00
info = self . _extract_theplatform_metadata ( tp_path , content_id )
2016-08-20 06:25:32 -06:00
info . update ( {
2016-09-22 12:27:57 -06:00
' id ' : content_id ,
' title ' : title ,
' series ' : xpath_text ( video_data , ' seriesTitle ' ) ,
' season_number ' : int_or_none ( xpath_text ( video_data , ' seasonNumber ' ) ) ,
' episode_number ' : int_or_none ( xpath_text ( video_data , ' episodeNumber ' ) ) ,
' duration ' : int_or_none ( xpath_text ( video_data , ' videoLength ' ) , 1000 ) ,
' thumbnail ' : xpath_text ( video_data , ' previewImageURL ' ) ,
2016-08-20 06:25:32 -06:00
' formats ' : formats ,
' subtitles ' : subtitles ,
} )
return info
2016-04-01 00:33:37 -06:00
2013-12-15 19:53:43 -07:00
def _real_extract ( self , url ) :
2016-06-19 16:40:00 -06:00
content_id = self . _match_id ( url )
2016-08-20 06:25:32 -06:00
return self . _extract_video_info ( content_id )