2014-01-04 20:30:00 -07:00
from __future__ import unicode_literals
2013-06-26 09:55:54 -06:00
import re
from . common import InfoExtractor
from . . utils import (
2014-04-02 21:56:28 -06:00
int_or_none ,
2014-01-04 20:30:00 -07:00
unescapeHTML ,
2014-02-02 10:24:20 -07:00
find_xpath_attr ,
2015-04-20 13:18:38 -06:00
smuggle_url ,
2015-04-24 09:46:51 -06:00
determine_ext ,
2013-06-26 09:55:54 -06:00
)
2015-04-20 13:18:38 -06:00
from . senateisvp import SenateISVPIE
2013-06-26 09:55:54 -06:00
2014-01-04 20:30:00 -07:00
2013-06-26 09:55:54 -06:00
class CSpanIE ( InfoExtractor ) :
2014-03-20 19:10:24 -06:00
_VALID_URL = r ' http://(?:www \ .)?c-span \ .org/video/ \ ?(?P<id>[0-9a-f]+) '
2014-01-04 20:30:00 -07:00
IE_DESC = ' C-SPAN '
2014-03-20 19:10:24 -06:00
_TESTS = [ {
2014-02-02 10:24:20 -07:00
' url ' : ' http://www.c-span.org/video/?313572-1/HolderonV ' ,
2015-10-03 12:28:48 -06:00
' md5 ' : ' 067803f994e049b455a58b16e5aab442 ' ,
2014-01-04 20:30:00 -07:00
' info_dict ' : {
2014-02-02 10:24:20 -07:00
' id ' : ' 315139 ' ,
' ext ' : ' mp4 ' ,
2014-01-04 20:30:00 -07:00
' title ' : ' Attorney General Eric Holder on Voting Rights Act Decision ' ,
2015-10-03 12:28:48 -06:00
' description ' : ' Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced. ' ,
2013-06-27 12:46:46 -06:00
} ,
2014-01-22 07:10:00 -07:00
' skip ' : ' Regularly fails on travis, for unknown reasons ' ,
2014-03-20 19:10:24 -06:00
} , {
' url ' : ' http://www.c-span.org/video/?c4486943/cspan-international-health-care-models ' ,
2015-10-03 12:28:48 -06:00
' md5 ' : ' 4eafd1e91a75d2b1e6a3cbd0995816a2 ' ,
2014-03-20 19:10:24 -06:00
' info_dict ' : {
2015-10-03 12:28:48 -06:00
' id ' : ' c4486943 ' ,
2014-03-20 19:10:24 -06:00
' ext ' : ' mp4 ' ,
2015-10-03 12:28:48 -06:00
' title ' : ' CSPAN - International Health Care Models ' ,
2014-03-20 19:10:24 -06:00
' description ' : ' md5:7a985a2d595dba00af3d9c9f0783c967 ' ,
}
2014-08-27 16:58:24 -06:00
} , {
' url ' : ' http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall ' ,
2015-04-20 13:30:54 -06:00
' md5 ' : ' 446562a736c6bf97118e389433ed88d4 ' ,
2014-08-27 16:58:24 -06:00
' info_dict ' : {
' id ' : ' 342759 ' ,
2015-04-20 13:30:54 -06:00
' ext ' : ' mp4 ' ,
2014-08-27 16:58:24 -06:00
' title ' : ' General Motors Ignition Switch Recall ' ,
2015-04-20 13:30:54 -06:00
' duration ' : 14848 ,
2015-10-03 12:28:48 -06:00
' description ' : ' md5:118081aedd24bf1d3b68b3803344e7f3 '
2014-08-27 16:58:24 -06:00
} ,
2015-04-20 13:18:38 -06:00
} , {
# Video from senate.gov
' url ' : ' http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers ' ,
' info_dict ' : {
' id ' : ' judiciary031715 ' ,
' ext ' : ' flv ' ,
' title ' : ' Immigration Reforms Needed to Protect Skilled American Workers ' ,
}
2014-03-20 19:10:24 -06:00
} ]
2013-06-26 09:55:54 -06:00
def _real_extract ( self , url ) :
2015-10-03 12:28:48 -06:00
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
matches = re . search ( r ' data-(prog|clip)id= \' ([0-9]+) \' ' , webpage )
if matches :
video_type , video_id = matches . groups ( )
if video_type == ' prog ' :
video_type = ' program '
else :
senate_isvp_url = SenateISVPIE . _search_iframe_url ( webpage )
if senate_isvp_url :
title = self . _og_search_title ( webpage )
surl = smuggle_url ( senate_isvp_url , { ' force_title ' : title } )
return self . url_result ( surl , ' SenateISVP ' , video_id , title )
2014-01-04 20:30:00 -07:00
2015-10-03 12:28:48 -06:00
data = self . _download_json (
' http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5= %s &id= %s ' % ( video_type , video_id ) ,
video_id )
2014-01-04 20:30:00 -07:00
2014-04-02 21:56:28 -06:00
doc = self . _download_xml (
2015-10-03 12:28:48 -06:00
' http://www.c-span.org/common/services/flashXml.php? %s id= %s ' % ( video_type , video_id ) ,
2014-02-02 10:24:20 -07:00
video_id )
2015-10-03 12:28:48 -06:00
description = self . _html_search_meta ( ' description ' , webpage )
2014-04-02 21:56:28 -06:00
title = find_xpath_attr ( doc , ' .//string ' , ' name ' , ' title ' ) . text
thumbnail = find_xpath_attr ( doc , ' .//string ' , ' name ' , ' poster ' ) . text
files = data [ ' video ' ] [ ' files ' ]
2015-04-24 09:46:51 -06:00
try :
capfile = data [ ' video ' ] [ ' capfile ' ] [ ' #text ' ]
except KeyError :
capfile = None
2014-04-02 21:56:28 -06:00
entries = [ {
' id ' : ' %s _ %d ' % ( video_id , partnum + 1 ) ,
' title ' : (
title if len ( files ) == 1 else
' %s part %d ' % ( title , partnum + 1 ) ) ,
' url ' : unescapeHTML ( f [ ' path ' ] [ ' #text ' ] ) ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' duration ' : int_or_none ( f . get ( ' length ' , { } ) . get ( ' #text ' ) ) ,
2015-04-24 09:46:51 -06:00
' subtitles ' : {
' en ' : [ {
' url ' : capfile ,
' ext ' : determine_ext ( capfile , ' dfxp ' )
} ] ,
} if capfile else None ,
2014-04-02 21:56:28 -06:00
} for partnum , f in enumerate ( files ) ]
2014-02-02 10:24:20 -07:00
2015-04-20 13:30:54 -06:00
if len ( entries ) == 1 :
entry = dict ( entries [ 0 ] )
2015-10-03 12:28:48 -06:00
entry [ ' id ' ] = ' c ' + video_id if video_type == ' clip ' else video_id
2015-04-20 13:30:54 -06:00
return entry
else :
return {
' _type ' : ' playlist ' ,
' entries ' : entries ,
' title ' : title ,
2015-10-03 12:28:48 -06:00
' id ' : ' c ' + video_id if video_type == ' clip ' else video_id ,
2015-04-20 13:30:54 -06:00
}