2014-01-04 20:30:00 -07:00
from __future__ import unicode_literals
import json
2013-06-26 09:55:54 -06:00
import re
from . common import InfoExtractor
from . . utils import (
2014-01-04 20:30:00 -07:00
unescapeHTML ,
2013-06-26 09:55:54 -06:00
)
2014-01-04 20:30:00 -07:00
2013-06-26 09:55:54 -06:00
class CSpanIE ( InfoExtractor ) :
2014-01-22 03:06:03 -07:00
_VALID_URL = r ' http://(?:www \ .)?c-spanvideo \ .org/program/(?P<name>.*) '
2014-01-04 20:30:00 -07:00
IE_DESC = ' C-SPAN '
2013-06-27 12:46:46 -06:00
_TEST = {
2014-01-04 20:30:00 -07:00
' url ' : ' http://www.c-spanvideo.org/program/HolderonV ' ,
' file ' : ' 315139.mp4 ' ,
' md5 ' : ' 8e44ce11f0f725527daccc453f553eb0 ' ,
' info_dict ' : {
' title ' : ' Attorney General Eric Holder on Voting Rights Act Decision ' ,
' description ' : ' Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review. ' ,
2013-06-27 12:46:46 -06:00
} ,
2014-01-22 07:10:00 -07:00
' skip ' : ' Regularly fails on travis, for unknown reasons ' ,
2013-06-27 12:46:46 -06:00
}
2013-06-26 09:55:54 -06:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2014-01-22 03:06:03 -07:00
prog_name = mobj . group ( ' name ' )
2013-06-26 09:55:54 -06:00
webpage = self . _download_webpage ( url , prog_name )
2014-01-22 03:06:03 -07:00
video_id = self . _search_regex ( r ' prog(?:ram)?id=(.*?)& ' , webpage , ' video id ' )
2014-01-04 20:30:00 -07:00
title = self . _html_search_regex (
r ' <!-- title --> \ n \ s*<h1[^>]*>(.*?)</h1> ' , webpage , ' title ' )
description = self . _og_search_description ( webpage )
info_url = ' http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id= ' + video_id
data_json = self . _download_webpage (
info_url , video_id , ' Downloading video info ' )
data = json . loads ( data_json )
url = unescapeHTML ( data [ ' video ' ] [ ' files ' ] [ 0 ] [ ' path ' ] [ ' #text ' ] )
return {
' id ' : video_id ,
' title ' : title ,
' url ' : url ,
' description ' : description ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
}