2013-11-27 21:47:20 -07:00
# coding: utf-8
2013-11-24 19:12:26 -07:00
import operator
2013-06-23 12:24:07 -06:00
import re
from . common import InfoExtractor
from . . utils import (
2013-11-24 19:12:26 -07:00
parse_xml_doc ,
unified_strdate ,
2013-06-23 12:24:07 -06:00
)
2013-09-28 07:43:34 -06:00
2013-06-23 12:24:07 -06:00
class ZDFIE ( InfoExtractor ) :
2013-11-27 21:47:20 -07:00
_VALID_URL = r ' ^https?://www \ .zdf \ .de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?: \ ?.*)? '
_TEST = {
u " url " : u " http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt " ,
u " file " : u " 2037704.webm " ,
u " info_dict " : {
u " upload_date " : u " 20131127 " ,
u " description " : u " Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \" Ende des Machtpokers - Große Koalition für Deutschland \" . " ,
u " uploader " : u " spezial " ,
u " title " : u " ZDFspezial - Ende des Machtpokers "
} ,
u " skip " : u " Videos on ZDF.de are depublicised in short order " ,
}
2013-06-23 12:24:07 -06:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' video_id ' )
2013-11-24 19:12:26 -07:00
xml_url = u ' http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
info_xml = self . _download_webpage (
xml_url , video_id , note = u ' Downloading video info ' )
doc = parse_xml_doc ( info_xml )
2013-09-28 07:43:34 -06:00
2013-11-24 19:12:26 -07:00
title = doc . find ( ' .//information/title ' ) . text
description = doc . find ( ' .//information/detail ' ) . text
uploader_node = doc . find ( ' .//details/originChannelTitle ' )
uploader = None if uploader_node is None else uploader_node . text
duration_str = doc . find ( ' .//details/length ' ) . text
duration_m = re . match ( r ''' (?x)^
( ? P < hours > [ 0 - 9 ] { 2 } )
: ( ? P < minutes > [ 0 - 9 ] { 2 } )
: ( ? P < seconds > [ 0 - 9 ] { 2 } )
( ? : \. ( ? P < ms > [ 0 - 9 ] + ) ? )
''' , duration_str)
duration = (
(
( int ( duration_m . group ( ' hours ' ) ) * 60 * 60 ) +
( int ( duration_m . group ( ' minutes ' ) ) * 60 ) +
int ( duration_m . group ( ' seconds ' ) )
)
if duration_m
else None
)
upload_date = unified_strdate ( doc . find ( ' .//details/airtime ' ) . text )
2013-06-23 12:24:07 -06:00
2013-11-24 19:12:26 -07:00
def xml_to_format ( fnode ) :
video_url = fnode . find ( ' url ' ) . text
is_available = u ' http://www.metafilegenerator ' not in video_url
format_id = fnode . attrib [ ' basetype ' ]
format_m = re . match ( r ''' (?x)
( ? P < vcodec > [ ^ _ ] + ) _ ( ? P < acodec > [ ^ _ ] + ) _ ( ? P < container > [ ^ _ ] + ) _
( ? P < proto > [ ^ _ ] + ) _ ( ? P < index > [ ^ _ ] + ) _ ( ? P < indexproto > [ ^ _ ] + )
''' , format_id)
2013-11-24 19:28:55 -07:00
ext = format_m . group ( ' container ' )
is_supported = ext != ' f4f '
2013-11-24 19:12:26 -07:00
PROTO_ORDER = [ ' http ' , ' rtmp ' , ' rtsp ' ]
2013-09-28 07:43:34 -06:00
try :
2013-11-24 19:12:26 -07:00
proto_pref = - PROTO_ORDER . index ( format_m . group ( ' proto ' ) )
2013-09-28 07:43:34 -06:00
except ValueError :
2013-11-24 19:12:26 -07:00
proto_pref = 999
2013-09-28 07:43:34 -06:00
2013-11-24 19:12:26 -07:00
quality = fnode . find ( ' ./quality ' ) . text
QUALITY_ORDER = [ ' veryhigh ' , ' 300 ' , ' high ' , ' med ' , ' low ' ]
2013-09-28 07:43:34 -06:00
try :
2013-11-24 19:12:26 -07:00
quality_pref = - QUALITY_ORDER . index ( quality )
2013-09-28 07:43:34 -06:00
except ValueError :
quality_pref = 999
2013-11-24 19:12:26 -07:00
abr = int ( fnode . find ( ' ./audioBitrate ' ) . text ) / / 1000
vbr = int ( fnode . find ( ' ./videoBitrate ' ) . text ) / / 1000
2013-11-24 19:28:55 -07:00
pref = ( is_available , is_supported ,
proto_pref , quality_pref , vbr , abr )
format_note = u ' '
if not is_supported :
format_note + = u ' (unsupported) '
if not format_note :
format_note = None
2013-06-23 12:24:07 -06:00
2013-11-24 19:12:26 -07:00
return {
2013-11-24 19:28:55 -07:00
' format_id ' : format_id + u ' - ' + quality ,
2013-11-24 19:12:26 -07:00
' url ' : video_url ,
2013-11-24 19:28:55 -07:00
' ext ' : ext ,
2013-11-24 19:12:26 -07:00
' acodec ' : format_m . group ( ' acodec ' ) ,
' vcodec ' : format_m . group ( ' vcodec ' ) ,
' abr ' : abr ,
' vbr ' : vbr ,
' width ' : int ( fnode . find ( ' ./width ' ) . text ) ,
' height ' : int ( fnode . find ( ' ./height ' ) . text ) ,
' filesize ' : int ( fnode . find ( ' ./filesize ' ) . text ) ,
2013-11-24 19:28:55 -07:00
' format_note ' : format_note ,
2013-11-24 19:12:26 -07:00
' _pref ' : pref ,
2013-11-24 19:28:55 -07:00
' _available ' : is_available ,
2013-11-24 19:12:26 -07:00
}
2013-06-23 12:24:07 -06:00
2013-11-24 19:12:26 -07:00
format_nodes = doc . findall ( ' .//formitaeten/formitaet ' )
2013-11-24 19:28:55 -07:00
formats = sorted ( filter ( lambda f : f [ ' _available ' ] ,
map ( xml_to_format , format_nodes ) ) ,
2013-11-24 19:12:26 -07:00
key = operator . itemgetter ( ' _pref ' ) )
2013-06-23 12:24:07 -06:00
2013-09-28 07:43:34 -06:00
return {
' id ' : video_id ,
' title ' : title ,
2013-11-24 19:12:26 -07:00
' formats ' : formats ,
' description ' : description ,
' uploader ' : uploader ,
' duration ' : duration ,
' upload_date ' : upload_date ,
2013-09-28 07:43:34 -06:00
}