2014-09-27 11:28:01 -06:00
# coding: utf-8
from __future__ import unicode_literals
from . common import InfoExtractor
from . . utils import (
get_meta_content ,
parse_iso8601 ,
)
class HeiseIE ( InfoExtractor ) :
2014-09-28 02:40:49 -06:00
_VALID_URL = r ''' (?x)
https ? : / / ( ? : www \. ) ? heise \. de / video / artikel /
. + ? ( ? P < id > [ 0 - 9 ] + ) \. html ( ? : $ | [ ? #])
'''
2014-09-27 11:28:01 -06:00
_TEST = {
' url ' : (
2014-09-28 02:40:49 -06:00
' http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html '
2014-09-27 11:28:01 -06:00
) ,
' md5 ' : ' ffed432483e922e88545ad9f2f15d30e ' ,
' info_dict ' : {
' id ' : ' 2404147 ' ,
' ext ' : ' mp4 ' ,
' title ' : (
2014-09-28 02:40:49 -06:00
" Podcast: c ' t uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone "
2014-09-27 11:28:01 -06:00
) ,
' format_id ' : ' mp4_720 ' ,
' timestamp ' : 1411812600 ,
' upload_date ' : ' 20140927 ' ,
2014-09-28 02:49:12 -06:00
' description ' : ' In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten. ' ,
2014-09-27 11:28:01 -06:00
}
}
def _real_extract ( self , url ) :
2014-09-28 02:40:49 -06:00
video_id = self . _match_id ( url )
2014-09-27 11:28:01 -06:00
2014-09-28 02:40:49 -06:00
webpage = self . _download_webpage ( url , video_id )
json_url = self . _search_regex (
r ' json_url: \ s* " ([^ " ]+) " ' , webpage , ' json URL ' )
config = self . _download_json ( json_url , video_id )
2014-09-27 11:28:01 -06:00
info = {
2014-09-28 02:40:49 -06:00
' id ' : video_id ,
' thumbnail ' : config . get ( ' poster ' ) ,
' timestamp ' : parse_iso8601 ( get_meta_content ( ' date ' , webpage ) ) ,
2014-09-28 02:49:12 -06:00
' description ' : self . _og_search_description ( webpage ) ,
2014-09-27 11:28:01 -06:00
}
2014-09-28 02:40:49 -06:00
title = get_meta_content ( ' fulltitle ' , webpage )
2014-09-27 11:28:01 -06:00
if title :
info [ ' title ' ] = title
elif config . get ( ' title ' ) :
info [ ' title ' ] = config [ ' title ' ]
else :
2014-09-28 02:40:49 -06:00
info [ ' title ' ] = self . _og_search_title ( webpage )
2014-09-27 11:28:01 -06:00
formats = [ ]
for t , rs in config [ ' formats ' ] . items ( ) :
if not rs or not hasattr ( rs , ' items ' ) :
2014-09-28 02:40:49 -06:00
self . _downloader . report_warning (
' formats: {0} : no resolutions ' . format ( t ) )
2014-09-27 11:28:01 -06:00
continue
2014-09-28 02:40:49 -06:00
for height_str , obj in rs . items ( ) :
format_id = ' {0} _ {1} ' . format ( t , height_str )
2014-09-27 11:28:01 -06:00
2014-09-27 13:12:23 -06:00
if not obj or not obj . get ( ' url ' ) :
2014-09-28 02:40:49 -06:00
self . _downloader . report_warning (
' formats: {0} : no url ' . format ( format_id ) )
2014-09-27 11:28:01 -06:00
continue
2014-09-28 02:40:49 -06:00
formats . append ( {
2014-09-27 11:28:01 -06:00
' url ' : obj [ ' url ' ] ,
2014-09-28 02:40:49 -06:00
' format_id ' : format_id ,
' height ' : self . _int ( height_str , ' height ' ) ,
} )
2014-09-27 11:28:01 -06:00
self . _sort_formats ( formats )
info [ ' formats ' ] = formats
return info