[nhk] Fix extraction (Closes #10633)

This commit is contained in:
Sergey M․ 2016-09-13 23:20:25 +07:00
parent 7a7309219c
commit 45396dd2ed
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 16 additions and 15 deletions

View File

@ -1,14 +1,15 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError
class NhkVodIE(InfoExtractor): class NhkVodIE(InfoExtractor):
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+?)\.html' _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)'
_TEST = { _TEST = {
# Videos available only for a limited period of time. Visit # Videos available only for a limited period of time. Visit
# http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
'info_dict': { 'info_dict': {
'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5',
'ext': 'flv', 'ext': 'flv',
@ -19,25 +20,25 @@ class NhkVodIE(InfoExtractor):
}, },
'skip': 'Videos available only for a limited period of time', 'skip': 'Videos available only for a limited period of time',
} }
_API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) data = self._download_json(self._API_URL, video_id)
embed_code = self._search_regex( try:
r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P<id>(?:(?!\1).)+)\1', episode = next(
webpage, 'ooyala embed code', group='id') e for e in data['data']['episodes']
if e.get('url') and video_id in e['url'])
except StopIteration:
raise ExtractorError('Unable to find episode')
title = self._search_regex( embed_code = episode['vod_id']
r'<div[^>]+class=["\']episode-detail["\']>\s*<h\d+>([^<]+)',
webpage, 'title', default=None) title = episode.get('sub_title_clean') or episode['sub_title']
description = self._html_search_regex( description = episode.get('description_clean') or episode.get('description')
r'(?s)<p[^>]+class=["\']description["\'][^>]*>(.+?)</p>', series = episode.get('title_clean') or episode.get('title')
webpage, 'description', default=None)
series = self._search_regex(
r'<h2[^>]+class=["\']detail-top-player-title[^>]+><a[^>]+>([^<]+)',
webpage, 'series', default=None)
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',