[twitch] Rename extractor and support channel videos

This commit is contained in:
Sergey M․ 2014-10-17 22:58:18 +07:00 committed by Sergey M
parent 4698f0d858
commit 46fd0dd5a5
2 changed files with 28 additions and 76 deletions

View File

@ -173,7 +173,6 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE from .jpopsukitv import JpopsukiIE
from .kankan import KankanIE from .kankan import KankanIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
@ -395,6 +394,7 @@ from .tutv import TutvIE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE from .tvp import TvpIE
from .tvplay import TVPlayIE from .tvplay import TVPlayIE
from .twitch import TwitchIE
from .ubu import UbuIE from .ubu import UbuIE
from .udemy import ( from .udemy import (
UdemyIE, UdemyIE,

View File

@ -1,26 +1,20 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
import json
import os
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
ExtractorError, ExtractorError,
formatSeconds,
parse_iso8601, parse_iso8601,
) )
class JustinTVIE(InfoExtractor): class TwitchIE(InfoExtractor):
"""Information extractor for justin.tv and twitch.tv"""
# TODO: One broadcast may be split into multiple videos. The key # TODO: One broadcast may be split into multiple videos. The key
# 'broadcast_id' is the same for all parts, and 'broadcast_part' # 'broadcast_id' is the same for all parts, and 'broadcast_part'
# starts at 1 and increases. Can we treat all parts as one video? # starts at 1 and increases. Can we treat all parts as one video?
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
(?: (?:
(?P<channelid>[^/]+)| (?P<channelid>[^/]+)|
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))| (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
@ -28,9 +22,8 @@ class JustinTVIE(InfoExtractor):
) )
/?(?:\#.*)?$ /?(?:\#.*)?$
""" """
_JUSTIN_PAGE_LIMIT = 100 _PAGE_LIMIT = 100
IE_NAME = 'justin.tv' _API_BASE = 'https://api.twitch.tv'
IE_DESC = 'justin.tv and twitch.tv'
_TEST = { _TEST = {
'url': 'http://www.twitch.tv/thegamedevhub/b/296128360', 'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
'md5': 'ecaa8a790c22a40770901460af191c9a', 'md5': 'ecaa8a790c22a40770901460af191c9a',
@ -44,39 +37,6 @@ class JustinTVIE(InfoExtractor):
} }
} }
_API_BASE = 'https://api.twitch.tv'
# Return count of items, list of *valid* items
def _parse_page(self, url, video_id, counter):
info_json = self._download_webpage(
url, video_id,
'Downloading video info JSON on page %d' % counter,
'Unable to download video info JSON %d' % counter)
response = json.loads(info_json)
if type(response) != list:
error_text = response.get('error', 'unknown error')
raise ExtractorError('Justin.tv API: %s' % error_text)
info = []
for clip in response:
video_url = clip['video_file_url']
if video_url:
video_extension = os.path.splitext(video_url)[1][1:]
video_date = re.sub('-', '', clip['start_time'][:10])
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
video_id = clip['id']
video_title = clip.get('title', video_id)
info.append({
'id': compat_str(video_id),
'url': video_url,
'title': video_title,
'uploader': clip.get('channel_name', video_uploader_id),
'uploader_id': video_uploader_id,
'upload_date': video_date,
'ext': video_extension,
})
return (len(response), info)
def _handle_error(self, response): def _handle_error(self, response):
if not isinstance(response, dict): if not isinstance(response, dict):
return return
@ -87,25 +47,21 @@ class JustinTVIE(InfoExtractor):
expected=True) expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'): def _download_json(self, url, video_id, note='Downloading JSON metadata'):
response = super(JustinTVIE, self)._download_json(url, video_id, note) response = super(TwitchIE, self)._download_json(url, video_id, note)
self._handle_error(response) self._handle_error(response)
return response return response
def _extract_media(self, item, item_id): def _extract_media(self, item, item_id):
ITEMS = { ITEMS = {
'a': 'video', 'a': 'video',
'c': 'chapter', 'c': 'chapter',
} }
info = self._extract_info(self._download_json( info = self._extract_info(self._download_json(
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id, '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
'Downloading %s info JSON' % ITEMS[item])) 'Downloading %s info JSON' % ITEMS[item]))
response = self._download_json( response = self._download_json(
'%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id, '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
'Downloading %s playlist JSON' % ITEMS[item]) 'Downloading %s playlist JSON' % ITEMS[item])
entries = [] entries = []
chunks = response['chunks'] chunks = response['chunks']
qualities = list(chunks.keys()) qualities = list(chunks.keys())
@ -144,14 +100,7 @@ class JustinTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj.group('chapterid'):
api_base = 'http://api.twitch.tv'
paged = False
if mobj.group('channelid'):
paged = True
video_id = mobj.group('channelid')
api = api_base + '/channel/archives/%s.json' % video_id
elif mobj.group('chapterid'):
return self._extract_media('c', mobj.group('chapterid')) return self._extract_media('c', mobj.group('chapterid'))
""" """
@ -203,22 +152,25 @@ class JustinTVIE(InfoExtractor):
} }
return info return info
""" """
else: elif mobj.group('videoid'):
return self._extract_media('a', mobj.group('videoid')) return self._extract_media('a', mobj.group('videoid'))
elif mobj.group('channelid'):
channel_id = mobj.group('channelid')
info = self._download_json(
'%s/kraken/channels/%s' % (self._API_BASE, channel_id),
channel_id, 'Downloading channel info JSON')
channel_name = info.get('display_name') or info.get('name')
entries = [] entries = []
offset = 0 offset = 0
limit = self._JUSTIN_PAGE_LIMIT limit = self._PAGE_LIMIT
for counter in itertools.count(1): for counter in itertools.count(1):
page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) response = self._download_json(
page_count, page_info = self._parse_page( '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
page_url, video_id, counter) % (self._API_BASE, channel_id, offset, limit),
entries.extend(page_info) channel_id, 'Downloading channel videos JSON page %d' % counter)
if not paged or page_count != limit: videos = response['videos']
if not videos:
break break
entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
offset += limit offset += limit
return { return self.playlist_result(entries, channel_id, channel_name)
'_type': 'playlist',
'id': video_id,
'entries': entries,
}