[xstream] Move xstream to separate extractor

2015-05-08 22:50:01 +06:00 · 2015-05-08 22:50:01 +06:00 · 2c0c9dc46c
parent 0ceab84749
commit 2c0c9dc46c
2 changed files with 116 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -653,6 +653,7 @@ from .xboxclips import XboxClipsIE
 from .xhamster import XHamsterIE
 from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xstream import XstreamIE
 from .xvideos import XVideosIE
 from .xtube import XTubeUserIE, XTubeIE
 from .xuite import XuiteIE
--- a/youtube_dl/extractor/xstream.py
+++ b/youtube_dl/extractor/xstream.py
@ -0,0 +1,115 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    parse_iso8601,
    xpath_with_ns,
    xpath_text,
    find_xpath_attr,
 )
 class XstreamIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    (?:
                        xstream:|
                        https?://frontend\.xstream\.(?:dk|net)/
                    )
                    (?P<partner_id>[^/]+)
                    (?:
                        :|
                        /feed/video/\?.*?\bid=
                    )
                    (?P<id>\d+)
                    '''
    _TESTS = [{
        'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
        'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
        'info_dict': {
            'id': '86588',
            'ext': 'mov',
            'title': 'Otto Wollertsen',
            'description': 'Vestlendingen Otto Fredrik Wollertsen',
            'timestamp': 1430473209,
            'upload_date': '20150501',
        },
    }, {
        'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        partner_id = mobj.group('partner_id')
        video_id = mobj.group('id')
        data = self._download_xml(
            'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
            % (partner_id, video_id),
            video_id)
        NS_MAP = {
            'atom': 'http://www.w3.org/2005/Atom',
            'xt': 'http://xstream.dk/',
            'media': 'http://search.yahoo.com/mrss/',
        }
        entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
        title = xpath_text(
            entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
        description = xpath_text(
            entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
        timestamp = parse_iso8601(xpath_text(
            entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
        formats = []
        media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
        for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
            media_url = media_content.get('url')
            if not media_url:
                continue
            tbr = int_or_none(media_content.get('bitrate'))
            mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
            if mobj:
                formats.append({
                    'url': mobj.group('url'),
                    'play_path': 'mp4:%s' % mobj.group('playpath'),
                    'app': mobj.group('app'),
                    'ext': 'flv',
                    'tbr': tbr,
                    'format_id': 'rtmp-%d' % tbr,
                })
            else:
                formats.append({
                    'url': media_url,
                    'tbr': tbr,
                })
        self._sort_formats(formats)
        link = find_xpath_attr(
            entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
        if link is not None:
            formats.append({
                'url': link.get('href'),
                'format_id': link.get('rel'),
            })
        thumbnails = [{
            'url': splash.get('url'),
            'width': int_or_none(splash.get('width')),
            'height': int_or_none(splash.get('height')),
        } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'timestamp': timestamp,
            'formats': formats,
            'thumbnails': thumbnails,
        }