youtube-dl/youtube_dl/extractor/southparkstudios.py

import re

from .mtv import MTVIE, _media_xml_tag


class SouthParkStudiosIE(MTVIE):
    IE_NAME = u'southparkstudios.com'
    _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'

    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'

    # Overwrite MTVIE properties we don't want
    _TESTS = [{
        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
        u'info_dict': {
            u'title': u'Bat Daded',
            u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
        },
    }]

    def _get_thumbnail_url(self, uri, itemdoc):
        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
        thumb_node = itemdoc.find(search_path)
        if thumb_node is None:
            return None
        else:
            return thumb_node.attrib['url']

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        url = u'http://www.' + mobj.group(u'url')
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
                                  webpage, u'mgid')
        return self._get_videos_info(mgid)

class SouthparkDeIE(SouthParkStudiosIE):
    IE_NAME = u'southpark.de'
    _VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
    _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'

    _TESTS = [{
        u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
        u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
        u'info_dict': {
            u'title': u'The Government Won\'t Respect My Privacy',
            u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
        },
    }]
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00			`import re`

			`from .mtv import MTVIE, _media_xml_tag`


			`class SouthParkStudiosIE(MTVIE):`
			`IE_NAME = u'southparkstudios.com'`
[SouthParkStudiosIE] Also detect urls without http:// or www 2013-11-17 09:42:24 -07:00			`_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips\|full-episodes)/(?P<id>.+?)(\?\|#\|$))'`
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00
			`_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'`

[SouthParkStudiosIE] Move from _TEST to _TESTS 2013-11-17 09:43:58 -07:00			`# Overwrite MTVIE properties we don't want`
			`_TESTS = [{`
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00			`u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',`
			`u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',`
			`u'info_dict': {`
			`u'title': u'Bat Daded',`
Revert "[southparkstudios] Fix mgid extraction" This reverts commit 0fd49457f5257dbe317c69314ee57a6c485d41a3. It seems that the redesign was temporary. 2013-09-24 13:38:37 -06:00			`u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',`
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00			`},`
[SouthParkStudiosIE] Move from _TEST to _TESTS 2013-11-17 09:43:58 -07:00			`}]`
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00
			`def _get_thumbnail_url(self, uri, itemdoc):`
			`search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))`
[southparkstudios] add support for http://www.southparkstudios.com/full-episodes/* urls (closes #1469) 2013-09-21 02:58:43 -06:00			`thumb_node = itemdoc.find(search_path)`
			`if thumb_node is None:`
			`return None`
			`else:`
			`return thumb_node.attrib['url']`
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
[SouthParkStudiosIE] Also detect urls without http:// or www 2013-11-17 09:42:24 -07:00			`url = u'http://www.' + mobj.group(u'url')`
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00			`video_id = mobj.group('id')`
			`webpage = self._download_webpage(url, video_id)`
Revert "[southparkstudios] Fix mgid extraction" This reverts commit 0fd49457f5257dbe317c69314ee57a6c485d41a3. It seems that the redesign was temporary. 2013-09-24 13:38:37 -06:00			`mgid = self._search_regex(r'swfobject.embedSWF\(".?(mgid:.?)"',`
Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system 2013-09-15 15:30:58 -06:00			`webpage, u'mgid')`
			`return self._get_videos_info(mgid)`
Add support for southpark.de 2013-11-17 09:54:47 -07:00
			`class SouthparkDeIE(SouthParkStudiosIE):`
			`IE_NAME = u'southpark.de'`
			`_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips\|alle-episoden)/(?P<id>.+?)(\?\|#\|$))'`
			`_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'`

			`_TESTS = [{`
			`u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',`
			`u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',`
			`u'info_dict': {`
			`u'title': u'The Government Won\'t Respect My Privacy',`
			`u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',`
			`},`
			`}]`