[hentaistigma] Add new extractor

2014-05-12 03:58:07 -07:00 · 2014-05-12 03:58:07 -07:00 · 33c7ff861e
parent e399853d0c
commit 33c7ff861e
2 changed files with 44 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -109,6 +109,7 @@ from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .huffpost import HuffPostIE
--- a/youtube_dl/extractor/hentaistigma.py
+++ b/youtube_dl/extractor/hentaistigma.py
@ -0,0 +1,43 @@
 import re
 from .common import InfoExtractor
 class HentaiStigmaIE(InfoExtractor):
    _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<videoid>[^/]+)'
    _TEST = {
        u'url': u'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
        u'file': u'inyouchuu-etsu-bonus.mp4',
        u'md5': u'4e3d07422a68a4cc363d8f57c8bf0d23',
        u'info_dict': {
            u"title": u"Inyouchuu Etsu Bonus",
            u"age_limit": 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
        # Get webpage content
        webpage = self._download_webpage(url, video_id)
        # Get the video title
        video_title = self._html_search_regex(r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
            webpage, u'title').strip()
        # Get the wrapper url
        wrap_url = self._html_search_regex(r'<iframe src="([^"]+mp4)"', webpage, u'wrapper url')
        # Get wrapper content
        wrap_webpage = self._download_webpage(wrap_url, video_id)
        video_url = self._html_search_regex(r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, u'video url')
        info = {'id': video_id,
                'url': video_url,
                'title': video_title,
                'format': 'mp4',
                'age_limit': 18}
        return [info]