From f166bccc8f4366531783d0e0c4c1eb3a585cdfb0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 13 Sep 2011 21:51:44 +0200 Subject: [PATCH] Allow downloading current thedailyshow episode with youtube-dl :tds --- youtube-dl | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index 1b2ccae2f..9d379dcd1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3037,9 +3037,9 @@ class MyVideoIE(InfoExtractor): self._downloader.trouble(u'\nERROR: Unable to download video') class ComedyCentralIE(InfoExtractor): - """Information extractor for blip.tv""" + """Information extractor for The Daily Show and Colbert Report """ - _VALID_URL = r'^(?:https?://)?(www\.)?(thedailyshow|colbertnation)\.com/full-episodes/(.*)$' + _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' @staticmethod def suitable(url): @@ -3064,15 +3064,39 @@ class ComedyCentralIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - epTitle = mobj.group(3) + + if mobj.group('shortname'): + if mobj.group('shortname') in ('tds', 'thedailyshow'): + url = 'http://www.thedailyshow.com/full-episodes/' + else: + url = 'http://www.colbertnation.com/full-episodes/' + mobj = re.match(self._VALID_URL, url) + assert mobj is not None + + dlNewest = not mobj.group('episode') + if dlNewest: + epTitle = mobj.group('showname') + else: + epTitle = mobj.group('episode') req = urllib2.Request(url) self.report_extraction(epTitle) try: - html = urllib2.urlopen(req).read() + htmlHandle = urllib2.urlopen(req) + html = htmlHandle.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) return + if dlNewest: + url = htmlHandle.geturl() + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url) + return + if mobj.group('episode') == '': + self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url) + return + epTitle = mobj.group('episode') mMovieParams = re.findall('', html) if len(mMovieParams) == 0: