mirror of https://github.com/yt-dlp/yt-dlp.git
Merge remote-tracking branch 'jtwaleson/master'
This commit is contained in:
commit
27f8b0994e
|
@ -142,7 +142,7 @@ def win_service_set_status(handle, status_code):
|
|||
|
||||
def win_service_main(service_name, real_main, argc, argv_raw):
|
||||
try:
|
||||
#args = [argv_raw[i].value for i in range(argc)]
|
||||
# args = [argv_raw[i].value for i in range(argc)]
|
||||
stop_event = threading.Event()
|
||||
handler = HandlerEx(functools.partial(stop_event, win_service_handler))
|
||||
h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
|
||||
|
|
|
@ -30,7 +30,6 @@ def build_completion(opt_parser):
|
|||
for group in opt_parser.option_groups:
|
||||
for option in group.option_list:
|
||||
long_option = option.get_opt_string().strip('-')
|
||||
help_msg = shell_quote([option.help])
|
||||
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
|
||||
if option._short_opts:
|
||||
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
|
||||
|
|
1
setup.py
1
setup.py
|
@ -4,7 +4,6 @@
|
|||
from __future__ import print_function
|
||||
|
||||
import os.path
|
||||
import pkg_resources
|
||||
import warnings
|
||||
import sys
|
||||
|
||||
|
|
|
@ -116,14 +116,14 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||
'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(got_dict.get(info_field))
|
||||
else:
|
||||
got = got_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
if got_dict.get('_type') != 'playlist':
|
||||
|
@ -135,8 +135,8 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
|
|
|
@ -314,7 +314,7 @@ class YoutubeDL(object):
|
|||
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
||||
self._output_process.stdin.flush()
|
||||
res = ''.join(self._output_channel.readline().decode('utf-8')
|
||||
for _ in range(line_count))
|
||||
for _ in range(line_count))
|
||||
return res[:-len('\n')]
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
|
@ -701,13 +701,15 @@ class YoutubeDL(object):
|
|||
'It needs to be updated.' % ie_result.get('extractor'))
|
||||
|
||||
def _fixup(r):
|
||||
self.add_extra_info(r,
|
||||
self.add_extra_info(
|
||||
r,
|
||||
{
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
})
|
||||
}
|
||||
)
|
||||
return r
|
||||
ie_result['entries'] = [
|
||||
self.process_ie_result(_fixup(r), download, extra_info)
|
||||
|
@ -857,14 +859,14 @@ class YoutubeDL(object):
|
|||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
# The first format must contain the video and the
|
||||
# second the audio
|
||||
if formats_info[0].get('vcodec') == 'none':
|
||||
self.report_error('The first format must '
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
|
@ -1042,10 +1044,10 @@ class YoutubeDL(object):
|
|||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_warning('Unable to download thumbnail "%s": %s' %
|
||||
(info_dict['thumbnail'], compat_str(err)))
|
||||
(info_dict['thumbnail'], compat_str(err)))
|
||||
|
||||
if not self.params.get('skip_download', False):
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
||||
|
@ -1066,8 +1068,8 @@ class YoutubeDL(object):
|
|||
if not merger._executable:
|
||||
postprocessors = []
|
||||
self.report_warning('You have requested multiple '
|
||||
'formats but ffmpeg or avconv are not installed.'
|
||||
' The formats won\'t be merged')
|
||||
'formats but ffmpeg or avconv are not installed.'
|
||||
' The formats won\'t be merged')
|
||||
else:
|
||||
postprocessors = [merger]
|
||||
for f in info_dict['requested_formats']:
|
||||
|
|
|
@ -116,7 +116,7 @@ except ImportError: # Python 2
|
|||
# Python 2's version is apparently totally broken
|
||||
|
||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
encoding='utf-8', errors='replace'):
|
||||
qs, _coerce_result = qs, unicode
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
r = []
|
||||
|
@ -145,10 +145,10 @@ except ImportError: # Python 2
|
|||
return r
|
||||
|
||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
encoding='utf-8', errors='replace'):
|
||||
parsed_result = {}
|
||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||
encoding=encoding, errors=errors)
|
||||
encoding=encoding, errors=errors)
|
||||
for name, value in pairs:
|
||||
if name in parsed_result:
|
||||
parsed_result[name].append(value)
|
||||
|
|
|
@ -225,13 +225,15 @@ class F4mFD(FileDownloader):
|
|||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(self.ydl,
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'test': self.params.get('test', False),
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||
|
@ -277,7 +279,7 @@ class F4mFD(FileDownloader):
|
|||
def frag_progress_hook(status):
|
||||
frag_total_bytes = status.get('total_bytes', 0)
|
||||
estimated_size = (state['downloaded_bytes'] +
|
||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
||||
if status['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_counter'] += 1
|
||||
|
@ -287,13 +289,13 @@ class F4mFD(FileDownloader):
|
|||
frag_downloaded_bytes = status['downloaded_bytes']
|
||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
||||
self.report_progress(progress, format_bytes(estimated_size),
|
||||
status.get('speed'), eta)
|
||||
status.get('speed'), eta)
|
||||
http_dl.add_progress_hook(frag_progress_hook)
|
||||
|
||||
frags_filenames = []
|
||||
|
|
|
@ -88,7 +88,7 @@ class AppleTrailersIE(InfoExtractor):
|
|||
for li in doc.findall('./div/ul/li'):
|
||||
on_click = li.find('.//a').attrib['onClick']
|
||||
trailer_info_json = self._search_regex(self._JSON_RE,
|
||||
on_click, 'trailer info')
|
||||
on_click, 'trailer info')
|
||||
trailer_info = json.loads(trailer_info_json)
|
||||
title = trailer_info['title']
|
||||
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
||||
|
|
|
@ -38,7 +38,7 @@ class BambuserIE(InfoExtractor):
|
|||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
|
||||
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
|
||||
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)['result']
|
||||
|
||||
|
@ -74,8 +74,8 @@ class BambuserChannelIE(InfoExtractor):
|
|||
last_id = ''
|
||||
for i in itertools.count(1):
|
||||
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
|
||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||
'&method=broadcast&format=json&vid_older_than={last}'
|
||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||
'&method=broadcast&format=json&vid_older_than={last}'
|
||||
).format(user=user, count=self._STEP, last=last_id)
|
||||
req = compat_urllib_request.Request(req_url)
|
||||
# Without setting this header, we wouldn't get any result
|
||||
|
|
|
@ -165,10 +165,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
if re.search(r'id="emp-error" class="notinuk">', webpage):
|
||||
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
expected=True)
|
||||
expected=True)
|
||||
|
||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||
'Downloading playlist XML')
|
||||
'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
|
|
|
@ -25,8 +25,7 @@ class CNNIE(InfoExtractor):
|
|||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
|
||||
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
|
||||
"info_dict": {
|
||||
|
|
|
@ -10,47 +10,46 @@ from ..utils import int_or_none
|
|||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||
'info_dict': {
|
||||
'id': '6902724',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comic-Con Cosplay Catastrophe',
|
||||
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
||||
'age_limit': 13,
|
||||
'duration': 187,
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||
'info_dict': {
|
||||
'id': '6902724',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comic-Con Cosplay Catastrophe',
|
||||
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
||||
'age_limit': 13,
|
||||
'duration': 187,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||
'info_dict': {
|
||||
'id': '3505939',
|
||||
'ext': 'mp4',
|
||||
'title': 'Font Conference',
|
||||
'description': "This video wasn't long enough, so we made it double-spaced.",
|
||||
'age_limit': 10,
|
||||
'duration': 179,
|
||||
},
|
||||
}, {
|
||||
# embedded youtube video
|
||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||
'info_dict': {
|
||||
'id': 'Z-bao9fg6Yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||
'uploader': 'Mark Dice',
|
||||
'uploader_id': 'MarkDice',
|
||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||
'info_dict': {
|
||||
'id': '3505939',
|
||||
'ext': 'mp4',
|
||||
'title': 'Font Conference',
|
||||
'description': "This video wasn't long enough, so we made it double-spaced.",
|
||||
'age_limit': 10,
|
||||
'duration': 179,
|
||||
},
|
||||
},
|
||||
# embedded youtube video
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||
'info_dict': {
|
||||
'id': 'Z-bao9fg6Yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||
'uploader': 'Mark Dice',
|
||||
'uploader_id': 'MarkDice',
|
||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -478,7 +478,7 @@ class InfoExtractor(object):
|
|||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract %s; '
|
||||
'please report this issue on http://yt-dl.org/bug' % _name)
|
||||
'please report this issue on http://yt-dl.org/bug' % _name)
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
|
@ -612,7 +612,7 @@ class InfoExtractor(object):
|
|||
|
||||
def _twitter_search_player(self, html):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
'twitter card player')
|
||||
|
||||
def _sort_formats(self, formats):
|
||||
if not formats:
|
||||
|
|
|
@ -114,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE)
|
||||
'video info', flags=re.MULTILINE)
|
||||
info = json.loads(info)
|
||||
if info.get('error') is not None:
|
||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||
|
@ -208,7 +208,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
for video_id in orderedSet(video_ids)]
|
||||
for video_id in orderedSet(video_ids)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
|
|
@ -9,7 +9,7 @@ from .common import InfoExtractor
|
|||
class DefenseGouvFrIE(InfoExtractor):
|
||||
IE_NAME = 'defense.gouv.fr'
|
||||
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
|
||||
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
||||
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
||||
|
@ -28,9 +28,9 @@ class DefenseGouvFrIE(InfoExtractor):
|
|||
webpage, 'ID')
|
||||
|
||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||
+ video_id)
|
||||
+ video_id)
|
||||
info = self._download_webpage(json_url, title,
|
||||
'Downloading JSON config')
|
||||
'Downloading JSON config')
|
||||
video_url = json.loads(info)['renditions'][0]['url']
|
||||
|
||||
return {'id': video_id,
|
||||
|
|
|
@ -16,9 +16,9 @@ class DiscoveryIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
||||
' back.'),
|
||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
||||
' back.'),
|
||||
'duration': 156,
|
||||
},
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ class DiscoveryIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
|
||||
webpage, 'video list', flags=re.DOTALL)
|
||||
webpage, 'video list', flags=re.DOTALL)
|
||||
video_list = json.loads(video_list_json)
|
||||
info = video_list['clips'][0]
|
||||
formats = []
|
||||
|
|
|
@ -11,18 +11,18 @@ from ..utils import url_basename
|
|||
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
'info_dict': {
|
||||
'id': 'nelirfsxnmcfbfh',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||
'only_matching': True,
|
||||
},
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
'info_dict': {
|
||||
'id': 'nelirfsxnmcfbfh',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -28,7 +28,7 @@ class EHowIE(InfoExtractor):
|
|||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||
webpage, 'video URL')
|
||||
webpage, 'video URL')
|
||||
final_url = compat_urllib_parse.unquote(video_url)
|
||||
uploader = self._html_search_meta('uploader', webpage)
|
||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||
|
|
|
@ -60,8 +60,8 @@ class FacebookIE(InfoExtractor):
|
|||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
lsd = self._search_regex(
|
||||
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||
login_page, 'lsd')
|
||||
|
@ -82,7 +82,7 @@ class FacebookIE(InfoExtractor):
|
|||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
|
@ -96,7 +96,7 @@ class FacebookIE(InfoExtractor):
|
|||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
|
|
|
@ -44,9 +44,9 @@ class FirstTVIE(InfoExtractor):
|
|||
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
||||
|
||||
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'like count', fatal=False)
|
||||
webpage, 'like count', fatal=False)
|
||||
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'dislike count', fatal=False)
|
||||
webpage, 'dislike count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -50,7 +50,7 @@ class FiveMinIE(InfoExtractor):
|
|||
video_id = mobj.group('id')
|
||||
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
'Downloading embed page')
|
||||
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||
query = compat_urllib_parse.urlencode({
|
||||
'func': 'GetResults',
|
||||
|
|
|
@ -32,9 +32,9 @@ class FKTVIE(InfoExtractor):
|
|||
server = random.randint(2, 4)
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
|
||||
episode)
|
||||
episode)
|
||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||
'playlist', flags=re.DOTALL)
|
||||
'playlist', flags=re.DOTALL)
|
||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||
# TODO: return a single multipart video
|
||||
videos = []
|
||||
|
|
|
@ -37,7 +37,7 @@ class FlickrIE(InfoExtractor):
|
|||
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
|
||||
|
||||
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
|
||||
first_xml, 'node_id')
|
||||
first_xml, 'node_id')
|
||||
|
||||
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
|
||||
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
|
||||
|
|
|
@ -55,7 +55,7 @@ class FourTubeIE(InfoExtractor):
|
|||
description = self._html_search_meta('description', webpage, 'description')
|
||||
if description:
|
||||
upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
|
||||
fatal=False)
|
||||
fatal=False)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
|
||||
|
|
|
@ -234,7 +234,7 @@ class GenerationQuoiIE(InfoExtractor):
|
|||
info_json = self._download_webpage(info_url, name)
|
||||
info = json.loads(info_json)
|
||||
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
|
||||
ie='Dailymotion')
|
||||
ie='Dailymotion')
|
||||
|
||||
|
||||
class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
|
|
|
@ -784,7 +784,7 @@ class GenericIE(InfoExtractor):
|
|||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
|
|
|
@ -27,10 +27,10 @@ class HowcastIE(InfoExtractor):
|
|||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
||||
webpage, 'video URL')
|
||||
webpage, 'video URL')
|
||||
|
||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
||||
webpage, 'description', fatal=False)
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -99,7 +99,7 @@ class IGNIE(InfoExtractor):
|
|||
video_id = self._find_video_id(webpage)
|
||||
result = self._get_video_info(video_id)
|
||||
description = self._html_search_regex(self._DESCRIPTION_RE,
|
||||
webpage, 'video description', flags=re.DOTALL)
|
||||
webpage, 'video description', flags=re.DOTALL)
|
||||
result['description'] = description
|
||||
return result
|
||||
|
||||
|
|
|
@ -27,9 +27,9 @@ class InstagramIE(InfoExtractor):
|
|||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
webpage, 'uploader id', fatal=False)
|
||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
||||
fatal=False)
|
||||
fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -45,22 +45,26 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||
url = self._build_url(query)
|
||||
|
||||
flashconfiguration = self._download_xml(url, video_id,
|
||||
'Downloading flash configuration')
|
||||
'Downloading flash configuration')
|
||||
file_url = flashconfiguration.find('file').text
|
||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||
# Replace some of the parameters in the query to get the best quality
|
||||
# and http links (no m3u8 manifests)
|
||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||
lambda m: self._clean_query(m.group()),
|
||||
file_url)
|
||||
lambda m: self._clean_query(m.group()),
|
||||
file_url)
|
||||
info = self._download_xml(file_url, video_id,
|
||||
'Downloading video info')
|
||||
'Downloading video info')
|
||||
item = info.find('channel/item')
|
||||
|
||||
def _bp(p):
|
||||
return xpath_with_ns(p,
|
||||
{'media': 'http://search.yahoo.com/mrss/',
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
|
||||
return xpath_with_ns(
|
||||
p,
|
||||
{
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
|
||||
}
|
||||
)
|
||||
formats = []
|
||||
for content in item.findall(_bp('media:group/media:content')):
|
||||
attr = content.attrib
|
||||
|
|
|
@ -36,7 +36,7 @@ class JukeboxIE(InfoExtractor):
|
|||
|
||||
try:
|
||||
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
||||
iframe_html, 'video url')
|
||||
iframe_html, 'video url')
|
||||
video_url = unescapeHTML(video_url).replace('\/', '/')
|
||||
except RegexNotFoundError:
|
||||
youtube_url = self._search_regex(
|
||||
|
@ -47,9 +47,9 @@ class JukeboxIE(InfoExtractor):
|
|||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
||||
html, 'title')
|
||||
html, 'title')
|
||||
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
||||
html, 'artist')
|
||||
html, 'artist')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -13,8 +13,10 @@ class KickStarterIE(InfoExtractor):
|
|||
'id': '1404461844',
|
||||
'ext': 'mp4',
|
||||
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
|
||||
'description': 'A unique motocross documentary that examines the '
|
||||
'life and mind of one of sports most elite athletes: Josh Grant.',
|
||||
'description': (
|
||||
'A unique motocross documentary that examines the '
|
||||
'life and mind of one of sports most elite athletes: Josh Grant.'
|
||||
),
|
||||
},
|
||||
}, {
|
||||
'note': 'Embedded video (not using the native kickstarter video service)',
|
||||
|
|
|
@ -45,7 +45,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||
video_id = mobj.group(1)
|
||||
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
||||
'Downloading video JSON')
|
||||
'Downloading video JSON')
|
||||
video_json = json.loads(page)
|
||||
|
||||
if 'Status' in video_json:
|
||||
|
|
|
@ -27,7 +27,7 @@ class M6IE(InfoExtractor):
|
|||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
||||
'Downloading video RSS')
|
||||
'Downloading video RSS')
|
||||
|
||||
title = rss.find('./channel/item/title').text
|
||||
description = rss.find('./channel/item/description').text
|
||||
|
|
|
@ -219,8 +219,8 @@ class MetacafeIE(InfoExtractor):
|
|||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, 'uploader nickname', fatal=False)
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, 'uploader nickname', fatal=False)
|
||||
duration = int_or_none(
|
||||
self._html_search_meta('video:duration', webpage))
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ class MetacriticIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
# The xml is not well formatted, there are raw '&'
|
||||
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
|
||||
video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
|
||||
video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
|
||||
|
||||
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
||||
formats = []
|
||||
|
@ -44,7 +44,7 @@ class MetacriticIE(InfoExtractor):
|
|||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
||||
webpage, 'description', flags=re.DOTALL)
|
||||
webpage, 'description', flags=re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||
# Otherwise we get a webpage that would execute some javascript
|
||||
req.add_header('Youtubedl-user-agent', 'curl/7')
|
||||
webpage = self._download_webpage(req, mtvn_id,
|
||||
'Downloading mobile page')
|
||||
'Downloading mobile page')
|
||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||
req = HEADRequest(metrics_url)
|
||||
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
||||
|
@ -66,10 +66,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||
self.to_screen('The normal version is not available from your '
|
||||
'country, trying with the mobile version')
|
||||
'country, trying with the mobile version')
|
||||
return self._extract_mobile_video_formats(mtvn_id)
|
||||
raise ExtractorError('This video is not available from your country.',
|
||||
expected=True)
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for rendition in mdoc.findall('.//rendition'):
|
||||
|
@ -98,7 +98,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||
mediagen_url += '&acceptMethods=fms'
|
||||
|
||||
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
||||
'Downloading video urls')
|
||||
'Downloading video urls')
|
||||
|
||||
description_node = itemdoc.find('description')
|
||||
if description_node is not None:
|
||||
|
@ -126,7 +126,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||
# This a short id that's used in the webpage urls
|
||||
mtvn_id = None
|
||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:id')
|
||||
'scheme', 'urn:mtvn:id')
|
||||
if mtvn_id_node is not None:
|
||||
mtvn_id = mtvn_id_node.text
|
||||
|
||||
|
@ -188,7 +188,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
|||
video_id = self._id_from_uri(uri)
|
||||
site_id = uri.replace(video_id, '')
|
||||
config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
|
||||
'context4/context5/config.xml'.format(site_id))
|
||||
'context4/context5/config.xml'.format(site_id))
|
||||
config_doc = self._download_xml(config_url, video_id)
|
||||
feed_node = config_doc.find('.//feed')
|
||||
feed_url = feed_node.text.strip().split('?')[0]
|
||||
|
|
|
@ -53,7 +53,7 @@ class MySpaceIE(InfoExtractor):
|
|||
# songs don't store any useful info in the 'context' variable
|
||||
def search_data(name):
|
||||
return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
|
||||
name)
|
||||
name)
|
||||
streamUrl = search_data('stream-url')
|
||||
info = {
|
||||
'id': video_id,
|
||||
|
@ -63,7 +63,7 @@ class MySpaceIE(InfoExtractor):
|
|||
}
|
||||
else:
|
||||
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
|
||||
u'context'))
|
||||
u'context'))
|
||||
video = context['video']
|
||||
streamUrl = video['streamUrl']
|
||||
info = {
|
||||
|
|
|
@ -72,7 +72,7 @@ class MyVideoIE(InfoExtractor):
|
|||
video_url = mobj.group(1) + '.flv'
|
||||
|
||||
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
||||
webpage, 'title')
|
||||
webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -162,7 +162,7 @@ class MyVideoIE(InfoExtractor):
|
|||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
webpage, 'title')
|
||||
webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -30,7 +30,7 @@ class NaverIE(InfoExtractor):
|
|||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
webpage)
|
||||
if m_id is None:
|
||||
m_error = re.search(
|
||||
r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
||||
|
|
|
@ -38,12 +38,12 @@ class NFBIE(InfoExtractor):
|
|||
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||
page, 'director id', fatal=False)
|
||||
page, 'director id', fatal=False)
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
|
||||
|
|
|
@ -125,7 +125,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
|
|||
self._downloader.report_warning(u'Got an empty reponse, trying '
|
||||
'adding the "newvideos" parameter')
|
||||
response = self._download_webpage(request_url + '&newvideos=true',
|
||||
playlist_title)
|
||||
playlist_title)
|
||||
response = self._fix_json(response)
|
||||
videos = json.loads(response)
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ class NiconicoIE(InfoExtractor):
|
|||
|
||||
if 'deleted=' in flv_info_webpage:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
expected=True)
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
|
@ -170,13 +170,13 @@ class NiconicoPlaylistIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, list_id)
|
||||
|
||||
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
||||
webpage, 'entries')
|
||||
webpage, 'entries')
|
||||
entries = json.loads(entries_json)
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
||||
entry['item_data']['video_id']),
|
||||
entry['item_data']['video_id']),
|
||||
} for entry in entries]
|
||||
|
||||
return {
|
||||
|
|
|
@ -27,8 +27,7 @@ class NineGagIE(InfoExtractor):
|
|||
"thumbnail": "re:^https?://",
|
||||
},
|
||||
'add_ie': ['Youtube']
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
||||
'info_dict': {
|
||||
'id': 'KklwM',
|
||||
|
|
|
@ -31,9 +31,9 @@ class NormalbootsIE(InfoExtractor):
|
|||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||
webpage, 'uploader')
|
||||
webpage, 'uploader')
|
||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||
webpage, 'date')
|
||||
webpage, 'date')
|
||||
video_upload_date = unified_strdate(raw_upload_date)
|
||||
|
||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||
|
|
|
@ -43,7 +43,7 @@ class OoyalaIE(InfoExtractor):
|
|||
@classmethod
|
||||
def _build_url_result(cls, embed_code):
|
||||
return cls.url_result(cls._url_for_embed_code(embed_code),
|
||||
ie=cls.ie_key())
|
||||
ie=cls.ie_key())
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
return {
|
||||
|
|
|
@ -31,7 +31,7 @@ class PhotobucketIE(InfoExtractor):
|
|||
# Extract URL, uploader, and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||
webpage, 'info json')
|
||||
webpage, 'info json')
|
||||
info = json.loads(info_json)
|
||||
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
|
||||
return {
|
||||
|
|
|
@ -33,7 +33,7 @@ class RBMARadioIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
|
||||
webpage, 'json data', flags=re.MULTILINE)
|
||||
webpage, 'json data', flags=re.MULTILINE)
|
||||
|
||||
try:
|
||||
data = json.loads(json_data)
|
||||
|
|
|
@ -27,8 +27,7 @@ class SBSIE(InfoExtractor):
|
|||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'add_ies': ['generic'],
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
|
|
@ -96,7 +96,7 @@ class ScreencastIE(InfoExtractor):
|
|||
if title is None:
|
||||
title = self._html_search_regex(
|
||||
[r'<b>Title:</b> ([^<]*)</div>',
|
||||
r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
|
||||
r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
|
||||
webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
|
|
|
@ -46,7 +46,7 @@ class SinaIE(InfoExtractor):
|
|||
def _extract_video(self, video_id):
|
||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||
video_id, 'Downloading video url')
|
||||
video_id, 'Downloading video url')
|
||||
image_page = self._download_webpage(
|
||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||
video_id, 'Downloading thumbnail info')
|
||||
|
|
|
@ -26,7 +26,7 @@ class SlutloadIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
||||
webpage, 'title').strip()
|
||||
webpage, 'title').strip()
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
||||
|
|
|
@ -282,7 +282,7 @@ class SmotriBroadcastIE(InfoExtractor):
|
|||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError('Erotic broadcasts allowed only for registered users, '
|
||||
'use --username and --password options to provide account credentials.', expected=True)
|
||||
'use --username and --password options to provide account credentials.', expected=True)
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
|
|
|
@ -159,7 +159,7 @@ class SoundcloudIE(InfoExtractor):
|
|||
|
||||
# We have to retrieve the url
|
||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||
format_dict = self._download_json(
|
||||
streams_url,
|
||||
track_id, 'Downloading track url')
|
||||
|
|
|
@ -82,7 +82,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||
|
||||
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
||||
rootpage = self._download_webpage(rootURL, info['id'],
|
||||
errnote='Unable to download course info page')
|
||||
errnote='Unable to download course info page')
|
||||
|
||||
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
||||
info['entries'] = [self.url_result(
|
||||
|
|
|
@ -8,24 +8,23 @@ from .common import InfoExtractor
|
|||
class TeamcocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
||||
'file': '80187.mp4',
|
||||
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
||||
'info_dict': {
|
||||
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
||||
{
|
||||
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
||||
'file': '80187.mp4',
|
||||
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
||||
'info_dict': {
|
||||
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||
'file': '19705.mp4',
|
||||
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
||||
'info_dict': {
|
||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||
'file': '19705.mp4',
|
||||
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
||||
'info_dict': {
|
||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -33,9 +33,9 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'The illusion of consciousness',
|
||||
'description': ('Philosopher Dan Dennett makes a compelling '
|
||||
'argument that not only don\'t we understand our own '
|
||||
'consciousness, but that half the time our brains are '
|
||||
'actively fooling us.'),
|
||||
'argument that not only don\'t we understand our own '
|
||||
'consciousness, but that half the time our brains are '
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 854,
|
||||
'duration': 1308,
|
||||
|
@ -93,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||
|
||||
def _extract_info(self, webpage):
|
||||
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
||||
webpage, 'info json')
|
||||
webpage, 'info json')
|
||||
return json.loads(info_json)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -113,7 +113,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||
'''Returns the videos of the playlist'''
|
||||
|
||||
webpage = self._download_webpage(url, name,
|
||||
'Downloading playlist webpage')
|
||||
'Downloading playlist webpage')
|
||||
info = self._extract_info(webpage)
|
||||
playlist_info = info['playlist']
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ class TF1IE(InfoExtractor):
|
|||
embed_url = self._html_search_regex(
|
||||
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed player page')
|
||||
'Downloading embed player page')
|
||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||
wat_info = self._download_json(
|
||||
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
|
||||
|
|
|
@ -47,7 +47,7 @@ class ThePlatformIE(InfoExtractor):
|
|||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(video_id))
|
||||
'format=smil&mbr=true'.format(video_id))
|
||||
|
||||
meta = self._download_xml(smil_url, video_id)
|
||||
try:
|
||||
|
|
|
@ -28,7 +28,7 @@ class TinyPicIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
||||
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ class TrailerAddictIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, name)
|
||||
|
||||
title = self._search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' - Trailer Addict', '')
|
||||
webpage, 'video title').replace(' - Trailer Addict', '')
|
||||
view_count_str = self._search_regex(
|
||||
r'<span class="views_n">([0-9,.]+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
|
@ -46,9 +46,9 @@ class TrailerAddictIE(InfoExtractor):
|
|||
info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage")
|
||||
|
||||
final_url = self._search_regex(r'&fileurl=(.+)',
|
||||
info_webpage, 'Download url').replace('%3F', '?')
|
||||
info_webpage, 'Download url').replace('%3F', '?')
|
||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||
info_webpage, 'thumbnail url')
|
||||
info_webpage, 'thumbnail url')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||
|
|
|
@ -43,7 +43,7 @@ class TumblrIE(InfoExtractor):
|
|||
webpage, 'iframe url')
|
||||
iframe = self._download_webpage(iframe_url, video_id)
|
||||
video_url = self._search_regex(r'<source src="([^"]+)"',
|
||||
iframe, 'video url')
|
||||
iframe, 'video url')
|
||||
|
||||
# The only place where you can get a title, it's not complete,
|
||||
# but searching in other places doesn't work for all videos
|
||||
|
|
|
@ -154,7 +154,7 @@ class UdemyCourseIE(UdemyIE):
|
|||
self.to_screen('%s: Already enrolled in' % course_id)
|
||||
|
||||
response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
||||
course_id, 'Downloading course curriculum')
|
||||
course_id, 'Downloading course curriculum')
|
||||
|
||||
entries = [
|
||||
self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
|
||||
|
|
|
@ -45,13 +45,13 @@ class UstreamIE(InfoExtractor):
|
|||
self.report_extraction(video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||
webpage, 'title')
|
||||
webpage, 'title')
|
||||
|
||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||
|
||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -30,13 +30,13 @@ class Vbox7IE(InfoExtractor):
|
|||
|
||||
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
||||
new_location = self._search_regex(r'window\.location = \'(.*)\';',
|
||||
redirect_page, 'redirect location')
|
||||
redirect_page, 'redirect location')
|
||||
redirect_url = urlh.geturl() + new_location
|
||||
webpage = self._download_webpage(redirect_url, video_id,
|
||||
'Downloading redirect page')
|
||||
'Downloading redirect page')
|
||||
|
||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, 'title').split('/')[0].strip()
|
||||
webpage, 'title').split('/')[0].strip()
|
||||
|
||||
info_url = "http://vbox7.com/play/magare.do"
|
||||
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
|
||||
|
|
|
@ -48,11 +48,11 @@ class VeeHDIE(InfoExtractor):
|
|||
video_url = compat_urlparse.unquote(config['clip']['url'])
|
||||
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
|
||||
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
|
||||
webpage, 'uploader')
|
||||
webpage, 'uploader')
|
||||
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
|
||||
webpage, 'thumbnail')
|
||||
webpage, 'thumbnail')
|
||||
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
|
||||
webpage, 'description', flags=re.DOTALL)
|
||||
webpage, 'description', flags=re.DOTALL)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
|
|
|
@ -112,7 +112,7 @@ class VestiIE(InfoExtractor):
|
|||
if mobj:
|
||||
video_id = mobj.group('id')
|
||||
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
|
||||
'Downloading video page')
|
||||
'Downloading video page')
|
||||
|
||||
rutv_url = RUTVIE._extract_url(page)
|
||||
if rutv_url:
|
||||
|
|
|
@ -28,11 +28,11 @@ class VideofyMeIE(InfoExtractor):
|
|||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||
video_id)
|
||||
video_id)
|
||||
video = config.find('video')
|
||||
sources = video.find('sources')
|
||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||
for key in ['on', 'av', 'off']] if node is not None)
|
||||
for key in ['on', 'av', 'off']] if node is not None)
|
||||
video_url = url_node.find('url').text
|
||||
|
||||
return {'id': video_id,
|
||||
|
|
|
@ -260,7 +260,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||
else:
|
||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||
config = self._search_regex(config_re, webpage, 'info section',
|
||||
flags=re.DOTALL)
|
||||
flags=re.DOTALL)
|
||||
config = json.loads(config)
|
||||
except Exception as e:
|
||||
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
||||
|
|
|
@ -121,7 +121,7 @@ class VKIE(InfoExtractor):
|
|||
}
|
||||
|
||||
request = compat_urllib_request.Request('https://login.vk.com/?act=login',
|
||||
compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||
|
||||
if re.search(r'onLoginFailed', login_page):
|
||||
|
@ -175,7 +175,7 @@ class VKIE(InfoExtractor):
|
|||
upload_date = None
|
||||
mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
|
||||
if mobj is not None:
|
||||
x = mobj.group(1) + ' ' + mobj.group(2)
|
||||
mobj.group(1) + ' ' + mobj.group(2)
|
||||
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
|
||||
|
||||
formats = [{
|
||||
|
|
|
@ -41,7 +41,7 @@ class WeiboIE(InfoExtractor):
|
|||
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
|
||||
player_url = videos_urls[-1]
|
||||
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
|
||||
player_url)
|
||||
player_url)
|
||||
if m_sina is not None:
|
||||
self.to_screen('Sina video detected')
|
||||
sina_id = m_sina.group(1)
|
||||
|
|
|
@ -67,17 +67,17 @@ class XHamsterIE(InfoExtractor):
|
|||
description = mobj.group(1) if mobj else None
|
||||
|
||||
upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
|
||||
webpage, 'upload date', fatal=False)
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
|
||||
webpage, 'uploader id', default='anonymous')
|
||||
webpage, 'uploader id', default='anonymous')
|
||||
|
||||
thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
|
||||
webpage, 'duration', fatal=False))
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
|
|
|
@ -30,14 +30,14 @@ class XNXXIE(InfoExtractor):
|
|||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(r'flv_url=(.*?)&',
|
||||
webpage, 'video URL')
|
||||
webpage, 'video URL')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
||||
webpage, 'title')
|
||||
webpage, 'title')
|
||||
|
||||
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -229,7 +229,7 @@ class YahooSearchIE(SearchInfoExtractor):
|
|||
for pagenum in itertools.count(0):
|
||||
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
||||
info = self._download_json(result_url, query,
|
||||
note='Downloading results page ' + str(pagenum + 1))
|
||||
note='Downloading results page ' + str(pagenum + 1))
|
||||
m = info['m']
|
||||
results = info['results']
|
||||
|
||||
|
|
|
@ -74,7 +74,7 @@ class YoukuIE(InfoExtractor):
|
|||
# -8 means blocked outside China.
|
||||
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
||||
raise ExtractorError(error or 'Server reported error %i' % error_code,
|
||||
expected=True)
|
||||
expected=True)
|
||||
|
||||
video_title = config['data'][0]['title']
|
||||
seed = config['data'][0]['seed']
|
||||
|
|
|
@ -64,7 +64,7 @@ class YouPornIE(InfoExtractor):
|
|||
# Get all of the links from the page
|
||||
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
||||
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
||||
webpage, 'download list').strip()
|
||||
webpage, 'download list').strip()
|
||||
LINK_RE = r'<a href="([^"]+)">'
|
||||
links = re.findall(LINK_RE, download_list_html)
|
||||
|
||||
|
|
|
@ -950,7 +950,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen('{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
(format_id, parts_sizes, player_desc))
|
||||
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
|
@ -1214,7 +1214,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||
class YoutubeTopListIE(YoutubePlaylistIE):
|
||||
IE_NAME = 'youtube:toplist'
|
||||
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||
' (Example: "yttoplist:music:Top Tracks")')
|
||||
' (Example: "yttoplist:music:Top Tracks")')
|
||||
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
||||
_TESTS = [{
|
||||
'url': 'yttoplist:music:Trending',
|
||||
|
|
Loading…
Reference in New Issue