mirror of https://github.com/yt-dlp/yt-dlp.git
parent
a471f21da6
commit
1bdae7d312
|
@ -72,15 +72,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
|
||||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
|
||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
|
|
||||||
def test_facebook_matching(self):
|
def test_facebook_matching(self):
|
||||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
|
||||||
|
|
|
@ -39,6 +39,16 @@ class TestExecution(unittest.TestCase):
|
||||||
_, stderr = p.communicate()
|
_, stderr = p.communicate()
|
||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
|
def test_lazy_extractors(self):
|
||||||
|
try:
|
||||||
|
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
|
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.remove('yt_dlp/extractor/lazy_extractors.py')
|
||||||
|
except (IOError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
from yt_dlp.extractor import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeMisc(unittest.TestCase):
|
||||||
|
def test_youtube_extract(self):
|
||||||
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
|
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||||
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -11,6 +11,7 @@ from ..compat import (
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
@ -25,8 +26,10 @@ from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
@ -761,8 +764,17 @@ class BBCIE(BBCCoUkIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# custom redirection to www.bbc.com
|
# custom redirection to www.bbc.com
|
||||||
|
# also, video with window.__INITIAL_DATA__
|
||||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': 'p02xzws1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Pluto may have 'nitrogen glaciers'",
|
||||||
|
'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1437785037,
|
||||||
|
'upload_date': '20150725',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
|
@ -1164,12 +1176,29 @@ class BBCIE(BBCCoUkIE):
|
||||||
continue
|
continue
|
||||||
formats, subtitles = self._download_media_selector(item_id)
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
item_desc = None
|
||||||
|
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
|
||||||
|
if blocks:
|
||||||
|
summary = []
|
||||||
|
for block in blocks:
|
||||||
|
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
||||||
|
if text:
|
||||||
|
summary.append(text)
|
||||||
|
if summary:
|
||||||
|
item_desc = '\n\n'.join(summary)
|
||||||
|
item_time = None
|
||||||
|
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
|
||||||
|
if try_get(meta, lambda x: x['label']) == 'Published':
|
||||||
|
item_time = unified_timestamp(meta.get('timestamp'))
|
||||||
|
break
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': item_id,
|
'id': item_id,
|
||||||
'title': item_title,
|
'title': item_title,
|
||||||
'thumbnail': item.get('holdingImageUrl'),
|
'thumbnail': item.get('holdingImageUrl'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'timestamp': item_time,
|
||||||
|
'description': strip_or_none(item_desc),
|
||||||
})
|
})
|
||||||
for resp in (initial_data.get('data') or {}).values():
|
for resp in (initial_data.get('data') or {}).values():
|
||||||
name = resp.get('name')
|
name = resp.get('name')
|
||||||
|
|
|
@ -4,12 +4,14 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
remove_start,
|
remove_start,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
try_get,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
@ -118,6 +120,18 @@ class GoIE(AdobePassIE):
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VDKA22600213',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Pilot',
|
||||||
|
'description': 'md5:74306df917cfc199d76d061d66bebdb4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -154,18 +168,30 @@ class GoIE(AdobePassIE):
|
||||||
brand = site_info.get('brand')
|
brand = site_info.get('brand')
|
||||||
if not video_id or not site_info:
|
if not video_id or not site_info:
|
||||||
webpage = self._download_webpage(url, display_id or video_id)
|
webpage = self._download_webpage(url, display_id or video_id)
|
||||||
video_id = self._search_regex(
|
data = self._parse_json(
|
||||||
(
|
self._search_regex(
|
||||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
||||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
'data', default='{}'),
|
||||||
r'data-video-id=["\']*(VDKA\w+)',
|
display_id or video_id, fatal=False)
|
||||||
# https://github.com/ytdl-org/youtube-dl/pull/25216/files
|
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
||||||
# The following is based on the pull request on the line above. Changed the ABC.com URL to a show available now.
|
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
||||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/19-the-q-word
|
video_id = None
|
||||||
r'\bvideoIdCode["\']\s*:\s*["\'](vdka\w+)',
|
if layout:
|
||||||
# Deprecated fallback pattern
|
video_id = try_get(
|
||||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
layout,
|
||||||
), webpage, 'video id', default=video_id)
|
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
||||||
|
compat_str)
|
||||||
|
if not video_id:
|
||||||
|
video_id = self._search_regex(
|
||||||
|
(
|
||||||
|
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||||
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||||
|
r'data-video-id=["\']*(VDKA\w+)',
|
||||||
|
# page.analytics.videoIdCode
|
||||||
|
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
||||||
|
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||||
|
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||||
|
), webpage, 'video id', default=video_id)
|
||||||
if not site_info:
|
if not site_info:
|
||||||
brand = self._search_regex(
|
brand = self._search_regex(
|
||||||
(r'data-brand=\s*["\']\s*(\d+)',
|
(r'data-brand=\s*["\']\s*(\d+)',
|
||||||
|
|
|
@ -77,11 +77,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
|
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
|
||||||
|
|
||||||
def _ids_to_results(self, ids):
|
|
||||||
return [
|
|
||||||
self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
|
||||||
for vid_id in ids]
|
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
"""
|
"""
|
||||||
Attempt to log in to YouTube.
|
Attempt to log in to YouTube.
|
||||||
|
@ -1313,6 +1308,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
|
# Hack for lazy extractors until more generic solution is implemented
|
||||||
|
# (see #28780)
|
||||||
|
from .youtube import parse_qs
|
||||||
qs = parse_qs(url)
|
qs = parse_qs(url)
|
||||||
if qs.get('list', [None])[0]:
|
if qs.get('list', [None])[0]:
|
||||||
return False
|
return False
|
||||||
|
@ -3595,6 +3593,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
if YoutubeTabIE.suitable(url):
|
if YoutubeTabIE.suitable(url):
|
||||||
return False
|
return False
|
||||||
|
# Hack for lazy extractors until more generic solution is implemented
|
||||||
|
# (see #28780)
|
||||||
|
from .youtube import parse_qs
|
||||||
qs = parse_qs(url)
|
qs = parse_qs(url)
|
||||||
if qs.get('v', [None])[0]:
|
if qs.get('v', [None])[0]:
|
||||||
return False
|
return False
|
||||||
|
|
Loading…
Reference in New Issue