mirror of https://github.com/yt-dlp/yt-dlp.git
[extractor/html5] Separate into own extractor (#4307)
Closes #4291 Authored by: coletdjnz, pukkandan
This commit is contained in:
parent
5fff2e576f
commit
f14a2d8382
|
@ -85,7 +85,7 @@ class TestHTTPS(unittest.TestCase):
|
||||||
|
|
||||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
||||||
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
||||||
|
|
||||||
|
|
||||||
class TestClientCert(unittest.TestCase):
|
class TestClientCert(unittest.TestCase):
|
||||||
|
@ -113,7 +113,7 @@ class TestClientCert(unittest.TestCase):
|
||||||
**params,
|
**params,
|
||||||
})
|
})
|
||||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
||||||
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
||||||
|
|
||||||
def test_certificate_combined_nopass(self):
|
def test_certificate_combined_nopass(self):
|
||||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
|
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
|
||||||
|
|
|
@ -662,6 +662,7 @@ from .hse import (
|
||||||
HSEShowIE,
|
HSEShowIE,
|
||||||
HSEProductIE,
|
HSEProductIE,
|
||||||
)
|
)
|
||||||
|
from .genericembeds import HTML5MediaEmbedIE
|
||||||
from .huajiao import HuajiaoIE
|
from .huajiao import HuajiaoIE
|
||||||
from .huya import HuyaLiveIE
|
from .huya import HuyaLiveIE
|
||||||
from .huffpost import HuffPostIE
|
from .huffpost import HuffPostIE
|
||||||
|
|
|
@ -3776,25 +3776,6 @@ class GenericIE(InfoExtractor):
|
||||||
elif embeds:
|
elif embeds:
|
||||||
return self.playlist_result(embeds, **info_dict)
|
return self.playlist_result(embeds, **info_dict)
|
||||||
|
|
||||||
# Look for HTML5 media
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
|
||||||
if entries:
|
|
||||||
self.report_detected('HTML5 media')
|
|
||||||
if len(entries) == 1:
|
|
||||||
entries[0].update({
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_title,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
for num, entry in enumerate(entries, start=1):
|
|
||||||
entry.update({
|
|
||||||
'id': f'{video_id}-{num}',
|
|
||||||
'title': '%s (%d)' % (video_title, num),
|
|
||||||
})
|
|
||||||
for entry in entries:
|
|
||||||
self._sort_formats(entry['formats'])
|
|
||||||
return self.playlist_result(entries, video_id, video_title)
|
|
||||||
|
|
||||||
jwplayer_data = self._find_jwplayer_data(
|
jwplayer_data = self._find_jwplayer_data(
|
||||||
webpage, video_id, transform_source=js_to_json)
|
webpage, video_id, transform_source=js_to_json)
|
||||||
if jwplayer_data:
|
if jwplayer_data:
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class HTML5MediaEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = False
|
||||||
|
IE_NAME = 'html5'
|
||||||
|
_WEBPAGE_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'https://html.com/media/',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'HTML5 Media',
|
||||||
|
'description': 'md5:933b2d02ceffe7a7a0f3c8326d91cc2a',
|
||||||
|
},
|
||||||
|
'playlist_count': 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_from_webpage(self, url, webpage):
|
||||||
|
video_id, title = self._generic_id(url), self._generic_title(url)
|
||||||
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or []
|
||||||
|
for num, entry in enumerate(entries, start=1):
|
||||||
|
entry.update({
|
||||||
|
'id': f'{video_id}-{num}',
|
||||||
|
'title': f'{title} ({num})',
|
||||||
|
})
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
yield entry
|
Loading…
Reference in New Issue