mirror of https://github.com/yt-dlp/yt-dlp.git
[VideocampusSachsen] Improve extractor (#3604)
Authored by: FestplattenSchnitzel
This commit is contained in:
parent
ff4d7860d5
commit
10fa2471fc
|
@ -1899,10 +1899,7 @@ from .vice import (
|
|||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videa import VideaIE
|
||||
from .videocampus_sachsen import (
|
||||
VideocampusSachsenIE,
|
||||
VideocampusSachsenEmbedIE,
|
||||
)
|
||||
from .videocampus_sachsen import VideocampusSachsenIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomore import (
|
||||
|
|
|
@ -1,11 +1,70 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class VideocampusSachsenIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?:
|
||||
IE_NAME = 'Vimp'
|
||||
_INSTANCES = (
|
||||
'campus.demo.vimp.com',
|
||||
'corporate.demo.vimp.com',
|
||||
'dancehalldatabase.com',
|
||||
'educhannel.hs-gesundheit.de',
|
||||
'emedia.ls.haw-hamburg.de',
|
||||
'globale-evolution.net',
|
||||
'k210039.vimp.mivitec.net',
|
||||
'media.cmslegal.com',
|
||||
'media.hs-furtwangen.de',
|
||||
'media.hwr-berlin.de',
|
||||
'mediathek.dkfz.de',
|
||||
'mediathek.htw-berlin.de',
|
||||
'mediathek.polizei-bw.de',
|
||||
'medien.hs-merseburg.de',
|
||||
'mportal.europa-uni.de',
|
||||
'pacific.demo.vimp.com',
|
||||
'slctv.com',
|
||||
'tube.isbonline.cn',
|
||||
'univideo.uni-kassel.de',
|
||||
'ursula2.genetics.emory.edu',
|
||||
'ursulablicklevideoarchiv.com',
|
||||
'v.agrarumweltpaedagogik.at',
|
||||
'video.eplay-tv.de',
|
||||
'video.fh-dortmund.de',
|
||||
'video.hs-offenburg.de',
|
||||
'video.hs-pforzheim.de',
|
||||
'video.hspv.nrw.de',
|
||||
'video.irtshdf.fr',
|
||||
'video.pareygo.de',
|
||||
'video.tu-freiberg.de',
|
||||
'videocampus.sachsen.de',
|
||||
'videoportal.uni-freiburg.de',
|
||||
'videoportal.vm.uni-freiburg.de',
|
||||
'videos.duoc.cl',
|
||||
'videos.uni-paderborn.de',
|
||||
'vimp-bemus.udk-berlin.de',
|
||||
'vimp.aekwl.de',
|
||||
'vimp.hs-mittweida.de',
|
||||
'vimp.oth-regensburg.de',
|
||||
'vimp.ph-heidelberg.de',
|
||||
'vimp.sma-events.com',
|
||||
'vimp.weka-fachmedien.de',
|
||||
'webtv.univ-montp3.fr',
|
||||
'www.b-tu.de/media',
|
||||
'www.bigcitytv.de',
|
||||
'www.cad-videos.de',
|
||||
'www.fh-bielefeld.de/medienportal',
|
||||
'www.orvovideo.com',
|
||||
'www.rwe.tv',
|
||||
'www.wenglor-media.com',
|
||||
'www2.univ-sba.dz',
|
||||
)
|
||||
_VALID_URL = r'''(?x)https?://(?P<host>%s)/(?:
|
||||
m/(?P<tmp_id>[0-9a-f]+)|
|
||||
(?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})
|
||||
)'''
|
||||
(?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})|
|
||||
media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?)
|
||||
)''' % ('|'.join(map(re.escape, _INSTANCES)))
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
|
@ -13,6 +72,7 @@ class VideocampusSachsenIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': 'e6b9349905c1628631f175712250f2a1',
|
||||
'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
|
||||
'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
},
|
||||
|
@ -21,6 +81,7 @@ class VideocampusSachsenIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': 'fc99c527e4205b121cb7c74433469262',
|
||||
'title': 'Was ist selbstgesteuertes Lernen?',
|
||||
'description': 'md5:196aa3b0509a526db62f84679522a2f5',
|
||||
'display_id': 'Was-ist-selbstgesteuertes-Lernen',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
|
@ -30,43 +91,32 @@ class VideocampusSachsenIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '09d4ed029002eb1bdda610f1103dd54c',
|
||||
'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
|
||||
'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
|
||||
'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id')
|
||||
webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
|
||||
|
||||
if not tmp_id:
|
||||
video_id = self._html_search_regex(
|
||||
r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&',
|
||||
webpage, 'video_id')
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')),
|
||||
webpage, 'title', group='content', fatal=False)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
{
|
||||
'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
|
||||
'info_dict': {
|
||||
'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
|
||||
'id': '0183356e41af7bfb83d7667b20d9b6a3',
|
||||
'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
|
||||
'description': 'md5:508958bd93e0ca002ac731d94182a54f',
|
||||
'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
|
||||
|
||||
class VideocampusSachsenEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)'
|
||||
|
||||
_TESTS = [
|
||||
},
|
||||
{
|
||||
'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
|
||||
'info_dict': {
|
||||
'id': 'c8816f1cc942c12b6cce57c835cffd7c',
|
||||
'title': 'Preisverleihung »Produkte des Jahres 2022«',
|
||||
'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
|
||||
'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
|
||||
'info_dict': {
|
||||
|
@ -78,18 +128,41 @@ class VideocampusSachsenEmbedIE(InfoExtractor):
|
|||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
|
||||
'host', 'id', 'tmp_id', 'display_id', 'embed_id')
|
||||
webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if not video_id:
|
||||
video_id = embed_id or self._html_search_regex(
|
||||
rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
|
||||
webpage, 'video_id')
|
||||
|
||||
if not (display_id or tmp_id):
|
||||
# Title, description from embedded page's meta wouldn't be correct
|
||||
title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False)
|
||||
description = None
|
||||
else:
|
||||
title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
|
||||
description = self._html_search_meta(
|
||||
('og:description', 'twitter:description', 'description'), webpage, default=None)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
try:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=True)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500):
|
||||
raise
|
||||
|
||||
formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue