[businessinsider] Fix jwplatform id extraction (closes #22929) (#22954)

This commit is contained in:
PB 2020-01-18 16:47:50 +01:00 committed by Sergey M
parent a4b2769451
commit fd032450f0
1 changed files with 17 additions and 11 deletions

View File

@ -9,21 +9,26 @@ class BusinessInsiderIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6', 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', 'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
'info_dict': { 'info_dict': {
'id': 'hZRllCfw', 'id': 'cjGDb0X9',
'ext': 'mp4', 'ext': 'mp4',
'title': "Here's how much radiation you're exposed to in everyday life", 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd', 'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
'upload_date': '20170709', 'upload_date': '20160611',
'timestamp': 1499606400, 'timestamp': 1465675620,
},
'params': {
'skip_download': True,
}, },
}, { }, {
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/', 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
'only_matching': True, 'md5': '43f438dbc6da0b89f5ac42f68529d84a',
'info_dict': {
'id': '5zJwd4FK',
'ext': 'mp4',
'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
'description': 'md5:2af8975825d38a4fed24717bbe51db49',
'upload_date': '20170705',
'timestamp': 1499270528,
},
}, { }, {
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
'only_matching': True, 'only_matching': True,
@ -35,7 +40,8 @@ class BusinessInsiderIE(InfoExtractor):
jwplatform_id = self._search_regex( jwplatform_id = self._search_regex(
(r'data-media-id=["\']([a-zA-Z0-9]{8})', (r'data-media-id=["\']([a-zA-Z0-9]{8})',
r'id=["\']jwplayer_([a-zA-Z0-9]{8})', r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'), r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
webpage, 'jwplatform id') webpage, 'jwplatform id')
return self.url_result( return self.url_result(
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),