From c40f327a1667a1dd04bd5c360e8b85dae93c8b4c Mon Sep 17 00:00:00 2001 From: Bricio <216170+Bricio@users.noreply.github.com> Date: Wed, 20 Jul 2022 01:37:13 -0300 Subject: [PATCH] [extractor/globo:article] Remove false positives (#4396) Authored by: Bricio --- yt_dlp/extractor/globo.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index 8915ebf48..fb2a3fab2 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -178,12 +178,12 @@ class GloboArticleIE(InfoExtractor): _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/.]+)(?:\.html)?' _VIDEOID_REGEXES = [ - r'\bdata-video-id=["\'](\d{7,})', - r'\bdata-player-videosids=["\'](\d{7,})', + r'\bdata-video-id=["\'](\d{7,})["\']', + r'\bdata-player-videosids=["\'](\d{7,})["\']', r'\bvideosIDs\s*:\s*["\']?(\d{7,})', - r'\bdata-id=["\'](\d{7,})', - r']+\bid=["\'](\d{7,})', - r']+\bvideoid=["\'](\d{8,})', + r'\bdata-id=["\'](\d{7,})["\']', + r']+\bid=["\'](\d{7,})["\']', + r']+\bvideoid=["\'](\d{8,})["\']', ] _TESTS = [{ @@ -219,6 +219,14 @@ class GloboArticleIE(InfoExtractor): 'description': 'md5:2d089d036c4c9675117d3a56f8c61739', }, 'playlist_count': 1, + }, { + 'url': 'https://redeglobo.globo.com/rpc/meuparana/noticia/a-producao-de-chocolates-no-parana.ghtml', + 'info_dict': { + 'id': 'a-producao-de-chocolates-no-parana', + 'title': 'A produção de chocolates no Paraná', + 'description': 'md5:f2e3daf00ffd1dc0e9a8a6c7cfb0a89e', + }, + 'playlist_count': 2, }] @classmethod @@ -234,6 +242,6 @@ class GloboArticleIE(InfoExtractor): entries = [ self.url_result('globo:%s' % video_id, GloboIE.ie_key()) for video_id in orderedSet(video_ids)] - title = self._og_search_title(webpage) + title = self._og_search_title(webpage).strip() description = self._html_search_meta('description', webpage) return self.playlist_result(entries, display_id, title, description)