diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py
index d3ec4a66c..b60fa0233 100644
--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@@ -303,9 +303,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
items.append(video)
- title = (self._og_search_title(webpage, default=None)
- or self._html_search_regex(r'
]*>([^<]+) ', default=None))
- title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
+ title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
description=self._og_search_description(webpage, default=None))
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 9a0a4414e..89fce8d5a 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -898,12 +898,8 @@ class BBCIE(BBCCoUkIE):
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
timestamp = json_ld_info.get('timestamp')
- playlist_title = json_ld_info.get('title')
- if not playlist_title:
- playlist_title = (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'playlist title', default=None))
- if playlist_title:
- playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+ playlist_title = json_ld_info.get('title') or re.sub(
+ r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
playlist_description = json_ld_info.get(
'description') or self._og_search_description(webpage, default=None)
diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py
index a2b04fcce..ca5757374 100644
--- a/yt_dlp/extractor/breitbart.py
+++ b/yt_dlp/extractor/breitbart.py
@@ -27,8 +27,7 @@ class BreitBartIE(InfoExtractor):
self._sort_formats(formats)
return {
'id': video_id,
- 'title': (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'video title')),
+ 'title': self._generic_title('', webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': self._rta_search(webpage),
diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py
index fc5da7028..6c8129f06 100644
--- a/yt_dlp/extractor/callin.py
+++ b/yt_dlp/extractor/callin.py
@@ -51,9 +51,7 @@ class CallinIE(InfoExtractor):
episode = next_data['props']['pageProps']['episode']
id = episode['id']
- title = (episode.get('title')
- or self._og_search_title(webpage, fatal=False)
- or self._html_extract_title(webpage))
+ title = episode.get('title') or self._generic_title('', webpage)
url = episode['m3u8']
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index fb787a722..84a2b95af 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3820,9 +3820,11 @@ class InfoExtractor:
def _generic_id(url):
return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
- @staticmethod
- def _generic_title(url):
- return urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
+ def _generic_title(self, url='', webpage='', *, default=None):
+ return (self._og_search_title(webpage, default=None)
+ or self._html_extract_title(webpage, default=None)
+ or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
+ or default)
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py
index 84393627a..1184633f5 100644
--- a/yt_dlp/extractor/cspan.py
+++ b/yt_dlp/extractor/cspan.py
@@ -275,8 +275,7 @@ class CSpanCongressIE(InfoExtractor):
self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'),
video_id, transform_source=js_to_json)
- title = (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'video title'))
+ title = self._generic_title('', webpage)
description = (self._og_search_description(webpage, default=None)
or self._html_search_meta('description', webpage, 'description', default=None))
diff --git a/yt_dlp/extractor/fivetv.py b/yt_dlp/extractor/fivetv.py
index 448c332b3..1f48cfd36 100644
--- a/yt_dlp/extractor/fivetv.py
+++ b/yt_dlp/extractor/fivetv.py
@@ -71,7 +71,7 @@ class FiveTVIE(InfoExtractor):
r']+?href="([^"]+)"[^>]+?class="videoplayer"'],
webpage, 'video url')
- title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
+ title = self._generic_title('', webpage)
duration = int_or_none(self._og_search_property(
'video:duration', webpage, 'duration', default=None))
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 5abde33a9..b0b26b61a 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2740,8 +2740,7 @@ class GenericIE(InfoExtractor):
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
- 'title': (self._og_search_title(webpage, default=None)
- or self._html_extract_title(webpage, 'video title', default='video')),
+ 'title': self._generic_title('', webpage, default='video'),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'age_limit': self._rta_search(webpage),
diff --git a/yt_dlp/extractor/genericembeds.py b/yt_dlp/extractor/genericembeds.py
index 1bffe275a..45e1618ba 100644
--- a/yt_dlp/extractor/genericembeds.py
+++ b/yt_dlp/extractor/genericembeds.py
@@ -20,7 +20,7 @@ class HTML5MediaEmbedIE(InfoExtractor):
]
def _extract_from_webpage(self, url, webpage):
- video_id, title = self._generic_id(url), self._generic_title(url)
+ video_id, title = self._generic_id(url), self._generic_title(url, webpage)
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or []
for num, entry in enumerate(entries, start=1):
entry.update({
diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py
index 2bffb26dc..d114f3494 100644
--- a/yt_dlp/extractor/glide.py
+++ b/yt_dlp/extractor/glide.py
@@ -20,7 +20,7 @@ class GlideIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- title = self._html_extract_title(webpage, default=None) or self._og_search_title(webpage)
+ title = self._generic_title('', webpage)
video_url = self._proto_relative_url(self._search_regex(
r']+src=(["\'])(?P.+?)\1',
webpage, 'video URL', default=None,
diff --git a/yt_dlp/extractor/meipai.py b/yt_dlp/extractor/meipai.py
index 95b6dfe52..1a6f3cd74 100644
--- a/yt_dlp/extractor/meipai.py
+++ b/yt_dlp/extractor/meipai.py
@@ -48,9 +48,7 @@ class MeipaiIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(
- webpage, default=None) or self._html_search_regex(
- r']*>([^<]+) ', webpage, 'title')
+ title = self._generic_title('', webpage)
formats = []
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index 60d76d1b1..517660ef1 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -321,8 +321,7 @@ class NhkForSchoolProgramListIE(InfoExtractor):
webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id)
- title = (self._og_search_title(webpage)
- or self._html_extract_title(webpage)
+ title = (self._generic_title('', webpage)
or self._html_search_regex(r'([^<]+?)とは?\s* ', webpage, 'title', fatal=False))
title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None
description = self._html_search_regex(
diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py
index 59d4490d0..a46211e77 100644
--- a/yt_dlp/extractor/onenewsnz.py
+++ b/yt_dlp/extractor/onenewsnz.py
@@ -106,7 +106,6 @@ class OneNewsNZIE(InfoExtractor):
playlist_title = (
traverse_obj(fusion_metadata, ('headlines', 'basic'))
- or self._og_search_title(webpage)
- or self._html_extract_title(webpage)
+ or self._generic_title('', webpage)
)
return self.playlist_result(entries, display_id, playlist_title)
diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py
index e15c22f2a..eea20ff85 100644
--- a/yt_dlp/extractor/steam.py
+++ b/yt_dlp/extractor/steam.py
@@ -166,7 +166,7 @@ class SteamCommunityBroadcastIE(InfoExtractor):
self._sort_formats(formats)
return {
'id': video_id,
- 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
+ 'title': self._generic_title('', webpage),
'formats': formats,
'live_status': 'is_live',
'view_count': json_data.get('num_view'),
diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py
index 5baa21d52..47cb0965e 100644
--- a/yt_dlp/extractor/tennistv.py
+++ b/yt_dlp/extractor/tennistv.py
@@ -142,7 +142,7 @@ class TennisTVIE(InfoExtractor):
return {
'id': video_id,
- 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
+ 'title': self._generic_title('', webpage),
'description': self._html_search_regex(
(r'', *self._og_regexes('description')),
webpage, 'description', fatal=False),
diff --git a/yt_dlp/extractor/tv24ua.py b/yt_dlp/extractor/tv24ua.py
index 2f2571df7..8d2475296 100644
--- a/yt_dlp/extractor/tv24ua.py
+++ b/yt_dlp/extractor/tv24ua.py
@@ -74,6 +74,6 @@ class TV24UAVideoIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
'thumbnail': thumbnail or self._og_search_thumbnail(webpage),
- 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
+ 'title': self._generic_title('', webpage),
'description': self._og_search_description(webpage, default=None),
}