mirror of https://github.com/yt-dlp/yt-dlp.git
Merge branch 'purdeaandrei-save_tags_simpler_only_saves_tags_to_info_json'
This commit is contained in:
commit
23e7f53bd3
|
@ -181,6 +181,7 @@ class InfoExtractor(object):
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
categories: A list of categories that the video falls in, for example
|
categories: A list of categories that the video falls in, for example
|
||||||
["Sports", "Berlin"]
|
["Sports", "Berlin"]
|
||||||
|
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
|
||||||
is_live: True, False, or None (=unknown). Whether this video is a
|
is_live: True, False, or None (=unknown). Whether this video is a
|
||||||
live stream that goes on instead of a fixed-length video.
|
live stream that goes on instead of a fixed-length video.
|
||||||
start_time: Time in seconds where the reproduction should start, as
|
start_time: Time in seconds where the reproduction should start, as
|
||||||
|
@ -630,6 +631,12 @@ class InfoExtractor(object):
|
||||||
template % (content_re, property_re),
|
template % (content_re, property_re),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _meta_regex(prop):
|
||||||
|
return r'''(?isx)<meta
|
||||||
|
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||||
|
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||||
|
|
||||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||||
if name is None:
|
if name is None:
|
||||||
name = 'OpenGraph %s' % prop
|
name = 'OpenGraph %s' % prop
|
||||||
|
@ -660,9 +667,7 @@ class InfoExtractor(object):
|
||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
r'''(?isx)<meta
|
self._meta_regex(name),
|
||||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
|
||||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
|
|
||||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||||
|
|
||||||
def _dc_search_uploader(self, html):
|
def _dc_search_uploader(self, html):
|
||||||
|
|
|
@ -329,6 +329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'upload_date': '20121002',
|
'upload_date': '20121002',
|
||||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||||
'categories': ['Science & Technology'],
|
'categories': ['Science & Technology'],
|
||||||
|
'tags': ['youtube-dl'],
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'start_time': 1,
|
'start_time': 1,
|
||||||
|
@ -343,7 +344,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20120506',
|
'upload_date': '20120506',
|
||||||
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||||
'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
|
'description': 'md5:782e8651347686cba06e58f71ab51773',
|
||||||
|
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
|
||||||
|
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
|
||||||
|
'iconic ep', 'iconic', 'love', 'it'],
|
||||||
'uploader': 'Icona Pop',
|
'uploader': 'Icona Pop',
|
||||||
'uploader_id': 'IconaPop',
|
'uploader_id': 'IconaPop',
|
||||||
}
|
}
|
||||||
|
@ -1072,6 +1076,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
else:
|
else:
|
||||||
video_categories = None
|
video_categories = None
|
||||||
|
|
||||||
|
video_tags = [
|
||||||
|
unescapeHTML(m.group('content'))
|
||||||
|
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||||
|
|
||||||
# description
|
# description
|
||||||
video_description = get_element_by_id("eow-description", video_webpage)
|
video_description = get_element_by_id("eow-description", video_webpage)
|
||||||
if video_description:
|
if video_description:
|
||||||
|
@ -1260,6 +1268,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'categories': video_categories,
|
'categories': video_categories,
|
||||||
|
'tags': video_tags,
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'automatic_captions': automatic_captions,
|
'automatic_captions': automatic_captions,
|
||||||
'duration': video_duration,
|
'duration': video_duration,
|
||||||
|
|
Loading…
Reference in New Issue