support youtube live chat replay

This commit is contained in:
siikamiika 2020-08-05 01:02:23 +03:00
parent 98b69821e4
commit a78e3a5795
3 changed files with 98 additions and 0 deletions

View File

@ -8,6 +8,7 @@ from .rtmp import RtmpFD
from .dash import DashSegmentsFD from .dash import DashSegmentsFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .ism import IsmFD from .ism import IsmFD
from .youtube_live_chat import YoutubeLiveChatReplayFD
from .external import ( from .external import (
get_external_downloader, get_external_downloader,
FFmpegFD, FFmpegFD,
@ -26,6 +27,7 @@ PROTOCOL_MAP = {
'f4m': F4mFD, 'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD, 'http_dash_segments': DashSegmentsFD,
'ism': IsmFD, 'ism': IsmFD,
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
} }

View File

@ -0,0 +1,88 @@
from __future__ import division, unicode_literals
import re
import json
from .fragment import FragmentFD
class YoutubeLiveChatReplayFD(FragmentFD):
""" Downloads YouTube live chat replays fragment by fragment """
FD_NAME = 'youtube_live_chat_replay'
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
test = self.params.get('test', False)
ctx = {
'filename': filename,
'live': True,
'total_frags': None,
}
def dl_fragment(url):
headers = info_dict.get('http_headers', {})
return self._download_fragment(ctx, url, info_dict, headers)
def parse_yt_initial_data(data):
raw_json = re.search(b'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1)
return json.loads(raw_json)
self._prepare_and_start_frag_download(ctx)
success, raw_fragment = dl_fragment(
'https://www.youtube.com/watch?v={}'.format(video_id))
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')
first = True
offset = None
while continuation_id is not None:
data = None
if first:
url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
success, raw_fragment = dl_fragment(url)
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
else:
url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
+ '?continuation={}'.format(continuation_id)
+ '&playerOffsetMs={}'.format(offset - 5000)
+ '&hidden=false'
+ '&pbj=1')
success, raw_fragment = dl_fragment(url)
if not success:
return False
data = json.loads(raw_fragment)['response']
first = False
continuation_id = None
live_chat_continuation = data['continuationContents']['liveChatContinuation']
offset = None
processed_fragment = bytearray()
if 'actions' in live_chat_continuation:
for action in live_chat_continuation['actions']:
if 'replayChatItemAction' in action:
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
self._append_fragment(ctx, processed_fragment)
if test or offset is None:
break
self._finish_frag_download(ctx)
return True

View File

@ -1462,6 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': ext, 'ext': ext,
}) })
sub_lang_list[lang] = sub_formats sub_lang_list[lang] = sub_formats
# TODO check that live chat replay actually exists
sub_lang_list['live_chat'] = [
{
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat_replay',
},
]
if not sub_lang_list: if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles') self._downloader.report_warning('video doesn\'t have subtitles')
return {} return {}