[downloader/hls] Add support for AES-128 encrypted segments in hlsnative downloader

2016-01-10 20:09:53 +01:00 · 2016-01-10 20:09:53 +01:00 · e154c65128
parent 589568789f
commit e154c65128
3 changed files with 68 additions and 36 deletions
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@ -2,14 +2,24 @@ from __future__ import unicode_literals

 import os.path
 import re
+import binascii
+try:
+    from Crypto.Cipher import AES
+    can_decrypt_frag = True
+except ImportError:
+    can_decrypt_frag = False

 from .fragment import FragmentFD
 from .external import FFmpegFD

-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_struct_pack,
+)
 from ..utils import (
    encodeFilename,
    sanitize_open,
+    parse_m3u8_attributes,
 )


@ -21,7 +31,7 @@ class HlsFD(FragmentFD):
    @staticmethod
    def can_download(manifest):
        UNSUPPORTED_FEATURES = (
-            r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1]
+            r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
            r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]

            # Live streams heuristic does not always work (e.g. geo restricted to Germany
@ -39,7 +49,9 @@ class HlsFD(FragmentFD):
            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
            # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
        )
-        return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
+        check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
+        check_results.append(not (re.search(r'#EXT-X-KEY:METHOD=AES-128', manifest) and not can_decrypt_frag))
+        return all(check_results)

    def real_download(self, filename, info_dict):
        man_url = info_dict['url']
@ -57,36 +69,58 @@ class HlsFD(FragmentFD):
                fd.add_progress_hook(ph)
            return fd.real_download(filename, info_dict)

-        fragment_urls = []
+        total_frags = 0
        for line in s.splitlines():
            line = line.strip()
            if line and not line.startswith('#'):
-                segment_url = (
-                    line
-                    if re.match(r'^https?://', line)
-                    else compat_urlparse.urljoin(man_url, line))
-                fragment_urls.append(segment_url)
-                # We only download the first fragment during the test
-                if self.params.get('test', False):
-                    break
+                total_frags += 1

        ctx = {
            'filename': filename,
-            'total_frags': len(fragment_urls),
+            'total_frags': total_frags,
        }

        self._prepare_and_start_frag_download(ctx)

+        i = 0
+        media_sequence = 0
+        decrypt_info = {'METHOD': 'NONE'}
        frags_filenames = []
-        for i, frag_url in enumerate(fragment_urls):
-            frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
-            success = ctx['dl'].download(frag_filename, {'url': frag_url})
-            if not success:
-                return False
-            down, frag_sanitized = sanitize_open(frag_filename, 'rb')
-            ctx['dest_stream'].write(down.read())
-            down.close()
-            frags_filenames.append(frag_sanitized)
+        for line in s.splitlines():
+            line = line.strip()
+            if line:
+                if not line.startswith('#'):
+                    frag_url = (
+                        line
+                        if re.match(r'^https?://', line)
+                        else compat_urlparse.urljoin(man_url, line))
+                    frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
+                    success = ctx['dl'].download(frag_filename, {'url': frag_url})
+                    if not success:
+                        return False
+                    down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+                    frag_content = down.read()
+                    down.close()
+                    if decrypt_info['METHOD'] == 'AES-128':
+                        iv = decrypt_info.get('IV') or compat_struct_pack(">8xq", media_sequence)
+                        frag_content = AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+                    ctx['dest_stream'].write(frag_content)
+                    frags_filenames.append(frag_sanitized)
+                    # We only download the first fragment during the test
+                    if self.params.get('test', False):
+                        break
+                    i += 1
+                    media_sequence += 1
+                elif line.startswith('#EXT-X-KEY'):
+                    decrypt_info = parse_m3u8_attributes(line[11:])
+                    if decrypt_info['METHOD'] == 'AES-128':
+                        if 'IV' in decrypt_info:
+                            decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
+                        if not re.match(r'^https?://', decrypt_info['URI']):
+                            decrypt_info['URI'] = compat_urlparse.urljoin(man_url, decrypt_info['URI'])
+                        decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
+                elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+                    media_sequence = int(line[22:])

        self._finish_frag_download(ctx)

--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -53,6 +53,7 @@ from ..utils import (
    mimetype2ext,
    update_Request,
    update_url_query,
+    parse_m3u8_attributes,
 )


@ -1150,23 +1151,11 @@ class InfoExtractor(object):
            }]
        last_info = None
        last_media = None
-        kv_rex = re.compile(
-            r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
        for line in m3u8_doc.splitlines():
            if line.startswith('#EXT-X-STREAM-INF:'):
-                last_info = {}
-                for m in kv_rex.finditer(line):
-                    v = m.group('val')
-                    if v.startswith('"'):
-                        v = v[1:-1]
-                    last_info[m.group('key')] = v
+                last_info = parse_m3u8_attributes(line)
            elif line.startswith('#EXT-X-MEDIA:'):
-                last_media = {}
-                for m in kv_rex.finditer(line):
-                    v = m.group('val')
-                    if v.startswith('"'):
-                        v = v[1:-1]
-                    last_media[m.group('key')] = v
+                last_media = parse_m3u8_attributes(line)
            elif line.startswith('#') or not line.strip():
                continue
            else:
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -2852,3 +2852,12 @@ def decode_packed_codes(code):
    return re.sub(
        r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
        obfucasted_code)
+
+
+def parse_m3u8_attributes(attrib):
+    info = {}
+    for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+        if val.startswith('"'):
+            val = val[1:-1]
+        info[key] = val
+    return info