Include more information in oEmbed previews. (#10819)
* Improved titles (fall back to the author name if there's not title) and include the site name. * Handle photo/video payloads. * Include the original URL in the Open Graph response. * Fix the expiration time (by properly converting from seconds to milliseconds).
This commit is contained in:
parent
9391de3f37
commit
6fc8be9a1b
|
@ -0,0 +1 @@
|
||||||
|
Improve oEmbed previews by processing the author name, photo, and video information.
|
|
@ -13,7 +13,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
|
||||||
|
@ -22,6 +22,8 @@ from synapse.types import JsonDict
|
||||||
from synapse.util import json_decoder
|
from synapse.util import json_decoder
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from synapse.server import HomeServer
|
from synapse.server import HomeServer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -31,7 +33,7 @@ logger = logging.getLogger(__name__)
|
||||||
class OEmbedResult:
|
class OEmbedResult:
|
||||||
# The Open Graph result (converted from the oEmbed result).
|
# The Open Graph result (converted from the oEmbed result).
|
||||||
open_graph_result: JsonDict
|
open_graph_result: JsonDict
|
||||||
# Number of seconds to cache the content, according to the oEmbed response.
|
# Number of milliseconds to cache the content, according to the oEmbed response.
|
||||||
#
|
#
|
||||||
# This will be None if no cache-age is provided in the oEmbed response (or
|
# This will be None if no cache-age is provided in the oEmbed response (or
|
||||||
# if the oEmbed response cannot be turned into an Open Graph response).
|
# if the oEmbed response cannot be turned into an Open Graph response).
|
||||||
|
@ -119,10 +121,22 @@ class OEmbedProvider:
|
||||||
# Ensure the cache age is None or an int.
|
# Ensure the cache age is None or an int.
|
||||||
cache_age = oembed.get("cache_age")
|
cache_age = oembed.get("cache_age")
|
||||||
if cache_age:
|
if cache_age:
|
||||||
cache_age = int(cache_age)
|
cache_age = int(cache_age) * 1000
|
||||||
|
|
||||||
# The results.
|
# The results.
|
||||||
open_graph_response = {"og:title": oembed.get("title")}
|
open_graph_response = {
|
||||||
|
"og:url": url,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Use either title or author's name as the title.
|
||||||
|
title = oembed.get("title") or oembed.get("author_name")
|
||||||
|
if title:
|
||||||
|
open_graph_response["og:title"] = title
|
||||||
|
|
||||||
|
# Use the provider name and as the site.
|
||||||
|
provider_name = oembed.get("provider_name")
|
||||||
|
if provider_name:
|
||||||
|
open_graph_response["og:site_name"] = provider_name
|
||||||
|
|
||||||
# If a thumbnail exists, use it. Note that dimensions will be calculated later.
|
# If a thumbnail exists, use it. Note that dimensions will be calculated later.
|
||||||
if "thumbnail_url" in oembed:
|
if "thumbnail_url" in oembed:
|
||||||
|
@ -137,6 +151,15 @@ class OEmbedProvider:
|
||||||
# If this is a photo, use the full image, not the thumbnail.
|
# If this is a photo, use the full image, not the thumbnail.
|
||||||
open_graph_response["og:image"] = oembed["url"]
|
open_graph_response["og:image"] = oembed["url"]
|
||||||
|
|
||||||
|
elif oembed_type == "video":
|
||||||
|
open_graph_response["og:type"] = "video.other"
|
||||||
|
calc_description_and_urls(open_graph_response, oembed["html"])
|
||||||
|
open_graph_response["og:video:width"] = oembed["width"]
|
||||||
|
open_graph_response["og:video:height"] = oembed["height"]
|
||||||
|
|
||||||
|
elif oembed_type == "link":
|
||||||
|
open_graph_response["og:type"] = "website"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(f"Unknown oEmbed type: {oembed_type}")
|
raise RuntimeError(f"Unknown oEmbed type: {oembed_type}")
|
||||||
|
|
||||||
|
@ -149,6 +172,14 @@ class OEmbedProvider:
|
||||||
return OEmbedResult(open_graph_response, cache_age)
|
return OEmbedResult(open_graph_response, cache_age)
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
|
||||||
|
results = []
|
||||||
|
for tag in tree.xpath("//*/" + tag_name):
|
||||||
|
if "src" in tag.attrib:
|
||||||
|
results.append(tag.attrib["src"])
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:
|
def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:
|
||||||
"""
|
"""
|
||||||
Calculate description for an HTML document.
|
Calculate description for an HTML document.
|
||||||
|
@ -179,6 +210,16 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) ->
|
||||||
if tree is None:
|
if tree is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Attempt to find interesting URLs (images, videos, embeds).
|
||||||
|
if "og:image" not in open_graph_response:
|
||||||
|
image_urls = _fetch_urls(tree, "img")
|
||||||
|
if image_urls:
|
||||||
|
open_graph_response["og:image"] = image_urls[0]
|
||||||
|
|
||||||
|
video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed")
|
||||||
|
if video_urls:
|
||||||
|
open_graph_response["og:video"] = video_urls[0]
|
||||||
|
|
||||||
from synapse.rest.media.v1.preview_url_resource import _calc_description
|
from synapse.rest.media.v1.preview_url_resource import _calc_description
|
||||||
|
|
||||||
description = _calc_description(tree)
|
description = _calc_description(tree)
|
||||||
|
|
|
@ -305,7 +305,7 @@ class PreviewUrlResource(DirectServeJsonResource):
|
||||||
with open(media_info.filename, "rb") as file:
|
with open(media_info.filename, "rb") as file:
|
||||||
body = file.read()
|
body = file.read()
|
||||||
|
|
||||||
oembed_response = self._oembed.parse_oembed_response(media_info.uri, body)
|
oembed_response = self._oembed.parse_oembed_response(url, body)
|
||||||
og = oembed_response.open_graph_result
|
og = oembed_response.open_graph_result
|
||||||
|
|
||||||
# Use the cache age from the oEmbed result, instead of the HTTP response.
|
# Use the cache age from the oEmbed result, instead of the HTTP response.
|
||||||
|
|
|
@ -620,11 +620,12 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
self.assertIn(b"/matrixdotorg", server.data)
|
self.assertIn(b"/matrixdotorg", server.data)
|
||||||
|
|
||||||
self.assertEqual(channel.code, 200)
|
self.assertEqual(channel.code, 200)
|
||||||
self.assertIsNone(channel.json_body["og:title"])
|
body = channel.json_body
|
||||||
self.assertTrue(channel.json_body["og:image"].startswith("mxc://"))
|
self.assertEqual(body["og:url"], "http://twitter.com/matrixdotorg/status/12345")
|
||||||
self.assertEqual(channel.json_body["og:image:height"], 1)
|
self.assertTrue(body["og:image"].startswith("mxc://"))
|
||||||
self.assertEqual(channel.json_body["og:image:width"], 1)
|
self.assertEqual(body["og:image:height"], 1)
|
||||||
self.assertEqual(channel.json_body["og:image:type"], "image/png")
|
self.assertEqual(body["og:image:width"], 1)
|
||||||
|
self.assertEqual(body["og:image:type"], "image/png")
|
||||||
|
|
||||||
def test_oembed_rich(self):
|
def test_oembed_rich(self):
|
||||||
"""Test an oEmbed endpoint which returns HTML content via the 'rich' type."""
|
"""Test an oEmbed endpoint which returns HTML content via the 'rich' type."""
|
||||||
|
@ -633,6 +634,8 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
result = {
|
result = {
|
||||||
"version": "1.0",
|
"version": "1.0",
|
||||||
"type": "rich",
|
"type": "rich",
|
||||||
|
# Note that this provides the author, not the title.
|
||||||
|
"author_name": "Alice",
|
||||||
"html": "<div>Content Preview</div>",
|
"html": "<div>Content Preview</div>",
|
||||||
}
|
}
|
||||||
end_content = json.dumps(result).encode("utf-8")
|
end_content = json.dumps(result).encode("utf-8")
|
||||||
|
@ -660,9 +663,14 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
|
|
||||||
self.pump()
|
self.pump()
|
||||||
self.assertEqual(channel.code, 200)
|
self.assertEqual(channel.code, 200)
|
||||||
|
body = channel.json_body
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
channel.json_body,
|
body,
|
||||||
{"og:title": None, "og:description": "Content Preview"},
|
{
|
||||||
|
"og:url": "http://twitter.com/matrixdotorg/status/12345",
|
||||||
|
"og:title": "Alice",
|
||||||
|
"og:description": "Content Preview",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_oembed_format(self):
|
def test_oembed_format(self):
|
||||||
|
@ -705,7 +713,11 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
self.assertIn(b"format=json", server.data)
|
self.assertIn(b"format=json", server.data)
|
||||||
|
|
||||||
self.assertEqual(channel.code, 200)
|
self.assertEqual(channel.code, 200)
|
||||||
|
body = channel.json_body
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
channel.json_body,
|
body,
|
||||||
{"og:title": None, "og:description": "Content Preview"},
|
{
|
||||||
|
"og:url": "http://www.hulu.com/watch/12345",
|
||||||
|
"og:description": "Content Preview",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue