Add stubs package for lxml. (#15697)
The stubs have some issues so this has some generous cast and ignores in it, but it is better than not having stubs. Note that confusing that Element is a function which creates _Element instances (and similarly for Comment).
This commit is contained in:
parent
874378c052
commit
6f18812bb0
|
@ -0,0 +1 @@
|
||||||
|
Improve type hints.
|
3
mypy.ini
3
mypy.ini
|
@ -60,9 +60,6 @@ ignore_missing_imports = True
|
||||||
[mypy-ijson.*]
|
[mypy-ijson.*]
|
||||||
ignore_missing_imports = True
|
ignore_missing_imports = True
|
||||||
|
|
||||||
[mypy-lxml]
|
|
||||||
ignore_missing_imports = True
|
|
||||||
|
|
||||||
# https://github.com/msgpack/msgpack-python/issues/448
|
# https://github.com/msgpack/msgpack-python/issues/448
|
||||||
[mypy-msgpack]
|
[mypy-msgpack]
|
||||||
ignore_missing_imports = True
|
ignore_missing_imports = True
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alabaster"
|
name = "alabaster"
|
||||||
|
@ -1215,6 +1215,21 @@ html5 = ["html5lib"]
|
||||||
htmlsoup = ["BeautifulSoup4"]
|
htmlsoup = ["BeautifulSoup4"]
|
||||||
source = ["Cython (>=0.29.7)"]
|
source = ["Cython (>=0.29.7)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lxml-stubs"
|
||||||
|
version = "0.4.0"
|
||||||
|
description = "Type annotations for the lxml package"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "lxml-stubs-0.4.0.tar.gz", hash = "sha256:184877b42127256abc2b932ba8bd0ab5ea80bd0b0fee618d16daa40e0b71abee"},
|
||||||
|
{file = "lxml_stubs-0.4.0-py3-none-any.whl", hash = "sha256:3b381e9e82397c64ea3cc4d6f79d1255d015f7b114806d4826218805c10ec003"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.9.3)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "markdown-it-py"
|
name = "markdown-it-py"
|
||||||
version = "2.2.0"
|
version = "2.2.0"
|
||||||
|
@ -3409,22 +3424,22 @@ docs = ["Sphinx", "repoze.sphinx.autointerface"]
|
||||||
test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"]
|
test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"]
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler", "pyicu"]
|
all = ["Pympler", "authlib", "hiredis", "jaeger-client", "lxml", "matrix-synapse-ldap3", "opentracing", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pyicu", "pysaml2", "sentry-sdk", "txredisapi"]
|
||||||
cache-memory = ["Pympler"]
|
cache-memory = ["Pympler"]
|
||||||
jwt = ["authlib"]
|
jwt = ["authlib"]
|
||||||
matrix-synapse-ldap3 = ["matrix-synapse-ldap3"]
|
matrix-synapse-ldap3 = ["matrix-synapse-ldap3"]
|
||||||
oidc = ["authlib"]
|
oidc = ["authlib"]
|
||||||
opentracing = ["jaeger-client", "opentracing"]
|
opentracing = ["jaeger-client", "opentracing"]
|
||||||
postgres = ["psycopg2", "psycopg2cffi", "psycopg2cffi-compat"]
|
postgres = ["psycopg2", "psycopg2cffi", "psycopg2cffi-compat"]
|
||||||
redis = ["txredisapi", "hiredis"]
|
redis = ["hiredis", "txredisapi"]
|
||||||
saml2 = ["pysaml2"]
|
saml2 = ["pysaml2"]
|
||||||
sentry = ["sentry-sdk"]
|
sentry = ["sentry-sdk"]
|
||||||
systemd = ["systemd-python"]
|
systemd = ["systemd-python"]
|
||||||
test = ["parameterized", "idna"]
|
test = ["idna", "parameterized"]
|
||||||
url-preview = ["lxml"]
|
url-preview = ["lxml"]
|
||||||
user-search = ["pyicu"]
|
user-search = ["pyicu"]
|
||||||
|
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.7.1"
|
python-versions = "^3.7.1"
|
||||||
content-hash = "ef3a16dd66177f7141239e1a2d3e07cc14c08f1e4e0c5127184d022bc062da52"
|
content-hash = "7ad11e62a675e09444cf33ca2de3216fc4efc5874a2575e54d95d577a52439d3"
|
||||||
|
|
|
@ -314,6 +314,7 @@ black = ">=22.3.0"
|
||||||
ruff = "0.0.265"
|
ruff = "0.0.265"
|
||||||
|
|
||||||
# Typechecking
|
# Typechecking
|
||||||
|
lxml-stubs = ">=0.4.0"
|
||||||
mypy = "*"
|
mypy = "*"
|
||||||
mypy-zope = "*"
|
mypy-zope = "*"
|
||||||
types-bleach = ">=4.1.0"
|
types-bleach = ">=4.1.0"
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
import html
|
import html
|
||||||
import logging
|
import logging
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import TYPE_CHECKING, List, Optional
|
from typing import TYPE_CHECKING, List, Optional, cast
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ class OEmbedProvider:
|
||||||
# No match.
|
# No match.
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def autodiscover_from_html(self, tree: "etree.Element") -> Optional[str]:
|
def autodiscover_from_html(self, tree: "etree._Element") -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Search an HTML document for oEmbed autodiscovery information.
|
Search an HTML document for oEmbed autodiscovery information.
|
||||||
|
|
||||||
|
@ -109,18 +109,22 @@ class OEmbedProvider:
|
||||||
The URL to use for oEmbed information, or None if no URL was found.
|
The URL to use for oEmbed information, or None if no URL was found.
|
||||||
"""
|
"""
|
||||||
# Search for link elements with the proper rel and type attributes.
|
# Search for link elements with the proper rel and type attributes.
|
||||||
for tag in tree.xpath(
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
"//link[@rel='alternate'][@type='application/json+oembed']"
|
for tag in cast(
|
||||||
|
List["etree._Element"],
|
||||||
|
tree.xpath("//link[@rel='alternate'][@type='application/json+oembed']"),
|
||||||
):
|
):
|
||||||
if "href" in tag.attrib:
|
if "href" in tag.attrib:
|
||||||
return tag.attrib["href"]
|
return cast(str, tag.attrib["href"])
|
||||||
|
|
||||||
# Some providers (e.g. Flickr) use alternative instead of alternate.
|
# Some providers (e.g. Flickr) use alternative instead of alternate.
|
||||||
for tag in tree.xpath(
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
"//link[@rel='alternative'][@type='application/json+oembed']"
|
for tag in cast(
|
||||||
|
List["etree._Element"],
|
||||||
|
tree.xpath("//link[@rel='alternative'][@type='application/json+oembed']"),
|
||||||
):
|
):
|
||||||
if "href" in tag.attrib:
|
if "href" in tag.attrib:
|
||||||
return tag.attrib["href"]
|
return cast(str, tag.attrib["href"])
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -212,11 +216,12 @@ class OEmbedProvider:
|
||||||
return OEmbedResult(open_graph_response, author_name, cache_age)
|
return OEmbedResult(open_graph_response, author_name, cache_age)
|
||||||
|
|
||||||
|
|
||||||
def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
|
def _fetch_urls(tree: "etree._Element", tag_name: str) -> List[str]:
|
||||||
results = []
|
results = []
|
||||||
for tag in tree.xpath("//*/" + tag_name):
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
|
for tag in cast(List["etree._Element"], tree.xpath("//*/" + tag_name)):
|
||||||
if "src" in tag.attrib:
|
if "src" in tag.attrib:
|
||||||
results.append(tag.attrib["src"])
|
results.append(cast(str, tag.attrib["src"]))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
@ -244,11 +249,12 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) ->
|
||||||
parser = etree.HTMLParser(recover=True, encoding="utf-8")
|
parser = etree.HTMLParser(recover=True, encoding="utf-8")
|
||||||
|
|
||||||
# Attempt to parse the body. If this fails, log and return no metadata.
|
# Attempt to parse the body. If this fails, log and return no metadata.
|
||||||
tree = etree.fromstring(html_body, parser)
|
# TODO Develop of lxml-stubs has this correct.
|
||||||
|
tree = etree.fromstring(html_body, parser) # type: ignore[arg-type]
|
||||||
|
|
||||||
# The data was successfully parsed, but no tree was found.
|
# The data was successfully parsed, but no tree was found.
|
||||||
if tree is None:
|
if tree is None:
|
||||||
return
|
return # type: ignore[unreachable]
|
||||||
|
|
||||||
# Attempt to find interesting URLs (images, videos, embeds).
|
# Attempt to find interesting URLs (images, videos, embeds).
|
||||||
if "og:image" not in open_graph_response:
|
if "og:image" not in open_graph_response:
|
||||||
|
|
|
@ -24,6 +24,7 @@ from typing import (
|
||||||
Optional,
|
Optional,
|
||||||
Set,
|
Set,
|
||||||
Union,
|
Union,
|
||||||
|
cast,
|
||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -115,7 +116,7 @@ def _get_html_media_encodings(
|
||||||
|
|
||||||
def decode_body(
|
def decode_body(
|
||||||
body: bytes, uri: str, content_type: Optional[str] = None
|
body: bytes, uri: str, content_type: Optional[str] = None
|
||||||
) -> Optional["etree.Element"]:
|
) -> Optional["etree._Element"]:
|
||||||
"""
|
"""
|
||||||
This uses lxml to parse the HTML document.
|
This uses lxml to parse the HTML document.
|
||||||
|
|
||||||
|
@ -152,11 +153,12 @@ def decode_body(
|
||||||
|
|
||||||
# Attempt to parse the body. Returns None if the body was successfully
|
# Attempt to parse the body. Returns None if the body was successfully
|
||||||
# parsed, but no tree was found.
|
# parsed, but no tree was found.
|
||||||
return etree.fromstring(body, parser)
|
# TODO Develop of lxml-stubs has this correct.
|
||||||
|
return etree.fromstring(body, parser) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
def _get_meta_tags(
|
def _get_meta_tags(
|
||||||
tree: "etree.Element",
|
tree: "etree._Element",
|
||||||
property: str,
|
property: str,
|
||||||
prefix: str,
|
prefix: str,
|
||||||
property_mapper: Optional[Callable[[str], Optional[str]]] = None,
|
property_mapper: Optional[Callable[[str], Optional[str]]] = None,
|
||||||
|
@ -175,9 +177,15 @@ def _get_meta_tags(
|
||||||
Returns:
|
Returns:
|
||||||
A map of tag name to value.
|
A map of tag name to value.
|
||||||
"""
|
"""
|
||||||
|
# This actually returns Dict[str, str], but the caller sets this as a variable
|
||||||
|
# which is Dict[str, Optional[str]].
|
||||||
results: Dict[str, Optional[str]] = {}
|
results: Dict[str, Optional[str]] = {}
|
||||||
for tag in tree.xpath(
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]"
|
for tag in cast(
|
||||||
|
List["etree._Element"],
|
||||||
|
tree.xpath(
|
||||||
|
f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]"
|
||||||
|
),
|
||||||
):
|
):
|
||||||
# if we've got more than 50 tags, someone is taking the piss
|
# if we've got more than 50 tags, someone is taking the piss
|
||||||
if len(results) >= 50:
|
if len(results) >= 50:
|
||||||
|
@ -187,14 +195,15 @@ def _get_meta_tags(
|
||||||
)
|
)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
key = tag.attrib[property]
|
key = cast(str, tag.attrib[property])
|
||||||
if property_mapper:
|
if property_mapper:
|
||||||
key = property_mapper(key)
|
new_key = property_mapper(key)
|
||||||
# None is a special value used to ignore a value.
|
# None is a special value used to ignore a value.
|
||||||
if key is None:
|
if new_key is None:
|
||||||
continue
|
continue
|
||||||
|
key = new_key
|
||||||
|
|
||||||
results[key] = tag.attrib["content"]
|
results[key] = cast(str, tag.attrib["content"])
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@ -219,7 +228,7 @@ def _map_twitter_to_open_graph(key: str) -> Optional[str]:
|
||||||
return "og" + key[7:]
|
return "og" + key[7:]
|
||||||
|
|
||||||
|
|
||||||
def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
|
def parse_html_to_open_graph(tree: "etree._Element") -> Dict[str, Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Parse the HTML document into an Open Graph response.
|
Parse the HTML document into an Open Graph response.
|
||||||
|
|
||||||
|
@ -276,24 +285,36 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
|
||||||
|
|
||||||
if "og:title" not in og:
|
if "og:title" not in og:
|
||||||
# Attempt to find a title from the title tag, or the biggest header on the page.
|
# Attempt to find a title from the title tag, or the biggest header on the page.
|
||||||
title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()")
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
|
title = cast(
|
||||||
|
List["etree._ElementUnicodeResult"],
|
||||||
|
tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()"),
|
||||||
|
)
|
||||||
if title:
|
if title:
|
||||||
og["og:title"] = title[0].strip()
|
og["og:title"] = title[0].strip()
|
||||||
else:
|
else:
|
||||||
og["og:title"] = None
|
og["og:title"] = None
|
||||||
|
|
||||||
if "og:image" not in og:
|
if "og:image" not in og:
|
||||||
meta_image = tree.xpath(
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
"//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]"
|
meta_image = cast(
|
||||||
|
List["etree._ElementUnicodeResult"],
|
||||||
|
tree.xpath(
|
||||||
|
"//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
# If a meta image is found, use it.
|
# If a meta image is found, use it.
|
||||||
if meta_image:
|
if meta_image:
|
||||||
og["og:image"] = meta_image[0]
|
og["og:image"] = meta_image[0]
|
||||||
else:
|
else:
|
||||||
# Try to find images which are larger than 10px by 10px.
|
# Try to find images which are larger than 10px by 10px.
|
||||||
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
#
|
#
|
||||||
# TODO: consider inlined CSS styles as well as width & height attribs
|
# TODO: consider inlined CSS styles as well as width & height attribs
|
||||||
images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]")
|
images = cast(
|
||||||
|
List["etree._Element"],
|
||||||
|
tree.xpath("//img[@src][number(@width)>10][number(@height)>10]"),
|
||||||
|
)
|
||||||
images = sorted(
|
images = sorted(
|
||||||
images,
|
images,
|
||||||
key=lambda i: (
|
key=lambda i: (
|
||||||
|
@ -302,20 +323,29 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
|
||||||
)
|
)
|
||||||
# If no images were found, try to find *any* images.
|
# If no images were found, try to find *any* images.
|
||||||
if not images:
|
if not images:
|
||||||
images = tree.xpath("//img[@src][1]")
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
|
images = cast(List["etree._Element"], tree.xpath("//img[@src][1]"))
|
||||||
if images:
|
if images:
|
||||||
og["og:image"] = images[0].attrib["src"]
|
og["og:image"] = cast(str, images[0].attrib["src"])
|
||||||
|
|
||||||
# Finally, fallback to the favicon if nothing else.
|
# Finally, fallback to the favicon if nothing else.
|
||||||
else:
|
else:
|
||||||
favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]")
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
|
favicons = cast(
|
||||||
|
List["etree._ElementUnicodeResult"],
|
||||||
|
tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]"),
|
||||||
|
)
|
||||||
if favicons:
|
if favicons:
|
||||||
og["og:image"] = favicons[0]
|
og["og:image"] = favicons[0]
|
||||||
|
|
||||||
if "og:description" not in og:
|
if "og:description" not in og:
|
||||||
# Check the first meta description tag for content.
|
# Check the first meta description tag for content.
|
||||||
meta_description = tree.xpath(
|
# Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
|
||||||
"//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]"
|
meta_description = cast(
|
||||||
|
List["etree._ElementUnicodeResult"],
|
||||||
|
tree.xpath(
|
||||||
|
"//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
# If a meta description is found with content, use it.
|
# If a meta description is found with content, use it.
|
||||||
if meta_description:
|
if meta_description:
|
||||||
|
@ -332,7 +362,7 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
|
||||||
return og
|
return og
|
||||||
|
|
||||||
|
|
||||||
def parse_html_description(tree: "etree.Element") -> Optional[str]:
|
def parse_html_description(tree: "etree._Element") -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Calculate a text description based on an HTML document.
|
Calculate a text description based on an HTML document.
|
||||||
|
|
||||||
|
@ -368,6 +398,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
|
||||||
"canvas",
|
"canvas",
|
||||||
"img",
|
"img",
|
||||||
"picture",
|
"picture",
|
||||||
|
# etree.Comment is a function which creates an etree._Comment element.
|
||||||
|
# The "tag" attribute of an etree._Comment instance is confusingly the
|
||||||
|
# etree.Comment function instead of a string.
|
||||||
etree.Comment,
|
etree.Comment,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -381,8 +414,8 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
|
||||||
|
|
||||||
|
|
||||||
def _iterate_over_text(
|
def _iterate_over_text(
|
||||||
tree: Optional["etree.Element"],
|
tree: Optional["etree._Element"],
|
||||||
tags_to_ignore: Set[Union[str, "etree.Comment"]],
|
tags_to_ignore: Set[object],
|
||||||
stack_limit: int = 1024,
|
stack_limit: int = 1024,
|
||||||
) -> Generator[str, None, None]:
|
) -> Generator[str, None, None]:
|
||||||
"""Iterate over the tree returning text nodes in a depth first fashion,
|
"""Iterate over the tree returning text nodes in a depth first fashion,
|
||||||
|
@ -402,7 +435,7 @@ def _iterate_over_text(
|
||||||
|
|
||||||
# This is a stack whose items are elements to iterate over *or* strings
|
# This is a stack whose items are elements to iterate over *or* strings
|
||||||
# to be returned.
|
# to be returned.
|
||||||
elements: List[Union[str, "etree.Element"]] = [tree]
|
elements: List[Union[str, "etree._Element"]] = [tree]
|
||||||
while elements:
|
while elements:
|
||||||
el = elements.pop()
|
el = elements.pop()
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@ from tests import unittest
|
||||||
try:
|
try:
|
||||||
import lxml
|
import lxml
|
||||||
except ImportError:
|
except ImportError:
|
||||||
lxml = None
|
lxml = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
|
||||||
class SummarizeTestCase(unittest.TestCase):
|
class SummarizeTestCase(unittest.TestCase):
|
||||||
|
@ -160,6 +160,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
||||||
|
@ -176,6 +177,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
||||||
|
@ -195,6 +197,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@ -217,6 +220,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
||||||
|
@ -231,6 +235,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
|
||||||
|
@ -246,6 +251,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": "Title", "og:description": "Title"})
|
self.assertEqual(og, {"og:title": "Title", "og:description": "Title"})
|
||||||
|
@ -261,6 +267,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
|
||||||
|
@ -281,6 +288,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": "Title", "og:description": "Finally!"})
|
self.assertEqual(og, {"og:title": "Title", "og:description": "Finally!"})
|
||||||
|
@ -296,6 +304,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
|
|
||||||
self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
|
||||||
|
@ -324,6 +333,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
<head><title>Foo</title></head><body>Some text.</body></html>
|
<head><title>Foo</title></head><body>Some text.</body></html>
|
||||||
""".strip()
|
""".strip()
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
||||||
|
|
||||||
|
@ -338,6 +348,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
tree = decode_body(html, "http://example.com/test.html", "invalid-encoding")
|
tree = decode_body(html, "http://example.com/test.html", "invalid-encoding")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
|
||||||
|
|
||||||
|
@ -353,6 +364,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})
|
||||||
|
|
||||||
|
@ -367,6 +379,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
|
self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
|
||||||
|
|
||||||
|
@ -380,6 +393,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
og,
|
og,
|
||||||
|
@ -401,6 +415,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
og,
|
og,
|
||||||
|
@ -419,6 +434,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
|
||||||
with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
|
with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
|
||||||
"""
|
"""
|
||||||
tree = decode_body(html, "http://example.com/test.html")
|
tree = decode_body(html, "http://example.com/test.html")
|
||||||
|
assert tree is not None
|
||||||
og = parse_html_to_open_graph(tree)
|
og = parse_html_to_open_graph(tree)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
og,
|
og,
|
||||||
|
|
|
@ -28,7 +28,7 @@ from tests.unittest import HomeserverTestCase
|
||||||
try:
|
try:
|
||||||
import lxml
|
import lxml
|
||||||
except ImportError:
|
except ImportError:
|
||||||
lxml = None
|
lxml = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
|
||||||
class OEmbedTests(HomeserverTestCase):
|
class OEmbedTests(HomeserverTestCase):
|
||||||
|
|
|
@ -24,7 +24,7 @@ from tests.unittest import override_config
|
||||||
try:
|
try:
|
||||||
import lxml
|
import lxml
|
||||||
except ImportError:
|
except ImportError:
|
||||||
lxml = None
|
lxml = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
|
||||||
class URLPreviewTests(unittest.HomeserverTestCase):
|
class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
|
|
|
@ -40,7 +40,7 @@ from tests.test_utils import SMALL_PNG
|
||||||
try:
|
try:
|
||||||
import lxml
|
import lxml
|
||||||
except ImportError:
|
except ImportError:
|
||||||
lxml = None
|
lxml = None # type: ignore[assignment]
|
||||||
|
|
||||||
|
|
||||||
class URLPreviewTests(unittest.HomeserverTestCase):
|
class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
|
|
Loading…
Reference in New Issue