Fix crash in url preview when html tag has no text
Signed-off-by: Marcin Bachry <hegel666@gmail.com>
This commit is contained in:
parent
c3208e45c9
commit
24c16fc349
|
@ -381,7 +381,10 @@ def _calc_og(tree, media_uri):
|
||||||
if 'og:title' not in og:
|
if 'og:title' not in og:
|
||||||
# do some basic spidering of the HTML
|
# do some basic spidering of the HTML
|
||||||
title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]")
|
title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]")
|
||||||
og['og:title'] = title[0].text.strip() if title else None
|
if title and title[0].text is not None:
|
||||||
|
og['og:title'] = title[0].text.strip()
|
||||||
|
else:
|
||||||
|
og['og:title'] = None
|
||||||
|
|
||||||
if 'og:image' not in og:
|
if 'og:image' not in og:
|
||||||
# TODO: extract a favicon failing all else
|
# TODO: extract a favicon failing all else
|
||||||
|
|
|
@ -215,3 +215,53 @@ class PreviewUrlTestCase(unittest.TestCase):
|
||||||
u"og:title": u"Foo",
|
u"og:title": u"Foo",
|
||||||
u"og:description": u"Some text."
|
u"og:description": u"Some text."
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def test_missing_title(self):
|
||||||
|
html = u"""
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
Some text.
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
og = decode_and_calc_og(html, "http://example.com/test.html")
|
||||||
|
|
||||||
|
self.assertEquals(og, {
|
||||||
|
u"og:title": None,
|
||||||
|
u"og:description": u"Some text."
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_h1_as_title(self):
|
||||||
|
html = u"""
|
||||||
|
<html>
|
||||||
|
<meta property="og:description" content="Some text."/>
|
||||||
|
<body>
|
||||||
|
<h1>Title</h1>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
og = decode_and_calc_og(html, "http://example.com/test.html")
|
||||||
|
|
||||||
|
self.assertEquals(og, {
|
||||||
|
u"og:title": u"Title",
|
||||||
|
u"og:description": u"Some text."
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_missing_title_and_broken_h1(self):
|
||||||
|
html = u"""
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1><a href="foo"/></h1>
|
||||||
|
Some text.
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
og = decode_and_calc_og(html, "http://example.com/test.html")
|
||||||
|
|
||||||
|
self.assertEquals(og, {
|
||||||
|
u"og:title": None,
|
||||||
|
u"og:description": u"Some text."
|
||||||
|
})
|
||||||
|
|
Loading…
Reference in New Issue