Don't infer paragrahs from newlines

This commit is contained in:
Erik Johnston 2016-08-02 18:50:03 +01:00
parent 6b58ade2f0
commit 58c9653c6b
1 changed files with 1 additions and 2 deletions

View File

@ -344,9 +344,8 @@ class PreviewUrlResource(Resource):
# Split all the text nodes into paragraphs (by splitting on new
# lines)
text_nodes = (
line.strip()
re.sub(r'\s+', '\n', el.text).strip()
for el in cloned_tree.iter() if el.text
for line in el.text.splitlines()
)
# Try to get a summary of between 200 and 500 words, respecting