aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/text.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2020-03-16 23:20:22 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2020-03-16 23:20:22 -0400
commitf1baa4aa12d705e290f74c9fb4c6cd5eb2976fa2 (patch)
tree70267e5f04db1da396e75fd4148d9c542683bbab /gallery_dl/text.py
parent2bd320e568d015940227b7355396701331e2cd1e (diff)
parente8cc000750de972384f2f34d02d42222b4018ae9 (diff)
Update upstream source from tag 'upstream/1.13.2'
Update to upstream version '1.13.2' with Debian dir a36309ac1ae7b23d042eaafd21c4267c2f840ab4
Diffstat (limited to 'gallery_dl/text.py')
-rw-r--r--gallery_dl/text.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 72dad5b..a3f4e0a 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -15,6 +15,8 @@ import datetime
import urllib.parse
+HTML_RE = re.compile("<[^>]+>")
+
INVALID_XML_CHARS = (
"\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07",
"\x08", "\x0b", "\x0c", "\x0e", "\x0f", "\x10", "\x11", "\x12",
@@ -39,7 +41,7 @@ def clean_xml(xmldata, repl=""):
def remove_html(txt, repl=" ", sep=" "):
"""Remove html-tags from a string"""
try:
- txt = re.sub("<[^>]+>", repl, txt)
+ txt = HTML_RE.sub(repl, txt)
except TypeError:
return ""
if sep:
@@ -51,7 +53,7 @@ def split_html(txt, sep=None):
"""Split input string by html-tags"""
try:
return [
- x.strip() for x in re.split("<[^>]+>", txt)
+ x.strip() for x in HTML_RE.split(txt)
if x and not x.isspace()
]
except TypeError: