summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/hatenablog.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/hatenablog.py')
-rw-r--r--gallery_dl/extractor/hatenablog.py21
1 files changed, 10 insertions, 11 deletions
diff --git a/gallery_dl/extractor/hatenablog.py b/gallery_dl/extractor/hatenablog.py
index 792f666..8e350d6 100644
--- a/gallery_dl/extractor/hatenablog.py
+++ b/gallery_dl/extractor/hatenablog.py
@@ -6,9 +6,8 @@
"""Extractors for https://hatenablog.com"""
-import re
from .common import Extractor, Message
-from .. import text
+from .. import text, util
BASE_PATTERN = (
@@ -28,10 +27,10 @@ class HatenablogExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.domain = match.group(1) or match.group(2)
+ self.domain = match[1] or match[2]
def _init(self):
- self._find_img = re.compile(r'<img +([^>]+)').finditer
+ self._find_img = util.re(r'<img +([^>]+)').finditer
def _handle_article(self, article: str):
extr = text.extract_from(article)
@@ -43,8 +42,8 @@ class HatenablogExtractor(Extractor):
'<div class="entry-content hatenablog-entry">', '</div>')
images = []
- for i in self._find_img(content):
- attributes = i.group(1)
+ for match in self._find_img(content):
+ attributes = match[1]
if 'class="hatena-fotolife"' not in attributes:
continue
image = text.unescape(text.extr(attributes, 'src="', '"'))
@@ -68,13 +67,13 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
- self.path = match.group(3)
+ self.path = match[3]
self.query = {key: value for key, value in text.parse_query(
- match.group(4)).items() if self._acceptable_query(key)}
+ match[4]).items() if self._acceptable_query(key)}
def _init(self):
HatenablogExtractor._init(self)
- self._find_pager_url = re.compile(
+ self._find_pager_url = util.re(
r' class="pager-next">\s*<a href="([^"]+)').search
def items(self):
@@ -92,7 +91,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
yield from self._handle_full_articles(extr)
match = self._find_pager_url(page)
- url = text.unescape(match.group(1)) if match else None
+ url = text.unescape(match[1]) if match else None
query = None
def _handle_partial_articles(self, extr):
@@ -129,7 +128,7 @@ class HatenablogEntryExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
- self.path = match.group(3)
+ self.path = match[3]
def items(self):
url = "https://" + self.domain + "/entry/" + self.path