summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/hatenablog.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
commita6e995c093de8aae2e91a0787281bb34c0b871eb (patch)
tree2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/extractor/hatenablog.py
parent7672a750cb74bf31e21d76aad2776367fd476155 (diff)
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/extractor/hatenablog.py')
-rw-r--r--gallery_dl/extractor/hatenablog.py21
1 files changed, 10 insertions, 11 deletions
diff --git a/gallery_dl/extractor/hatenablog.py b/gallery_dl/extractor/hatenablog.py
index 792f666..8e350d6 100644
--- a/gallery_dl/extractor/hatenablog.py
+++ b/gallery_dl/extractor/hatenablog.py
@@ -6,9 +6,8 @@
"""Extractors for https://hatenablog.com"""
-import re
from .common import Extractor, Message
-from .. import text
+from .. import text, util
BASE_PATTERN = (
@@ -28,10 +27,10 @@ class HatenablogExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.domain = match.group(1) or match.group(2)
+ self.domain = match[1] or match[2]
def _init(self):
- self._find_img = re.compile(r'<img +([^>]+)').finditer
+ self._find_img = util.re(r'<img +([^>]+)').finditer
def _handle_article(self, article: str):
extr = text.extract_from(article)
@@ -43,8 +42,8 @@ class HatenablogExtractor(Extractor):
'<div class="entry-content hatenablog-entry">', '</div>')
images = []
- for i in self._find_img(content):
- attributes = i.group(1)
+ for match in self._find_img(content):
+ attributes = match[1]
if 'class="hatena-fotolife"' not in attributes:
continue
image = text.unescape(text.extr(attributes, 'src="', '"'))
@@ -68,13 +67,13 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
- self.path = match.group(3)
+ self.path = match[3]
self.query = {key: value for key, value in text.parse_query(
- match.group(4)).items() if self._acceptable_query(key)}
+ match[4]).items() if self._acceptable_query(key)}
def _init(self):
HatenablogExtractor._init(self)
- self._find_pager_url = re.compile(
+ self._find_pager_url = util.re(
r' class="pager-next">\s*<a href="([^"]+)').search
def items(self):
@@ -92,7 +91,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
yield from self._handle_full_articles(extr)
match = self._find_pager_url(page)
- url = text.unescape(match.group(1)) if match else None
+ url = text.unescape(match[1]) if match else None
query = None
def _handle_partial_articles(self, extr):
@@ -129,7 +128,7 @@ class HatenablogEntryExtractor(HatenablogExtractor):
def __init__(self, match):
HatenablogExtractor.__init__(self, match)
- self.path = match.group(3)
+ self.path = match[3]
def items(self):
url = "https://" + self.domain + "/entry/" + self.path