diff options
Diffstat (limited to 'gallery_dl/extractor/hatenablog.py')
| -rw-r--r-- | gallery_dl/extractor/hatenablog.py | 21 |
1 files changed, 10 insertions, 11 deletions
diff --git a/gallery_dl/extractor/hatenablog.py b/gallery_dl/extractor/hatenablog.py index 792f666..8e350d6 100644 --- a/gallery_dl/extractor/hatenablog.py +++ b/gallery_dl/extractor/hatenablog.py @@ -6,9 +6,8 @@ """Extractors for https://hatenablog.com""" -import re from .common import Extractor, Message -from .. import text +from .. import text, util BASE_PATTERN = ( @@ -28,10 +27,10 @@ class HatenablogExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.domain = match.group(1) or match.group(2) + self.domain = match[1] or match[2] def _init(self): - self._find_img = re.compile(r'<img +([^>]+)').finditer + self._find_img = util.re(r'<img +([^>]+)').finditer def _handle_article(self, article: str): extr = text.extract_from(article) @@ -43,8 +42,8 @@ class HatenablogExtractor(Extractor): '<div class="entry-content hatenablog-entry">', '</div>') images = [] - for i in self._find_img(content): - attributes = i.group(1) + for match in self._find_img(content): + attributes = match[1] if 'class="hatena-fotolife"' not in attributes: continue image = text.unescape(text.extr(attributes, 'src="', '"')) @@ -68,13 +67,13 @@ class HatenablogEntriesExtractor(HatenablogExtractor): def __init__(self, match): HatenablogExtractor.__init__(self, match) - self.path = match.group(3) + self.path = match[3] self.query = {key: value for key, value in text.parse_query( - match.group(4)).items() if self._acceptable_query(key)} + match[4]).items() if self._acceptable_query(key)} def _init(self): HatenablogExtractor._init(self) - self._find_pager_url = re.compile( + self._find_pager_url = util.re( r' class="pager-next">\s*<a href="([^"]+)').search def items(self): @@ -92,7 +91,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor): yield from self._handle_full_articles(extr) match = self._find_pager_url(page) - url = text.unescape(match.group(1)) if match else None + url = text.unescape(match[1]) if match else None query = None def _handle_partial_articles(self, extr): @@ -129,7 +128,7 @@ class HatenablogEntryExtractor(HatenablogExtractor): def __init__(self, match): HatenablogExtractor.__init__(self, match) - self.path = match.group(3) + self.path = match[3] def items(self): url = "https://" + self.domain + "/entry/" + self.path |
