diff options
| author | 2021-10-05 23:30:05 -0400 | |
|---|---|---|
| committer | 2021-10-05 23:30:05 -0400 | |
| commit | 34ba2951b8c523713425c98addb9256ea05c946f (patch) | |
| tree | 6ec7e96d0c6e6f6e94b6b97ecd8c0a414ceef93d /gallery_dl/extractor/aryion.py | |
| parent | 3f5483df9075ae526f4c54f4cbe80edeabf6d4cc (diff) | |
New upstream version 1.19.0.upstream/1.19.0
Diffstat (limited to 'gallery_dl/extractor/aryion.py')
| -rw-r--r-- | gallery_dl/extractor/aryion.py | 57 |
1 files changed, 50 insertions, 7 deletions
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 0d0ad70..06ec571 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -29,7 +29,6 @@ class AryionExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) self.recursive = True - self._needle = "class='gallery-item' id='" def login(self): if self._check_cookies(self.cookienames): @@ -56,25 +55,50 @@ class AryionExtractor(Extractor): def items(self): self.login() + data = self.metadata() for post_id in self.posts(): post = self._parse_post(post_id) if post: + if data: + post.update(data) yield Message.Directory, post yield Message.Url, post["url"], post elif post is False and self.recursive: base = self.root + "/g4/view/" data = {"_extractor": AryionPostExtractor} - for post_id in self._pagination(base + post_id): + for post_id in self._pagination_params(base + post_id): yield Message.Queue, base + post_id, data def posts(self): """Yield relevant post IDs""" - def _pagination(self, url): + def metadata(self): + """Return general metadata""" + + def _pagination_params(self, url, params=None): + if params is None: + params = {"p": 1} + else: + params["p"] = text.parse_int(params.get("p"), 1) + + while True: + page = self.request(url, params=params).text + + cnt = 0 + for post_id in text.extract_iter( + page, "class='gallery-item' id='", "'"): + cnt += 1 + yield post_id + + if cnt < 40: + return + params["p"] += 1 + + def _pagination_next(self, url): while True: page = self.request(url).text - yield from text.extract_iter(page, self._needle, "'") + yield from text.extract_iter(page, "thumb' href='/g4/view/", "'") pos = page.find("Next >>") if pos < 0: @@ -180,11 +204,30 @@ class AryionGalleryExtractor(AryionExtractor): def posts(self): if self.recursive: url = "{}/g4/gallery/{}".format(self.root, self.user) - return self._pagination(url) + return self._pagination_params(url) else: - self._needle = "thumb' href='/g4/view/" url = "{}/g4/latest.php?name={}".format(self.root, self.user) - return util.advance(self._pagination(url), self.offset) + return util.advance(self._pagination_next(url), self.offset) + + +class AryionTagExtractor(AryionExtractor): + """Extractor for tag searches on eka's portal""" + subcategory = "tag" + directory_fmt = ("{category}", "tags", "{search_tags}") + archive_fmt = "t_{search_tags}_{id}" + pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)" + test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", { + "count": ">= 5", + }) + + def metadata(self): + self.params = text.parse_query(self.user) + self.user = None + return {"search_tags": self.params.get("tag")} + + def posts(self): + url = self.root + "/g4/tags.php" + return self._pagination_params(url, self.params) class AryionPostExtractor(AryionExtractor): |
