From 8de58070ee3e55f29966a787fd618632dbf4309b Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 8 Jan 2022 20:39:28 -0500 Subject: New upstream version 1.20.1. --- gallery_dl/extractor/common.py | 3 +- gallery_dl/extractor/gelbooru.py | 43 +++++++++++++++++++++++- gallery_dl/extractor/gelbooru_v02.py | 18 +++++++--- gallery_dl/extractor/hitomi.py | 35 ++++++++++++++------ gallery_dl/extractor/mangadex.py | 17 +++++----- gallery_dl/extractor/newgrounds.py | 64 +++++++++++++++++++++++++++++++++++- gallery_dl/extractor/patreon.py | 1 + gallery_dl/extractor/wordpress.py | 41 ----------------------- 8 files changed, 156 insertions(+), 66 deletions(-) delete mode 100644 gallery_dl/extractor/wordpress.py (limited to 'gallery_dl/extractor') diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index c440aee..afe4a16 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -336,7 +336,8 @@ class Extractor(): now = time.time() for cookie in self._cookiejar: - if cookie.name in names and cookie.domain == domain: + if cookie.name in names and ( + not domain or cookie.domain == domain): if cookie.expires and cookie.expires < now: self.log.warning("Cookie '%s' has expired", cookie.name) else: diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index df45d0d..a6bda52 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from . import gelbooru_v02 -from .. import text, exception +from .. import text, util, exception import binascii @@ -20,6 +20,42 @@ class GelbooruBase(): basecategory = "booru" root = "https://gelbooru.com" + def _api_request(self, params): + url = self.root + "/index.php?page=dapi&s=post&q=index&json=1" + data = self.request(url, params=params).json() + if "post" not in data: + return () + posts = data["post"] + if not isinstance(posts, list): + return (posts,) + return posts + + def _pagination(self, params): + params["pid"] = self.page_start + params["limit"] = self.per_page + + post = None + while True: + try: + posts = self._api_request(params) + except ValueError: + if "tags" not in params or post is None: + raise + taglist = [tag for tag in params["tags"].split() + if not tag.startswith("id:<")] + taglist.append("id:<" + str(post.attrib["id"])) + params["tags"] = " ".join(taglist) + params["pid"] = 0 + continue + + post = None + for post in posts: + yield post + + if len(posts) < self.per_page: + return + params["pid"] += 1 + @staticmethod def _file_url(post): url = post["file_url"] @@ -82,6 +118,11 @@ class GelbooruPoolExtractor(GelbooruBase, "pool_name": text.unescape(name), } + def posts(self): + params = {} + for params["id"] in util.advance(self.post_ids, self.page_start): + yield from self._api_request(params) + class GelbooruPostExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PostExtractor): diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index a42a202..8da0bde 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -19,8 +19,15 @@ import re class GelbooruV02Extractor(booru.BooruExtractor): basecategory = "gelbooru_v02" + def __init__(self, match): + booru.BooruExtractor.__init__(self, match) + try: + self.api_root = INSTANCES[self.category]["api_root"] + except KeyError: + self.api_root = self.root + def _api_request(self, params): - url = self.root + "/index.php?page=dapi&s=post&q=index" + url = self.api_root + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) def _pagination(self, params): @@ -97,12 +104,15 @@ class GelbooruV02Extractor(booru.BooruExtractor): post["notes"] = notes -BASE_PATTERN = GelbooruV02Extractor.update({ +INSTANCES = { "realbooru": {"root": "https://realbooru.com"}, - "rule34" : {"root": "https://rule34.xxx"}, + "rule34" : {"root": "https://rule34.xxx", + "api_root": " https://api.rule34.xxx"}, "safebooru": {"root": "https://safebooru.org"}, "tbib" : {"root": "https://tbib.org"}, -}) +} + +BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES) class GelbooruV02TagExtractor(GelbooruV02Extractor): diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 88cf98c..ce6c7ce 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -26,7 +26,7 @@ class HitomiGalleryExtractor(GalleryExtractor): r"/(?:[^/?#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { - "pattern": r"https://[a-c]b.hitomi.la/images/1639745412/\d+" + "pattern": r"https://[a-c]b.hitomi.la/images/1641140516/\d+" r"/[0-9a-f]{64}\.jpg", "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae", "options": (("metadata", True),), @@ -39,12 +39,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "479d16fe92117a6a2ce81b4e702e6347922c81e3", + "url": "d4854175da2b5fa4ae62749266c7be0bf237dc99", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "ebc1415c5d7f634166ef7e2635b77735de1ea7a2", + "url": "eea99c3745719a7a392150335e6ae3f73faa0b85", "count": 1413, }), # gallery with "broken" redirect @@ -138,7 +138,7 @@ class HitomiGalleryExtractor(GalleryExtractor): def images(self, _): # see https://ltn.hitomi.la/gg.js - gg_m, gg_b = _parse_gg(self) + gg_m, gg_b, gg_default = _parse_gg(self) result = [] for image in self.info["files"]: @@ -148,7 +148,7 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js inum = int(ihash[-1] + ihash[-3:-1], 16) url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( - chr(97 + gg_m.get(inum, 0)), + chr(97 + gg_m.get(inum, gg_default)), gg_b, inum, ihash, idata["extension"], ) result.append((url, idata)) @@ -195,10 +195,25 @@ class HitomiTagExtractor(Extractor): def _parse_gg(extr): page = extr.request("https://ltn.hitomi.la/gg.js").text - m = { - int(match.group(1)): int(match.group(2)) - for match in re.finditer(r"case (\d+): o = (\d+); break;", page) - } + m = {} + + keys = [] + for match in re.finditer( + r"case\s+(\d+):(?:\s*o\s*=\s*(\d+))?", page): + key, value = match.groups() + keys.append(int(key)) + + if value: + value = int(value) + for key in keys: + m[key] = value + keys.clear() + + for match in re.finditer( + r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)", page): + m[int(match.group(1))] = int(match.group(2)) + + d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page) b = re.search(r"b:\s*[\"'](.+)[\"']", page) - return m, b.group(1).strip("/") + return m, b.group(1).strip("/"), int(d.group(1)) if d else 1 diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 393f4e2..ea5d4a8 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -40,7 +40,7 @@ class MangadexExtractor(Extractor): uuid = chapter["id"] data = self._transform(chapter) data["_extractor"] = MangadexChapterExtractor - self._cache[uuid] = (chapter, data) + self._cache[uuid] = data yield Message.Queue, self.root + "/chapter/" + uuid, data def _transform(self, chapter): @@ -72,7 +72,7 @@ class MangadexExtractor(Extractor): "date" : text.parse_datetime(cattributes["publishAt"]), "lang" : lang, "language": util.code_to_language(lang), - "count" : len(cattributes["data"]), + "count" : cattributes["pages"], } data["artist"] = [artist["attributes"]["name"] @@ -107,20 +107,21 @@ class MangadexChapterExtractor(MangadexExtractor): def items(self): try: - chapter, data = self._cache.pop(self.uuid) + data = self._cache.pop(self.uuid) except KeyError: chapter = self.api.chapter(self.uuid) data = self._transform(chapter) - yield Message.Directory, data - cattributes = chapter["attributes"] + yield Message.Directory, data data["_http_headers"] = self._headers - base = "{}/data/{}/".format( - self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"]) + + server = self.api.athome_server(self.uuid) + chapter = server["chapter"] + base = "{}/data/{}/".format(server["baseUrl"], chapter["hash"]) enum = util.enumerate_reversed if self.config( "page-reverse") else enumerate - for data["page"], page in enum(cattributes["data"], 1): + for data["page"], page in enum(chapter["data"], 1): text.nameext_from_url(page, data) yield Message.Url, base + page, data diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 4351b3e..8bcbc20 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -38,6 +38,7 @@ class NewgroundsExtractor(Extractor): def items(self): self.login() + metadata = self.metadata() for post_url in self.posts(): try: @@ -48,6 +49,8 @@ class NewgroundsExtractor(Extractor): url = None if url: + if metadata: + post.update(metadata) yield Message.Directory, post yield Message.Url, url, text.nameext_from_url(url, post) @@ -62,9 +65,12 @@ class NewgroundsExtractor(Extractor): "Unable to get download URL for '%s'", post_url) def posts(self): - """Return urls of all relevant image pages""" + """Return URLs of all relevant post pages""" return self._pagination(self._path) + def metadata(self): + """Return general metadata""" + def login(self): username, password = self._get_auth_info() if username: @@ -493,3 +499,59 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '">