From 8de58070ee3e55f29966a787fd618632dbf4309b Mon Sep 17 00:00:00 2001
From: Unit 193 <unit193@unit193.net>
Date: Sat, 8 Jan 2022 20:39:28 -0500
Subject: New upstream version 1.20.1.

---
 gallery_dl/extractor/common.py       |  3 +-
 gallery_dl/extractor/gelbooru.py     | 43 +++++++++++++++++++++++-
 gallery_dl/extractor/gelbooru_v02.py | 18 +++++++---
 gallery_dl/extractor/hitomi.py       | 35 ++++++++++++++------
 gallery_dl/extractor/mangadex.py     | 17 +++++-----
 gallery_dl/extractor/newgrounds.py   | 64 +++++++++++++++++++++++++++++++++++-
 gallery_dl/extractor/patreon.py      |  1 +
 gallery_dl/extractor/wordpress.py    | 41 -----------------------
 8 files changed, 156 insertions(+), 66 deletions(-)
 delete mode 100644 gallery_dl/extractor/wordpress.py

(limited to 'gallery_dl/extractor')

diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index c440aee..afe4a16 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -336,7 +336,8 @@ class Extractor():
         now = time.time()
 
         for cookie in self._cookiejar:
-            if cookie.name in names and cookie.domain == domain:
+            if cookie.name in names and (
+                    not domain or cookie.domain == domain):
                 if cookie.expires and cookie.expires < now:
                     self.log.warning("Cookie '%s' has expired", cookie.name)
                 else:
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index df45d0d..a6bda52 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -10,7 +10,7 @@
 
 from .common import Extractor, Message
 from . import gelbooru_v02
-from .. import text, exception
+from .. import text, util, exception
 import binascii
 
 
@@ -20,6 +20,42 @@ class GelbooruBase():
     basecategory = "booru"
     root = "https://gelbooru.com"
 
+    def _api_request(self, params):
+        url = self.root + "/index.php?page=dapi&s=post&q=index&json=1"
+        data = self.request(url, params=params).json()
+        if "post" not in data:
+            return ()
+        posts = data["post"]
+        if not isinstance(posts, list):
+            return (posts,)
+        return posts
+
+    def _pagination(self, params):
+        params["pid"] = self.page_start
+        params["limit"] = self.per_page
+
+        post = None
+        while True:
+            try:
+                posts = self._api_request(params)
+            except ValueError:
+                if "tags" not in params or post is None:
+                    raise
+                taglist = [tag for tag in params["tags"].split()
+                           if not tag.startswith("id:<")]
+                taglist.append("id:<" + str(post.attrib["id"]))
+                params["tags"] = " ".join(taglist)
+                params["pid"] = 0
+                continue
+
+            post = None
+            for post in posts:
+                yield post
+
+            if len(posts) < self.per_page:
+                return
+            params["pid"] += 1
+
     @staticmethod
     def _file_url(post):
         url = post["file_url"]
@@ -82,6 +118,11 @@ class GelbooruPoolExtractor(GelbooruBase,
             "pool_name": text.unescape(name),
         }
 
+    def posts(self):
+        params = {}
+        for params["id"] in util.advance(self.post_ids, self.page_start):
+            yield from self._api_request(params)
+
 
 class GelbooruPostExtractor(GelbooruBase,
                             gelbooru_v02.GelbooruV02PostExtractor):
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index a42a202..8da0bde 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -19,8 +19,15 @@ import re
 class GelbooruV02Extractor(booru.BooruExtractor):
     basecategory = "gelbooru_v02"
 
+    def __init__(self, match):
+        booru.BooruExtractor.__init__(self, match)
+        try:
+            self.api_root = INSTANCES[self.category]["api_root"]
+        except KeyError:
+            self.api_root = self.root
+
     def _api_request(self, params):
-        url = self.root + "/index.php?page=dapi&s=post&q=index"
+        url = self.api_root + "/index.php?page=dapi&s=post&q=index"
         return ElementTree.fromstring(self.request(url, params=params).text)
 
     def _pagination(self, params):
@@ -97,12 +104,15 @@ class GelbooruV02Extractor(booru.BooruExtractor):
         post["notes"] = notes
 
 
-BASE_PATTERN = GelbooruV02Extractor.update({
+INSTANCES = {
     "realbooru": {"root": "https://realbooru.com"},
-    "rule34"   : {"root": "https://rule34.xxx"},
+    "rule34"   : {"root": "https://rule34.xxx",
+                  "api_root": " https://api.rule34.xxx"},
     "safebooru": {"root": "https://safebooru.org"},
     "tbib"     : {"root": "https://tbib.org"},
-})
+}
+
+BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
 
 
 class GelbooruV02TagExtractor(GelbooruV02Extractor):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 88cf98c..ce6c7ce 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -26,7 +26,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
                r"/(?:[^/?#]+-)?(\d+)")
     test = (
         ("https://hitomi.la/galleries/867789.html", {
-            "pattern": r"https://[a-c]b.hitomi.la/images/1639745412/\d+"
+            "pattern": r"https://[a-c]b.hitomi.la/images/1641140516/\d+"
                        r"/[0-9a-f]{64}\.jpg",
             "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae",
             "options": (("metadata", True),),
@@ -39,12 +39,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
         }),
         # Game CG with scenes (#321)
         ("https://hitomi.la/galleries/733697.html", {
-            "url": "479d16fe92117a6a2ce81b4e702e6347922c81e3",
+            "url": "d4854175da2b5fa4ae62749266c7be0bf237dc99",
             "count": 210,
         }),
         # fallback for galleries only available through /reader/ URLs
         ("https://hitomi.la/galleries/1045954.html", {
-            "url": "ebc1415c5d7f634166ef7e2635b77735de1ea7a2",
+            "url": "eea99c3745719a7a392150335e6ae3f73faa0b85",
             "count": 1413,
         }),
         # gallery with "broken" redirect
@@ -138,7 +138,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
 
     def images(self, _):
         # see https://ltn.hitomi.la/gg.js
-        gg_m, gg_b = _parse_gg(self)
+        gg_m, gg_b, gg_default = _parse_gg(self)
 
         result = []
         for image in self.info["files"]:
@@ -148,7 +148,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
             # see https://ltn.hitomi.la/common.js
             inum = int(ihash[-1] + ihash[-3:-1], 16)
             url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
-                chr(97 + gg_m.get(inum, 0)),
+                chr(97 + gg_m.get(inum, gg_default)),
                 gg_b, inum, ihash, idata["extension"],
             )
             result.append((url, idata))
@@ -195,10 +195,25 @@ class HitomiTagExtractor(Extractor):
 def _parse_gg(extr):
     page = extr.request("https://ltn.hitomi.la/gg.js").text
 
-    m = {
-        int(match.group(1)): int(match.group(2))
-        for match in re.finditer(r"case (\d+): o = (\d+); break;", page)
-    }
+    m = {}
+
+    keys = []
+    for match in re.finditer(
+            r"case\s+(\d+):(?:\s*o\s*=\s*(\d+))?", page):
+        key, value = match.groups()
+        keys.append(int(key))
+
+        if value:
+            value = int(value)
+            for key in keys:
+                m[key] = value
+            keys.clear()
+
+    for match in re.finditer(
+            r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)", page):
+        m[int(match.group(1))] = int(match.group(2))
+
+    d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page)
     b = re.search(r"b:\s*[\"'](.+)[\"']", page)
 
-    return m, b.group(1).strip("/")
+    return m, b.group(1).strip("/"), int(d.group(1)) if d else 1
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 393f4e2..ea5d4a8 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -40,7 +40,7 @@ class MangadexExtractor(Extractor):
             uuid = chapter["id"]
             data = self._transform(chapter)
             data["_extractor"] = MangadexChapterExtractor
-            self._cache[uuid] = (chapter, data)
+            self._cache[uuid] = data
             yield Message.Queue, self.root + "/chapter/" + uuid, data
 
     def _transform(self, chapter):
@@ -72,7 +72,7 @@ class MangadexExtractor(Extractor):
             "date"    : text.parse_datetime(cattributes["publishAt"]),
             "lang"    : lang,
             "language": util.code_to_language(lang),
-            "count"   : len(cattributes["data"]),
+            "count"   : cattributes["pages"],
         }
 
         data["artist"] = [artist["attributes"]["name"]
@@ -107,20 +107,21 @@ class MangadexChapterExtractor(MangadexExtractor):
 
     def items(self):
         try:
-            chapter, data = self._cache.pop(self.uuid)
+            data = self._cache.pop(self.uuid)
         except KeyError:
             chapter = self.api.chapter(self.uuid)
             data = self._transform(chapter)
-        yield Message.Directory, data
 
-        cattributes = chapter["attributes"]
+        yield Message.Directory, data
         data["_http_headers"] = self._headers
-        base = "{}/data/{}/".format(
-            self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
+
+        server = self.api.athome_server(self.uuid)
+        chapter = server["chapter"]
+        base = "{}/data/{}/".format(server["baseUrl"], chapter["hash"])
 
         enum = util.enumerate_reversed if self.config(
             "page-reverse") else enumerate
-        for data["page"], page in enum(cattributes["data"], 1):
+        for data["page"], page in enum(chapter["data"], 1):
             text.nameext_from_url(page, data)
             yield Message.Url, base + page, data
 
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 4351b3e..8bcbc20 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -38,6 +38,7 @@ class NewgroundsExtractor(Extractor):
 
     def items(self):
         self.login()
+        metadata = self.metadata()
 
         for post_url in self.posts():
             try:
@@ -48,6 +49,8 @@ class NewgroundsExtractor(Extractor):
                 url = None
 
             if url:
+                if metadata:
+                    post.update(metadata)
                 yield Message.Directory, post
                 yield Message.Url, url, text.nameext_from_url(url, post)
 
@@ -62,9 +65,12 @@ class NewgroundsExtractor(Extractor):
                     "Unable to get download URL for '%s'", post_url)
 
     def posts(self):
-        """Return urls of all relevant image pages"""
+        """Return URLs of all relevant post pages"""
         return self._pagination(self._path)
 
+    def metadata(self):
+        """Return general metadata"""
+
     def login(self):
         username, password = self._get_auth_info()
         if username:
@@ -493,3 +499,59 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
             text.ensure_http_scheme(user.rpartition('"')[2])
             for user in text.extract_iter(page, 'class="item-user', '"><img')
         ]
+
+
+class NewgroundsSearchExtractor(NewgroundsExtractor):
+    """Extractor for newgrounds.com search reesults"""
+    subcategory = "search"
+    directory_fmt = ("{category}", "search", "{search_tags}")
+    pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
+               r"/search/conduct/([^/?#]+)/?\?([^#]+)")
+    test = (
+        ("https://www.newgrounds.com/search/conduct/art?terms=tree", {
+            "pattern": NewgroundsImageExtractor.pattern,
+            "keyword": {"search_tags": "tree"},
+            "range": "1-10",
+            "count": 10,
+        }),
+        ("https://www.newgrounds.com/search/conduct/movies?terms=tree", {
+            "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+",
+            "range": "1-10",
+            "count": 10,
+        }),
+        ("https://www.newgrounds.com/search/conduct/audio?advanced=1"
+         "&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm"),
+    )
+
+    def __init__(self, match):
+        NewgroundsExtractor.__init__(self, match)
+        self._path, query = match.groups()
+        self.query = text.parse_query(query)
+
+    def posts(self):
+        return self._pagination("/search/conduct/" + self._path, self.query)
+
+    def metadata(self):
+        return {"search_tags": self.query.get("terms", "")}
+
+    def _pagination(self, path, params):
+        url = self.root + path
+        headers = {
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "X-Requested-With": "XMLHttpRequest",
+            "Referer": self.root,
+        }
+        params["inner"] = "1"
+        params["page"] = 1
+
+        while True:
+            data = self.request(url, params=params, headers=headers).json()
+
+            post_url = None
+            for post_url in text.extract_iter(data["content"], 'href="', '"'):
+                if not post_url.startswith("/search/"):
+                    yield post_url
+
+            if post_url is None:
+                return
+            params["page"] += 1
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index f8c80ef..a7e0ff1 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -20,6 +20,7 @@ class PatreonExtractor(Extractor):
     """Base class for patreon extractors"""
     category = "patreon"
     root = "https://www.patreon.com"
+    cookiedomain = ".patreon.com"
     directory_fmt = ("{category}", "{creator[full_name]}")
     filename_fmt = "{id}_{title}_{num:>02}.{extension}"
     archive_fmt = "{id}_{num}"
diff --git a/gallery_dl/extractor/wordpress.py b/gallery_dl/extractor/wordpress.py
deleted file mode 100644
index dd7d28a..0000000
--- a/gallery_dl/extractor/wordpress.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2021 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for WordPress blogs"""
-
-from .common import BaseExtractor, Message
-from .. import text
-
-
-class WordpressExtractor(BaseExtractor):
-    """Base class for wordpress extractors"""
-    basecategory = "wordpress"
-
-    def items(self):
-        for post in self.posts():
-            yield Message.Difrectory, post
-
-
-
-BASE_PATTERN = WordpressExtractor.update({})
-
-
-class WordpressBlogExtractor(WordpressExtractor):
-    """Extractor for WordPress blogs"""
-    subcategory = "blog"
-    directory_fmt = ("{category}", "{blog}")
-    pattern = BASE_PATTERN + r"/?$"
-
-    def posts(self):
-        url = self.root + "/wp-json/wp/v2/posts"
-        params = {"page": 1, "per_page": "100"}
-
-        while True:
-            data = self.request(url, params=params).json()
-            exit()
-        yield 1
-- 
cgit v1.2.3