New upstream version 1.26.6.upstream/1.26.6

author: Unit 193 <unit193@unit193.net> 2024-01-08 03:22:24 -0500
committer: Unit 193 <unit193@unit193.net> 2024-01-08 03:22:24 -0500
commit: e949aaf6f6ac93896947d5b736e48e7911926efb (patch)
tree: b73090d78cd83dee0f85b385a25dcf623ac12f2d /gallery_dl/extractor
parent: 4d7a4f1ecef2c96269f3590335d2834ebcdd50bf (diff)
23 files changed, 482 insertions, 147 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 695b8b2..9e33f2c 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -24,6 +24,7 @@ modules = [
     "architizer",
     "artstation",
     "aryion",
+    "batoto",
     "bbc",
     "behance",
     "blogger",
@@ -107,7 +108,6 @@ modules = [
     "nitter",
     "nozomi",
     "nsfwalbum",
-    "nudecollect",
     "paheal",
     "patreon",
     "philomena",
@@ -122,6 +122,7 @@ modules = [
     "pixnet",
     "plurk",
     "poipiku",
+    "poringa",
     "pornhub",
     "pornpics",
     "postmill",
@@ -177,6 +178,7 @@ modules = [
     "xhamster",
     "xvideos",
     "zerochan",
+    "zzup",
     "booru",
     "moebooru",
     "foolfuuka",
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
new file mode 100644
index 0000000..cd6302e
--- /dev/null
+++ b/gallery_dl/extractor/batoto.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bato.to/"""
+
+from .common import Extractor, ChapterExtractor, MangaExtractor
+from .. import text, exception
+import re
+
+BASE_PATTERN = (r"(?:https?://)?"
+                r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)")
+
+
+class BatotoBase():
+    """Base class for batoto extractors"""
+    category = "batoto"
+    root = "https://bato.to"
+
+    def request(self, url, **kwargs):
+        kwargs["encoding"] = "utf-8"
+        return Extractor.request(self, url, **kwargs)
+
+
+class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
+    """Extractor for bato.to manga chapters"""
+    pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)"
+    example = "https://bato.to/title/12345-MANGA/54321"
+
+    def __init__(self, match):
+        self.root = text.root_from_url(match.group(0))
+        self.chapter_id = match.group(1)
+        url = "{}/title/0/{}".format(self.root, self.chapter_id)
+        ChapterExtractor.__init__(self, match, url)
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
+        manga_id = extr("/title/", "/")
+
+        match = re.match(
+            r"(?:Volume\s+(\d+) )?"
+            r"\w+\s+(\d+)(.*)", info)
+        if match:
+            volume, chapter, minor = match.groups()
+            title = text.remove_html(extr(
+                "selected>", "</option")).partition(" : ")[2]
+        else:
+            volume = chapter = 0
+            minor = ""
+            title = info
+
+        return {
+            "manga"        : text.unescape(manga),
+            "manga_id"     : text.parse_int(manga_id),
+            "title"        : text.unescape(title),
+            "volume"       : text.parse_int(volume),
+            "chapter"      : text.parse_int(chapter),
+            "chapter_minor": minor,
+            "chapter_id"   : text.parse_int(self.chapter_id),
+            "date"         : text.parse_timestamp(extr(' time="', '"')[:-3]),
+        }
+
+    def images(self, page):
+        images_container = text.extr(page, 'pageOpts', ':[0,0]}"')
+        images_container = text.unescape(images_container)
+        return [
+            (url, None)
+            for url in text.extract_iter(images_container, r"\"", r"\"")
+        ]
+
+
+class BatotoMangaExtractor(BatotoBase, MangaExtractor):
+    """Extractor for bato.to manga"""
+    reverse = False
+    chapterclass = BatotoChapterExtractor
+    pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$"
+    example = "https://bato.to/title/12345-MANGA/"
+
+    def __init__(self, match):
+        self.root = text.root_from_url(match.group(0))
+        self.manga_id = match.group(1)
+        url = "{}/title/{}".format(self.root, self.manga_id)
+        MangaExtractor.__init__(self, match, url)
+
+    def chapters(self, page):
+        extr = text.extract_from(page)
+
+        warning = extr(' class="alert alert-warning">', "</div><")
+        if warning:
+            raise exception.StopExtraction("'%s'", text.remove_html(warning))
+
+        data = {
+            "manga_id": text.parse_int(self.manga_id),
+            "manga"   : text.unescape(extr(
+                "<title>", "<").rpartition(" - ")[0]),
+        }
+
+        extr('<div data-hk="0-0-0-0"', "")
+        results = []
+        while True:
+            href = extr('<a href="/title/', '"')
+            if not href:
+                break
+
+            chapter = href.rpartition("-ch_")[2]
+            chapter, sep, minor = chapter.partition(".")
+
+            data["chapter"] = text.parse_int(chapter)
+            data["chapter_minor"] = sep + minor
+            data["date"] = text.parse_datetime(
+                extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
+
+            url = "{}/title/{}".format(self.root, href)
+            results.append((url, data.copy()))
+        return results
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 21166bd..2bf200b 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -35,7 +35,7 @@ class CheveretoExtractor(BaseExtractor):
 
 BASE_PATTERN = CheveretoExtractor.update({
     "jpgfish": {
-        "root": "https://jpg2.su",
+        "root": "https://jpg4.su",
         "pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
     },
     "pixl": {
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 9b010c5..0dd05ef 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -526,12 +526,15 @@ class Extractor():
         if include == "all":
             include = extractors
         elif isinstance(include, str):
-            include = include.split(",")
+            include = include.replace(" ", "").split(",")
 
         result = [(Message.Version, 1)]
         for category in include:
-            if category in extractors:
+            try:
                 extr, url = extractors[category]
+            except KeyError:
+                self.log.warning("Invalid include '%s'", category)
+            else:
                 result.append((Message.Queue, url, {"_extractor": extr}))
         return iter(result)
 
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 2ba47e1..4b5f1d7 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -38,7 +38,7 @@ class DeviantartExtractor(Extractor):
 
     def __init__(self, match):
         Extractor.__init__(self, match)
-        self.user = match.group(1) or match.group(2)
+        self.user = (match.group(1) or match.group(2)).lower()
         self.offset = 0
 
     def _init(self):
@@ -104,7 +104,6 @@ class DeviantartExtractor(Extractor):
                     raise exception.StopExtraction()
                 else:
                     self.subcategory = "group-" + self.subcategory
-                    self.user = self.user.lower()
                     self.group = True
 
         for deviation in self.deviations():
@@ -513,11 +512,13 @@ class DeviantartUserExtractor(DeviantartExtractor):
     def items(self):
         base = "{}/{}/".format(self.root, self.user)
         return self._dispatch_extractors((
-            (DeviantartGalleryExtractor , base + "gallery"),
-            (DeviantartScrapsExtractor  , base + "gallery/scraps"),
-            (DeviantartJournalExtractor , base + "posts"),
-            (DeviantartStatusExtractor  , base + "posts/statuses"),
-            (DeviantartFavoriteExtractor, base + "favourites"),
+            (DeviantartAvatarExtractor    , base + "avatar"),
+            (DeviantartBackgroundExtractor, base + "banner"),
+            (DeviantartGalleryExtractor   , base + "gallery"),
+            (DeviantartScrapsExtractor    , base + "gallery/scraps"),
+            (DeviantartJournalExtractor   , base + "posts"),
+            (DeviantartStatusExtractor    , base + "posts/statuses"),
+            (DeviantartFavoriteExtractor  , base + "favourites"),
         ), ("gallery",))
 
 
@@ -538,6 +539,47 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
         return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
 
 
+class DeviantartAvatarExtractor(DeviantartExtractor):
+    """Extractor for an artist's avatar"""
+    subcategory = "avatar"
+    archive_fmt = "a_{_username}_{index}"
+    pattern = BASE_PATTERN + r"/avatar"
+    example = "https://www.deviantart.com/USER/avatar/"
+
+    def deviations(self):
+        profile = self.api.user_profile(self.user.lower())
+        if profile:
+            url = profile["user"]["usericon"]
+            return ({
+                "author"         : profile["user"],
+                "category"       : "avatar",
+                "index"          : text.parse_int(url.rpartition("?")[2]),
+                "is_deleted"     : False,
+                "is_downloadable": False,
+                "published_time" : 0,
+                "title"          : "avatar",
+                "content"        : {
+                    "src": url.replace("/avatars/", "/avatars-big/", 1),
+                },
+            },)
+        return ()
+
+
+class DeviantartBackgroundExtractor(DeviantartExtractor):
+    """Extractor for an artist's banner"""
+    subcategory = "background"
+    archive_fmt = "b_{index}"
+    pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
+    example = "https://www.deviantart.com/USER/banner/"
+
+    def deviations(self):
+        try:
+            return (self.api.user_profile(self.user.lower())
+                    ["cover_deviation"]["cover_deviation"],)
+        except Exception:
+            return ()
+
+
 class DeviantartFolderExtractor(DeviantartExtractor):
     """Extractor for deviations inside an artist's gallery folder"""
     subcategory = "folder"
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 4572bea..61a3928 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -8,6 +8,7 @@
 
 from .common import Extractor, Message
 from .. import text
+from ..cache import memcache
 import re
 
 BASE_PATTERN = (
@@ -27,8 +28,20 @@ class FanboxExtractor(Extractor):
     _warning = True
 
     def _init(self):
+        self.headers = {"Origin": self.root}
         self.embeds = self.config("embeds", True)
 
+        includes = self.config("metadata")
+        if includes:
+            if isinstance(includes, str):
+                includes = includes.split(",")
+            elif not isinstance(includes, (list, tuple)):
+                includes = ("user", "plan")
+            self._meta_user = ("user" in includes)
+            self._meta_plan = ("plan" in includes)
+        else:
+            self._meta_user = self._meta_plan = False
+
         if self._warning:
             if not self.cookies_check(("FANBOXSESSID",)):
                 self.log.warning("no 'FANBOXSESSID' cookie set")
@@ -43,11 +56,9 @@ class FanboxExtractor(Extractor):
         """Return all relevant post objects"""
 
     def _pagination(self, url):
-        headers = {"Origin": self.root}
-
         while url:
             url = text.ensure_http_scheme(url)
-            body = self.request(url, headers=headers).json()["body"]
+            body = self.request(url, headers=self.headers).json()["body"]
             for item in body["items"]:
                 try:
                     yield self._get_post_data(item["id"])
@@ -58,9 +69,8 @@ class FanboxExtractor(Extractor):
 
     def _get_post_data(self, post_id):
         """Fetch and process post data"""
-        headers = {"Origin": self.root}
         url = "https://api.fanbox.cc/post.info?postId="+post_id
-        post = self.request(url, headers=headers).json()["body"]
+        post = self.request(url, headers=self.headers).json()["body"]
 
         content_body = post.pop("body", None)
         if content_body:
@@ -98,8 +108,47 @@ class FanboxExtractor(Extractor):
         post["text"] = content_body.get("text") if content_body else None
         post["isCoverImage"] = False
 
+        if self._meta_user:
+            post["user"] = self._get_user_data(post["creatorId"])
+        if self._meta_plan:
+            plans = self._get_plan_data(post["creatorId"])
+            post["plan"] = plans[post["feeRequired"]]
+
         return content_body, post
 
+    @memcache(keyarg=1)
+    def _get_user_data(self, creator_id):
+        url = "https://api.fanbox.cc/creator.get"
+        params = {"creatorId": creator_id}
+        data = self.request(url, params=params, headers=self.headers).json()
+
+        user = data["body"]
+        user.update(user.pop("user"))
+
+        return user
+
+    @memcache(keyarg=1)
+    def _get_plan_data(self, creator_id):
+        url = "https://api.fanbox.cc/plan.listCreator"
+        params = {"creatorId": creator_id}
+        data = self.request(url, params=params, headers=self.headers).json()
+
+        plans = {0: {
+            "id"             : "",
+            "title"          : "",
+            "fee"            : 0,
+            "description"    : "",
+            "coverImageUrl"  : "",
+            "creatorId"      : creator_id,
+            "hasAdultContent": None,
+            "paymentMethod"  : None,
+        }}
+        for plan in data["body"]:
+            del plan["user"]
+            plans[plan["fee"]] = plan
+
+        return plans
+
     def _get_urls_from_post(self, content_body, post):
         num = 0
         cover_image = post.get("coverImageUrl")
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index b62ff78..eba1539 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -32,10 +32,13 @@ class GelbooruBase():
         url = self.root + "/index.php?page=dapi&q=index&json=1"
         data = self.request(url, params=params).json()
 
-        if key not in data:
-            return ()
+        try:
+            posts = data[key]
+        except KeyError:
+            self.log.error("Incomplete API response (missing '%s')", key)
+            self.log.debug("%s", data)
+            return []
 
-        posts = data[key]
         if not isinstance(posts, list):
             return (posts,)
         return posts
@@ -165,15 +168,16 @@ class GelbooruFavoriteExtractor(GelbooruBase,
             "id"   : self.favorite_id,
             "limit": "1",
         }
-        count = self._api_request(params, "@attributes")[0]["count"]
 
+        count = self._api_request(params, "@attributes")[0]["count"]
         if count <= self.offset:
             return
-        pnum, last = divmod(count + 1, self.per_page)
 
-        if self.offset >= last:
+        pnum, last = divmod(count-1, self.per_page)
+        if self.offset > last:
+            # page number change
             self.offset -= last
-            diff, self.offset = divmod(self.offset, self.per_page)
+            diff, self.offset = divmod(self.offset-1, self.per_page)
             pnum -= diff + 1
         skip = self.offset
 
@@ -183,8 +187,8 @@ class GelbooruFavoriteExtractor(GelbooruBase,
 
         while True:
             favs = self._api_request(params, "favorite")
-
             favs.reverse()
+
             if skip:
                 favs = favs[skip:]
                 skip = 0
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 0864b9f..0c8af3d 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -168,7 +168,7 @@ INSTANCES = {
     },
     "rule34": {
         "root": "https://rule34.xxx",
-        "pattern": r"rule34\.xxx",
+        "pattern": r"(?:www\.)?rule34\.xxx",
         "api_root": "https://api.rule34.xxx",
     },
     "safebooru": {
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index b9e2c3d..f70a948 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -34,8 +34,11 @@ class IdolcomplexExtractor(SankakuExtractor):
         self.start_post = 0
 
     def _init(self):
+        self.find_pids = re.compile(
+            r" href=[\"#]/\w\w/posts/([0-9a-f]+)"
+        ).findall
         self.find_tags = re.compile(
-            r'tag-type-([^"]+)">\s*<div [^>]+>\s*<a href="/\?tags=([^"]+)'
+            r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
         ).findall
 
     def items(self):
@@ -149,8 +152,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
     subcategory = "tag"
     directory_fmt = ("{category}", "{search_tags}")
     archive_fmt = "t_{search_tags}_{id}"
-    pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
-    example = "https://idol.sankakucomplex.com/?tags=TAGS"
+    pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
+    example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
     per_page = 20
 
     def __init__(self, match):
@@ -196,7 +199,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
             page = self.request(self.root, params=params, retries=10).text
             pos = ((page.find('id="more-popular-posts-link"') + 1) or
                    (page.find('<span class="thumb') + 1))
-            yield from text.extract_iter(page, ' href="/posts/', '"', pos)
+
+            yield from self.find_pids(page, pos)
 
             next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
             if not next_url:
@@ -218,7 +222,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
     subcategory = "pool"
     directory_fmt = ("{category}", "pool", "{pool}")
     archive_fmt = "p_{pool}_{id}"
-    pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pools?/show/(\d+)"
+    pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
     example = "https://idol.sankakucomplex.com/pools/show/12345"
     per_page = 24
 
@@ -242,8 +246,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
         while True:
             page = self.request(url, params=params, retries=10).text
             pos = page.find('id="pool-show"') + 1
-            post_ids = list(text.extract_iter(
-                page, ' href="/posts/', '"', pos))
+            post_ids = self.find_pids(page, pos)
 
             yield from post_ids
             if len(post_ids) < self.per_page:
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 9aa0332..9199d12 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -44,7 +44,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
         }
 
     def images(self, page):
-        if " More Files</button>" in page:
+        if ' load-all">' in page:
             url = "{}/p/{}/loadAll".format(self.root, self.gallery_id)
             headers = {
                 "X-Requested-With": "XMLHttpRequest",
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index a3e0130..7a19be5 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -6,19 +6,19 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for https://komikcast.site/"""
+"""Extractors for https://komikcast.lol/"""
 
 from .common import ChapterExtractor, MangaExtractor
 from .. import text
 import re
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:site|me|com)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:lol|site|me|com)"
 
 
 class KomikcastBase():
     """Base class for komikcast extractors"""
     category = "komikcast"
-    root = "https://komikcast.site"
+    root = "https://komikcast.lol"
 
     @staticmethod
     def parse_chapter_string(chapter_string, data=None):
@@ -46,9 +46,9 @@ class KomikcastBase():
 
 
 class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
-    """Extractor for manga-chapters from komikcast.site"""
+    """Extractor for manga-chapters from komikcast.lol"""
     pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
-    example = "https://komikcast.site/chapter/TITLE/"
+    example = "https://komikcast.lol/chapter/TITLE/"
 
     def metadata(self, page):
         info = text.extr(page, "<title>", " - Komikcast<")
@@ -65,10 +65,10 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
 
 
 class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
-    """Extractor for manga from komikcast.site"""
+    """Extractor for manga from komikcast.lol"""
     chapterclass = KomikcastChapterExtractor
     pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
-    example = "https://komikcast.site/komik/TITLE"
+    example = "https://komikcast.lol/komik/TITLE"
 
     def chapters(self, page):
         results = []
@@ -76,8 +76,10 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
 
         for item in text.extract_iter(
                 page, '<a class="chapter-link-item" href="', '</a'):
-            url, _, chapter_string = item.rpartition('">Chapter ')
-            self.parse_chapter_string(chapter_string, data)
+            url, _, chapter = item.rpartition('">Chapter')
+            chapter, sep, minor = chapter.strip().partition(".")
+            data["chapter"] = text.parse_int(chapter)
+            data["chapter_minor"] = sep + minor
             results.append((url, data.copy()))
         return results
 
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
index 0edd5c1..85b3fef 100644
--- a/gallery_dl/extractor/lynxchan.py
+++ b/gallery_dl/extractor/lynxchan.py
@@ -18,8 +18,8 @@ class LynxchanExtractor(BaseExtractor):
 
 BASE_PATTERN = LynxchanExtractor.update({
     "bbw-chan": {
-        "root": "https://bbw-chan.nl",
-        "pattern": r"bbw-chan\.nl",
+        "root": "https://bbw-chan.link",
+        "pattern": r"bbw-chan\.(?:link|nl)",
     },
     "kohlchan": {
         "root": "https://kohlchan.net",
@@ -40,7 +40,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
     filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
     archive_fmt = "{boardUri}_{postId}_{num}"
     pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
-    example = "https://bbw-chan.nl/a/res/12345.html"
+    example = "https://endchan.org/a/res/12345.html"
 
     def __init__(self, match):
         LynxchanExtractor.__init__(self, match)
@@ -71,7 +71,7 @@ class LynxchanBoardExtractor(LynxchanExtractor):
     """Extractor for LynxChan boards"""
     subcategory = "board"
     pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
-    example = "https://bbw-chan.nl/a/"
+    example = "https://endchan.org/a/"
 
     def __init__(self, match):
         LynxchanExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 46019ad..232b98d 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -10,7 +10,11 @@ from .common import ChapterExtractor, MangaExtractor
 from .. import text
 import re
 
-BASE_PATTERN = r"(?:https?://)?((?:chap|read|www\.|m\.)?mangan(?:at|el)o\.com)"
+BASE_PATTERN = (
+    r"(?:https?://)?"
+    r"((?:chap|read|www\.|m\.)?mangan(?:at|el)o"
+    r"\.(?:to|com))"
+)
 
 
 class ManganeloBase():
@@ -67,10 +71,11 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
 
     def images(self, page):
         page = text.extr(
-            page, 'class="container-chapter-reader', '\n<div')
+            page, 'class="container-chapter-reader', 'class="container')
         return [
             (url, None)
             for url in text.extract_iter(page, '<img src="', '"')
+            if not url.endswith("/gohome.png")
         ] or [
             (url, None)
             for url in text.extract_iter(
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 57c3118..b991705 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -55,9 +55,12 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
             else:
                 data["user_id"] = data["artist_id"]
                 data["user_name"] = data["artist_name"]
-            yield Message.Directory, data
 
-            for num, url in enumerate(self._extract_images(image_id, page)):
+            urls = list(self._extract_images(image_id, page))
+            data["count"] = len(urls)
+
+            yield Message.Directory, data
+            for num, url in enumerate(urls):
                 image = text.nameext_from_url(url, {
                     "num": num,
                     "url": "https:" + url,
diff --git a/gallery_dl/extractor/nudecollect.py b/gallery_dl/extractor/nudecollect.py
deleted file mode 100644
index bda5d77..0000000
--- a/gallery_dl/extractor/nudecollect.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://nudecollect.com/"""
-
-from .common import GalleryExtractor
-from .. import text
-
-
-class NudecollectExtractor(GalleryExtractor):
-    """Base class for Nudecollect extractors"""
-    category = "nudecollect"
-    directory_fmt = ("{category}", "{title}")
-    filename_fmt = "{slug}_{num:>03}.{extension}"
-    archive_fmt = "{slug}_{num}"
-    root = "https://www.nudecollect.com"
-
-    def request(self, url, **kwargs):
-        kwargs["allow_redirects"] = False
-        return GalleryExtractor.request(self, url, **kwargs)
-
-    @staticmethod
-    def get_title(page):
-        return text.unescape(text.extr(page, "<title>", "</title>"))[31:]
-
-    @staticmethod
-    def get_image(page):
-        return text.extr(page, '<img src="', '"')
-
-
-class NudecollectImageExtractor(NudecollectExtractor):
-    """Extractor for individual images from nudecollect.com"""
-    subcategory = "image"
-    pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
-               r"(/content/([^/?#]+)/image-(\d+)-pics-(\d+)"
-               r"-mirror-(\d+)\.html)")
-    example = ("https://www.nudecollect.com/content/12345_TITLE"
-               "/image-1-pics-108-mirror-1.html")
-
-    def __init__(self, match):
-        NudecollectExtractor.__init__(self, match)
-        _, self.slug, self.num, self.count, self.mirror = match.groups()
-
-    def metadata(self, page):
-        return {
-            "slug"  : self.slug,
-            "title" : self.get_title(page),
-            "count" : text.parse_int(self.count),
-            "mirror": text.parse_int(self.mirror),
-        }
-
-    def images(self, page):
-        return ((self.get_image(page), {"num": text.parse_int(self.num)}),)
-
-
-class NudecollectAlbumExtractor(NudecollectExtractor):
-    """Extractor for image albums on nudecollect.com"""
-    subcategory = "album"
-    pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
-               r"/content/([^/?#]+)/(?:index-mirror-(\d+)-(\d+)"
-               r"|page-\d+-pics-(\d+)-mirror-(\d+))\.html")
-    example = ("https://www.nudecollect.com/content/12345_TITLE"
-               "/index-mirror-01-123.html")
-
-    def __init__(self, match):
-        self.slug = match.group(1)
-        self.mirror = match.group(2) or match.group(5)
-        self.count = text.parse_int(match.group(3) or match.group(4))
-        url = "{}/content/{}/image-1-pics-{}-mirror-{}.html".format(
-            self.root, self.slug, self.count, self.mirror)
-        NudecollectExtractor.__init__(self, match, url)
-
-    def metadata(self, page):
-        return {
-            "slug"  : self.slug,
-            "title" : self.get_title(page),
-            "mirror": text.parse_int(self.mirror),
-        }
-
-    def images(self, page):
-        url = self.get_image(page)
-        p1, _, p2 = url.partition("/image0")
-        ufmt = p1 + "/image{:>05}" + p2[4:]
-        return [(ufmt.format(num), None) for num in range(1, self.count + 1)]
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 0389ead..89c0d2f 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -32,7 +32,7 @@ class PahealExtractor(Extractor):
             post["tags"] = text.unquote(post["tags"])
             post.update(data)
             yield Message.Directory, post
-            yield Message.Url, url, text.nameext_from_url(url, post)
+            yield Message.Url, url, post
 
     def get_metadata(self):
         """Return general metadata"""
@@ -59,11 +59,13 @@ class PahealExtractor(Extractor):
                 extr(">Source&nbsp;Link<", "</td>"), "href='", "'")),
         }
 
-        dimensions, size, ext = extr("Info</th><td>", ">").split(" // ")
-        post["width"], _, height = dimensions.partition("x")
+        dimensions, size, ext = extr("Info</th><td>", "<").split(" // ")
         post["size"] = text.parse_bytes(size[:-1])
+        post["width"], _, height = dimensions.partition("x")
         post["height"], _, duration = height.partition(", ")
         post["duration"] = text.parse_float(duration[:-1])
+        post["filename"] = "{} - {}".format(post_id, post["tags"])
+        post["extension"] = ext
 
         return post
 
@@ -112,6 +114,7 @@ class PahealTagExtractor(PahealExtractor):
 
         tags, data, date = data.split("\n")
         dimensions, size, ext = data.split(" // ")
+        tags = text.unescape(tags)
         width, _, height = dimensions.partition("x")
         height, _, duration = height.partition(", ")
 
@@ -119,9 +122,11 @@ class PahealTagExtractor(PahealExtractor):
             "id": pid, "md5": md5, "file_url": url,
             "width": width, "height": height,
             "duration": text.parse_float(duration[:-1]),
-            "tags": text.unescape(tags),
+            "tags": tags,
             "size": text.parse_bytes(size[:-1]),
             "date": text.parse_datetime(date, "%B %d, %Y; %H:%M"),
+            "filename" : "{} - {}".format(pid, tags),
+            "extension": ext,
         }
 
     def _extract_data_ex(self, post):
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 4b26393..c46a587 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -47,6 +47,7 @@ class PinterestExtractor(Extractor):
 
             carousel_data = pin.get("carousel_data")
             if carousel_data:
+                pin["count"] = len(carousel_data["carousel_slots"])
                 for num, slot in enumerate(carousel_data["carousel_slots"], 1):
                     slot["media_id"] = slot.pop("id")
                     pin.update(slot)
@@ -65,7 +66,7 @@ class PinterestExtractor(Extractor):
 
                 if videos or media.get("duration") is None:
                     pin.update(media)
-                    pin["num"] = 0
+                    pin["num"] = pin["count"] = 1
                     pin["media_id"] = ""
 
                     url = media["url"]
diff --git a/gallery_dl/extractor/poringa.py b/gallery_dl/extractor/poringa.py
new file mode 100644
index 0000000..0149d06
--- /dev/null
+++ b/gallery_dl/extractor/poringa.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for http://www.poringa.net/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
+import itertools
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?poringa\.net"
+
+
+class PoringaExtractor(Extractor):
+    category = "poringa"
+    directory_fmt = ("{category}", "{user}", "{post_id}")
+    filename_fmt = "{post_id}_{title}_{num:>03}_{filename}.{extension}"
+    archive_fmt = "{post_id}_{num}"
+    root = "http://www.poringa.net"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.item = match.group(1)
+        self.__cookies = True
+
+    def items(self):
+        for post_id in self.posts():
+            url = "{}/posts/imagenes/{}".format(self.root, post_id)
+
+            try:
+                response = self.request(url)
+            except exception.HttpError as exc:
+                self.log.warning(
+                    "Unable to fetch posts for '%s' (%s)", post_id, exc)
+                continue
+
+            if "/registro-login?" in response.url:
+                self.log.warning("Private post '%s'", post_id)
+                continue
+
+            page = response.text
+            title, pos = text.extract(
+                page, 'property="og:title" content="', '"')
+
+            try:
+                pos = page.index('<div class="main-info', pos)
+                user, pos = text.extract(
+                    page, 'href="http://www.poringa.net/', '"', pos)
+            except ValueError:
+                user = None
+
+            if not user:
+                user = "poringa"
+
+            data = {
+                "post_id"      : post_id,
+                "title"        : text.unescape(title),
+                "user"         : text.unquote(user),
+                "_http_headers": {"Referer": url},
+            }
+
+            main_post = text.extr(
+                page, 'property="dc:content" role="main">', '</div>')
+            urls = list(text.extract_iter(
+                main_post, '<img class="imagen" border="0" src="', '"'))
+            data["count"] = len(urls)
+
+            yield Message.Directory, data
+            for data["num"], url in enumerate(urls, 1):
+                yield Message.Url, url, text.nameext_from_url(url, data)
+
+    def posts(self):
+        return ()
+
+    def request(self, url, **kwargs):
+        if self.__cookies:
+            self.__cookies = False
+            self.cookies_update(_cookie_cache())
+
+        for _ in range(5):
+            response = Extractor.request(self, url, **kwargs)
+            if response.cookies:
+                _cookie_cache.update("", response.cookies)
+            if response.content.find(
+                    b"<title>Please wait a few moments</title>", 0, 600) < 0:
+                return response
+            self.sleep(5.0, "check")
+
+    def _pagination(self, url, params):
+        for params["p"] in itertools.count(1):
+            page = self.request(url, params=params).text
+
+            posts_ids = PoringaPostExtractor.pattern.findall(page)
+            posts_ids = list(dict.fromkeys(posts_ids))
+            yield from posts_ids
+
+            if len(posts_ids) < 19:
+                return
+
+
+class PoringaPostExtractor(PoringaExtractor):
+    """Extractor for posts on poringa.net"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)"
+    example = "http://www.poringa.net/posts/imagenes/12345/TITLE.html"
+
+    def posts(self):
+        return (self.item,)
+
+
+class PoringaUserExtractor(PoringaExtractor):
+    subcategory = "user"
+    pattern = BASE_PATTERN + r"/(\w+)$"
+    example = "http://www.poringa.net/USER"
+
+    def posts(self):
+        url = self.root + "/buscar/"
+        params = {"q": self.item}
+        return self._pagination(url, params)
+
+
+class PoringaSearchExtractor(PoringaExtractor):
+    subcategory = "search"
+    pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)"
+    example = "http://www.poringa.net/buscar/?q=QUERY"
+
+    def posts(self):
+        url = self.root + "/buscar/"
+        params = {"q": self.item}
+        return self._pagination(url, params)
+
+
+@cache()
+def _cookie_cache():
+    return ()
diff --git a/gallery_dl/extractor/rule34us.py b/gallery_dl/extractor/rule34us.py
index 6439a22..cf70ccc 100644
--- a/gallery_dl/extractor/rule34us.py
+++ b/gallery_dl/extractor/rule34us.py
@@ -38,7 +38,11 @@ class Rule34usExtractor(BooruExtractor):
             "height"  : extr(' x ', 'h'),
             "file_url": extr(' src="', '"'),
         }
-        post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
+
+        url = post["file_url"]
+        if "//video-cdn1." in url:
+            post["_fallback"] = (url.replace("//video-cdn1.", "//video."),)
+        post["md5"] = url.rpartition("/")[2].partition(".")[0]
 
         tags = collections.defaultdict(list)
         for tag_type, tag_name in self._find_tags(page):
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index 5415bf3..08cccab 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -87,6 +87,10 @@ BASE_PATTERN = SzurubooruExtractor.update({
         "root": "https://booru.bcbnsfw.space",
         "pattern": r"booru\.bcbnsfw\.space",
     },
+    "snootbooru": {
+        "root": "https://snootbooru.com",
+        "pattern": r"snootbooru\.com",
+    },
 })
 
 
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index fdcefdd..aa9ab9f 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -552,9 +552,11 @@ class TwitterTimelineExtractor(TwitterExtractor):
                 return self.api.user_media
         if strategy == "tweets":
             return self.api.user_tweets
+        if strategy == "media":
+            return self.api.user_media
         if strategy == "with_replies":
             return self.api.user_tweets_and_replies
-        return self.api.user_media
+        raise exception.StopExtraction("Invalid strategy '%s'", strategy)
 
 
 class TwitterTweetsExtractor(TwitterExtractor):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 7413b5a..3bd0648 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -225,9 +225,6 @@ class WeiboUserExtractor(WeiboExtractor):
     pattern = USER_PATTERN + r"(?:$|#)"
     example = "https://weibo.com/USER"
 
-    def initialize(self):
-        pass
-
     def items(self):
         base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
         return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/zzup.py b/gallery_dl/extractor/zzup.py
new file mode 100644
index 0000000..45b0cd8
--- /dev/null
+++ b/gallery_dl/extractor/zzup.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class ZzupGalleryExtractor(GalleryExtractor):
+    category = "zzup"
+    directory_fmt = ("{category}", "{title}")
+    filename_fmt = "{slug}_{num:>03}.{extension}"
+    archive_fmt = "{slug}_{num}"
+    root = "https://zzup.com"
+    pattern = (r"(?:https?://)?(?:www\.)?zzup\.com(/content"
+               r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
+    example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
+
+    def __init__(self, match):
+        url = "{}/{}/index.html".format(self.root, match.group(1))
+        GalleryExtractor.__init__(self, match, url)
+        self.slug = match.group(2)
+
+    def metadata(self, page):
+        return {
+            "slug" : self.slug,
+            "title": text.unescape(text.extr(
+                page, "<title>", "</title>"))[:-11],
+        }
+
+    def images(self, page):
+        path = text.extr(page, 'class="picbox"><a target="_blank" href="', '"')
+        count = text.parse_int(text.extr(path, "-pics-", "-mirror"))
+        page = self.request(self.root + path).text
+        url = self.root + text.extr(page, '\n<a href="', '"')
+        p1, _, p2 = url.partition("/image0")
+        ufmt = p1 + "/image{:>05}" + p2[4:]
+        return [(ufmt.format(num), None) for num in range(1, count + 1)]
author	Unit 193 <unit193@unit193.net>	2024-01-08 03:22:24 -0500
committer	Unit 193 <unit193@unit193.net>	2024-01-08 03:22:24 -0500
commit	e949aaf6f6ac93896947d5b736e48e7911926efb (patch)
tree	b73090d78cd83dee0f85b385a25dcf623ac12f2d /gallery_dl/extractor
parent	4d7a4f1ecef2c96269f3590335d2834ebcdd50bf (diff)