New upstream version 1.22.0.upstream/1.22.0

author: Unit 193 <unit193@unit193.net> 2022-05-26 23:57:04 -0400
committer: Unit 193 <unit193@unit193.net> 2022-05-26 23:57:04 -0400
commit: ad61a6d8122973534ab63df48f6090954bc73db6 (patch)
tree: aedce94427ac95fa180005f88fc94b5c8ef5a62a /gallery_dl/extractor/gelbooru_v01.py
parent: c6b88a96bd191711fc540d7babab3d2e09c68da8 (diff)
1 files changed, 70 insertions, 21 deletions
diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py
index 541f454..9c19664 100644
--- a/gallery_dl/extractor/gelbooru_v01.py
+++ b/gallery_dl/extractor/gelbooru_v01.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractors for Gelbooru v0.1 sites"""
+"""Extractors for Gelbooru Beta 0.1.11 sites"""
 
 from . import booru
 from .. import text
@@ -42,14 +42,43 @@ class GelbooruV01Extractor(booru.BooruExtractor):
 
         return post
 
+    def _pagination(self, url, begin, end):
+        pid = self.page_start
+
+        while True:
+            page = self.request(url + str(pid)).text
+
+            cnt = 0
+            for post_id in text.extract_iter(page, begin, end):
+                yield self._parse_post(post_id)
+                cnt += 1
+
+            if cnt < self.per_page:
+                return
+            pid += self.per_page
+
 
 BASE_PATTERN = GelbooruV01Extractor.update({
-    "thecollection"     : {"root": "https://the-collection.booru.org"},
-    "illusioncardsbooru": {"root": "https://illusioncards.booru.org"},
-    "allgirlbooru"      : {"root": "https://allgirl.booru.org"},
-    "drawfriends"       : {"root": "https://drawfriends.booru.org"},
-    "vidyart"           : {"root": "https://vidyart.booru.org"},
-    "theloudbooru"      : {"root": "https://tlb.booru.org"},
+    "thecollection": {
+        "root": "https://the-collection.booru.org",
+        "pattern": r"the-collection\.booru\.org",
+    },
+    "illusioncardsbooru": {
+        "root": "https://illusioncards.booru.org",
+        "pattern": r"illusioncards\.booru\.org",
+    },
+    "allgirlbooru": {
+        "root": "https://allgirl.booru.org",
+        "pattern": r"allgirl\.booru\.org",
+    },
+    "drawfriends": {
+        "root": "https://drawfriends.booru.org",
+        "pattern": r"drawfriends\.booru\.org",
+    },
+    "vidyart": {
+        "root": "https://vidyart.booru.org",
+        "pattern": r"vidyart\.booru\.org",
+    },
 })
 
 
@@ -75,7 +104,6 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
         }),
         ("https://drawfriends.booru.org/index.php?page=post&s=list&tags=all"),
         ("https://vidyart.booru.org/index.php?page=post&s=list&tags=all"),
-        ("https://tlb.booru.org/index.php?page=post&s=list&tags=all"),
     )
 
     def __init__(self, match):
@@ -88,20 +116,42 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
     def posts(self):
         url = "{}/index.php?page=post&s=list&tags={}&pid=".format(
             self.root, self.tags)
-        pid = self.page_start
+        return self._pagination(url, 'class="thumb"><a id="p', '"')
 
-        while True:
-            page = self.request(url + str(pid)).text
 
-            cnt = 0
-            for post_id in text.extract_iter(
-                    page, 'class="thumb"><a id="p', '"'):
-                yield self._parse_post(post_id)
-                cnt += 1
+class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
+    subcategory = "favorite"
+    directory_fmt = ("{category}", "favorites", "{favorite_id}")
+    archive_fmt = "f_{favorite_id}_{id}"
+    per_page = 50
+    pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+    test = (
+        (("https://the-collection.booru.org"
+          "/index.php?page=favorites&s=view&id=1166"), {
+            "count": 2,
+        }),
+        (("https://illusioncards.booru.org"
+          "/index.php?page=favorites&s=view&id=84887"), {
+            "count": 2,
+        }),
+        ("https://allgirl.booru.org/index.php?page=favorites&s=view&id=380", {
+            "count": 4,
+        }),
+        ("https://drawfriends.booru.org/index.php?page=favorites&s=view&id=1"),
+        ("https://vidyart.booru.org/index.php?page=favorites&s=view&id=1"),
+    )
 
-            if cnt < self.per_page:
-                return
-            pid += self.per_page
+    def __init__(self, match):
+        GelbooruV01Extractor.__init__(self, match)
+        self.favorite_id = match.group(match.lastindex)
+
+    def metadata(self):
+        return {"favorite_id": text.parse_int(self.favorite_id)}
+
+    def posts(self):
+        url = "{}/index.php?page=favorites&s=view&id={}&pid=".format(
+            self.root, self.favorite_id)
+        return self._pagination(url, "posts[", "]")
 
 
 class GelbooruV01PostExtractor(GelbooruV01Extractor):
@@ -141,7 +191,6 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
         }),
         ("https://drawfriends.booru.org/index.php?page=post&s=view&id=107474"),
         ("https://vidyart.booru.org/index.php?page=post&s=view&id=383111"),
-        ("https://tlb.booru.org/index.php?page=post&s=view&id=127223"),
     )
 
     def __init__(self, match):
author	Unit 193 <unit193@unit193.net>	2022-05-26 23:57:04 -0400
committer	Unit 193 <unit193@unit193.net>	2022-05-26 23:57:04 -0400
commit	ad61a6d8122973534ab63df48f6090954bc73db6 (patch)
tree	aedce94427ac95fa180005f88fc94b5c8ef5a62a /gallery_dl/extractor/gelbooru_v01.py
parent	c6b88a96bd191711fc540d7babab3d2e09c68da8 (diff)