summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-01-08 20:39:28 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2022-01-08 20:39:28 -0500
commit8de58070ee3e55f29966a787fd618632dbf4309b (patch)
treec140f8a0f97445413a1681424cc93308592070c4 /gallery_dl
parent7bc30b43b70556630b4a93c03fefc0d888e3d19f (diff)
New upstream version 1.20.1.upstream/1.20.1
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/extractor/common.py3
-rw-r--r--gallery_dl/extractor/gelbooru.py43
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py18
-rw-r--r--gallery_dl/extractor/hitomi.py35
-rw-r--r--gallery_dl/extractor/mangadex.py17
-rw-r--r--gallery_dl/extractor/newgrounds.py64
-rw-r--r--gallery_dl/extractor/patreon.py1
-rw-r--r--gallery_dl/extractor/wordpress.py41
-rw-r--r--gallery_dl/job.py5
-rw-r--r--gallery_dl/option.py20
-rw-r--r--gallery_dl/path.py3
-rw-r--r--gallery_dl/version.py2
12 files changed, 172 insertions, 80 deletions
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index c440aee..afe4a16 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -336,7 +336,8 @@ class Extractor():
now = time.time()
for cookie in self._cookiejar:
- if cookie.name in names and cookie.domain == domain:
+ if cookie.name in names and (
+ not domain or cookie.domain == domain):
if cookie.expires and cookie.expires < now:
self.log.warning("Cookie '%s' has expired", cookie.name)
else:
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index df45d0d..a6bda52 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from . import gelbooru_v02
-from .. import text, exception
+from .. import text, util, exception
import binascii
@@ -20,6 +20,42 @@ class GelbooruBase():
basecategory = "booru"
root = "https://gelbooru.com"
+ def _api_request(self, params):
+ url = self.root + "/index.php?page=dapi&s=post&q=index&json=1"
+ data = self.request(url, params=params).json()
+ if "post" not in data:
+ return ()
+ posts = data["post"]
+ if not isinstance(posts, list):
+ return (posts,)
+ return posts
+
+ def _pagination(self, params):
+ params["pid"] = self.page_start
+ params["limit"] = self.per_page
+
+ post = None
+ while True:
+ try:
+ posts = self._api_request(params)
+ except ValueError:
+ if "tags" not in params or post is None:
+ raise
+ taglist = [tag for tag in params["tags"].split()
+ if not tag.startswith("id:<")]
+ taglist.append("id:<" + str(post.attrib["id"]))
+ params["tags"] = " ".join(taglist)
+ params["pid"] = 0
+ continue
+
+ post = None
+ for post in posts:
+ yield post
+
+ if len(posts) < self.per_page:
+ return
+ params["pid"] += 1
+
@staticmethod
def _file_url(post):
url = post["file_url"]
@@ -82,6 +118,11 @@ class GelbooruPoolExtractor(GelbooruBase,
"pool_name": text.unescape(name),
}
+ def posts(self):
+ params = {}
+ for params["id"] in util.advance(self.post_ids, self.page_start):
+ yield from self._api_request(params)
+
class GelbooruPostExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PostExtractor):
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index a42a202..8da0bde 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -19,8 +19,15 @@ import re
class GelbooruV02Extractor(booru.BooruExtractor):
basecategory = "gelbooru_v02"
+ def __init__(self, match):
+ booru.BooruExtractor.__init__(self, match)
+ try:
+ self.api_root = INSTANCES[self.category]["api_root"]
+ except KeyError:
+ self.api_root = self.root
+
def _api_request(self, params):
- url = self.root + "/index.php?page=dapi&s=post&q=index"
+ url = self.api_root + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
def _pagination(self, params):
@@ -97,12 +104,15 @@ class GelbooruV02Extractor(booru.BooruExtractor):
post["notes"] = notes
-BASE_PATTERN = GelbooruV02Extractor.update({
+INSTANCES = {
"realbooru": {"root": "https://realbooru.com"},
- "rule34" : {"root": "https://rule34.xxx"},
+ "rule34" : {"root": "https://rule34.xxx",
+ "api_root": " https://api.rule34.xxx"},
"safebooru": {"root": "https://safebooru.org"},
"tbib" : {"root": "https://tbib.org"},
-})
+}
+
+BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
class GelbooruV02TagExtractor(GelbooruV02Extractor):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 88cf98c..ce6c7ce 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -26,7 +26,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
r"/(?:[^/?#]+-)?(\d+)")
test = (
("https://hitomi.la/galleries/867789.html", {
- "pattern": r"https://[a-c]b.hitomi.la/images/1639745412/\d+"
+ "pattern": r"https://[a-c]b.hitomi.la/images/1641140516/\d+"
r"/[0-9a-f]{64}\.jpg",
"keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae",
"options": (("metadata", True),),
@@ -39,12 +39,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
- "url": "479d16fe92117a6a2ce81b4e702e6347922c81e3",
+ "url": "d4854175da2b5fa4ae62749266c7be0bf237dc99",
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
- "url": "ebc1415c5d7f634166ef7e2635b77735de1ea7a2",
+ "url": "eea99c3745719a7a392150335e6ae3f73faa0b85",
"count": 1413,
}),
# gallery with "broken" redirect
@@ -138,7 +138,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
def images(self, _):
# see https://ltn.hitomi.la/gg.js
- gg_m, gg_b = _parse_gg(self)
+ gg_m, gg_b, gg_default = _parse_gg(self)
result = []
for image in self.info["files"]:
@@ -148,7 +148,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
# see https://ltn.hitomi.la/common.js
inum = int(ihash[-1] + ihash[-3:-1], 16)
url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
- chr(97 + gg_m.get(inum, 0)),
+ chr(97 + gg_m.get(inum, gg_default)),
gg_b, inum, ihash, idata["extension"],
)
result.append((url, idata))
@@ -195,10 +195,25 @@ class HitomiTagExtractor(Extractor):
def _parse_gg(extr):
page = extr.request("https://ltn.hitomi.la/gg.js").text
- m = {
- int(match.group(1)): int(match.group(2))
- for match in re.finditer(r"case (\d+): o = (\d+); break;", page)
- }
+ m = {}
+
+ keys = []
+ for match in re.finditer(
+ r"case\s+(\d+):(?:\s*o\s*=\s*(\d+))?", page):
+ key, value = match.groups()
+ keys.append(int(key))
+
+ if value:
+ value = int(value)
+ for key in keys:
+ m[key] = value
+ keys.clear()
+
+ for match in re.finditer(
+ r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)", page):
+ m[int(match.group(1))] = int(match.group(2))
+
+ d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page)
b = re.search(r"b:\s*[\"'](.+)[\"']", page)
- return m, b.group(1).strip("/")
+ return m, b.group(1).strip("/"), int(d.group(1)) if d else 1
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 393f4e2..ea5d4a8 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -40,7 +40,7 @@ class MangadexExtractor(Extractor):
uuid = chapter["id"]
data = self._transform(chapter)
data["_extractor"] = MangadexChapterExtractor
- self._cache[uuid] = (chapter, data)
+ self._cache[uuid] = data
yield Message.Queue, self.root + "/chapter/" + uuid, data
def _transform(self, chapter):
@@ -72,7 +72,7 @@ class MangadexExtractor(Extractor):
"date" : text.parse_datetime(cattributes["publishAt"]),
"lang" : lang,
"language": util.code_to_language(lang),
- "count" : len(cattributes["data"]),
+ "count" : cattributes["pages"],
}
data["artist"] = [artist["attributes"]["name"]
@@ -107,20 +107,21 @@ class MangadexChapterExtractor(MangadexExtractor):
def items(self):
try:
- chapter, data = self._cache.pop(self.uuid)
+ data = self._cache.pop(self.uuid)
except KeyError:
chapter = self.api.chapter(self.uuid)
data = self._transform(chapter)
- yield Message.Directory, data
- cattributes = chapter["attributes"]
+ yield Message.Directory, data
data["_http_headers"] = self._headers
- base = "{}/data/{}/".format(
- self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
+
+ server = self.api.athome_server(self.uuid)
+ chapter = server["chapter"]
+ base = "{}/data/{}/".format(server["baseUrl"], chapter["hash"])
enum = util.enumerate_reversed if self.config(
"page-reverse") else enumerate
- for data["page"], page in enum(cattributes["data"], 1):
+ for data["page"], page in enum(chapter["data"], 1):
text.nameext_from_url(page, data)
yield Message.Url, base + page, data
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 4351b3e..8bcbc20 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -38,6 +38,7 @@ class NewgroundsExtractor(Extractor):
def items(self):
self.login()
+ metadata = self.metadata()
for post_url in self.posts():
try:
@@ -48,6 +49,8 @@ class NewgroundsExtractor(Extractor):
url = None
if url:
+ if metadata:
+ post.update(metadata)
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
@@ -62,9 +65,12 @@ class NewgroundsExtractor(Extractor):
"Unable to get download URL for '%s'", post_url)
def posts(self):
- """Return urls of all relevant image pages"""
+ """Return URLs of all relevant post pages"""
return self._pagination(self._path)
+ def metadata(self):
+ """Return general metadata"""
+
def login(self):
username, password = self._get_auth_info()
if username:
@@ -493,3 +499,59 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
text.ensure_http_scheme(user.rpartition('"')[2])
for user in text.extract_iter(page, 'class="item-user', '"><img')
]
+
+
+class NewgroundsSearchExtractor(NewgroundsExtractor):
+ """Extractor for newgrounds.com search reesults"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "search", "{search_tags}")
+ pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
+ r"/search/conduct/([^/?#]+)/?\?([^#]+)")
+ test = (
+ ("https://www.newgrounds.com/search/conduct/art?terms=tree", {
+ "pattern": NewgroundsImageExtractor.pattern,
+ "keyword": {"search_tags": "tree"},
+ "range": "1-10",
+ "count": 10,
+ }),
+ ("https://www.newgrounds.com/search/conduct/movies?terms=tree", {
+ "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+",
+ "range": "1-10",
+ "count": 10,
+ }),
+ ("https://www.newgrounds.com/search/conduct/audio?advanced=1"
+ "&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm"),
+ )
+
+ def __init__(self, match):
+ NewgroundsExtractor.__init__(self, match)
+ self._path, query = match.groups()
+ self.query = text.parse_query(query)
+
+ def posts(self):
+ return self._pagination("/search/conduct/" + self._path, self.query)
+
+ def metadata(self):
+ return {"search_tags": self.query.get("terms", "")}
+
+ def _pagination(self, path, params):
+ url = self.root + path
+ headers = {
+ "Accept": "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ "Referer": self.root,
+ }
+ params["inner"] = "1"
+ params["page"] = 1
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+
+ post_url = None
+ for post_url in text.extract_iter(data["content"], 'href="', '"'):
+ if not post_url.startswith("/search/"):
+ yield post_url
+
+ if post_url is None:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index f8c80ef..a7e0ff1 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -20,6 +20,7 @@ class PatreonExtractor(Extractor):
"""Base class for patreon extractors"""
category = "patreon"
root = "https://www.patreon.com"
+ cookiedomain = ".patreon.com"
directory_fmt = ("{category}", "{creator[full_name]}")
filename_fmt = "{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
diff --git a/gallery_dl/extractor/wordpress.py b/gallery_dl/extractor/wordpress.py
deleted file mode 100644
index dd7d28a..0000000
--- a/gallery_dl/extractor/wordpress.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2021 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for WordPress blogs"""
-
-from .common import BaseExtractor, Message
-from .. import text
-
-
-class WordpressExtractor(BaseExtractor):
- """Base class for wordpress extractors"""
- basecategory = "wordpress"
-
- def items(self):
- for post in self.posts():
- yield Message.Difrectory, post
-
-
-
-BASE_PATTERN = WordpressExtractor.update({})
-
-
-class WordpressBlogExtractor(WordpressExtractor):
- """Extractor for WordPress blogs"""
- subcategory = "blog"
- directory_fmt = ("{category}", "{blog}")
- pattern = BASE_PATTERN + r"/?$"
-
- def posts(self):
- url = self.root + "/wp-json/wp/v2/posts"
- params = {"page": 1, "per_page": "100"}
-
- while True:
- data = self.request(url, params=params).json()
- exit()
- yield 1
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 97a8d3f..3e72e9c 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -501,14 +501,15 @@ class DownloadJob(Job):
clist = self.extractor.config("whitelist")
if clist is not None:
negate = False
+ special = None
else:
clist = self.extractor.config("blacklist")
negate = True
+ special = util.SPECIAL_EXTRACTORS
if clist is None:
clist = (self.extractor.category,)
- return util.build_extractor_filter(
- clist, negate, util.SPECIAL_EXTRACTORS)
+ return util.build_extractor_filter(clist, negate, special)
class SimulationJob(DownloadJob):
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 1967bf7..cdfe9a1 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -92,28 +92,28 @@ def build_parser():
help="Print program version and exit",
)
general.add_argument(
- "--dest",
- dest="base-directory", metavar="DEST", action=ConfigAction,
- help=argparse.SUPPRESS,
- )
- general.add_argument(
"-i", "--input-file",
dest="inputfiles", metavar="FILE", action="append",
help=("Download URLs found in FILE ('-' for stdin). "
"More than one --input-file can be specified"),
)
general.add_argument(
+ "-d", "--destination",
+ dest="base-directory", metavar="PATH", action=ConfigAction,
+ help="Target location for file downloads",
+ )
+ general.add_argument(
+ "-D", "--directory",
+ dest="directory", metavar="PATH",
+ help="Exact location for file downloads",
+ )
+ general.add_argument(
"-f", "--filename",
dest="filename", metavar="FORMAT",
help=("Filename format string for downloaded files "
"('/O' for \"original\" filenames)"),
)
general.add_argument(
- "-d", "--directory",
- dest="directory", metavar="PATH",
- help="Target location for file downloads",
- )
- general.add_argument(
"--cookies",
dest="cookies", metavar="FILE", action=ConfigAction,
help="File to load additional cookies from",
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 9e9e983..15db67f 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -178,10 +178,11 @@ class PathFormat():
if WINDOWS:
# Enable longer-than-260-character paths
+ directory = os.path.abspath(directory)
if directory.startswith("\\\\"):
directory = "\\\\?\\UNC\\" + directory[2:]
else:
- directory = "\\\\?\\" + os.path.abspath(directory)
+ directory = "\\\\?\\" + directory
# abspath() in Python 3.7+ removes trailing path separators (#402)
if directory[-1] != sep:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index b5114e8..8fa7c22 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.20.0"
+__version__ = "1.20.1"