aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-02-10 22:50:56 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2022-02-10 22:50:56 -0500
commit99bc014c924c755f10a4a930b1a83efabd84fde1 (patch)
tree596bbab18416b66adf87011a0a6e439468f68778
parent8a812de1450d5d53fc1cd9a59f6c3f08452fc5b1 (diff)
New upstream version 1.20.4.upstream/1.20.4
-rw-r--r--CHANGELOG.md22
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.530
-rw-r--r--docs/gallery-dl-example.conf74
-rw-r--r--docs/gallery-dl.conf1
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/downloader/http.py14
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/behance.py4
-rw-r--r--gallery_dl/extractor/common.py183
-rw-r--r--gallery_dl/extractor/dynastyscans.py6
-rw-r--r--gallery_dl/extractor/e621.py30
-rw-r--r--gallery_dl/extractor/exhentai.py6
-rw-r--r--gallery_dl/extractor/foolfuuka.py8
-rw-r--r--gallery_dl/extractor/gelbooru.py33
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py6
-rw-r--r--gallery_dl/extractor/hitomi.py30
-rw-r--r--gallery_dl/extractor/kemonoparty.py7
-rw-r--r--gallery_dl/extractor/kohlchan.py78
-rw-r--r--gallery_dl/extractor/lolisafe.py22
-rw-r--r--gallery_dl/extractor/luscious.py6
-rw-r--r--gallery_dl/extractor/mangahere.py11
-rw-r--r--gallery_dl/extractor/patreon.py1
-rw-r--r--gallery_dl/extractor/pillowfort.py6
-rw-r--r--gallery_dl/extractor/sexcom.py23
-rw-r--r--gallery_dl/extractor/tapas.py6
-rw-r--r--gallery_dl/extractor/twitter.py89
-rw-r--r--gallery_dl/extractor/unsplash.py4
-rw-r--r--gallery_dl/extractor/vk.py6
-rw-r--r--gallery_dl/extractor/weibo.py36
-rw-r--r--gallery_dl/extractor/wikiart.py6
-rw-r--r--gallery_dl/job.py5
-rw-r--r--gallery_dl/version.py2
36 files changed, 568 insertions, 207 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0cefbb2..091bb90 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,29 @@
# Changelog
+## 1.20.4 - 2022-02-06
+### Additions
+- [e621] add `favorite` extractor ([#2250](https://github.com/mikf/gallery-dl/issues/2250))
+- [hitomi] add `format` option ([#2260](https://github.com/mikf/gallery-dl/issues/2260))
+- [kohlchan] add Kohlchan extractors ([#2251](https://github.com/mikf/gallery-dl/issues/2251))
+- [sexcom] add `pins` extractor ([#2265](https://github.com/mikf/gallery-dl/issues/2265))
+- [twitter] add `warnings` option ([#2258](https://github.com/mikf/gallery-dl/issues/2258))
+- add ability to disable TLS 1.2 ([#2243](https://github.com/mikf/gallery-dl/issues/2243))
+- add examples for custom gelbooru instances ([#2262](https://github.com/mikf/gallery-dl/issues/2262))
+### Fixes
+- [bunkr] fix mp4 downloads ([#2239](https://github.com/mikf/gallery-dl/issues/2239))
+- [gelbooru] improve and fix pagination ([#2230](https://github.com/mikf/gallery-dl/issues/2230), [#2232](https://github.com/mikf/gallery-dl/issues/2232))
+- [hitomi] "fix" 403 errors ([#2260](https://github.com/mikf/gallery-dl/issues/2260))
+- [kemonoparty] fix downloading smaller text files ([#2267](https://github.com/mikf/gallery-dl/issues/2267))
+- [patreon] disable TLS 1.2 by default ([#2249](https://github.com/mikf/gallery-dl/issues/2249))
+- [twitter] restore errors for protected timelines etc ([#2237](https://github.com/mikf/gallery-dl/issues/2237))
+- [twitter] restore `logout` functionality ([#1719](https://github.com/mikf/gallery-dl/issues/1719))
+- [twitter] provide fallback URLs for card images
+- [weibo] update pagination code ([#2244](https://github.com/mikf/gallery-dl/issues/2244))
+
## 1.20.3 - 2022-01-26
### Fixes
- [kemonoparty] fix DMs extraction ([#2008](https://github.com/mikf/gallery-dl/issues/2008))
-- [twitter] fix crash ob Tweets with deleted quotes ([#2225](https://github.com/mikf/gallery-dl/issues/2225))
+- [twitter] fix crash on Tweets with deleted quotes ([#2225](https://github.com/mikf/gallery-dl/issues/2225))
- [twitter] fix crash on suspended Tweets without `legacy` entry ([#2216](https://github.com/mikf/gallery-dl/issues/2216))
- [twitter] fix crash on unified cards without `type`
- [twitter] prevent crash on invalid/deleted Retweets ([#2225](https://github.com/mikf/gallery-dl/issues/2225))
diff --git a/PKG-INFO b/PKG-INFO
index 2910471..5fe7846 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.20.3
+Version: 1.20.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -98,8 +98,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index 0602fac..9ba4e2c 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 49b1af8..9c1ed7f 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-01-26" "1.20.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-02-06" "1.20.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 564368c..e44f008 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-01-26" "1.20.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-02-06" "1.20.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1479,6 +1479,22 @@ Possible values are
You can use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.hitomi.format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"webp"\f[]
+
+.IP "Description:" 4
+Selects which image format to download.
+
+Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[].
+
+\f[I]"original"\f[] will try to download the original \f[I]jpg\f[] or \f[I]png\f[] versions,
+but is most likely going to fail with \f[I]403 Forbidden\f[] errors.
+
+
.SS extractor.hitomi.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -2477,6 +2493,18 @@ Control video download behavior.
* \f[I]false\f[]: Skip video Tweets
+.SS extractor.twitter.warnings
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Emit \f[I]logging messages\f[]
+for non-fatal errors reported by Twitter's API.
+
+
.SS extractor.unsplash.format
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf
index 72e7465..29b2507 100644
--- a/docs/gallery-dl-example.conf
+++ b/docs/gallery-dl-example.conf
@@ -6,6 +6,12 @@
"#": "set global archive file for all extractors",
"archive": "~/gallery-dl/archive.sqlite3",
+ "#": "add two custom keywords into the metadata dictionary",
+ "#": "these can be used to further refine your output directories or filenames",
+ "keywords": {"bkey": "", "ckey": ""},
+ "#": "make sure that custom keywords are empty, i.e. they don't appear unless specified by the user",
+ "keywords-default": "",
+
"#": "replace invalid path characters with unicode alternatives",
"path-restrict": {
"\\": "⧹",
@@ -212,6 +218,74 @@
"#": "add two other foolfuuka 4chan archives",
"fireden-onion": {"root": "http://ydt6jy2ng3s3xg2e.onion"},
"scalearchive" : {"root": "https://archive.scaled.team" }
+ },
+
+ "gelbooru_v01":
+ {
+ "#": "add a custom gelbooru_v01 instance",
+ "#": "this is just an example, this specific instance is already included!",
+ "allgirlbooru": {"root": "https://allgirl.booru.org"},
+
+ "#": "the following options are used for all gelbooru_v01 instances",
+ "tag":
+ {
+ "directory": {
+ "locals().get('bkey')": ["Booru", "AllGirlBooru", "Tags", "{bkey}", "{ckey}", "{search_tags}"],
+ "" : ["Booru", "AllGirlBooru", "Tags", "_Unsorted", "{search_tags}"]
+ }
+ },
+ "post":
+ {
+ "directory": ["Booru", "AllGirlBooru", "Posts"]
+ },
+ "archive": "~/gallery-dl/custom-archive-file-for-gelbooru_v01_instances.db",
+ "filename": "{tags}_{id}_{md5}.{extension}",
+ "sleep-request": [0, 1.2]
+ },
+
+ "gelbooru_v02":
+ {
+ "#": "add a custom gelbooru_v02 instance",
+ "#": "this is just an example, this specific instance is already included!",
+ "tbib":
+ {
+ "root": "https://tbib.org",
+ "#": "some sites have different domains for API access",
+ "#": "use the 'api_root' option in addition to the 'root' setting here"
+ }
+ },
+
+ "tbib": {
+ "#": "the following options are only used for TBIB",
+ "#": "gelbooru_v02 has four subcategories at the moment, use custom directory settings for all of these",
+ "tag":
+ {
+ "directory": {
+ "locals().get('bkey')": ["Other Boorus", "TBIB", "Tags", "{bkey}", "{ckey}", "{search_tags}"],
+ "" : ["Other Boorus", "TBIB", "Tags", "_Unsorted", "{search_tags}"]
+ }
+ },
+ "pool":
+ {
+ "directory": {
+ "locals().get('bkey')": ["Other Boorus", "TBIB", "Pools", "{bkey}", "{ckey}", "{pool}"],
+ "" : ["Other Boorus", "TBIB", "Pools", "_Unsorted", "{pool}"]
+ }
+ },
+ "favorite":
+ {
+ "directory": {
+ "locals().get('bkey')": ["Other Boorus", "TBIB", "Favorites", "{bkey}", "{ckey}", "{favorite_id}"],
+ "" : ["Other Boorus", "TBIB", "Favorites", "_Unsorted", "{favorite_id}"]
+ }
+ },
+ "post":
+ {
+ "directory": ["Other Boorus", "TBIB", "Posts"]
+ },
+ "archive": "~/gallery-dl/custom-archive-file-for-TBIB.db",
+ "filename": "{id}_{md5}.{extension}",
+ "sleep-request": [0, 1.2]
}
},
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 30be840..ab5f6f9 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -114,6 +114,7 @@
},
"hitomi":
{
+ "format": "webp",
"metadata": false
},
"idolcomplex":
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 6a91b03..c086512 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.20.3
+Version: 1.20.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -98,8 +98,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 8ddae52..69a8b70 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -103,6 +103,7 @@ gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
gallery_dl/extractor/khinsider.py
+gallery_dl/extractor/kohlchan.py
gallery_dl/extractor/komikcast.py
gallery_dl/extractor/lineblog.py
gallery_dl/extractor/livedoor.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index d2efd3f..91ce731 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -148,9 +148,15 @@ class HttpDownloader(DownloaderBase):
# check for invalid responses
validate = kwdict.get("_http_validate")
- if validate and not validate(response):
- self.log.warning("Invalid response")
- return False
+ if validate:
+ result = validate(response)
+ if isinstance(result, str):
+ url = result
+ tries -= 1
+ continue
+ if not result:
+ self.log.warning("Invalid response")
+ return False
# set missing filename extension from MIME type
if not pathfmt.extension:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 65c994d..e7d71d6 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -68,6 +68,7 @@ modules = [
"keenspot",
"kemonoparty",
"khinsider",
+ "kohlchan",
"komikcast",
"lineblog",
"livedoor",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 71d3320..994a701 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -84,7 +84,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
}),
# 'video' modules (#1282)
("https://www.behance.net/gallery/101185577/COLCCI", {
- "pattern": r"ytdl:https://adobeprod-a\.akamaihd\.net/",
+ "pattern": r"ytdl:https://cdn-prod-ccv\.adobe\.com/",
"count": 3,
}),
)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 52e5199..1d81dfc 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -38,6 +38,7 @@ class Extractor():
request_interval = 0.0
request_interval_min = 0.0
request_timestamp = 0.0
+ tls12 = True
def __init__(self, match):
self.log = logging.getLogger(self.category)
@@ -219,14 +220,7 @@ class Extractor():
self.session = session = requests.Session()
headers = session.headers
headers.clear()
-
- source_address = self.config("source-address")
- if source_address:
- if isinstance(source_address, str):
- source_address = (source_address, 0)
- else:
- source_address = (source_address[0], source_address[1])
- session.mount("http://", SourceAdapter(source_address))
+ ssl_options = ssl_ciphers = 0
browser = self.config("browser") or self.browser
if browser and isinstance(browser, str):
@@ -243,12 +237,21 @@ class Extractor():
platform = "Macintosh; Intel Mac OS X 11.5"
if browser == "chrome":
- _emulate_browser_chrome(session, platform, source_address)
+ if platform.startswith("Macintosh"):
+ platform = platform.replace(".", "_") + "_2"
else:
- _emulate_browser_firefox(session, platform, source_address)
+ browser = "firefox"
+
+ for key, value in HTTP_HEADERS[browser]:
+ if value and "{}" in value:
+ headers[key] = value.format(platform)
+ else:
+ headers[key] = value
+
+ ssl_options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 |
+ ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1)
+ ssl_ciphers = SSL_CIPHERS[browser]
else:
- if source_address:
- session.mount("https://", SourceAdapter(source_address))
headers["User-Agent"] = self.config("user-agent", (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
"rv:91.0) Gecko/20100101 Firefox/91.0"))
@@ -260,11 +263,31 @@ class Extractor():
if custom_headers:
headers.update(custom_headers)
- ciphers = self.config("ciphers")
- if ciphers:
- if isinstance(ciphers, list):
- ciphers = ":".join(ciphers)
- session.mount("https://", HTTPSAdapter(ciphers))
+ custom_ciphers = self.config("ciphers")
+ if custom_ciphers:
+ if isinstance(custom_ciphers, list):
+ ssl_ciphers = ":".join(custom_ciphers)
+ else:
+ ssl_ciphers = custom_ciphers
+
+ source_address = self.config("source-address")
+ if source_address:
+ if isinstance(source_address, str):
+ source_address = (source_address, 0)
+ else:
+ source_address = (source_address[0], source_address[1])
+
+ tls12 = self.config("tls12")
+ if tls12 is None:
+ tls12 = self.tls12
+ if not tls12:
+ ssl_options |= ssl.OP_NO_TLSv1_2
+ self.log.debug("TLS 1.2 disabled.")
+
+ adapter = _build_requests_adapter(
+ ssl_options, ssl_ciphers, source_address)
+ session.mount("https://", adapter)
+ session.mount("http://", adapter)
def _init_proxies(self):
"""Update the session's proxy map"""
@@ -615,29 +638,10 @@ class BaseExtractor(Extractor):
)
-class SourceAdapter(HTTPAdapter):
+class RequestsAdapter(HTTPAdapter):
- def __init__(self, source_address):
- self.source_address = source_address
- HTTPAdapter.__init__(self)
-
- def init_poolmanager(self, *args, **kwargs):
- kwargs["source_address"] = self.source_address
- return HTTPAdapter.init_poolmanager(self, *args, **kwargs)
-
- def proxy_manager_for(self, *args, **kwargs):
- kwargs["source_address"] = self.source_address
- return HTTPAdapter.proxy_manager_for(self, *args, **kwargs)
-
-
-class HTTPSAdapter(HTTPAdapter):
-
- def __init__(self, ciphers, source_address=None):
- context = self.ssl_context = ssl.create_default_context()
- context.options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 |
- ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1)
- context.set_ecdh_curve("prime256v1")
- context.set_ciphers(ciphers)
+ def __init__(self, ssl_context=None, source_address=None):
+ self.ssl_context = ssl_context
self.source_address = source_address
HTTPAdapter.__init__(self)
@@ -652,19 +656,59 @@ class HTTPSAdapter(HTTPAdapter):
return HTTPAdapter.proxy_manager_for(self, *args, **kwargs)
-def _emulate_browser_firefox(session, platform, source_address):
- headers = session.headers
- headers["User-Agent"] = ("Mozilla/5.0 (" + platform + "; rv:91.0) "
- "Gecko/20100101 Firefox/91.0")
- headers["Accept"] = ("text/html,application/xhtml+xml,"
- "application/xml;q=0.9,image/webp,*/*;q=0.8")
- headers["Accept-Language"] = "en-US,en;q=0.5"
- headers["Accept-Encoding"] = "gzip, deflate"
- headers["Referer"] = None
- headers["Upgrade-Insecure-Requests"] = "1"
- headers["Cookie"] = None
+def _build_requests_adapter(ssl_options, ssl_ciphers, source_address):
+ key = (ssl_options, ssl_ciphers, source_address)
+ try:
+ return _adapter_cache[key]
+ except KeyError:
+ pass
- session.mount("https://", HTTPSAdapter(
+ if ssl_options or ssl_ciphers:
+ ssl_context = ssl.create_default_context()
+ if ssl_options:
+ ssl_context.options |= ssl_options
+ if ssl_ciphers:
+ ssl_context.set_ecdh_curve("prime256v1")
+ ssl_context.set_ciphers(ssl_ciphers)
+ else:
+ ssl_context = None
+
+ adapter = _adapter_cache[key] = RequestsAdapter(
+ ssl_context, source_address)
+ return adapter
+
+
+_adapter_cache = {}
+
+
+HTTP_HEADERS = {
+ "firefox": (
+ ("User-Agent", "Mozilla/5.0 ({}; rv:91.0) "
+ "Gecko/20100101 Firefox/91.0"),
+ ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
+ "image/avif,*/*;q=0.8"),
+ ("Accept-Language", "en-US,en;q=0.5"),
+ ("Accept-Encoding", "gzip, deflate"),
+ ("Referer", None),
+ ("Connection", "keep-alive"),
+ ("Upgrade-Insecure-Requests", "1"),
+ ("Cookie", None),
+ ),
+ "chrome": (
+ ("Upgrade-Insecure-Requests", "1"),
+ ("User-Agent", "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, "
+ "like Gecko) Chrome/92.0.4515.131 Safari/537.36"),
+ ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
+ "image/webp,image/apng,*/*;q=0.8"),
+ ("Referer", None),
+ ("Accept-Encoding", "gzip, deflate"),
+ ("Accept-Language", "en-US,en;q=0.9"),
+ ("Cookie", None),
+ ),
+}
+
+SSL_CIPHERS = {
+ "firefox": (
"TLS_AES_128_GCM_SHA256:"
"TLS_CHACHA20_POLY1305_SHA256:"
"TLS_AES_256_GCM_SHA384:"
@@ -678,32 +722,13 @@ def _emulate_browser_firefox(session, platform, source_address):
"ECDHE-ECDSA-AES128-SHA:"
"ECDHE-RSA-AES128-SHA:"
"ECDHE-RSA-AES256-SHA:"
- "DHE-RSA-AES128-SHA:"
- "DHE-RSA-AES256-SHA:"
+ "AES128-GCM-SHA256:"
+ "AES256-GCM-SHA384:"
"AES128-SHA:"
"AES256-SHA:"
- "DES-CBC3-SHA",
- source_address
- ))
-
-
-def _emulate_browser_chrome(session, platform, source_address):
- if platform.startswith("Macintosh"):
- platform = platform.replace(".", "_") + "_2"
-
- headers = session.headers
- headers["Upgrade-Insecure-Requests"] = "1"
- headers["User-Agent"] = (
- "Mozilla/5.0 (" + platform + ") AppleWebKit/537.36 "
- "(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36")
- headers["Accept"] = ("text/html,application/xhtml+xml,application/xml;"
- "q=0.9,image/webp,image/apng,*/*;q=0.8")
- headers["Referer"] = None
- headers["Accept-Encoding"] = "gzip, deflate"
- headers["Accept-Language"] = "en-US,en;q=0.9"
- headers["Cookie"] = None
-
- session.mount("https://", HTTPSAdapter(
+ "DES-CBC3-SHA"
+ ),
+ "chrome": (
"TLS_AES_128_GCM_SHA256:"
"TLS_AES_256_GCM_SHA384:"
"TLS_CHACHA20_POLY1305_SHA256:"
@@ -719,9 +744,9 @@ def _emulate_browser_chrome(session, platform, source_address):
"AES256-GCM-SHA384:"
"AES128-SHA:"
"AES256-SHA:"
- "DES-CBC3-SHA",
- source_address
- ))
+ "DES-CBC3-SHA"
+ ),
+}
# Undo automatic pyOpenSSL injection by requests
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index ab1044f..e5c5c01 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -115,8 +115,8 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$"
test = (
("https://dynasty-scans.com/images?with[]=4930&with[]=5211", {
- "url": "6b570eedd8a741c2cd34fb98b22a49d772f84191",
- "keyword": "fa7ff94f82cdf942f7734741d758f160a6b0905a",
+ "url": "22cf0fb64e12b29e79b0a3d26666086a48f9916a",
+ "keyword": "11cbc555a15528d25567977b8808e10369c4c3ee",
}),
("https://dynasty-scans.com/images", {
"range": "1",
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 4ad19cd..213178c 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2020 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from . import danbooru
+from .. import text
BASE_PATTERN = r"(?:https?://)?e(621|926)\.net"
@@ -119,3 +120,30 @@ class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
"count": ">= 70",
})
)
+
+
+class E621FavoriteExtractor(E621Extractor):
+ """Extractor for e621 favorites"""
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "Favorites", "{user_id}")
+ archive_fmt = "f_{user_id}_{id}"
+ pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+ test = (
+ ("https://e621.net/favorites"),
+ ("https://e621.net/favorites?page=2&user_id=53275", {
+ "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
+ "count": "> 260",
+ })
+ )
+
+ def __init__(self, match):
+ super().__init__(match)
+ self.query = text.parse_query(match.group(2))
+
+ def metadata(self):
+ return {"user_id": self.query.get("user_id", "")}
+
+ def posts(self):
+ if self.page_start is None:
+ self.page_start = 1
+ return self._pagination("/favorites.json", self.query, True)
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index cf9706b..c23c36f 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"date": "dt:2018-03-18 20:15:00",
"eh_category": "Non-H",
"expunged": False,
- "favorites": "19",
+ "favorites": "20",
"filecount": "4",
"filesize": 1488978,
"gid": 1200119,
@@ -137,7 +137,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"parody:komi-san wa komyushou desu.",
"character:shouko komi",
"group:seventh lowlife",
- "sample",
+ "other:sample",
],
"thumb": "https://exhentai.org/t/ce/0a/ce0a5bcb583229a9b07c0f8"
"3bcb1630ab1350640-624622-736-1036-jpg_250.jpg",
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 6ddd689..04e5926 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -122,7 +122,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
("https://desuarchive.org/a/thread/159542679/", {
- "url": "2bddbe03b01b4630337f6916f6df36d1d443b7b8",
+ "url": "e7d624aded15a069194e38dc731ec23217a422fb",
}),
("https://boards.fireden.net/sci/thread/11264294/", {
"url": "61cab625c95584a12a30049d054931d64f8d20aa",
@@ -131,10 +131,10 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
}),
("https://rbt.asia/g/thread/61487650/", {
- "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5",
+ "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
}),
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
- "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5",
+ "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
}),
("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index fd26192..e8bee37 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -33,28 +33,20 @@ class GelbooruBase():
def _pagination(self, params):
params["pid"] = self.page_start
params["limit"] = self.per_page
+ limit = self.per_page // 2
- post = None
while True:
- try:
- posts = self._api_request(params)
- except ValueError:
- if "tags" not in params or post is None:
- raise
- taglist = [tag for tag in params["tags"].split()
- if not tag.startswith("id:<")]
- taglist.append("id:<" + str(post.attrib["id"]))
- params["tags"] = " ".join(taglist)
- params["pid"] = 0
- continue
-
- post = None
+ posts = self._api_request(params)
+
for post in posts:
yield post
- if len(posts) < self.per_page:
+ if len(posts) < limit:
return
- params["pid"] += 1
+
+ if "pid" in params:
+ del params["pid"]
+ params["tags"] = "{} id:<{}".format(self.tags, post["id"])
@staticmethod
def _file_url(post):
@@ -81,9 +73,12 @@ class GelbooruTagExtractor(GelbooruBase,
("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
"count": 5,
}),
- ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
- "options": (("api", False),),
- "count": 5,
+ ("https://gelbooru.com/index.php?page=post&s=list&tags=meiya_neon", {
+ "range": "196-204",
+ "url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
+ "pattern": r"https://img\d\.gelbooru\.com"
+ r"/images/../../[0-9a-f]{32}\.jpg",
+ "count": 9,
}),
)
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 8da0bde..7e16a51 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -122,9 +122,9 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
test = (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
- "content": "97e4bbf86c3860be18de384d02d544251afe1d45",
+ "content": "622e80be3f496672c44aab5c47fbc6941c61bc79",
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
- "count": 1,
+ "count": 2,
}),
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index e132bf9..34eaaab 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -26,34 +26,38 @@ class HitomiGalleryExtractor(GalleryExtractor):
r"/(?:[^/?#]+-)?(\d+)")
test = (
("https://hitomi.la/galleries/867789.html", {
- "pattern": r"https://[a-c]b.hitomi.la/images/1641140516/\d+"
- r"/[0-9a-f]{64}\.jpg",
- "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae",
+ "pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+"
+ r"/[0-9a-f]{64}\.webp",
+ "keyword": "4b584d09d535694d7d757c47daf5c15d116420d2",
"options": (("metadata", True),),
"count": 16,
}),
# download test
("https://hitomi.la/galleries/1401410.html", {
"range": "1",
- "content": "b3ca8c6c8cc5826cf8b4ceb7252943abad7b8b4c",
+ "content": "d75d5a3d1302a48469016b20e53c26b714d17745",
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
- "url": "d4854175da2b5fa4ae62749266c7be0bf237dc99",
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
- "url": "eea99c3745719a7a392150335e6ae3f73faa0b85",
"count": 1413,
}),
# gallery with "broken" redirect
("https://hitomi.la/cg/scathacha-sama-okuchi-ecchi-1291900.html", {
"count": 10,
+ "options": (("format", "original"),),
+ "pattern": r"https://[a-c]b\.hitomi\.la/images/\d+/\d+"
+ r"/[0-9a-f]{64}\.jpg",
}),
# no tags
("https://hitomi.la/cg/1615823.html", {
"count": 22,
+ "options": (("format", "avif"),),
+ "pattern": r"https://[a-c]a\.hitomi\.la/avif/\d+/\d+"
+ r"/[0-9a-f]{64}\.avif",
}),
("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"),
("https://hitomi.la/manga/867789.html"),
@@ -140,16 +144,24 @@ class HitomiGalleryExtractor(GalleryExtractor):
# see https://ltn.hitomi.la/gg.js
gg_m, gg_b, gg_default = _parse_gg(self)
+ fmt = self.config("format") or "webp"
+ if fmt == "original":
+ subdomain, fmt, ext = "b", "images", None
+ else:
+ subdomain, ext = "a", fmt
+
result = []
for image in self.info["files"]:
ihash = image["hash"]
idata = text.nameext_from_url(image["name"])
+ if ext:
+ idata["extension"] = ext
# see https://ltn.hitomi.la/common.js
inum = int(ihash[-1] + ihash[-3:-1], 16)
- url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
+ url = "https://{}{}.hitomi.la/{}/{}/{}/{}.{}".format(
chr(97 + gg_m.get(inum, gg_default)),
- gg_b, inum, ihash, idata["extension"],
+ subdomain, fmt, gg_b, inum, ihash, idata["extension"],
)
result.append((url, idata))
return result
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index beb992c..e8fcd1a 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -45,11 +45,8 @@ class KemonopartyExtractor(Extractor):
comments = self.config("comments")
username = dms = None
- # prevent files from coomer.party to be sent with gzip compression
- if "coomer" in self.root:
- headers = {"Accept-Encoding": "identity"}
- else:
- headers = None
+ # prevent files to be sent with gzip compression
+ headers = {"Accept-Encoding": "identity"}
if self.config("metadata"):
username = text.unescape(text.extract(
diff --git a/gallery_dl/extractor/kohlchan.py b/gallery_dl/extractor/kohlchan.py
new file mode 100644
index 0000000..c96dedc
--- /dev/null
+++ b/gallery_dl/extractor/kohlchan.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://kohlchan.net/"""
+
+from .common import Extractor, Message
+from .. import text
+import itertools
+
+
+class KohlchanThreadExtractor(Extractor):
+ """Extractor for Kohlchan threads"""
+ category = "kohlchan"
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{boardUri}",
+ "{threadId} {subject|message[:50]}")
+ filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
+ archive_fmt = "{boardUri}_{postId}_{num}"
+ pattern = r"(?:https?://)?kohlchan\.net/([^/?#]+)/res/(\d+)"
+ test = ("https://kohlchan.net/a/res/4594.html", {
+ "pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
+ "count": ">= 80",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "https://kohlchan.net/{}/res/{}.json".format(
+ self.board, self.thread)
+ thread = self.request(url).json()
+ thread["postId"] = thread["threadId"]
+ posts = thread.pop("posts")
+
+ yield Message.Directory, thread
+
+ for post in itertools.chain((thread,), posts):
+ files = post.pop("files", ())
+ if files:
+ thread.update(post)
+ for num, file in enumerate(files):
+ file.update(thread)
+ file["num"] = num
+ url = "https://kohlchan.net" + file["path"]
+ text.nameext_from_url(file["originalName"], file)
+ yield Message.Url, url, file
+
+
+class KohlchanBoardExtractor(Extractor):
+ """Extractor for Kohlchan boards"""
+ category = "kohlchan"
+ subcategory = "board"
+ pattern = (r"(?:https?://)?kohlchan\.net"
+ r"/([^/?#]+)/(?:(?:catalog|\d+)\.html)?$")
+ test = (
+ ("https://kohlchan.net/a/", {
+ "pattern": KohlchanThreadExtractor.pattern,
+ "count": ">= 100",
+ }),
+ ("https://kohlchan.net/a/2.html"),
+ ("https://kohlchan.net/a/catalog.html"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ url = "https://kohlchan.net/{}/catalog.json".format(self.board)
+ for thread in self.request(url).json():
+ url = "https://kohlchan.net/{}/res/{}.html".format(
+ self.board, thread["threadId"])
+ thread["_extractor"] = KohlchanThreadExtractor
+ yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index cdaf22b..c63fa51 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -42,6 +42,11 @@ class LolisafelbumExtractor(LolisafeExtractor):
"num": int,
},
}),
+ # mp4 (#2239)
+ ("https://bunkr.is/a/ptRHaCn2", {
+ "pattern": r"https://cdn\.bunkr\.is/_-RnHoW69L\.mp4",
+ "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
+ }),
("https://bunkr.to/a/Lktg9Keq"),
("https://zz.ht/a/lop7W6EZ", {
"pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png",
@@ -66,6 +71,11 @@ class LolisafelbumExtractor(LolisafeExtractor):
url = file["file"]
text.nameext_from_url(url, data)
data["name"], sep, data["id"] = data["filename"].rpartition("-")
+
+ if data["extension"] == "mp4":
+ data["_http_validate"] = self._check_rewrite
+ else:
+ data["_http_validate"] = None
yield Message.Url, url, data
def fetch_album(self, album_id):
@@ -77,3 +87,13 @@ class LolisafelbumExtractor(LolisafeExtractor):
"album_name": text.unescape(data["title"]),
"count" : data["count"],
}
+
+ @staticmethod
+ def _check_rewrite(response):
+ if response.history and response.headers.get(
+ "Content-Type").startswith("text/html"):
+ # consume content to reuse connection
+ response.content
+ # rewrite to download URL
+ return response.url.replace("/v/", "/d/", 1)
+ return True
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 6761b55..b5db3dd 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2021 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -72,7 +72,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
"permissions" : list,
"rating" : float,
"slug" : "okinami-no-koigokoro",
- "status" : str,
+ "status" : None,
"tags" : list,
"title" : "Okinami no Koigokoro",
"url" : "/albums/okinami-no-koigokoro_277031/",
@@ -92,7 +92,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
"like_status" : "none",
"position" : int,
"resolution" : r"re:\d+x\d+",
- "status" : str,
+ "status" : None,
"tags" : list,
"thumbnail" : str,
"title" : str,
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index 653c61a..f655f94 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://www.mangahere.cc/"""
+"""Extractors for https://www.mangahere.cc/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
@@ -91,11 +91,12 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
"""Extractor for manga from mangahere.cc"""
chapterclass = MangahereChapterExtractor
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]"
- r"(/manga/[^/]+)/?(?:#.*)?$")
+ r"(/manga/[^/?#]+/?)(?:#.*)?$")
test = (
("https://www.mangahere.cc/manga/aria/", {
- "url": "23ad9256f7392de5973b79a36f6875e9fdcb7563",
- "keyword": "79e326641e7d5d2fed43a1eb9949471b8162a9e0",
+ "url": "dc7f8954efbe87d9fd670c54e5edb5230c01f767",
+ "keyword": "864524eed2dc6a73e366f6ba400b80d894f99b5a",
+ "count": 69,
}),
("https://www.mangahere.cc/manga/hiyokoi/#50", {
"url": "654850570aa03825cd57e2ae2904af489602c523",
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index a7e0ff1..051f1ef 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -25,6 +25,7 @@ class PatreonExtractor(Extractor):
filename_fmt = "{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
browser = "firefox"
+ tls12 = False
_warning = True
def items(self):
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
index 3c3fcd4..bdd9f21 100644
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -145,7 +145,7 @@ class PillowfortPostExtractor(PillowfortExtractor):
"id": int,
"last_activity": str,
"last_activity_elapsed": str,
- "last_edited_at": None,
+ "last_edited_at": str,
"likes_count": int,
"media_type": "picture",
"nsfw": False,
@@ -169,7 +169,7 @@ class PillowfortPostExtractor(PillowfortExtractor):
"tags": list,
"time_elapsed": str,
"timestamp": str,
- "title": "What is Pillowfort.io? ",
+ "title": "What is Pillowfort.social?",
"updated_at": str,
"url": r"re:https://img3.pillowfort.social/posts/.*\.png",
"user_id": 5,
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 9f4bfc3..edf35da 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -167,6 +167,27 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
return self._pagination(url)
+class SexcomPinsExtractor(SexcomExtractor):
+ """Extractor for a user's pins on www.sex.com"""
+ subcategory = "pins"
+ directory_fmt = ("{category}", "{user}")
+ pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/pins/"
+ test = ("https://www.sex.com/user/sirjuan79/pins/", {
+ "count": ">= 15",
+ })
+
+ def __init__(self, match):
+ SexcomExtractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def metadata(self):
+ return {"user": text.unquote(self.user)}
+
+ def pins(self):
+ url = "{}/user/{}/pins/".format(self.root, self.user)
+ return self._pagination(url)
+
+
class SexcomBoardExtractor(SexcomExtractor):
"""Extractor for pins from a board on www.sex.com"""
subcategory = "board"
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index 9b06f92..fcdf18f 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -150,7 +150,7 @@ class TapasEpisodeExtractor(TapasExtractor):
subcategory = "episode"
pattern = BASE_PATTERN + r"/episode/(\d+)"
test = ("https://tapas.io/episode/2068651", {
- "url": "f122b05648a9f53c2ddb2f6854a7a80ab946e9e8",
+ "url": "0b53644c864a0a097f65accea6bb620be9671078",
"pattern": "^text:",
"keyword": {
"book": True,
@@ -173,7 +173,7 @@ class TapasEpisodeExtractor(TapasExtractor):
"publish_date": "2021-02-23T16:02:07Z",
"read": bool,
"related_ep_id": None,
- "relative_publish_date": "Feb 23",
+ "relative_publish_date": "Feb 23, 2021",
"scene": 2,
"scheduled": False,
"title": "You are a Tomb Raider (2)",
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index f924292..f459fba 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -168,6 +168,11 @@ class TwitterExtractor(Extractor):
if key in bvals:
value = bvals[key].get("image_value")
if value and "url" in value:
+ base, sep, size = value["url"].rpartition("&name=")
+ if sep:
+ base += sep
+ value["url"] = base + self._size_image
+ value["_fallback"] = self._image_fallback(base)
files.append(value)
return
elif name == "unified_card":
@@ -759,7 +764,10 @@ class TwitterAPI():
"__fs_interactive_text": False,
"__fs_dont_mention_me_view_api_enabled": False,
}
+
+ self._log_warnings = extractor.config("warnings")
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+ self._user = None
cookies = extractor.session.cookies
cookiedomain = extractor.cookiedomain
@@ -898,6 +906,15 @@ class TwitterAPI():
}
return self._pagination_users(endpoint, variables)
+ def user_by_rest_id(self, rest_id):
+ endpoint = "/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId"
+ params = {"variables": self._json_dumps({
+ "userId": rest_id,
+ "withSafetyModeUserFields": True,
+ "withSuperFollowsUserFields": True,
+ })}
+ return self._call(endpoint, params)["data"]["user"]["result"]
+
def user_by_screen_name(self, screen_name):
endpoint = "/graphql/7mjxD3-C6BxitPMVQ6w0-Q/UserByScreenName"
params = {"variables": self._json_dumps({
@@ -909,11 +926,12 @@ class TwitterAPI():
def _user_id_by_screen_name(self, screen_name):
if screen_name.startswith("id:"):
+ self._user = util.SENTINEL
return screen_name[3:]
user = ()
try:
- user = self.user_by_screen_name(screen_name)
+ user = self._user = self.user_by_screen_name(screen_name)
return user["rest_id"]
except KeyError:
if "unavailable_message" in user:
@@ -929,7 +947,7 @@ class TwitterAPI():
endpoint = "/1.1/guest/activate.json"
return str(self._call(endpoint, None, root, "POST")["guest_token"])
- def _call(self, endpoint, params, root=None, method="GET", warning=True):
+ def _call(self, endpoint, params, root=None, method="GET"):
if root is None:
root = self.root
@@ -954,7 +972,7 @@ class TwitterAPI():
if response.status_code < 400:
# success
- if errors and warning:
+ if errors and self._log_warnings:
self.extractor.log.warning(errors)
return data
@@ -965,22 +983,6 @@ class TwitterAPI():
self.extractor.wait(until=until, seconds=seconds)
continue
- if response.status_code == 401 and \
- "have been blocked from viewing" in errors:
- # account blocked
- extr = self.extractor
- if self.headers["x-twitter-auth-type"] and \
- extr.config("logout"):
- guest_token = self._guest_token()
- extr.session.cookies.set(
- "gt", guest_token, domain=extr.cookiedomain)
- extr._cookiefile = None
- del extr.session.cookies["auth_token"]
- self.headers["x-guest-token"] = guest_token
- self.headers["x-twitter-auth-type"] = None
- extr.log.info("Retrying API request as guest")
- continue
-
# error
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, errors)
@@ -1070,9 +1072,10 @@ class TwitterAPI():
params["cursor"] = cursor
def _pagination_tweets(self, endpoint, variables, path=None):
+ extr = self.extractor
variables.update(self.variables)
- original_retweets = (self.extractor.retweets == "original")
- pinned_tweet = self.extractor.pinned
+ original_retweets = (extr.retweets == "original")
+ pinned_tweet = extr.pinned
while True:
params = {"variables": self._json_dumps(variables)}
@@ -1083,13 +1086,47 @@ class TwitterAPI():
instructions = (data["user"]["result"]["timeline"]
["timeline"]["instructions"])
else:
+ instructions = data
for key in path:
- data = data[key]
- instructions = data["instructions"]
+ instructions = instructions[key]
+ instructions = instructions["instructions"]
entries = instructions[0]["entries"]
except (KeyError, IndexError):
- return
+ extr.log.debug(data)
+
+ if self._user:
+ user = self._user
+ if user is util.SENTINEL:
+ try:
+ user = self.user_by_rest_id(variables["userId"])
+ except KeyError:
+ raise exception.NotFoundError("user")
+ user = user.get("legacy")
+ if not user:
+ pass
+ elif user.get("blocked_by"):
+ if self.headers["x-twitter-auth-type"] and \
+ extr.config("logout"):
+ guest_token = self._guest_token()
+ extr.session.cookies.set(
+ "gt", guest_token, domain=extr.cookiedomain)
+ extr._cookiefile = None
+ del extr.session.cookies["auth_token"]
+ self.headers["x-guest-token"] = guest_token
+ self.headers["x-twitter-auth-type"] = None
+ extr.log.info("Retrying API request as guest")
+ continue
+ raise exception.AuthorizationError(
+ "{} blocked your account".format(
+ user["screen_name"]))
+ elif user.get("protected"):
+ raise exception.AuthorizationError(
+ "{}'s Tweets are protected".format(
+ user["screen_name"]))
+
+ raise exception.StopExtraction(
+ "Unable to retrieve Tweets from this timeline")
tweets = []
tweet = cursor = None
@@ -1121,7 +1158,7 @@ class TwitterAPI():
["itemContent"]["tweet_results"]["result"])
legacy = tweet["legacy"]
except KeyError:
- self.extractor.log.debug(
+ extr.log.debug(
"Skipping %s (deleted)",
(entry.get("entryId") or "").rpartition("-")[2])
continue
@@ -1160,7 +1197,7 @@ class TwitterAPI():
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
yield quoted
except KeyError:
- self.extractor.log.debug(
+ extr.log.debug(
"Skipping quote of %s (deleted)",
tweet.get("rest_id"))
continue
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 1677929..2405dc3 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -121,7 +121,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
"total_collections": int,
"total_likes": int,
"total_photos": int,
- "twitter_username": "dave_hoefler",
+ "twitter_username": None,
"updated_at": str,
"username": "davehoefler",
},
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9724c4b..ed565bc 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -75,8 +75,8 @@ class VkPhotosExtractor(VkExtractor):
r"|(?!album-?\d+_)([^/?#]+))")
test = (
("https://vk.com/id398982326", {
- "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
- r"/[0-9a-f]+/[\w-]+\.jpg",
+ "pattern": r"https://sun\d+-\d+\.userapi\.com/sun\d+-\d+"
+ r"/c\d+/v\d+/[0-9a-f]+/[\w-]+\.jpg",
"count": ">= 35",
"keywords": {
"id": r"re:\d+",
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 0b6a153..81ca87f 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,6 +20,7 @@ class WeiboExtractor(Extractor):
filename_fmt = "{status[id]}_{num:>02}.{extension}"
archive_fmt = "{status[id]}_{num}"
root = "https://m.weibo.cn"
+ request_interval = (1.0, 2.0)
def __init__(self, match):
Extractor.__init__(self, match)
@@ -111,22 +112,39 @@ class WeiboUserExtractor(WeiboExtractor):
def __init__(self, match):
WeiboExtractor.__init__(self, match)
- self.user_id = match.group(1)
+ self.user_id = match.group(1)[-10:]
def statuses(self):
url = self.root + "/api/container/getIndex"
- params = {"page": 1, "containerid": "107603" + self.user_id[-10:]}
+ headers = {
+ "Accept": "application/json, text/plain, */*",
+ "X-Requested-With": "XMLHttpRequest",
+ "MWeibo-Pwa": "1",
+ "X-XSRF-TOKEN": None,
+ "Referer": "{}/u/{}".format(self.root, self.user_id),
+ }
+ params = {
+ "type": "uid",
+ "value": self.user_id,
+ "containerid": "107603" + self.user_id,
+ }
while True:
- data = self.request(url, params=params).json()
- cards = data["data"]["cards"]
+ response = self.request(url, params=params, headers=headers)
+ headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN")
+ data = response.json()["data"]
- if not cards:
- return
- for card in cards:
+ for card in data["cards"]:
if "mblog" in card:
yield card["mblog"]
- params["page"] += 1
+
+ info = data.get("cardlistInfo")
+ if not info:
+ continue
+
+ params["since_id"] = sid = info.get("since_id")
+ if not sid:
+ return
class WeiboStatusExtractor(WeiboExtractor):
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index f68cb85..05f27f1 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -70,8 +70,8 @@ class WikiartArtistExtractor(WikiartExtractor):
directory_fmt = ("{category}", "{artist[artistName]}")
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
test = ("https://www.wikiart.org/en/thomas-cole", {
- "url": "deabec0ed7efa97e2a729ff9d08b539143106bac",
- "keyword": "751a5457b71c8704982d3bb6485a214cd3d07bf9",
+ "url": "8514d743382720e6fdab7c9a73faf9e1ec940cfb",
+ "keyword": "58037afba35bfd7b4101c2316975a75d4ee92a68",
})
def __init__(self, match):
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 3e72e9c..3eebf0b 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -53,9 +53,6 @@ class Job():
extr.category = pextr.category
extr.subcategory = pextr.subcategory
- # reuse connection adapters
- extr.session.adapters = pextr.session.adapters
-
# user-supplied metadata
kwdict = extr.config("keywords")
if kwdict:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 1a399fa..cedbfa0 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.20.3"
+__version__ = "1.20.4"