diff options
| author | 2022-02-10 22:51:06 -0500 | |
|---|---|---|
| committer | 2022-02-10 22:51:06 -0500 | |
| commit | d40f1b6c88f06d71d685b5c216f2c63d314dcb5c (patch) | |
| tree | 808b72655d12b10943bbd1ea67c2a1962fb33aac | |
| parent | 72af53198131d7814b13c722466aa7abd5835bb1 (diff) | |
| parent | 99bc014c924c755f10a4a930b1a83efabd84fde1 (diff) | |
Update upstream source from tag 'upstream/1.20.4'
Update to upstream version '1.20.4'
with Debian dir c430b4f23b67c5441d118c1cb5ff3e6989eb5730
36 files changed, 568 insertions, 207 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cefbb2..091bb90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,29 @@ # Changelog +## 1.20.4 - 2022-02-06 +### Additions +- [e621] add `favorite` extractor ([#2250](https://github.com/mikf/gallery-dl/issues/2250)) +- [hitomi] add `format` option ([#2260](https://github.com/mikf/gallery-dl/issues/2260)) +- [kohlchan] add Kohlchan extractors ([#2251](https://github.com/mikf/gallery-dl/issues/2251)) +- [sexcom] add `pins` extractor ([#2265](https://github.com/mikf/gallery-dl/issues/2265)) +- [twitter] add `warnings` option ([#2258](https://github.com/mikf/gallery-dl/issues/2258)) +- add ability to disable TLS 1.2 ([#2243](https://github.com/mikf/gallery-dl/issues/2243)) +- add examples for custom gelbooru instances ([#2262](https://github.com/mikf/gallery-dl/issues/2262)) +### Fixes +- [bunkr] fix mp4 downloads ([#2239](https://github.com/mikf/gallery-dl/issues/2239)) +- [gelbooru] improve and fix pagination ([#2230](https://github.com/mikf/gallery-dl/issues/2230), [#2232](https://github.com/mikf/gallery-dl/issues/2232)) +- [hitomi] "fix" 403 errors ([#2260](https://github.com/mikf/gallery-dl/issues/2260)) +- [kemonoparty] fix downloading smaller text files ([#2267](https://github.com/mikf/gallery-dl/issues/2267)) +- [patreon] disable TLS 1.2 by default ([#2249](https://github.com/mikf/gallery-dl/issues/2249)) +- [twitter] restore errors for protected timelines etc ([#2237](https://github.com/mikf/gallery-dl/issues/2237)) +- [twitter] restore `logout` functionality ([#1719](https://github.com/mikf/gallery-dl/issues/1719)) +- [twitter] provide fallback URLs for card images +- [weibo] update pagination code ([#2244](https://github.com/mikf/gallery-dl/issues/2244)) + ## 1.20.3 - 2022-01-26 ### Fixes - [kemonoparty] fix DMs extraction ([#2008](https://github.com/mikf/gallery-dl/issues/2008)) -- [twitter] fix crash ob Tweets with deleted quotes ([#2225](https://github.com/mikf/gallery-dl/issues/2225)) +- [twitter] fix crash on Tweets with deleted quotes ([#2225](https://github.com/mikf/gallery-dl/issues/2225)) - [twitter] fix crash on suspended Tweets without `legacy` entry ([#2216](https://github.com/mikf/gallery-dl/issues/2216)) - [twitter] fix crash on unified cards without `type` - [twitter] prevent crash on invalid/deleted Retweets ([#2225](https://github.com/mikf/gallery-dl/issues/2225)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.20.3 +Version: 1.20.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -98,8 +98,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 49b1af8..9c1ed7f 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2022-01-26" "1.20.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2022-02-06" "1.20.4" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 564368c..e44f008 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2022-01-26" "1.20.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2022-02-06" "1.20.4" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1479,6 +1479,22 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.hitomi.format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"webp"\f[] + +.IP "Description:" 4 +Selects which image format to download. + +Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[]. + +\f[I]"original"\f[] will try to download the original \f[I]jpg\f[] or \f[I]png\f[] versions, +but is most likely going to fail with \f[I]403 Forbidden\f[] errors. + + .SS extractor.hitomi.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -2477,6 +2493,18 @@ Control video download behavior. * \f[I]false\f[]: Skip video Tweets +.SS extractor.twitter.warnings +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Emit \f[I]logging messages\f[] +for non-fatal errors reported by Twitter's API. + + .SS extractor.unsplash.format .IP "Type:" 6 \f[I]string\f[] diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf index 72e7465..29b2507 100644 --- a/docs/gallery-dl-example.conf +++ b/docs/gallery-dl-example.conf @@ -6,6 +6,12 @@ "#": "set global archive file for all extractors", "archive": "~/gallery-dl/archive.sqlite3", + "#": "add two custom keywords into the metadata dictionary", + "#": "these can be used to further refine your output directories or filenames", + "keywords": {"bkey": "", "ckey": ""}, + "#": "make sure that custom keywords are empty, i.e. they don't appear unless specified by the user", + "keywords-default": "", + "#": "replace invalid path characters with unicode alternatives", "path-restrict": { "\\": "⧹", @@ -212,6 +218,74 @@ "#": "add two other foolfuuka 4chan archives", "fireden-onion": {"root": "http://ydt6jy2ng3s3xg2e.onion"}, "scalearchive" : {"root": "https://archive.scaled.team" } + }, + + "gelbooru_v01": + { + "#": "add a custom gelbooru_v01 instance", + "#": "this is just an example, this specific instance is already included!", + "allgirlbooru": {"root": "https://allgirl.booru.org"}, + + "#": "the following options are used for all gelbooru_v01 instances", + "tag": + { + "directory": { + "locals().get('bkey')": ["Booru", "AllGirlBooru", "Tags", "{bkey}", "{ckey}", "{search_tags}"], + "" : ["Booru", "AllGirlBooru", "Tags", "_Unsorted", "{search_tags}"] + } + }, + "post": + { + "directory": ["Booru", "AllGirlBooru", "Posts"] + }, + "archive": "~/gallery-dl/custom-archive-file-for-gelbooru_v01_instances.db", + "filename": "{tags}_{id}_{md5}.{extension}", + "sleep-request": [0, 1.2] + }, + + "gelbooru_v02": + { + "#": "add a custom gelbooru_v02 instance", + "#": "this is just an example, this specific instance is already included!", + "tbib": + { + "root": "https://tbib.org", + "#": "some sites have different domains for API access", + "#": "use the 'api_root' option in addition to the 'root' setting here" + } + }, + + "tbib": { + "#": "the following options are only used for TBIB", + "#": "gelbooru_v02 has four subcategories at the moment, use custom directory settings for all of these", + "tag": + { + "directory": { + "locals().get('bkey')": ["Other Boorus", "TBIB", "Tags", "{bkey}", "{ckey}", "{search_tags}"], + "" : ["Other Boorus", "TBIB", "Tags", "_Unsorted", "{search_tags}"] + } + }, + "pool": + { + "directory": { + "locals().get('bkey')": ["Other Boorus", "TBIB", "Pools", "{bkey}", "{ckey}", "{pool}"], + "" : ["Other Boorus", "TBIB", "Pools", "_Unsorted", "{pool}"] + } + }, + "favorite": + { + "directory": { + "locals().get('bkey')": ["Other Boorus", "TBIB", "Favorites", "{bkey}", "{ckey}", "{favorite_id}"], + "" : ["Other Boorus", "TBIB", "Favorites", "_Unsorted", "{favorite_id}"] + } + }, + "post": + { + "directory": ["Other Boorus", "TBIB", "Posts"] + }, + "archive": "~/gallery-dl/custom-archive-file-for-TBIB.db", + "filename": "{id}_{md5}.{extension}", + "sleep-request": [0, 1.2] } }, diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 30be840..ab5f6f9 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -114,6 +114,7 @@ }, "hitomi": { + "format": "webp", "metadata": false }, "idolcomplex": diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 6a91b03..c086512 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.20.3 +Version: 1.20.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -98,8 +98,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.4/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 8ddae52..69a8b70 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -103,6 +103,7 @@ gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py gallery_dl/extractor/kemonoparty.py gallery_dl/extractor/khinsider.py +gallery_dl/extractor/kohlchan.py gallery_dl/extractor/komikcast.py gallery_dl/extractor/lineblog.py gallery_dl/extractor/livedoor.py diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index d2efd3f..91ce731 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2021 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -148,9 +148,15 @@ class HttpDownloader(DownloaderBase): # check for invalid responses validate = kwdict.get("_http_validate") - if validate and not validate(response): - self.log.warning("Invalid response") - return False + if validate: + result = validate(response) + if isinstance(result, str): + url = result + tries -= 1 + continue + if not result: + self.log.warning("Invalid response") + return False # set missing filename extension from MIME type if not pathfmt.extension: diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 65c994d..e7d71d6 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -68,6 +68,7 @@ modules = [ "keenspot", "kemonoparty", "khinsider", + "kohlchan", "komikcast", "lineblog", "livedoor", diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 71d3320..994a701 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -84,7 +84,7 @@ class BehanceGalleryExtractor(BehanceExtractor): }), # 'video' modules (#1282) ("https://www.behance.net/gallery/101185577/COLCCI", { - "pattern": r"ytdl:https://adobeprod-a\.akamaihd\.net/", + "pattern": r"ytdl:https://cdn-prod-ccv\.adobe\.com/", "count": 3, }), ) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 52e5199..1d81dfc 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2021 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -38,6 +38,7 @@ class Extractor(): request_interval = 0.0 request_interval_min = 0.0 request_timestamp = 0.0 + tls12 = True def __init__(self, match): self.log = logging.getLogger(self.category) @@ -219,14 +220,7 @@ class Extractor(): self.session = session = requests.Session() headers = session.headers headers.clear() - - source_address = self.config("source-address") - if source_address: - if isinstance(source_address, str): - source_address = (source_address, 0) - else: - source_address = (source_address[0], source_address[1]) - session.mount("http://", SourceAdapter(source_address)) + ssl_options = ssl_ciphers = 0 browser = self.config("browser") or self.browser if browser and isinstance(browser, str): @@ -243,12 +237,21 @@ class Extractor(): platform = "Macintosh; Intel Mac OS X 11.5" if browser == "chrome": - _emulate_browser_chrome(session, platform, source_address) + if platform.startswith("Macintosh"): + platform = platform.replace(".", "_") + "_2" else: - _emulate_browser_firefox(session, platform, source_address) + browser = "firefox" + + for key, value in HTTP_HEADERS[browser]: + if value and "{}" in value: + headers[key] = value.format(platform) + else: + headers[key] = value + + ssl_options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | + ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1) + ssl_ciphers = SSL_CIPHERS[browser] else: - if source_address: - session.mount("https://", SourceAdapter(source_address)) headers["User-Agent"] = self.config("user-agent", ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64; " "rv:91.0) Gecko/20100101 Firefox/91.0")) @@ -260,11 +263,31 @@ class Extractor(): if custom_headers: headers.update(custom_headers) - ciphers = self.config("ciphers") - if ciphers: - if isinstance(ciphers, list): - ciphers = ":".join(ciphers) - session.mount("https://", HTTPSAdapter(ciphers)) + custom_ciphers = self.config("ciphers") + if custom_ciphers: + if isinstance(custom_ciphers, list): + ssl_ciphers = ":".join(custom_ciphers) + else: + ssl_ciphers = custom_ciphers + + source_address = self.config("source-address") + if source_address: + if isinstance(source_address, str): + source_address = (source_address, 0) + else: + source_address = (source_address[0], source_address[1]) + + tls12 = self.config("tls12") + if tls12 is None: + tls12 = self.tls12 + if not tls12: + ssl_options |= ssl.OP_NO_TLSv1_2 + self.log.debug("TLS 1.2 disabled.") + + adapter = _build_requests_adapter( + ssl_options, ssl_ciphers, source_address) + session.mount("https://", adapter) + session.mount("http://", adapter) def _init_proxies(self): """Update the session's proxy map""" @@ -615,29 +638,10 @@ class BaseExtractor(Extractor): ) -class SourceAdapter(HTTPAdapter): +class RequestsAdapter(HTTPAdapter): - def __init__(self, source_address): - self.source_address = source_address - HTTPAdapter.__init__(self) - - def init_poolmanager(self, *args, **kwargs): - kwargs["source_address"] = self.source_address - return HTTPAdapter.init_poolmanager(self, *args, **kwargs) - - def proxy_manager_for(self, *args, **kwargs): - kwargs["source_address"] = self.source_address - return HTTPAdapter.proxy_manager_for(self, *args, **kwargs) - - -class HTTPSAdapter(HTTPAdapter): - - def __init__(self, ciphers, source_address=None): - context = self.ssl_context = ssl.create_default_context() - context.options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | - ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1) - context.set_ecdh_curve("prime256v1") - context.set_ciphers(ciphers) + def __init__(self, ssl_context=None, source_address=None): + self.ssl_context = ssl_context self.source_address = source_address HTTPAdapter.__init__(self) @@ -652,19 +656,59 @@ class HTTPSAdapter(HTTPAdapter): return HTTPAdapter.proxy_manager_for(self, *args, **kwargs) -def _emulate_browser_firefox(session, platform, source_address): - headers = session.headers - headers["User-Agent"] = ("Mozilla/5.0 (" + platform + "; rv:91.0) " - "Gecko/20100101 Firefox/91.0") - headers["Accept"] = ("text/html,application/xhtml+xml," - "application/xml;q=0.9,image/webp,*/*;q=0.8") - headers["Accept-Language"] = "en-US,en;q=0.5" - headers["Accept-Encoding"] = "gzip, deflate" - headers["Referer"] = None - headers["Upgrade-Insecure-Requests"] = "1" - headers["Cookie"] = None +def _build_requests_adapter(ssl_options, ssl_ciphers, source_address): + key = (ssl_options, ssl_ciphers, source_address) + try: + return _adapter_cache[key] + except KeyError: + pass - session.mount("https://", HTTPSAdapter( + if ssl_options or ssl_ciphers: + ssl_context = ssl.create_default_context() + if ssl_options: + ssl_context.options |= ssl_options + if ssl_ciphers: + ssl_context.set_ecdh_curve("prime256v1") + ssl_context.set_ciphers(ssl_ciphers) + else: + ssl_context = None + + adapter = _adapter_cache[key] = RequestsAdapter( + ssl_context, source_address) + return adapter + + +_adapter_cache = {} + + +HTTP_HEADERS = { + "firefox": ( + ("User-Agent", "Mozilla/5.0 ({}; rv:91.0) " + "Gecko/20100101 Firefox/91.0"), + ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," + "image/avif,*/*;q=0.8"), + ("Accept-Language", "en-US,en;q=0.5"), + ("Accept-Encoding", "gzip, deflate"), + ("Referer", None), + ("Connection", "keep-alive"), + ("Upgrade-Insecure-Requests", "1"), + ("Cookie", None), + ), + "chrome": ( + ("Upgrade-Insecure-Requests", "1"), + ("User-Agent", "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, " + "like Gecko) Chrome/92.0.4515.131 Safari/537.36"), + ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," + "image/webp,image/apng,*/*;q=0.8"), + ("Referer", None), + ("Accept-Encoding", "gzip, deflate"), + ("Accept-Language", "en-US,en;q=0.9"), + ("Cookie", None), + ), +} + +SSL_CIPHERS = { + "firefox": ( "TLS_AES_128_GCM_SHA256:" "TLS_CHACHA20_POLY1305_SHA256:" "TLS_AES_256_GCM_SHA384:" @@ -678,32 +722,13 @@ def _emulate_browser_firefox(session, platform, source_address): "ECDHE-ECDSA-AES128-SHA:" "ECDHE-RSA-AES128-SHA:" "ECDHE-RSA-AES256-SHA:" - "DHE-RSA-AES128-SHA:" - "DHE-RSA-AES256-SHA:" + "AES128-GCM-SHA256:" + "AES256-GCM-SHA384:" "AES128-SHA:" "AES256-SHA:" - "DES-CBC3-SHA", - source_address - )) - - -def _emulate_browser_chrome(session, platform, source_address): - if platform.startswith("Macintosh"): - platform = platform.replace(".", "_") + "_2" - - headers = session.headers - headers["Upgrade-Insecure-Requests"] = "1" - headers["User-Agent"] = ( - "Mozilla/5.0 (" + platform + ") AppleWebKit/537.36 " - "(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36") - headers["Accept"] = ("text/html,application/xhtml+xml,application/xml;" - "q=0.9,image/webp,image/apng,*/*;q=0.8") - headers["Referer"] = None - headers["Accept-Encoding"] = "gzip, deflate" - headers["Accept-Language"] = "en-US,en;q=0.9" - headers["Cookie"] = None - - session.mount("https://", HTTPSAdapter( + "DES-CBC3-SHA" + ), + "chrome": ( "TLS_AES_128_GCM_SHA256:" "TLS_AES_256_GCM_SHA384:" "TLS_CHACHA20_POLY1305_SHA256:" @@ -719,9 +744,9 @@ def _emulate_browser_chrome(session, platform, source_address): "AES256-GCM-SHA384:" "AES128-SHA:" "AES256-SHA:" - "DES-CBC3-SHA", - source_address - )) + "DES-CBC3-SHA" + ), +} # Undo automatic pyOpenSSL injection by requests diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index ab1044f..e5c5c01 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2021 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -115,8 +115,8 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor): pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$" test = ( ("https://dynasty-scans.com/images?with[]=4930&with[]=5211", { - "url": "6b570eedd8a741c2cd34fb98b22a49d772f84191", - "keyword": "fa7ff94f82cdf942f7734741d758f160a6b0905a", + "url": "22cf0fb64e12b29e79b0a3d26666086a48f9916a", + "keyword": "11cbc555a15528d25567977b8808e10369c4c3ee", }), ("https://dynasty-scans.com/images", { "range": "1", diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 4ad19cd..213178c 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,6 +10,7 @@ from .common import Extractor, Message from . import danbooru +from .. import text BASE_PATTERN = r"(?:https?://)?e(621|926)\.net" @@ -119,3 +120,30 @@ class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor): "count": ">= 70", }) ) + + +class E621FavoriteExtractor(E621Extractor): + """Extractor for e621 favorites""" + subcategory = "favorite" + directory_fmt = ("{category}", "Favorites", "{user_id}") + archive_fmt = "f_{user_id}_{id}" + pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" + test = ( + ("https://e621.net/favorites"), + ("https://e621.net/favorites?page=2&user_id=53275", { + "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", + "count": "> 260", + }) + ) + + def __init__(self, match): + super().__init__(match) + self.query = text.parse_query(match.group(2)) + + def metadata(self): + return {"user_id": self.query.get("user_id", "")} + + def posts(self): + if self.page_start is None: + self.page_start = 1 + return self._pagination("/favorites.json", self.query, True) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index cf9706b..c23c36f 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2021 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "date": "dt:2018-03-18 20:15:00", "eh_category": "Non-H", "expunged": False, - "favorites": "19", + "favorites": "20", "filecount": "4", "filesize": 1488978, "gid": 1200119, @@ -137,7 +137,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "parody:komi-san wa komyushou desu.", "character:shouko komi", "group:seventh lowlife", - "sample", + "other:sample", ], "thumb": "https://exhentai.org/t/ce/0a/ce0a5bcb583229a9b07c0f8" "3bcb1630ab1350640-624622-736-1036-jpg_250.jpg", diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 6ddd689..04e5926 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -122,7 +122,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a", }), ("https://desuarchive.org/a/thread/159542679/", { - "url": "2bddbe03b01b4630337f6916f6df36d1d443b7b8", + "url": "e7d624aded15a069194e38dc731ec23217a422fb", }), ("https://boards.fireden.net/sci/thread/11264294/", { "url": "61cab625c95584a12a30049d054931d64f8d20aa", @@ -131,10 +131,10 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f", }), ("https://rbt.asia/g/thread/61487650/", { - "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5", + "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4", }), ("https://archive.rebeccablacktech.com/g/thread/61487650/", { - "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5", + "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4", }), ("https://thebarchive.com/b/thread/739772332/", { "url": "e8b18001307d130d67db31740ce57c8561b5d80c", diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index fd26192..e8bee37 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -33,28 +33,20 @@ class GelbooruBase(): def _pagination(self, params): params["pid"] = self.page_start params["limit"] = self.per_page + limit = self.per_page // 2 - post = None while True: - try: - posts = self._api_request(params) - except ValueError: - if "tags" not in params or post is None: - raise - taglist = [tag for tag in params["tags"].split() - if not tag.startswith("id:<")] - taglist.append("id:<" + str(post.attrib["id"])) - params["tags"] = " ".join(taglist) - params["pid"] = 0 - continue - - post = None + posts = self._api_request(params) + for post in posts: yield post - if len(posts) < self.per_page: + if len(posts) < limit: return - params["pid"] += 1 + + if "pid" in params: + del params["pid"] + params["tags"] = "{} id:<{}".format(self.tags, post["id"]) @staticmethod def _file_url(post): @@ -81,9 +73,12 @@ class GelbooruTagExtractor(GelbooruBase, ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", { "count": 5, }), - ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", { - "options": (("api", False),), - "count": 5, + ("https://gelbooru.com/index.php?page=post&s=list&tags=meiya_neon", { + "range": "196-204", + "url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf", + "pattern": r"https://img\d\.gelbooru\.com" + r"/images/../../[0-9a-f]{32}\.jpg", + "count": 9, }), ) diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 8da0bde..7e16a51 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -122,9 +122,9 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor): pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)" test = ( ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", { - "content": "97e4bbf86c3860be18de384d02d544251afe1d45", + "content": "622e80be3f496672c44aab5c47fbc6941c61bc79", "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg", - "count": 1, + "count": 2, }), ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", { "url": "17c61b386530cf4c30842c9f580d15ef1cd09586", diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index e132bf9..34eaaab 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2021 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -26,34 +26,38 @@ class HitomiGalleryExtractor(GalleryExtractor): r"/(?:[^/?#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { - "pattern": r"https://[a-c]b.hitomi.la/images/1641140516/\d+" - r"/[0-9a-f]{64}\.jpg", - "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae", + "pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+" + r"/[0-9a-f]{64}\.webp", + "keyword": "4b584d09d535694d7d757c47daf5c15d116420d2", "options": (("metadata", True),), "count": 16, }), # download test ("https://hitomi.la/galleries/1401410.html", { "range": "1", - "content": "b3ca8c6c8cc5826cf8b4ceb7252943abad7b8b4c", + "content": "d75d5a3d1302a48469016b20e53c26b714d17745", }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "d4854175da2b5fa4ae62749266c7be0bf237dc99", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "eea99c3745719a7a392150335e6ae3f73faa0b85", "count": 1413, }), # gallery with "broken" redirect ("https://hitomi.la/cg/scathacha-sama-okuchi-ecchi-1291900.html", { "count": 10, + "options": (("format", "original"),), + "pattern": r"https://[a-c]b\.hitomi\.la/images/\d+/\d+" + r"/[0-9a-f]{64}\.jpg", }), # no tags ("https://hitomi.la/cg/1615823.html", { "count": 22, + "options": (("format", "avif"),), + "pattern": r"https://[a-c]a\.hitomi\.la/avif/\d+/\d+" + r"/[0-9a-f]{64}\.avif", }), ("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"), ("https://hitomi.la/manga/867789.html"), @@ -140,16 +144,24 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/gg.js gg_m, gg_b, gg_default = _parse_gg(self) + fmt = self.config("format") or "webp" + if fmt == "original": + subdomain, fmt, ext = "b", "images", None + else: + subdomain, ext = "a", fmt + result = [] for image in self.info["files"]: ihash = image["hash"] idata = text.nameext_from_url(image["name"]) + if ext: + idata["extension"] = ext # see https://ltn.hitomi.la/common.js inum = int(ihash[-1] + ihash[-3:-1], 16) - url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( + url = "https://{}{}.hitomi.la/{}/{}/{}/{}.{}".format( chr(97 + gg_m.get(inum, gg_default)), - gg_b, inum, ihash, idata["extension"], + subdomain, fmt, gg_b, inum, ihash, idata["extension"], ) result.append((url, idata)) return result diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index beb992c..e8fcd1a 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -45,11 +45,8 @@ class KemonopartyExtractor(Extractor): comments = self.config("comments") username = dms = None - # prevent files from coomer.party to be sent with gzip compression - if "coomer" in self.root: - headers = {"Accept-Encoding": "identity"} - else: - headers = None + # prevent files to be sent with gzip compression + headers = {"Accept-Encoding": "identity"} if self.config("metadata"): username = text.unescape(text.extract( diff --git a/gallery_dl/extractor/kohlchan.py b/gallery_dl/extractor/kohlchan.py new file mode 100644 index 0000000..c96dedc --- /dev/null +++ b/gallery_dl/extractor/kohlchan.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://kohlchan.net/""" + +from .common import Extractor, Message +from .. import text +import itertools + + +class KohlchanThreadExtractor(Extractor): + """Extractor for Kohlchan threads""" + category = "kohlchan" + subcategory = "thread" + directory_fmt = ("{category}", "{boardUri}", + "{threadId} {subject|message[:50]}") + filename_fmt = "{postId}{num:?-//} {filename}.{extension}" + archive_fmt = "{boardUri}_{postId}_{num}" + pattern = r"(?:https?://)?kohlchan\.net/([^/?#]+)/res/(\d+)" + test = ("https://kohlchan.net/a/res/4594.html", { + "pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$", + "count": ">= 80", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "https://kohlchan.net/{}/res/{}.json".format( + self.board, self.thread) + thread = self.request(url).json() + thread["postId"] = thread["threadId"] + posts = thread.pop("posts") + + yield Message.Directory, thread + + for post in itertools.chain((thread,), posts): + files = post.pop("files", ()) + if files: + thread.update(post) + for num, file in enumerate(files): + file.update(thread) + file["num"] = num + url = "https://kohlchan.net" + file["path"] + text.nameext_from_url(file["originalName"], file) + yield Message.Url, url, file + + +class KohlchanBoardExtractor(Extractor): + """Extractor for Kohlchan boards""" + category = "kohlchan" + subcategory = "board" + pattern = (r"(?:https?://)?kohlchan\.net" + r"/([^/?#]+)/(?:(?:catalog|\d+)\.html)?$") + test = ( + ("https://kohlchan.net/a/", { + "pattern": KohlchanThreadExtractor.pattern, + "count": ">= 100", + }), + ("https://kohlchan.net/a/2.html"), + ("https://kohlchan.net/a/catalog.html"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board = match.group(1) + + def items(self): + url = "https://kohlchan.net/{}/catalog.json".format(self.board) + for thread in self.request(url).json(): + url = "https://kohlchan.net/{}/res/{}.html".format( + self.board, thread["threadId"]) + thread["_extractor"] = KohlchanThreadExtractor + yield Message.Queue, url, thread diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py index cdaf22b..c63fa51 100644 --- a/gallery_dl/extractor/lolisafe.py +++ b/gallery_dl/extractor/lolisafe.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -42,6 +42,11 @@ class LolisafelbumExtractor(LolisafeExtractor): "num": int, }, }), + # mp4 (#2239) + ("https://bunkr.is/a/ptRHaCn2", { + "pattern": r"https://cdn\.bunkr\.is/_-RnHoW69L\.mp4", + "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", + }), ("https://bunkr.to/a/Lktg9Keq"), ("https://zz.ht/a/lop7W6EZ", { "pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png", @@ -66,6 +71,11 @@ class LolisafelbumExtractor(LolisafeExtractor): url = file["file"] text.nameext_from_url(url, data) data["name"], sep, data["id"] = data["filename"].rpartition("-") + + if data["extension"] == "mp4": + data["_http_validate"] = self._check_rewrite + else: + data["_http_validate"] = None yield Message.Url, url, data def fetch_album(self, album_id): @@ -77,3 +87,13 @@ class LolisafelbumExtractor(LolisafeExtractor): "album_name": text.unescape(data["title"]), "count" : data["count"], } + + @staticmethod + def _check_rewrite(response): + if response.history and response.headers.get( + "Content-Type").startswith("text/html"): + # consume content to reuse connection + response.content + # rewrite to download URL + return response.url.replace("/v/", "/d/", 1) + return True diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 6761b55..b5db3dd 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2021 Mike Fährmann +# Copyright 2016-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -72,7 +72,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "permissions" : list, "rating" : float, "slug" : "okinami-no-koigokoro", - "status" : str, + "status" : None, "tags" : list, "title" : "Okinami no Koigokoro", "url" : "/albums/okinami-no-koigokoro_277031/", @@ -92,7 +92,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "like_status" : "none", "position" : int, "resolution" : r"re:\d+x\d+", - "status" : str, + "status" : None, "tags" : list, "thumbnail" : str, "title" : str, diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index 653c61a..f655f94 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://www.mangahere.cc/""" +"""Extractors for https://www.mangahere.cc/""" from .common import ChapterExtractor, MangaExtractor from .. import text @@ -91,11 +91,12 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor): """Extractor for manga from mangahere.cc""" chapterclass = MangahereChapterExtractor pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]" - r"(/manga/[^/]+)/?(?:#.*)?$") + r"(/manga/[^/?#]+/?)(?:#.*)?$") test = ( ("https://www.mangahere.cc/manga/aria/", { - "url": "23ad9256f7392de5973b79a36f6875e9fdcb7563", - "keyword": "79e326641e7d5d2fed43a1eb9949471b8162a9e0", + "url": "dc7f8954efbe87d9fd670c54e5edb5230c01f767", + "keyword": "864524eed2dc6a73e366f6ba400b80d894f99b5a", + "count": 69, }), ("https://www.mangahere.cc/manga/hiyokoi/#50", { "url": "654850570aa03825cd57e2ae2904af489602c523", diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index a7e0ff1..051f1ef 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -25,6 +25,7 @@ class PatreonExtractor(Extractor): filename_fmt = "{id}_{title}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" browser = "firefox" + tls12 = False _warning = True def items(self): diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py index 3c3fcd4..bdd9f21 100644 --- a/gallery_dl/extractor/pillowfort.py +++ b/gallery_dl/extractor/pillowfort.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -145,7 +145,7 @@ class PillowfortPostExtractor(PillowfortExtractor): "id": int, "last_activity": str, "last_activity_elapsed": str, - "last_edited_at": None, + "last_edited_at": str, "likes_count": int, "media_type": "picture", "nsfw": False, @@ -169,7 +169,7 @@ class PillowfortPostExtractor(PillowfortExtractor): "tags": list, "time_elapsed": str, "timestamp": str, - "title": "What is Pillowfort.io? ", + "title": "What is Pillowfort.social?", "updated_at": str, "url": r"re:https://img3.pillowfort.social/posts/.*\.png", "user_id": 5, diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index 9f4bfc3..edf35da 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -167,6 +167,27 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor): return self._pagination(url) +class SexcomPinsExtractor(SexcomExtractor): + """Extractor for a user's pins on www.sex.com""" + subcategory = "pins" + directory_fmt = ("{category}", "{user}") + pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/pins/" + test = ("https://www.sex.com/user/sirjuan79/pins/", { + "count": ">= 15", + }) + + def __init__(self, match): + SexcomExtractor.__init__(self, match) + self.user = match.group(1) + + def metadata(self): + return {"user": text.unquote(self.user)} + + def pins(self): + url = "{}/user/{}/pins/".format(self.root, self.user) + return self._pagination(url) + + class SexcomBoardExtractor(SexcomExtractor): """Extractor for pins from a board on www.sex.com""" subcategory = "board" diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py index 9b06f92..fcdf18f 100644 --- a/gallery_dl/extractor/tapas.py +++ b/gallery_dl/extractor/tapas.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -150,7 +150,7 @@ class TapasEpisodeExtractor(TapasExtractor): subcategory = "episode" pattern = BASE_PATTERN + r"/episode/(\d+)" test = ("https://tapas.io/episode/2068651", { - "url": "f122b05648a9f53c2ddb2f6854a7a80ab946e9e8", + "url": "0b53644c864a0a097f65accea6bb620be9671078", "pattern": "^text:", "keyword": { "book": True, @@ -173,7 +173,7 @@ class TapasEpisodeExtractor(TapasExtractor): "publish_date": "2021-02-23T16:02:07Z", "read": bool, "related_ep_id": None, - "relative_publish_date": "Feb 23", + "relative_publish_date": "Feb 23, 2021", "scene": 2, "scheduled": False, "title": "You are a Tomb Raider (2)", diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index f924292..f459fba 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -168,6 +168,11 @@ class TwitterExtractor(Extractor): if key in bvals: value = bvals[key].get("image_value") if value and "url" in value: + base, sep, size = value["url"].rpartition("&name=") + if sep: + base += sep + value["url"] = base + self._size_image + value["_fallback"] = self._image_fallback(base) files.append(value) return elif name == "unified_card": @@ -759,7 +764,10 @@ class TwitterAPI(): "__fs_interactive_text": False, "__fs_dont_mention_me_view_api_enabled": False, } + + self._log_warnings = extractor.config("warnings") self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode + self._user = None cookies = extractor.session.cookies cookiedomain = extractor.cookiedomain @@ -898,6 +906,15 @@ class TwitterAPI(): } return self._pagination_users(endpoint, variables) + def user_by_rest_id(self, rest_id): + endpoint = "/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId" + params = {"variables": self._json_dumps({ + "userId": rest_id, + "withSafetyModeUserFields": True, + "withSuperFollowsUserFields": True, + })} + return self._call(endpoint, params)["data"]["user"]["result"] + def user_by_screen_name(self, screen_name): endpoint = "/graphql/7mjxD3-C6BxitPMVQ6w0-Q/UserByScreenName" params = {"variables": self._json_dumps({ @@ -909,11 +926,12 @@ class TwitterAPI(): def _user_id_by_screen_name(self, screen_name): if screen_name.startswith("id:"): + self._user = util.SENTINEL return screen_name[3:] user = () try: - user = self.user_by_screen_name(screen_name) + user = self._user = self.user_by_screen_name(screen_name) return user["rest_id"] except KeyError: if "unavailable_message" in user: @@ -929,7 +947,7 @@ class TwitterAPI(): endpoint = "/1.1/guest/activate.json" return str(self._call(endpoint, None, root, "POST")["guest_token"]) - def _call(self, endpoint, params, root=None, method="GET", warning=True): + def _call(self, endpoint, params, root=None, method="GET"): if root is None: root = self.root @@ -954,7 +972,7 @@ class TwitterAPI(): if response.status_code < 400: # success - if errors and warning: + if errors and self._log_warnings: self.extractor.log.warning(errors) return data @@ -965,22 +983,6 @@ class TwitterAPI(): self.extractor.wait(until=until, seconds=seconds) continue - if response.status_code == 401 and \ - "have been blocked from viewing" in errors: - # account blocked - extr = self.extractor - if self.headers["x-twitter-auth-type"] and \ - extr.config("logout"): - guest_token = self._guest_token() - extr.session.cookies.set( - "gt", guest_token, domain=extr.cookiedomain) - extr._cookiefile = None - del extr.session.cookies["auth_token"] - self.headers["x-guest-token"] = guest_token - self.headers["x-twitter-auth-type"] = None - extr.log.info("Retrying API request as guest") - continue - # error raise exception.StopExtraction( "%s %s (%s)", response.status_code, response.reason, errors) @@ -1070,9 +1072,10 @@ class TwitterAPI(): params["cursor"] = cursor def _pagination_tweets(self, endpoint, variables, path=None): + extr = self.extractor variables.update(self.variables) - original_retweets = (self.extractor.retweets == "original") - pinned_tweet = self.extractor.pinned + original_retweets = (extr.retweets == "original") + pinned_tweet = extr.pinned while True: params = {"variables": self._json_dumps(variables)} @@ -1083,13 +1086,47 @@ class TwitterAPI(): instructions = (data["user"]["result"]["timeline"] ["timeline"]["instructions"]) else: + instructions = data for key in path: - data = data[key] - instructions = data["instructions"] + instructions = instructions[key] + instructions = instructions["instructions"] entries = instructions[0]["entries"] except (KeyError, IndexError): - return + extr.log.debug(data) + + if self._user: + user = self._user + if user is util.SENTINEL: + try: + user = self.user_by_rest_id(variables["userId"]) + except KeyError: + raise exception.NotFoundError("user") + user = user.get("legacy") + if not user: + pass + elif user.get("blocked_by"): + if self.headers["x-twitter-auth-type"] and \ + extr.config("logout"): + guest_token = self._guest_token() + extr.session.cookies.set( + "gt", guest_token, domain=extr.cookiedomain) + extr._cookiefile = None + del extr.session.cookies["auth_token"] + self.headers["x-guest-token"] = guest_token + self.headers["x-twitter-auth-type"] = None + extr.log.info("Retrying API request as guest") + continue + raise exception.AuthorizationError( + "{} blocked your account".format( + user["screen_name"])) + elif user.get("protected"): + raise exception.AuthorizationError( + "{}'s Tweets are protected".format( + user["screen_name"])) + + raise exception.StopExtraction( + "Unable to retrieve Tweets from this timeline") tweets = [] tweet = cursor = None @@ -1121,7 +1158,7 @@ class TwitterAPI(): ["itemContent"]["tweet_results"]["result"]) legacy = tweet["legacy"] except KeyError: - self.extractor.log.debug( + extr.log.debug( "Skipping %s (deleted)", (entry.get("entryId") or "").rpartition("-")[2]) continue @@ -1160,7 +1197,7 @@ class TwitterAPI(): quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"] yield quoted except KeyError: - self.extractor.log.debug( + extr.log.debug( "Skipping quote of %s (deleted)", tweet.get("rest_id")) continue diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py index 1677929..2405dc3 100644 --- a/gallery_dl/extractor/unsplash.py +++ b/gallery_dl/extractor/unsplash.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -121,7 +121,7 @@ class UnsplashImageExtractor(UnsplashExtractor): "total_collections": int, "total_likes": int, "total_photos": int, - "twitter_username": "dave_hoefler", + "twitter_username": None, "updated_at": str, "username": "davehoefler", }, diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index 9724c4b..ed565bc 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -75,8 +75,8 @@ class VkPhotosExtractor(VkExtractor): r"|(?!album-?\d+_)([^/?#]+))") test = ( ("https://vk.com/id398982326", { - "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+" - r"/[0-9a-f]+/[\w-]+\.jpg", + "pattern": r"https://sun\d+-\d+\.userapi\.com/sun\d+-\d+" + r"/c\d+/v\d+/[0-9a-f]+/[\w-]+\.jpg", "count": ">= 35", "keywords": { "id": r"re:\d+", diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 0b6a153..81ca87f 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -20,6 +20,7 @@ class WeiboExtractor(Extractor): filename_fmt = "{status[id]}_{num:>02}.{extension}" archive_fmt = "{status[id]}_{num}" root = "https://m.weibo.cn" + request_interval = (1.0, 2.0) def __init__(self, match): Extractor.__init__(self, match) @@ -111,22 +112,39 @@ class WeiboUserExtractor(WeiboExtractor): def __init__(self, match): WeiboExtractor.__init__(self, match) - self.user_id = match.group(1) + self.user_id = match.group(1)[-10:] def statuses(self): url = self.root + "/api/container/getIndex" - params = {"page": 1, "containerid": "107603" + self.user_id[-10:]} + headers = { + "Accept": "application/json, text/plain, */*", + "X-Requested-With": "XMLHttpRequest", + "MWeibo-Pwa": "1", + "X-XSRF-TOKEN": None, + "Referer": "{}/u/{}".format(self.root, self.user_id), + } + params = { + "type": "uid", + "value": self.user_id, + "containerid": "107603" + self.user_id, + } while True: - data = self.request(url, params=params).json() - cards = data["data"]["cards"] + response = self.request(url, params=params, headers=headers) + headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN") + data = response.json()["data"] - if not cards: - return - for card in cards: + for card in data["cards"]: if "mblog" in card: yield card["mblog"] - params["page"] += 1 + + info = data.get("cardlistInfo") + if not info: + continue + + params["since_id"] = sid = info.get("since_id") + if not sid: + return class WeiboStatusExtractor(WeiboExtractor): diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index f68cb85..05f27f1 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -70,8 +70,8 @@ class WikiartArtistExtractor(WikiartExtractor): directory_fmt = ("{category}", "{artist[artistName]}") pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$" test = ("https://www.wikiart.org/en/thomas-cole", { - "url": "deabec0ed7efa97e2a729ff9d08b539143106bac", - "keyword": "751a5457b71c8704982d3bb6485a214cd3d07bf9", + "url": "8514d743382720e6fdab7c9a73faf9e1ec940cfb", + "keyword": "58037afba35bfd7b4101c2316975a75d4ee92a68", }) def __init__(self, match): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 3e72e9c..3eebf0b 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2021 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -53,9 +53,6 @@ class Job(): extr.category = pextr.category extr.subcategory = pextr.subcategory - # reuse connection adapters - extr.session.adapters = pextr.session.adapters - # user-supplied metadata kwdict = extr.config("keywords") if kwdict: diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 1a399fa..cedbfa0 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.20.3" +__version__ = "1.20.4" |
