diff options
| author | 2023-12-11 01:12:30 -0500 | |
|---|---|---|
| committer | 2023-12-11 01:12:30 -0500 | |
| commit | 30dee4697019389ef29458b2e3931adc976389b2 (patch) | |
| tree | 9c627d0f873d3d2efe5a1c3881a5feaec3acf5d4 | |
| parent | 2a817af4fe41289fa705bdc5ee61372333f43996 (diff) | |
New upstream version 1.26.4.upstream/1.26.4
28 files changed, 344 insertions, 97 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 39f5884..88dbc44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## 1.26.4 - 2023-12-10 +### Extractors +#### Additions +- [exhentai] add `fallback-retries` option ([#4792](https://github.com/mikf/gallery-dl/issues/4792)) +- [urlgalleries] add `gallery` extractor ([#919](https://github.com/mikf/gallery-dl/issues/919), [#1184](https://github.com/mikf/gallery-dl/issues/1184), [#2905](https://github.com/mikf/gallery-dl/issues/2905), [#4886](https://github.com/mikf/gallery-dl/issues/4886)) +#### Fixes +- [nijie] fix image URLs of multi-image posts ([#4876](https://github.com/mikf/gallery-dl/issues/4876)) +- [patreon] fix bootstrap data extraction ([#4904](https://github.com/mikf/gallery-dl/issues/4904), [#4906](https://github.com/mikf/gallery-dl/issues/4906)) +- [twitter] fix `/media` timelines ([#4898](https://github.com/mikf/gallery-dl/issues/4898), [#4899](https://github.com/mikf/gallery-dl/issues/4899)) +- [twitter] retry API requests when response contains incomplete results ([#4811](https://github.com/mikf/gallery-dl/issues/4811)) +#### Improvements +- [exhentai] store more cookies when logging in with username & password ([#4881](https://github.com/mikf/gallery-dl/issues/4881)) +- [twitter] generalize "Login Required" errors ([#4734](https://github.com/mikf/gallery-dl/issues/4734), [#4324](https://github.com/mikf/gallery-dl/issues/4324)) +### Options +- add `-e/--error-file` command-line and `output.errorfile` config option ([#4732](https://github.com/mikf/gallery-dl/issues/4732)) +### Miscellaneous +- automatically build and push Docker images +- prompt for passwords on login when necessary +- fix `util.dump_response()` to work with `bytes` header values + ## 1.26.3 - 2023-11-27 ### Extractors #### Additions @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.26.3 +Version: 1.26.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -26,6 +26,7 @@ Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Internet :: WWW/HTTP @@ -111,9 +112,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__ Nightly Builds @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__ Nightly Builds diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index f3b17a2..39fabcc 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -26,6 +26,7 @@ _arguments -C -S \ {-s,--simulate}'[Simulate data extraction; do not download anything]' \ {-E,--extractor-info}'[Print extractor defaults and settings]' \ {-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \ +{-e,--error-file}'[Add input URLs which returned an error to FILE]':'<file>':_files \ --list-modules'[Print a list of available extractor modules]' \ --list-extractors'[Print a list of extractor classes with description, (sub)category and example URL]' \ --write-log'[Write logging output to FILE]':'<file>':_files \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index e5bc6c1..d280ab4 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -5,12 +5,12 @@ _gallery_dl() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" - if [[ "${prev}" =~ ^(-i|--input-file|-I|--input-file-comment|-x|--input-file-delete|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|-C|--cookies|--cookies-export|--download-archive)$ ]]; then + if [[ "${prev}" =~ ^(-i|--input-file|-I|--input-file-comment|-x|--input-file-delete|-e|--error-file|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|-C|--cookies|--cookies-export|--download-archive)$ ]]; then COMPREPLY=( $(compgen -f -- "${cur}") ) elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --input-file --input-file-comment --input-file-delete --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --postprocessor --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --input-file --input-file-comment --input-file-delete --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --postprocessor --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") ) fi } diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index 5cb60d4..135dfb7 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -20,6 +20,7 @@ complete -c gallery-dl -s 'j' -l 'dump-json' -d 'Print JSON information' complete -c gallery-dl -s 's' -l 'simulate' -d 'Simulate data extraction; do not download anything' complete -c gallery-dl -s 'E' -l 'extractor-info' -d 'Print extractor defaults and settings' complete -c gallery-dl -s 'K' -l 'list-keywords' -d 'Print a list of available keywords and example values for the given URLs' +complete -c gallery-dl -r -F -s 'e' -l 'error-file' -d 'Add input URLs which returned an error to FILE' complete -c gallery-dl -l 'list-modules' -d 'Print a list of available extractor modules' complete -c gallery-dl -l 'list-extractors' -d 'Print a list of extractor classes with description, (sub)category and example URL' complete -c gallery-dl -r -F -l 'write-log' -d 'Write logging output to FILE' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 2789154..277b227 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-11-27" "1.26.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-12-10" "1.26.4" "gallery-dl Manual" .\" disable hyphenation .nh @@ -80,6 +80,9 @@ Print extractor defaults and settings .B "\-K, \-\-list\-keywords" Print a list of available keywords and example values for the given URLs .TP +.B "\-e, \-\-error\-file" \f[I]FILE\f[] +Add input URLs which returned an error to FILE +.TP .B "\-\-list\-modules" Print a list of available extractor modules .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index adf9885..95e9627 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-11-27" "1.26.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-12-10" "1.26.4" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -450,7 +450,7 @@ during data extraction. The username and password to use when attempting to log in to another site. -Specifying a username and password is required for +Specifying username and password is required for .br * \f[I]nijie\f[] @@ -509,6 +509,10 @@ by using a \f[I].netrc\f[] file. (see Authentication_) (*) The password value for these sites should be the API key found in your user profile, not the actual account password. +Note: Leave the \f[I]password\f[] value empty or undefined +to get prompted for a passeword when performing a login +(see \f[I]getpass()\f[]). + .SS extractor.*.netrc .IP "Type:" 6 @@ -1802,6 +1806,18 @@ depending on the input URL * \f[I]"exhentai.org"\f[]: Use \f[I]exhentai.org\f[] for all URLs +.SS extractor.exhentai.fallback-retries +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]2\f[] + +.IP "Description:" 4 +Number of times a failed image gets retried. +Use \f[I]-1\f[] for infinite retries + + .SS extractor.exhentai.fav .IP "Type:" 6 \f[I]string\f[] @@ -5070,6 +5086,26 @@ File to write external URLs unsupported by *gallery-dl* to. The default format string here is \f[I]"{message}"\f[]. +.SS output.errorfile +.IP "Type:" 6 +.br +* \f[I]Path\f[] +.br +* \f[I]Logging Configuration\f[] + +.IP "Description:" 4 +File to write input URLs which returned an error to. + +The default format string here is also \f[I]"{message}"\f[]. + +When combined with +\f[I]-I\f[]/\f[I]--input-file-comment\f[] or +\f[I]-x\f[]/\f[I]--input-file-delete\f[], +this option will cause *all* input URLs from these files +to be commented/deleted after processing them +and not just successful ones. + + .SS output.num-to-str .IP "Type:" 6 \f[I]bool\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 5fc537b..e1b709b 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.26.3 +Version: 1.26.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -26,6 +26,7 @@ Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Internet :: WWW/HTTP @@ -111,9 +112,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 255edc7..9bcf0b2 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -203,6 +203,7 @@ gallery_dl/extractor/twibooru.py gallery_dl/extractor/twitter.py gallery_dl/extractor/unsplash.py gallery_dl/extractor/uploadir.py +gallery_dl/extractor/urlgalleries.py gallery_dl/extractor/urlshortener.py gallery_dl/extractor/vanillarock.py gallery_dl/extractor/vichan.py diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 287faf1..fff53eb 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -226,18 +226,26 @@ def main(): else: jobtype = args.jobtype or job.DownloadJob + input_manager = InputManager() + input_manager.log = input_log = logging.getLogger("inputfile") + # unsupported file logging handler handler = output.setup_logging_handler( "unsupportedfile", fmt="{message}") if handler: - ulog = logging.getLogger("unsupported") + ulog = job.Job.ulog = logging.getLogger("unsupported") ulog.addHandler(handler) ulog.propagate = False - job.Job.ulog = ulog + + # error file logging handler + handler = output.setup_logging_handler( + "errorfile", fmt="{message}", mode="a") + if handler: + elog = input_manager.err = logging.getLogger("errorfile") + elog.addHandler(handler) + elog.propagate = False # collect input URLs - input_manager = InputManager() - input_manager.log = input_log = logging.getLogger("inputfile") input_manager.add_list(args.urls) if args.input_files: @@ -270,6 +278,7 @@ def main(): if status: retval |= status + input_manager.error() else: input_manager.success() @@ -281,6 +290,7 @@ def main(): except exception.NoExtractorError: log.error("Unsupported URL '%s'", url) retval |= 64 + input_manager.error() input_manager.next() return retval @@ -301,8 +311,11 @@ class InputManager(): def __init__(self): self.urls = [] self.files = () + self.log = self.err = None + + self._url = "" + self._item = None self._index = 0 - self._current = None self._pformat = None def add_url(self, url): @@ -439,17 +452,33 @@ class InputManager(): self._index += 1 def success(self): - if self._current: - url, path, action, indicies = self._current - lines = self.files[path] - action(lines, indicies) - try: - with open(path, "w", encoding="utf-8") as fp: - fp.writelines(lines) - except Exception as exc: - self.log.warning( - "Unable to update '%s' (%s: %s)", - path, exc.__class__.__name__, exc) + if self._item: + self._rewrite() + + def error(self): + if self.err: + if self._item: + url, path, action, indicies = self._item + lines = self.files[path] + out = "".join(lines[i] for i in indicies) + if out and out[-1] == "\n": + out = out[:-1] + self._rewrite() + else: + out = str(self._url) + self.err.info(out) + + def _rewrite(self): + url, path, action, indicies = self._item + lines = self.files[path] + action(lines, indicies) + try: + with open(path, "w", encoding="utf-8") as fp: + fp.writelines(lines) + except Exception as exc: + self.log.warning( + "Unable to update '%s' (%s: %s)", + path, exc.__class__.__name__, exc) @staticmethod def _action_comment(lines, indicies): @@ -467,23 +496,24 @@ class InputManager(): def __next__(self): try: - item = self.urls[self._index] + url = self.urls[self._index] except IndexError: raise StopIteration - if isinstance(item, tuple): - self._current = item - item = item[0] + if isinstance(url, tuple): + self._item = url + url = url[0] else: - self._current = None + self._item = None + self._url = url if self._pformat: output.stderr_write(self._pformat({ "total" : len(self.urls), "current": self._index + 1, - "url" : item, + "url" : url, })) - return item + return url class ExtendedUrl(): diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 72239d5..d074de2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -155,6 +155,7 @@ modules = [ "tumblrgallery", "twibooru", "twitter", + "urlgalleries", "unsplash", "uploadir", "urlshortener", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index f378427..9b010c5 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -136,6 +136,18 @@ class Extractor(): kwargs["timeout"] = self._timeout if "verify" not in kwargs: kwargs["verify"] = self._verify + + if "json" in kwargs: + json = kwargs["json"] + if json is not None: + kwargs["data"] = util.json_dumps(json).encode() + del kwargs["json"] + headers = kwargs.get("headers") + if headers: + headers["Content-Type"] = "application/json" + else: + kwargs["headers"] = {"Content-Type": "application/json"} + response = None tries = 1 @@ -233,7 +245,7 @@ class Extractor(): password = None if username: - password = self.config("password") + password = self.config("password") or util.LazyPrompt() elif self.config("netrc", False): try: info = netrc.netrc().authenticators(self.category) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 56d81e5..9e6516e 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -36,7 +36,7 @@ class DanbooruExtractor(BaseExtractor): username, api_key = self._get_auth_info() if username: self.log.debug("Using HTTP Basic Auth for user '%s'", username) - self.session.auth = (username, api_key) + self.session.auth = util.HTTPBasicAuth(username, api_key) def skip(self, num): pages = num // self.per_page diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 2c37ef1..1852dc1 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1239,7 +1239,7 @@ class DeviantartOAuthAPI(): self.log.info("Requesting public access token") data = {"grant_type": "client_credentials"} - auth = (self.client_id, self.client_secret) + auth = util.HTTPBasicAuth(self.client_id, self.client_secret) response = self.extractor.request( url, method="POST", data=data, auth=auth, fatal=False) data = response.json() diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 5dc498f..a479d00 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -47,14 +47,6 @@ class ExhentaiExtractor(Extractor): if self.version != "ex": self.cookies.set("nw", "1", domain=self.cookies_domain) - self.original = self.config("original", True) - - limits = self.config("limits", False) - if limits and limits.__class__ is int: - self.limits = limits - self._remaining = 0 - else: - self.limits = False def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs) @@ -85,6 +77,7 @@ class ExhentaiExtractor(Extractor): @cache(maxage=90*24*3600, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) + url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01" headers = { "Referer": "https://e-hentai.org/bounce_login.php?b=d&bt=1-1", @@ -98,10 +91,19 @@ class ExhentaiExtractor(Extractor): "ipb_login_submit": "Login!", } + self.cookies.clear() + response = self.request(url, method="POST", headers=headers, data=data) if b"You are now logged in as:" not in response.content: raise exception.AuthenticationError() - return {c: response.cookies[c] for c in self.cookies_names} + + # collect more cookies + url = self.root + "/favorites.php" + response = self.request(url) + if response.history: + self.request(url) + + return self.cookies class ExhentaiGalleryExtractor(ExhentaiExtractor): @@ -128,6 +130,19 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if source == "hitomi": self.items = self._items_hitomi + limits = self.config("limits", False) + if limits and limits.__class__ is int: + self.limits = limits + self._remaining = 0 + else: + self.limits = False + + self.fallback_retries = self.config("fallback-retries", 2) + if self.fallback_retries < 0: + self.fallback_retries = float("inf") + + self.original = self.config("original", True) + def favorite(self, slot="0"): url = self.root + "/gallerypopups.php" params = { @@ -301,12 +316,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if self.original and orig: url = self.root + "/fullimg" + text.unescape(orig) data = self._parse_original_info(extr('ownload original', '<')) - data["_fallback"] = ("{}?nl={}".format(url, nl),) + data["_fallback"] = self._fallback_original(nl, url) else: url = iurl data = self._parse_image_info(url) - data["_fallback"] = self._fallback( - None, self.image_num, nl) + data["_fallback"] = self._fallback_1280(nl, self.image_num) except IndexError: self.log.debug("Page content:\n%s", page) raise exception.StopExtraction( @@ -315,6 +329,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["num"] = self.image_num data["image_token"] = self.key_start = extr('var startkey="', '";') data["_url_1280"] = iurl + data["_nl"] = nl self.key_show = extr('var showkey="', '";') self._check_509(iurl, data) @@ -351,12 +366,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): url = text.unescape(origurl) data = self._parse_original_info(text.extract( i6, "ownload original", "<", pos)[0]) - data["_fallback"] = ("{}?nl={}".format(url, nl),) + data["_fallback"] = self._fallback_original(nl, url) else: url = imgurl data = self._parse_image_info(url) - data["_fallback"] = self._fallback( - imgkey, request["page"], nl) + data["_fallback"] = self._fallback_1280( + nl, request["page"], imgkey) except IndexError: self.log.debug("Page content:\n%s", page) raise exception.StopExtraction( @@ -365,6 +380,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["num"] = request["page"] data["image_token"] = imgkey data["_url_1280"] = imgurl + data["_nl"] = nl self._check_509(imgurl, data) yield url, text.nameext_from_url(url, data) @@ -431,13 +447,26 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.NotFoundError("image page") return page - def _fallback(self, imgkey, num, nl): - url = "{}/s/{}/{}-{}?nl={}".format( - self.root, imgkey or self.key_start, self.gallery_id, num, nl) - page = self.request(url, fatal=False).text - if page.startswith(("Invalid page", "Keep trying")): - return - yield self.image_from_page(page)[0] + def _fallback_original(self, nl, fullimg): + url = "{}?nl={}".format(fullimg, nl) + for _ in range(self.fallback_retries): + yield url + + def _fallback_1280(self, nl, num, token=None): + if not token: + token = self.key_start + + for _ in range(self.fallback_retries): + url = "{}/s/{}/{}-{}?nl={}".format( + self.root, token, self.gallery_id, num, nl) + + page = self.request(url, fatal=False).text + if page.startswith(("Invalid page", "Keep trying")): + return + url, data = self.image_from_page(page) + yield url + + nl = data["_nl"] @staticmethod def _parse_image_info(url): diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 76c5404..54f2942 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -57,7 +57,11 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): data["user_name"] = data["artist_name"] yield Message.Directory, data - for image in self._extract_images(page): + for num, url in enumerate(self._extract_images(image_id, page)): + image = text.nameext_from_url(url, { + "num": num, + "url": "https:" + url, + }) image.update(data) if not image["extension"]: image["extension"] = "jpg" @@ -72,7 +76,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): extr = text.extract_from(page) keywords = text.unescape(extr( 'name="keywords" content="', '" />')).split(",") - data = { + return { "title" : keywords[0].strip(), "description": text.unescape(extr( '"description": "', '"').replace("&", "&")), @@ -82,7 +86,6 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): "artist_name": keywords[1], "tags" : keywords[2:-1], } - return data @staticmethod def _extract_data_horne(page): @@ -90,7 +93,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): extr = text.extract_from(page) keywords = text.unescape(extr( 'name="keywords" content="', '" />')).split(",") - data = { + return { "title" : keywords[0].strip(), "description": text.unescape(extr( 'property="og:description" content="', '"')), @@ -101,21 +104,16 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): "itemprop='datePublished' content=", "<").rpartition(">")[2], "%Y-%m-%d %H:%M:%S", 9), } - return data - @staticmethod - def _extract_images(page): - """Extract image URLs from 'page'""" - images = text.extract_iter(page, "/view_popup.php", "</a>") - for num, image in enumerate(images): - src = text.extr(image, 'src="', '"') - if not src: - continue - url = ("https:" + src).replace("/__rs_l120x120/", "/") - yield text.nameext_from_url(url, { - "num": num, - "url": url, - }) + def _extract_images(self, image_id, page): + if '&#diff_1" ' in page: + # multiple images + url = "{}/view_popup.php?id={}".format(self.root, image_id) + page = self.request(url).text + yield from text.extract_iter( + page, 'href="javascript:void(0);"><img src="', '"') + else: + yield text.extr(page, 'itemprop="image" src="', '"') @staticmethod def _extract_user_name(page): diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index d1f135d..65db94d 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -183,7 +183,7 @@ class OAuthBase(Extractor): } if auth: - auth = (client_id, client_secret) + auth = util.HTTPBasicAuth(client_id, client_secret) else: auth = None data["client_id"] = client_id diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 6aef9cb..fb560e9 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -254,6 +254,13 @@ class PatreonExtractor(Extractor): if bootstrap: return util.json_loads(bootstrap + "}") + bootstrap = text.extr( + page, + 'window.patreon = wrapInProxy({"bootstrap":', + '},"apiServer"') + if bootstrap: + return util.json_loads(bootstrap + "}") + bootstrap = text.extr(page, "window.patreon.bootstrap,", "});") if bootstrap: return util.json_loads(bootstrap + "}") diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py index 34b4ebf..5cfdc43 100644 --- a/gallery_dl/extractor/pixeldrain.py +++ b/gallery_dl/extractor/pixeldrain.py @@ -9,7 +9,7 @@ """Extractors for https://pixeldrain.com/""" from .common import Extractor, Message -from .. import text +from .. import text, util BASE_PATTERN = r"(?:https?://)?pixeldrain\.com" @@ -23,7 +23,7 @@ class PixeldrainExtractor(Extractor): def _init(self): api_key = self.config("api-key") if api_key: - self.session.auth = ("", api_key) + self.session.auth = util.HTTPBasicAuth("", api_key) def parse_datetime(self, date_string): return text.parse_datetime( diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index c0bf5b3..feb6d1f 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -423,9 +423,10 @@ class RedditAPI(): "grants/installed_client"), "device_id": "DO_NOT_TRACK_THIS_DEVICE"} + auth = util.HTTPBasicAuth(self.client_id, "") response = self.extractor.request( url, method="POST", headers=self.headers, - data=data, auth=(self.client_id, ""), fatal=False) + data=data, auth=auth, fatal=False) data = response.json() if response.status_code != 200: diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index ca1e906..f874f12 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1276,8 +1276,21 @@ class TwitterAPI(): self.headers["x-csrf-token"] = csrf_token if response.status_code < 400: - # success - return response.json() + data = response.json() + if not data.get("errors") or not any( + (e.get("message") or "").lower().startswith("timeout") + for e in data["errors"]): + return data # success or non-timeout errors + + msg = data["errors"][0].get("message") or "Unspecified" + self.extractor.log.debug("Internal Twitter error: '%s'", msg) + + if self.headers["x-twitter-auth-type"]: + self.extractor.log.debug("Retrying API request") + continue # retry + + # fall through to "Login Required" + response.status_code = 404 if response.status_code == 429: # rate limit exceeded @@ -1289,11 +1302,9 @@ class TwitterAPI(): self.extractor.wait(until=until, seconds=seconds) continue - if response.status_code == 403 and \ - not self.headers["x-twitter-auth-type"] and \ - endpoint == "/2/search/adaptive.json": - raise exception.AuthorizationError( - "Login required to access search results") + if response.status_code in (403, 404) and \ + not self.headers["x-twitter-auth-type"]: + raise exception.AuthorizationError("Login required") # error try: @@ -1431,7 +1442,12 @@ class TwitterAPI(): for instr in instructions: instr_type = instr.get("type") if instr_type == "TimelineAddEntries": - entries = instr["entries"] + if entries: + entries.extend(instr["entries"]) + else: + entries = instr["entries"] + elif instr_type == "TimelineAddToModule": + entries = instr["moduleItems"] elif instr_type == "TimelineReplaceEntry": entry = instr["entry"] if entry["entryId"].startswith("cursor-bottom-"): @@ -1479,6 +1495,11 @@ class TwitterAPI(): if esw("tweet-"): tweets.append(entry) + elif esw("profile-grid-"): + if "content" in entry: + tweets.extend(entry["content"]["items"]) + else: + tweets.append(entry) elif esw(("homeConversation-", "profile-conversation-", "conversationthread-")): diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py new file mode 100644 index 0000000..b21709a --- /dev/null +++ b/gallery_dl/extractor/urlgalleries.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://urlgalleries.net/""" + +from .common import GalleryExtractor, Message +from .. import text + + +class UrlgalleriesGalleryExtractor(GalleryExtractor): + """Base class for Urlgalleries extractors""" + category = "urlgalleries" + root = "urlgalleries.net" + request_interval = (0.5, 1.0) + pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)" + example = "https://blog.urlgalleries.net/gallery-12345/TITLE" + + def __init__(self, match): + self.blog, self.gallery_id = match.groups() + url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format( + self.blog, self.gallery_id) + GalleryExtractor.__init__(self, match, url) + + def items(self): + page = self.request(self.gallery_url).text + imgs = self.images(page) + data = self.metadata(page) + data["count"] = len(imgs) + del page + + root = "https://{}.urlgalleries.net".format(self.blog) + yield Message.Directory, data + for data["num"], img in enumerate(imgs, 1): + response = self.request( + root + img, method="HEAD", allow_redirects=False) + yield Message.Queue, response.headers["Location"], data + + def metadata(self, page): + extr = text.extract_from(page) + return { + "gallery_id": self.gallery_id, + "_site": extr(' title="', '"'), # site name + "blog" : text.unescape(extr(' title="', '"')), + "_rprt": extr(' title="', '"'), # report button + "title": text.unescape(extr(' title="', '"').strip()), + "date" : text.parse_datetime( + extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"), + } + + def images(self, page): + imgs = text.extr(page, 'id="wtf"', "</div>") + return list(text.extract_iter(imgs, " href='", "'")) diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 255d9f2..72a602f 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -287,6 +287,11 @@ def build_parser(): "for the given URLs"), ) output.add_argument( + "-e", "--error-file", + dest="errorfile", metavar="FILE", action=ConfigAction, + help="Add input URLs which returned an error to FILE", + ) + output.add_argument( "--list-modules", dest="list_modules", action="store_true", help="Print a list of available extractor modules", diff --git a/gallery_dl/output.py b/gallery_dl/output.py index 9508ff3..c0971f0 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -210,7 +210,7 @@ def configure_logging(loglevel): root.setLevel(minlevel) -def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL): +def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL, mode="w"): """Setup a new logging handler""" opts = config.interpolate(("output",), key) if not opts: @@ -219,7 +219,7 @@ def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL): opts = {"path": opts} path = opts.get("path") - mode = opts.get("mode", "w") + mode = opts.get("mode", mode) encoding = opts.get("encoding", "utf-8") try: path = util.expand_path(path) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 62aa12d..53502ef 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -14,6 +14,7 @@ import sys import json import time import random +import getpass import hashlib import sqlite3 import binascii @@ -274,7 +275,7 @@ Response Headers if hide_auth: authorization = req_headers.get("Authorization") if authorization: - atype, sep, _ = authorization.partition(" ") + atype, sep, _ = str(authorization).partition(" ") req_headers["Authorization"] = atype + " ***" if sep else "***" cookie = req_headers.get("Cookie") @@ -290,15 +291,17 @@ Response Headers r"(^|, )([^ =]+)=[^,;]*", r"\1\2=***", set_cookie, ) + fmt_nv = "{}: {}".format + fp.write(outfmt.format( request=request, response=response, request_headers="\n".join( - name + ": " + value + fmt_nv(name, value) for name, value in req_headers.items() ), response_headers="\n".join( - name + ": " + value + fmt_nv(name, value) for name, value in res_headers.items() ), ).encode()) @@ -487,6 +490,26 @@ CODES = { } +class HTTPBasicAuth(): + __slots__ = ("authorization",) + + def __init__(self, username, password): + self.authorization = b"Basic " + binascii.b2a_base64( + username.encode("latin1") + b":" + str(password).encode("latin1") + )[:-1] + + def __call__(self, request): + request.headers["Authorization"] = self.authorization + return request + + +class LazyPrompt(): + __slots__ = () + + def __str__(self): + return getpass.getpass() + + class CustomNone(): """None-style type that supports more operations than regular None""" __slots__ = () diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 5034fb2..f0d55f6 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.26.3" +__version__ = "1.26.4" @@ -135,6 +135,7 @@ def build_setuptools(): "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Internet :: WWW/HTTP", |
