From 4d7a4f1ecef2c96269f3590335d2834ebcdd50bf Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 25 Dec 2023 01:27:47 -0500 Subject: New upstream version 1.26.5. --- CHANGELOG.md | 28 +++++ PKG-INFO | 43 ++++++- README.rst | 41 ++++++- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 66 +++++------ gallery_dl.egg-info/PKG-INFO | 43 ++++++- gallery_dl.egg-info/SOURCES.txt | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/aryion.py | 2 +- gallery_dl/extractor/danbooru.py | 21 +++- gallery_dl/extractor/deviantart.py | 13 +- gallery_dl/extractor/exhentai.py | 93 ++++++++------- gallery_dl/extractor/foolfuuka.py | 2 +- gallery_dl/extractor/idolcomplex.py | 4 +- gallery_dl/extractor/imgbb.py | 2 +- gallery_dl/extractor/inkbunny.py | 38 +++++- gallery_dl/extractor/instagram.py | 2 +- gallery_dl/extractor/kemonoparty.py | 2 +- gallery_dl/extractor/mangadex.py | 2 +- gallery_dl/extractor/mastodon.py | 4 +- gallery_dl/extractor/myhentaigallery.py | 6 +- gallery_dl/extractor/newgrounds.py | 4 +- gallery_dl/extractor/nijie.py | 12 +- gallery_dl/extractor/oauth.py | 2 +- gallery_dl/extractor/patreon.py | 9 ++ gallery_dl/extractor/philomena.py | 2 +- gallery_dl/extractor/pillowfort.py | 2 +- gallery_dl/extractor/pinterest.py | 37 ------ gallery_dl/extractor/pixiv.py | 4 +- gallery_dl/extractor/plurk.py | 2 +- gallery_dl/extractor/postmill.py | 203 ++++++++++++++++++++++++++++++++ gallery_dl/extractor/reactor.py | 2 +- gallery_dl/extractor/readcomiconline.py | 2 +- gallery_dl/extractor/reddit.py | 4 +- gallery_dl/extractor/sankaku.py | 2 +- gallery_dl/extractor/shimmie2.py | 41 +++++-- gallery_dl/extractor/subscribestar.py | 2 +- gallery_dl/extractor/tapas.py | 2 +- gallery_dl/extractor/tsumino.py | 2 +- gallery_dl/extractor/tumblr.py | 118 ++++++++++--------- gallery_dl/extractor/twibooru.py | 4 +- gallery_dl/extractor/twitter.py | 72 +---------- gallery_dl/extractor/vipergirls.py | 2 +- gallery_dl/extractor/vk.py | 2 +- gallery_dl/job.py | 2 +- gallery_dl/oauth.py | 2 +- gallery_dl/postprocessor/common.py | 2 +- gallery_dl/util.py | 7 ++ gallery_dl/version.py | 2 +- 49 files changed, 643 insertions(+), 320 deletions(-) create mode 100644 gallery_dl/extractor/postmill.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 88dbc44..8907e07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +## 1.26.5 - 2023-12-23 +### Extractors +#### Additions +- [deviantart] add `intermediary` option ([#4955](https://github.com/mikf/gallery-dl/issues/4955)) +- [inkbunny] add `unread` extractor ([#4934](https://github.com/mikf/gallery-dl/issues/4934)) +- [mastodon] support non-numeric status IDs ([#4936](https://github.com/mikf/gallery-dl/issues/4936)) +- [myhentaigallery] recognize `/g/` URLs ([#4920](https://github.com/mikf/gallery-dl/issues/4920)) +- [postmill] add support ([#4917](https://github.com/mikf/gallery-dl/issues/4917), [#4919](https://github.com/mikf/gallery-dl/issues/4919)) +- {shimmie2[ support `rule34hentai.net` ([#861](https://github.com/mikf/gallery-dl/issues/861), [#4789](https://github.com/mikf/gallery-dl/issues/4789), [#4945](https://github.com/mikf/gallery-dl/issues/4945)) +#### Fixes +- [deviantart] add workaround for integer `client-id` values ([#4924](https://github.com/mikf/gallery-dl/issues/4924)) +- [exhentai] fix error for infinite `fallback-retries` ([#4911](https://github.com/mikf/gallery-dl/issues/4911)) +- [inkbunny] stop pagination on empty results +- [patreon] fix bootstrap data extraction again ([#4904](https://github.com/mikf/gallery-dl/issues/4904)) +- [tumblr] fix exception after waiting for rate limit ([#4916](https://github.com/mikf/gallery-dl/issues/4916)) +#### Improvements +- [exhentai] output continuation URL when interrupted ([#4782](https://github.com/mikf/gallery-dl/issues/4782)) +- [inkbunny] improve `/submissionsviewall.php` patterns ([#4934](https://github.com/mikf/gallery-dl/issues/4934)) +- [tumblr] support infinite `fallback-retries` +- [twitter] default to `tweets` timeline when `replies` are enabled ([#4953](https://github.com/mikf/gallery-dl/issues/4953)) +#### Metadata +- [danbooru] provide `tags` as list ([#4942](https://github.com/mikf/gallery-dl/issues/4942)) +- [deviantart] set `is_original` for intermediary URLs to `false` +- [twitter] remove `date_liked` ([#3850](https://github.com/mikf/gallery-dl/issues/3850), [#4108](https://github.com/mikf/gallery-dl/issues/4108), [#4657](https://github.com/mikf/gallery-dl/issues/4657)) +### Docker +- add Docker instructions to README ([#4850](https://github.com/mikf/gallery-dl/issues/4850)) +- fix auto-generation of `latest` tags + ## 1.26.4 - 2023-12-10 ### Extractors #### Additions diff --git a/PKG-INFO b/PKG-INFO index e75fd05..bdd0025 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.26.4 +Version: 1.26.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -112,9 +112,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -172,6 +172,43 @@ For macOS users with MacPorts: sudo port install gallery-dl +Docker +-------- +Using the Dockerfile in the repository: + +.. code:: bash + + git clone https://github.com/mikf/gallery-dl.git + cd gallery-dl/ + docker build -t gallery-dl:latest . + +Pulling image from `Docker Hub `__: + +.. code:: bash + + docker pull mikf123/gallery-dl + docker tag mikf123/gallery-dl gallery-dl + +Pulling image from `GitHub Container Registry `__: + +.. code:: bash + + docker pull ghcr.io/mikf/gallery-dl + docker tag ghcr.io/mikf/gallery-dl gallery-dl + +To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs. + +Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there. + +If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure. + +This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running. + +.. code:: bash + + docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest + +You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. Usage ===== diff --git a/README.rst b/README.rst index 5603929..782c8de 100644 --- a/README.rst +++ b/README.rst @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -132,6 +132,43 @@ For macOS users with MacPorts: sudo port install gallery-dl +Docker +-------- +Using the Dockerfile in the repository: + +.. code:: bash + + git clone https://github.com/mikf/gallery-dl.git + cd gallery-dl/ + docker build -t gallery-dl:latest . + +Pulling image from `Docker Hub `__: + +.. code:: bash + + docker pull mikf123/gallery-dl + docker tag mikf123/gallery-dl gallery-dl + +Pulling image from `GitHub Container Registry `__: + +.. code:: bash + + docker pull ghcr.io/mikf/gallery-dl + docker tag ghcr.io/mikf/gallery-dl gallery-dl + +To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs. + +Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there. + +If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure. + +This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running. + +.. code:: bash + + docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest + +You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. Usage ===== diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 277b227..caa0d4a 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-12-10" "1.26.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-12-23" "1.26.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 95e9627..b641f29 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-12-10" "1.26.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-12-23" "1.26.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1592,6 +1592,18 @@ Possible values are It is possible to use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.deviantart.intermediary +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +For older non-downloadable images, +download a higher-quality \f[I]/intermediary/\f[] version. + + .SS extractor.deviantart.journals .IP "Type:" 6 \f[I]string\f[] @@ -1814,8 +1826,8 @@ depending on the input URL \f[I]2\f[] .IP "Description:" 4 -Number of times a failed image gets retried. -Use \f[I]-1\f[] for infinite retries +Number of times a failed image gets retried +or \f[I]-1\f[] for infinite retries. .SS extractor.exhentai.fav @@ -3158,6 +3170,17 @@ A value of \f[I]0\f[] means no limit. Also search Plurk comments for URLs. +.SS extractor.[postmill].save-link-post-body +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Whether or not to save the body for link/image posts. + + .SS extractor.reactor.gif .IP "Type:" 6 \f[I]bool\f[] @@ -3626,7 +3649,8 @@ for fetching full-resolution images. \f[I]2\f[] .IP "Description:" 4 -Number of retries for fetching full-resolution images. +Number of retries for fetching full-resolution images +or \f[I]-1\f[] for infinite retries. .SS extractor.twibooru.api-key @@ -3763,8 +3787,6 @@ with enabled \f[I]conversations\f[] option for each Tweet in said timeline. Note: This requires at least 1 additional API call per initial Tweet. -Age-restricted replies cannot be expanded when using the -\f[I]syndication\f[] API. .SS extractor.twitter.include @@ -3844,36 +3866,6 @@ Known available sizes are \f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[]. -.SS extractor.twitter.syndication -.IP "Type:" 6 -.br -* \f[I]bool\f[] -.br -* \f[I]string\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Controls how to retrieve age-restricted content when not logged in. - -.br -* \f[I]false\f[]: Skip age-restricted Tweets. -.br -* \f[I]true\f[]: Download using Twitter's syndication API. -.br -* \f[I]"extended"\f[]: Try to fetch Tweet metadata using the normal API -in addition to the syndication API. This requires additional HTTP -requests in some cases (e.g. when \f[I]retweets\f[] -are enabled). - -Note: This does not apply to search results (including -\f[I]timeline strategies\f[]). -To retrieve such content from search results, you must log in and -disable "Hide sensitive content" in your \f[I]search settings -\f[]. - - .SS extractor.twitter.logout .IP "Type:" 6 \f[I]bool\f[] @@ -3979,7 +3971,7 @@ Controls the strategy / tweet source used for timeline URLs .br * \f[I]"with_replies"\f[]: \f[I]/with_replies\f[] timeline + search .br -* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[] and \f[I]text-tweets\f[] settings +* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[], \f[I]replies\f[], and \f[I]text-tweets\f[] settings .SS extractor.twitter.text-tweets diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index e1b709b..934609a 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.26.4 +Version: 1.26.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -112,9 +112,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -172,6 +172,43 @@ For macOS users with MacPorts: sudo port install gallery-dl +Docker +-------- +Using the Dockerfile in the repository: + +.. code:: bash + + git clone https://github.com/mikf/gallery-dl.git + cd gallery-dl/ + docker build -t gallery-dl:latest . + +Pulling image from `Docker Hub `__: + +.. code:: bash + + docker pull mikf123/gallery-dl + docker tag mikf123/gallery-dl gallery-dl + +Pulling image from `GitHub Container Registry `__: + +.. code:: bash + + docker pull ghcr.io/mikf/gallery-dl + docker tag ghcr.io/mikf/gallery-dl gallery-dl + +To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs. + +Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there. + +If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure. + +This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running. + +.. code:: bash + + docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest + +You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. Usage ===== diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 9bcf0b2..30cda54 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -167,6 +167,7 @@ gallery_dl/extractor/plurk.py gallery_dl/extractor/poipiku.py gallery_dl/extractor/pornhub.py gallery_dl/extractor/pornpics.py +gallery_dl/extractor/postmill.py gallery_dl/extractor/pururin.py gallery_dl/extractor/reactor.py gallery_dl/extractor/readcomiconline.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d074de2..695b8b2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -124,6 +124,7 @@ modules = [ "poipiku", "pornhub", "pornpics", + "postmill", "pururin", "reactor", "readcomiconline", diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 576bc83..ec86263 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -40,7 +40,7 @@ class AryionExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=14*24*3600, keyarg=1) + @cache(maxage=14*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 9e6516e..09beb5f 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -20,7 +20,7 @@ class DanbooruExtractor(BaseExtractor): page_limit = 1000 page_start = None per_page = 200 - request_interval = 1.0 + request_interval = (0.5, 1.5) def _init(self): self.ugoira = self.config("ugoira", False) @@ -72,6 +72,25 @@ class DanbooruExtractor(BaseExtractor): post["date"] = text.parse_datetime( post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + post["tags"] = ( + post["tag_string"].split(" ") + if post["tag_string"] else ()) + post["tags_artist"] = ( + post["tag_string_artist"].split(" ") + if post["tag_string_artist"] else ()) + post["tags_character"] = ( + post["tag_string_character"].split(" ") + if post["tag_string_character"] else ()) + post["tags_copyright"] = ( + post["tag_string_copyright"].split(" ") + if post["tag_string_copyright"] else ()) + post["tags_general"] = ( + post["tag_string_general"].split(" ") + if post["tag_string_general"] else ()) + post["tags_meta"] = ( + post["tag_string_meta"].split(" ") + if post["tag_string_meta"] else ()) + if post["extension"] == "zip": if self.ugoira: post["frames"] = self._ugoira_frames(post) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 1852dc1..2ba47e1 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -48,6 +48,7 @@ class DeviantartExtractor(Extractor): self.quality = self.config("quality", "100") self.original = self.config("original", True) self.comments = self.config("comments", False) + self.intermediary = self.config("intermediary", True) self.api = DeviantartOAuthAPI(self) self.group = False @@ -136,12 +137,13 @@ class DeviantartExtractor(Extractor): elif self.jwt: self._update_token(deviation, content) elif content["src"].startswith("https://images-wixmp-"): - if deviation["index"] <= 790677560: + if self.intermediary and deviation["index"] <= 790677560: # https://github.com/r888888888/danbooru/issues/4069 intermediary, count = re.subn( r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", content["src"], 1) if count: + deviation["is_original"] = False deviation["_fallback"] = (content["src"],) content["src"] = intermediary if self.quality: @@ -1003,8 +1005,9 @@ class DeviantartOAuthAPI(): self.strategy = extractor.config("pagination") self.public = extractor.config("public", True) - self.client_id = extractor.config("client-id") - if self.client_id: + client_id = extractor.config("client-id") + if client_id: + self.client_id = str(client_id) self.client_secret = extractor.config("client-secret") else: self.client_id = self.CLIENT_ID @@ -1012,7 +1015,7 @@ class DeviantartOAuthAPI(): token = extractor.config("refresh-token") if token is None or token == "cache": - token = "#" + str(self.client_id) + token = "#" + self.client_id if not _refresh_token_cache(token): token = None self.refresh_token_key = token @@ -1578,7 +1581,7 @@ class DeviantartEclipseAPI(): return token -@cache(maxage=100*365*86400, keyarg=0) +@cache(maxage=36500*86400, keyarg=0) def _refresh_token_cache(token): if token and token[0] == "#": return None diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index a479d00..acad95c 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -26,7 +26,7 @@ class ExhentaiExtractor(Extractor): cookies_domain = ".exhentai.org" cookies_names = ("ipb_member_id", "ipb_pass_hash") root = "https://exhentai.org" - request_interval = 5.0 + request_interval = (3.0, 6.0) ciphers = "DEFAULT:!DH" LIMIT = False @@ -67,14 +67,15 @@ class ExhentaiExtractor(Extractor): if username: return self.cookies_update(self._login_impl(username, password)) - self.log.info("no username given; using e-hentai.org") - self.root = "https://e-hentai.org" - self.cookies_domain = ".e-hentai.org" - self.cookies.set("nw", "1", domain=self.cookies_domain) + if self.version == "ex": + self.log.info("No username or cookies given; using e-hentai.org") + self.root = "https://e-hentai.org" + self.cookies_domain = ".e-hentai.org" + self.cookies.set("nw", "1", domain=self.cookies_domain) self.original = False self.limits = False - @cache(maxage=90*24*3600, keyarg=1) + @cache(maxage=90*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) @@ -124,6 +125,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.key_show = None self.key_next = None self.count = 0 + self.data = None def _init(self): source = self.config("source") @@ -138,11 +140,15 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.limits = False self.fallback_retries = self.config("fallback-retries", 2) - if self.fallback_retries < 0: - self.fallback_retries = float("inf") - self.original = self.config("original", True) + def finalize(self): + if self.data: + self.log.info("Use '%s/s/%s/%s-%s' as input URL " + "to continue downloading from the current position", + self.root, self.data["image_token"], + self.gallery_id, self.data["num"]) + def favorite(self, slot="0"): url = self.root + "/gallerypopups.php" params = { @@ -178,32 +184,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.gallery_token = part.split("/")[1] gpage = self._gallery_page() - data = self.get_metadata(gpage) + self.data = data = self.get_metadata(gpage) self.count = text.parse_int(data["filecount"]) yield Message.Directory, data - def _validate_response(response): - # declared inside 'items()' to be able to access 'data' - if not response.history and response.headers.get( - "content-type", "").startswith("text/html"): - page = response.text - self.log.warning("'%s'", page) - - if " requires GP" in page: - gp = self.config("gp") - if gp == "stop": - raise exception.StopExtraction("Not enough GP") - elif gp == "wait": - input("Press ENTER to continue.") - return response.url - - self.log.info("Falling back to non-original downloads") - self.original = False - return data["_url_1280"] - - self._report_limits(data) - return True - images = itertools.chain( (self.image_from_page(ipage),), self.images_from_api()) for url, image in images: @@ -211,7 +195,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if self.limits: self._check_limits(data) if "/fullimg" in url: - data["_http_validate"] = _validate_response + data["_http_validate"] = self._validate_response else: data["_http_validate"] = None yield Message.Url, url, data @@ -219,6 +203,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): fav = self.config("fav") if fav is not None: self.favorite(fav) + self.data = None def _items_hitomi(self): if self.config("metadata", False): @@ -332,7 +317,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["_nl"] = nl self.key_show = extr('var showkey="', '";') - self._check_509(iurl, data) + self._check_509(iurl) return url, text.nameext_from_url(url, data) def images_from_api(self): @@ -382,33 +367,51 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["_url_1280"] = imgurl data["_nl"] = nl - self._check_509(imgurl, data) + self._check_509(imgurl) yield url, text.nameext_from_url(url, data) request["imgkey"] = nextkey - def _report_limits(self, data): + def _validate_response(self, response): + if not response.history and response.headers.get( + "content-type", "").startswith("text/html"): + page = response.text + self.log.warning("'%s'", page) + + if " requires GP" in page: + gp = self.config("gp") + if gp == "stop": + raise exception.StopExtraction("Not enough GP") + elif gp == "wait": + input("Press ENTER to continue.") + return response.url + + self.log.info("Falling back to non-original downloads") + self.original = False + return self.data["_url_1280"] + + self._report_limits() + return True + + def _report_limits(self): ExhentaiExtractor.LIMIT = True - raise exception.StopExtraction( - "Image limit reached! " - "Continue with '%s/s/%s/%s-%s' as URL after resetting it.", - self.root, data["image_token"], self.gallery_id, data["num"]) + raise exception.StopExtraction("Image limit reached!") def _check_limits(self, data): if not self._remaining or data["num"] % 25 == 0: self._update_limits() self._remaining -= data["cost"] if self._remaining <= 0: - self._report_limits(data) + self._report_limits() - def _check_509(self, url, data): + def _check_509(self, url): # full 509.gif URLs # - https://exhentai.org/img/509.gif # - https://ehgt.org/g/509.gif if url.endswith(("hentai.org/img/509.gif", "ehgt.org/g/509.gif")): self.log.debug(url) - self._report_limits(data) + self._report_limits() def _update_limits(self): url = "https://e-hentai.org/home.php" @@ -449,14 +452,14 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def _fallback_original(self, nl, fullimg): url = "{}?nl={}".format(fullimg, nl) - for _ in range(self.fallback_retries): + for _ in util.repeat(self.fallback_retries): yield url def _fallback_1280(self, nl, num, token=None): if not token: token = self.key_start - for _ in range(self.fallback_retries): + for _ in util.repeat(self.fallback_retries): url = "{}/s/{}/{}-{}?nl={}".format( self.root, token, self.gallery_id, num, nl) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 93ac541..cedac0c 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -169,7 +169,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): directory_fmt = ("{category}", "search", "{search}") pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)" example = "https://archived.moe/_/search/text/QUERY/" - request_interval = 1.0 + request_interval = (0.5, 1.5) def __init__(self, match): FoolfuukaExtractor.__init__(self, match) diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 5c7a1b3..b9e2c3d 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -25,7 +25,7 @@ class IdolcomplexExtractor(SankakuExtractor): cookies_domain = "idol.sankakucomplex.com" cookies_names = ("_idolcomplex_session",) referer = False - request_interval = (4.0, 6.0) + request_interval = (3.0, 6.0) def __init__(self, match): SankakuExtractor.__init__(self, match) @@ -67,7 +67,7 @@ class IdolcomplexExtractor(SankakuExtractor): self.logged_in = False - @cache(maxage=90*24*3600, keyarg=1) + @cache(maxage=90*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 6c0684e..b926cb2 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -64,7 +64,7 @@ class ImgbbExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=360*24*3600, keyarg=1) + @cache(maxage=365*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index 4ad37fc..62586af 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -103,7 +103,8 @@ class InkbunnyPoolExtractor(InkbunnyExtractor): subcategory = "pool" pattern = (BASE_PATTERN + r"/(?:" r"poolview_process\.php\?pool_id=(\d+)|" - r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))") + r"submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=pool(?:&[^#]+)?))") example = "https://inkbunny.net/poolview_process.php?pool_id=12345" def __init__(self, match): @@ -133,7 +134,8 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor): subcategory = "favorite" pattern = (BASE_PATTERN + r"/(?:" r"userfavorites_process\.php\?favs_user_id=(\d+)|" - r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))") + r"submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=userfavs(?:&[^#]+)?))") example = ("https://inkbunny.net/userfavorites_process.php" "?favs_user_id=12345") @@ -161,11 +163,31 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor): return self.api.search(params) +class InkbunnyUnreadExtractor(InkbunnyExtractor): + """Extractor for unread inkbunny submissions""" + subcategory = "unread" + pattern = (BASE_PATTERN + r"/submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=unreadsubs(?:&[^#]+)?)") + example = ("https://inkbunny.net/submissionsviewall.php" + "?text=&mode=unreadsubs&type=") + + def __init__(self, match): + InkbunnyExtractor.__init__(self, match) + self.params = text.parse_query(match.group(1)) + + def posts(self): + params = self.params.copy() + params.pop("rid", None) + params.pop("mode", None) + params["unread_submissions"] = "yes" + return self.api.search(params) + + class InkbunnySearchExtractor(InkbunnyExtractor): """Extractor for inkbunny search results""" subcategory = "search" - pattern = (BASE_PATTERN + - r"/submissionsviewall\.php\?([^#]+&mode=search&[^#]+)") + pattern = (BASE_PATTERN + r"/submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=search(?:&[^#]+)?)") example = ("https://inkbunny.net/submissionsviewall.php" "?text=TAG&mode=search&type=") @@ -201,7 +223,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor): subcategory = "following" pattern = (BASE_PATTERN + r"/(?:" r"watchlist_process\.php\?mode=watching&user_id=(\d+)|" - r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))") + r"usersviewall\.php" + r"\?((?:[^#]+&)?mode=watching(?:&[^#]+)?))") example = ("https://inkbunny.net/watchlist_process.php" "?mode=watching&user_id=12345") @@ -324,6 +347,9 @@ class InkbunnyAPI(): while True: data = self._call("search", params) + if not data["submissions"]: + return + yield from self.detail(data["submissions"]) if data["page"] >= data["pages_count"]: @@ -334,7 +360,7 @@ class InkbunnyAPI(): params["page"] += 1 -@cache(maxage=360*24*3600, keyarg=1) +@cache(maxage=365*86400, keyarg=1) def _authenticate_impl(api, username, password): api.extractor.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 8ec6741..6eae7db 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -977,7 +977,7 @@ class InstagramGraphqlAPI(): variables["after"] = extr._update_cursor(info["end_cursor"]) -@cache(maxage=90*24*3600, keyarg=1) +@cache(maxage=90*86400, keyarg=1) def _login_impl(extr, username, password): extr.log.error("Login with username & password is no longer supported. " "Use browser cookies instead.") diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index cba6211..c24e57d 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -129,7 +129,7 @@ class KemonopartyExtractor(Extractor): self.cookies_update(self._login_impl( (username, self.cookies_domain), password)) - @cache(maxage=28*24*3600, keyarg=1) + @cache(maxage=28*86400, keyarg=1) def _login_impl(self, username, password): username = username[0] self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index dbaf4cb..94bea57 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -266,6 +266,6 @@ class MangadexAPI(): return -@cache(maxage=28*24*3600, keyarg=0) +@cache(maxage=28*86400, keyarg=0) def _refresh_token_cache(username): return None diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index c5fe840..0b63d6c 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -152,7 +152,7 @@ class MastodonFollowingExtractor(MastodonExtractor): class MastodonStatusExtractor(MastodonExtractor): """Extractor for images from a status""" subcategory = "status" - pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)" + pattern = BASE_PATTERN + r"/@[^/?#]+/(?!following)([^/?#]+)" example = "https://mastodon.social/@USER/12345" def statuses(self): @@ -277,6 +277,6 @@ class MastodonAPI(): params = None -@cache(maxage=100*365*24*3600, keyarg=0) +@cache(maxage=36500*86400, keyarg=0) def _access_token_cache(instance): return None diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 33a2284..5e8179e 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -16,12 +16,12 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): root = "https://myhentaigallery.com" directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") pattern = (r"(?:https?://)?myhentaigallery\.com" - r"/gallery/(?:thumbnails|show)/(\d+)") - example = "https://myhentaigallery.com/gallery/thumbnails/12345" + r"/g(?:allery/(?:thumbnails|show))?/(\d+)") + example = "https://myhentaigallery.com/g/12345" def __init__(self, match): self.gallery_id = match.group(1) - url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id) + url = "{}/g/{}".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) def _init(self): diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index a6971e8..4cdcf87 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -23,7 +23,7 @@ class NewgroundsExtractor(Extractor): root = "https://www.newgrounds.com" cookies_domain = ".newgrounds.com" cookies_names = ("NG_GG_username", "vmk1du5I8m") - request_interval = 1.0 + request_interval = (0.5, 1.5) def __init__(self, match): Extractor.__init__(self, match) @@ -98,7 +98,7 @@ class NewgroundsExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=360*24*3600, keyarg=1) + @cache(maxage=365*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 54f2942..57c3118 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -124,15 +124,15 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): return username, password = self._get_auth_info() - self.cookies_update(self._login_impl(username, password)) + if username: + return self.cookies_update(self._login_impl(username, password)) - @cache(maxage=90*24*3600, keyarg=1) - def _login_impl(self, username, password): - if not username or not password: - raise exception.AuthenticationError( - "Username and password required") + raise exception.AuthenticationError("Username and password required") + @cache(maxage=90*86400, keyarg=1) + def _login_impl(self, username, password): self.log.info("Logging in as %s", username) + url = "{}/login_int.php".format(self.root) data = {"email": username, "password": password, "save": "on"} diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 65db94d..1690160 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -376,7 +376,7 @@ class OAuthMastodon(OAuthBase): cache=mastodon._access_token_cache, ) - @cache(maxage=10*365*24*3600, keyarg=1) + @cache(maxage=36500*86400, keyarg=1) def _register(self, instance): self.log.info("Registering application for '%s'", instance) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index fb560e9..6c2f39d 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -249,6 +249,15 @@ class PatreonExtractor(Extractor): return [genmap[ft] for ft in filetypes] def _extract_bootstrap(self, page): + data = text.extr( + page, 'id="__NEXT_DATA__" type="application/json">', '')) + date = text.parse_datetime(extr( + '')) + username = extr( + '') + post_canonical_url = text.unescape(extr( + '')) + + url = text.unescape(extr( + '

', + '') + + match = self._search_canonical_url(post_canonical_url) + forum = match.group(1) + id = int(match.group(2)) + + is_text_post = url.startswith("/") + is_image_post = self._search_image_tag(page) is not None + data = { + "title": title, + "date": date, + "username": username, + "forum": forum, + "id": id, + "flair": [text.unescape(i) for i in text.extract_iter( + page, '', '')], + "instance": self.instance, + } + + urls = [] + if is_text_post or self.save_link_post_body: + urls.append((Message.Url, "text:" + body)) + + if is_image_post: + urls.append((Message.Url, url)) + elif not is_text_post: + urls.append((Message.Queue, url)) + + data["count"] = len(urls) + yield Message.Directory, data + for data["num"], (msg, url) in enumerate(urls, 1): + if url.startswith("text:"): + data["filename"], data["extension"] = "", "htm" + else: + data = text.nameext_from_url(url, data) + + yield msg, url, data + + +class PostmillSubmissionsExtractor(PostmillExtractor): + """Base class for Postmill submissions extractors""" + whitelisted_parameters = () + + def __init__(self, match): + PostmillExtractor.__init__(self, match) + groups = match.groups() + self.base = groups[-3] + self.sorting_path = groups[-2] or "" + self.query = {key: value for key, value in text.parse_query( + groups[-1]).items() if self.acceptable_query(key)} + + def items(self): + url = self.root + self.base + self.sorting_path + + while url: + response = self.request(url, params=self.query) + if response.history: + redirect_url = response.url + if redirect_url == self.root + "/login": + raise exception.StopExtraction( + "HTTP redirect to login page (%s)", redirect_url) + page = response.text + + for nav in text.extract_iter(page, + ''): + post_url = text.unescape(text.extr(nav, '