diff options
| author | 2023-12-25 01:27:52 -0500 | |
|---|---|---|
| committer | 2023-12-25 01:27:52 -0500 | |
| commit | 98c6e469f590f6891c51c5febe1f813b2dc977da (patch) | |
| tree | 9d6edc52e25f4360179bbe8b1c276bbc42b5ab16 | |
| parent | db32caa91551d41b9af8180dbf039d42856e2c47 (diff) | |
| parent | 4d7a4f1ecef2c96269f3590335d2834ebcdd50bf (diff) | |
Update upstream source from tag 'upstream/1.26.5'
Update to upstream version '1.26.5'
with Debian dir 1dc7837c0ebb3638f03ef761990f5066c0499e14
49 files changed, 643 insertions, 320 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 88dbc44..8907e07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +## 1.26.5 - 2023-12-23 +### Extractors +#### Additions +- [deviantart] add `intermediary` option ([#4955](https://github.com/mikf/gallery-dl/issues/4955)) +- [inkbunny] add `unread` extractor ([#4934](https://github.com/mikf/gallery-dl/issues/4934)) +- [mastodon] support non-numeric status IDs ([#4936](https://github.com/mikf/gallery-dl/issues/4936)) +- [myhentaigallery] recognize `/g/` URLs ([#4920](https://github.com/mikf/gallery-dl/issues/4920)) +- [postmill] add support ([#4917](https://github.com/mikf/gallery-dl/issues/4917), [#4919](https://github.com/mikf/gallery-dl/issues/4919)) +- {shimmie2[ support `rule34hentai.net` ([#861](https://github.com/mikf/gallery-dl/issues/861), [#4789](https://github.com/mikf/gallery-dl/issues/4789), [#4945](https://github.com/mikf/gallery-dl/issues/4945)) +#### Fixes +- [deviantart] add workaround for integer `client-id` values ([#4924](https://github.com/mikf/gallery-dl/issues/4924)) +- [exhentai] fix error for infinite `fallback-retries` ([#4911](https://github.com/mikf/gallery-dl/issues/4911)) +- [inkbunny] stop pagination on empty results +- [patreon] fix bootstrap data extraction again ([#4904](https://github.com/mikf/gallery-dl/issues/4904)) +- [tumblr] fix exception after waiting for rate limit ([#4916](https://github.com/mikf/gallery-dl/issues/4916)) +#### Improvements +- [exhentai] output continuation URL when interrupted ([#4782](https://github.com/mikf/gallery-dl/issues/4782)) +- [inkbunny] improve `/submissionsviewall.php` patterns ([#4934](https://github.com/mikf/gallery-dl/issues/4934)) +- [tumblr] support infinite `fallback-retries` +- [twitter] default to `tweets` timeline when `replies` are enabled ([#4953](https://github.com/mikf/gallery-dl/issues/4953)) +#### Metadata +- [danbooru] provide `tags` as list ([#4942](https://github.com/mikf/gallery-dl/issues/4942)) +- [deviantart] set `is_original` for intermediary URLs to `false` +- [twitter] remove `date_liked` ([#3850](https://github.com/mikf/gallery-dl/issues/3850), [#4108](https://github.com/mikf/gallery-dl/issues/4108), [#4657](https://github.com/mikf/gallery-dl/issues/4657)) +### Docker +- add Docker instructions to README ([#4850](https://github.com/mikf/gallery-dl/issues/4850)) +- fix auto-generation of `latest` tags + ## 1.26.4 - 2023-12-10 ### Extractors #### Additions @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.26.4 +Version: 1.26.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -112,9 +112,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__ Nightly Builds @@ -172,6 +172,43 @@ For macOS users with MacPorts: sudo port install gallery-dl +Docker +-------- +Using the Dockerfile in the repository: + +.. code:: bash + + git clone https://github.com/mikf/gallery-dl.git + cd gallery-dl/ + docker build -t gallery-dl:latest . + +Pulling image from `Docker Hub <https://hub.docker.com/r/mikf123/gallery-dl>`__: + +.. code:: bash + + docker pull mikf123/gallery-dl + docker tag mikf123/gallery-dl gallery-dl + +Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-dl/pkgs/container/gallery-dl>`__: + +.. code:: bash + + docker pull ghcr.io/mikf/gallery-dl + docker tag ghcr.io/mikf/gallery-dl gallery-dl + +To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs. + +Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there. + +If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure. + +This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running. + +.. code:: bash + + docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest + +You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. Usage ===== @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__ Nightly Builds @@ -132,6 +132,43 @@ For macOS users with MacPorts: sudo port install gallery-dl +Docker +-------- +Using the Dockerfile in the repository: + +.. code:: bash + + git clone https://github.com/mikf/gallery-dl.git + cd gallery-dl/ + docker build -t gallery-dl:latest . + +Pulling image from `Docker Hub <https://hub.docker.com/r/mikf123/gallery-dl>`__: + +.. code:: bash + + docker pull mikf123/gallery-dl + docker tag mikf123/gallery-dl gallery-dl + +Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-dl/pkgs/container/gallery-dl>`__: + +.. code:: bash + + docker pull ghcr.io/mikf/gallery-dl + docker tag ghcr.io/mikf/gallery-dl gallery-dl + +To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs. + +Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there. + +If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure. + +This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running. + +.. code:: bash + + docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest + +You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. Usage ===== diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 277b227..caa0d4a 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-12-10" "1.26.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-12-23" "1.26.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 95e9627..b641f29 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-12-10" "1.26.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-12-23" "1.26.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1592,6 +1592,18 @@ Possible values are It is possible to use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.deviantart.intermediary +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +For older non-downloadable images, +download a higher-quality \f[I]/intermediary/\f[] version. + + .SS extractor.deviantart.journals .IP "Type:" 6 \f[I]string\f[] @@ -1814,8 +1826,8 @@ depending on the input URL \f[I]2\f[] .IP "Description:" 4 -Number of times a failed image gets retried. -Use \f[I]-1\f[] for infinite retries +Number of times a failed image gets retried +or \f[I]-1\f[] for infinite retries. .SS extractor.exhentai.fav @@ -3158,6 +3170,17 @@ A value of \f[I]0\f[] means no limit. Also search Plurk comments for URLs. +.SS extractor.[postmill].save-link-post-body +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Whether or not to save the body for link/image posts. + + .SS extractor.reactor.gif .IP "Type:" 6 \f[I]bool\f[] @@ -3626,7 +3649,8 @@ for fetching full-resolution images. \f[I]2\f[] .IP "Description:" 4 -Number of retries for fetching full-resolution images. +Number of retries for fetching full-resolution images +or \f[I]-1\f[] for infinite retries. .SS extractor.twibooru.api-key @@ -3763,8 +3787,6 @@ with enabled \f[I]conversations\f[] option for each Tweet in said timeline. Note: This requires at least 1 additional API call per initial Tweet. -Age-restricted replies cannot be expanded when using the -\f[I]syndication\f[] API. .SS extractor.twitter.include @@ -3844,36 +3866,6 @@ Known available sizes are \f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[]. -.SS extractor.twitter.syndication -.IP "Type:" 6 -.br -* \f[I]bool\f[] -.br -* \f[I]string\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Controls how to retrieve age-restricted content when not logged in. - -.br -* \f[I]false\f[]: Skip age-restricted Tweets. -.br -* \f[I]true\f[]: Download using Twitter's syndication API. -.br -* \f[I]"extended"\f[]: Try to fetch Tweet metadata using the normal API -in addition to the syndication API. This requires additional HTTP -requests in some cases (e.g. when \f[I]retweets\f[] -are enabled). - -Note: This does not apply to search results (including -\f[I]timeline strategies\f[]). -To retrieve such content from search results, you must log in and -disable "Hide sensitive content" in your \f[I]search settings -<https://twitter.com/settings/search>\f[]. - - .SS extractor.twitter.logout .IP "Type:" 6 \f[I]bool\f[] @@ -3979,7 +3971,7 @@ Controls the strategy / tweet source used for timeline URLs .br * \f[I]"with_replies"\f[]: \f[I]/with_replies\f[] timeline + search .br -* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[] and \f[I]text-tweets\f[] settings +* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[], \f[I]replies\f[], and \f[I]text-tweets\f[] settings .SS extractor.twitter.text-tweets diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index e1b709b..934609a 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.26.4 +Version: 1.26.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -112,9 +112,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__ Nightly Builds @@ -172,6 +172,43 @@ For macOS users with MacPorts: sudo port install gallery-dl +Docker +-------- +Using the Dockerfile in the repository: + +.. code:: bash + + git clone https://github.com/mikf/gallery-dl.git + cd gallery-dl/ + docker build -t gallery-dl:latest . + +Pulling image from `Docker Hub <https://hub.docker.com/r/mikf123/gallery-dl>`__: + +.. code:: bash + + docker pull mikf123/gallery-dl + docker tag mikf123/gallery-dl gallery-dl + +Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-dl/pkgs/container/gallery-dl>`__: + +.. code:: bash + + docker pull ghcr.io/mikf/gallery-dl + docker tag ghcr.io/mikf/gallery-dl gallery-dl + +To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs. + +Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there. + +If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure. + +This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running. + +.. code:: bash + + docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest + +You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. Usage ===== diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 9bcf0b2..30cda54 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -167,6 +167,7 @@ gallery_dl/extractor/plurk.py gallery_dl/extractor/poipiku.py gallery_dl/extractor/pornhub.py gallery_dl/extractor/pornpics.py +gallery_dl/extractor/postmill.py gallery_dl/extractor/pururin.py gallery_dl/extractor/reactor.py gallery_dl/extractor/readcomiconline.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d074de2..695b8b2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -124,6 +124,7 @@ modules = [ "poipiku", "pornhub", "pornpics", + "postmill", "pururin", "reactor", "readcomiconline", diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 576bc83..ec86263 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -40,7 +40,7 @@ class AryionExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=14*24*3600, keyarg=1) + @cache(maxage=14*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 9e6516e..09beb5f 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -20,7 +20,7 @@ class DanbooruExtractor(BaseExtractor): page_limit = 1000 page_start = None per_page = 200 - request_interval = 1.0 + request_interval = (0.5, 1.5) def _init(self): self.ugoira = self.config("ugoira", False) @@ -72,6 +72,25 @@ class DanbooruExtractor(BaseExtractor): post["date"] = text.parse_datetime( post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + post["tags"] = ( + post["tag_string"].split(" ") + if post["tag_string"] else ()) + post["tags_artist"] = ( + post["tag_string_artist"].split(" ") + if post["tag_string_artist"] else ()) + post["tags_character"] = ( + post["tag_string_character"].split(" ") + if post["tag_string_character"] else ()) + post["tags_copyright"] = ( + post["tag_string_copyright"].split(" ") + if post["tag_string_copyright"] else ()) + post["tags_general"] = ( + post["tag_string_general"].split(" ") + if post["tag_string_general"] else ()) + post["tags_meta"] = ( + post["tag_string_meta"].split(" ") + if post["tag_string_meta"] else ()) + if post["extension"] == "zip": if self.ugoira: post["frames"] = self._ugoira_frames(post) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 1852dc1..2ba47e1 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -48,6 +48,7 @@ class DeviantartExtractor(Extractor): self.quality = self.config("quality", "100") self.original = self.config("original", True) self.comments = self.config("comments", False) + self.intermediary = self.config("intermediary", True) self.api = DeviantartOAuthAPI(self) self.group = False @@ -136,12 +137,13 @@ class DeviantartExtractor(Extractor): elif self.jwt: self._update_token(deviation, content) elif content["src"].startswith("https://images-wixmp-"): - if deviation["index"] <= 790677560: + if self.intermediary and deviation["index"] <= 790677560: # https://github.com/r888888888/danbooru/issues/4069 intermediary, count = re.subn( r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", content["src"], 1) if count: + deviation["is_original"] = False deviation["_fallback"] = (content["src"],) content["src"] = intermediary if self.quality: @@ -1003,8 +1005,9 @@ class DeviantartOAuthAPI(): self.strategy = extractor.config("pagination") self.public = extractor.config("public", True) - self.client_id = extractor.config("client-id") - if self.client_id: + client_id = extractor.config("client-id") + if client_id: + self.client_id = str(client_id) self.client_secret = extractor.config("client-secret") else: self.client_id = self.CLIENT_ID @@ -1012,7 +1015,7 @@ class DeviantartOAuthAPI(): token = extractor.config("refresh-token") if token is None or token == "cache": - token = "#" + str(self.client_id) + token = "#" + self.client_id if not _refresh_token_cache(token): token = None self.refresh_token_key = token @@ -1578,7 +1581,7 @@ class DeviantartEclipseAPI(): return token -@cache(maxage=100*365*86400, keyarg=0) +@cache(maxage=36500*86400, keyarg=0) def _refresh_token_cache(token): if token and token[0] == "#": return None diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index a479d00..acad95c 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -26,7 +26,7 @@ class ExhentaiExtractor(Extractor): cookies_domain = ".exhentai.org" cookies_names = ("ipb_member_id", "ipb_pass_hash") root = "https://exhentai.org" - request_interval = 5.0 + request_interval = (3.0, 6.0) ciphers = "DEFAULT:!DH" LIMIT = False @@ -67,14 +67,15 @@ class ExhentaiExtractor(Extractor): if username: return self.cookies_update(self._login_impl(username, password)) - self.log.info("no username given; using e-hentai.org") - self.root = "https://e-hentai.org" - self.cookies_domain = ".e-hentai.org" - self.cookies.set("nw", "1", domain=self.cookies_domain) + if self.version == "ex": + self.log.info("No username or cookies given; using e-hentai.org") + self.root = "https://e-hentai.org" + self.cookies_domain = ".e-hentai.org" + self.cookies.set("nw", "1", domain=self.cookies_domain) self.original = False self.limits = False - @cache(maxage=90*24*3600, keyarg=1) + @cache(maxage=90*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) @@ -124,6 +125,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.key_show = None self.key_next = None self.count = 0 + self.data = None def _init(self): source = self.config("source") @@ -138,11 +140,15 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.limits = False self.fallback_retries = self.config("fallback-retries", 2) - if self.fallback_retries < 0: - self.fallback_retries = float("inf") - self.original = self.config("original", True) + def finalize(self): + if self.data: + self.log.info("Use '%s/s/%s/%s-%s' as input URL " + "to continue downloading from the current position", + self.root, self.data["image_token"], + self.gallery_id, self.data["num"]) + def favorite(self, slot="0"): url = self.root + "/gallerypopups.php" params = { @@ -178,32 +184,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.gallery_token = part.split("/")[1] gpage = self._gallery_page() - data = self.get_metadata(gpage) + self.data = data = self.get_metadata(gpage) self.count = text.parse_int(data["filecount"]) yield Message.Directory, data - def _validate_response(response): - # declared inside 'items()' to be able to access 'data' - if not response.history and response.headers.get( - "content-type", "").startswith("text/html"): - page = response.text - self.log.warning("'%s'", page) - - if " requires GP" in page: - gp = self.config("gp") - if gp == "stop": - raise exception.StopExtraction("Not enough GP") - elif gp == "wait": - input("Press ENTER to continue.") - return response.url - - self.log.info("Falling back to non-original downloads") - self.original = False - return data["_url_1280"] - - self._report_limits(data) - return True - images = itertools.chain( (self.image_from_page(ipage),), self.images_from_api()) for url, image in images: @@ -211,7 +195,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if self.limits: self._check_limits(data) if "/fullimg" in url: - data["_http_validate"] = _validate_response + data["_http_validate"] = self._validate_response else: data["_http_validate"] = None yield Message.Url, url, data @@ -219,6 +203,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): fav = self.config("fav") if fav is not None: self.favorite(fav) + self.data = None def _items_hitomi(self): if self.config("metadata", False): @@ -332,7 +317,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["_nl"] = nl self.key_show = extr('var showkey="', '";') - self._check_509(iurl, data) + self._check_509(iurl) return url, text.nameext_from_url(url, data) def images_from_api(self): @@ -382,33 +367,51 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["_url_1280"] = imgurl data["_nl"] = nl - self._check_509(imgurl, data) + self._check_509(imgurl) yield url, text.nameext_from_url(url, data) request["imgkey"] = nextkey - def _report_limits(self, data): + def _validate_response(self, response): + if not response.history and response.headers.get( + "content-type", "").startswith("text/html"): + page = response.text + self.log.warning("'%s'", page) + + if " requires GP" in page: + gp = self.config("gp") + if gp == "stop": + raise exception.StopExtraction("Not enough GP") + elif gp == "wait": + input("Press ENTER to continue.") + return response.url + + self.log.info("Falling back to non-original downloads") + self.original = False + return self.data["_url_1280"] + + self._report_limits() + return True + + def _report_limits(self): ExhentaiExtractor.LIMIT = True - raise exception.StopExtraction( - "Image limit reached! " - "Continue with '%s/s/%s/%s-%s' as URL after resetting it.", - self.root, data["image_token"], self.gallery_id, data["num"]) + raise exception.StopExtraction("Image limit reached!") def _check_limits(self, data): if not self._remaining or data["num"] % 25 == 0: self._update_limits() self._remaining -= data["cost"] if self._remaining <= 0: - self._report_limits(data) + self._report_limits() - def _check_509(self, url, data): + def _check_509(self, url): # full 509.gif URLs # - https://exhentai.org/img/509.gif # - https://ehgt.org/g/509.gif if url.endswith(("hentai.org/img/509.gif", "ehgt.org/g/509.gif")): self.log.debug(url) - self._report_limits(data) + self._report_limits() def _update_limits(self): url = "https://e-hentai.org/home.php" @@ -449,14 +452,14 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def _fallback_original(self, nl, fullimg): url = "{}?nl={}".format(fullimg, nl) - for _ in range(self.fallback_retries): + for _ in util.repeat(self.fallback_retries): yield url def _fallback_1280(self, nl, num, token=None): if not token: token = self.key_start - for _ in range(self.fallback_retries): + for _ in util.repeat(self.fallback_retries): url = "{}/s/{}/{}-{}?nl={}".format( self.root, token, self.gallery_id, num, nl) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 93ac541..cedac0c 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -169,7 +169,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): directory_fmt = ("{category}", "search", "{search}") pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)" example = "https://archived.moe/_/search/text/QUERY/" - request_interval = 1.0 + request_interval = (0.5, 1.5) def __init__(self, match): FoolfuukaExtractor.__init__(self, match) diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 5c7a1b3..b9e2c3d 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -25,7 +25,7 @@ class IdolcomplexExtractor(SankakuExtractor): cookies_domain = "idol.sankakucomplex.com" cookies_names = ("_idolcomplex_session",) referer = False - request_interval = (4.0, 6.0) + request_interval = (3.0, 6.0) def __init__(self, match): SankakuExtractor.__init__(self, match) @@ -67,7 +67,7 @@ class IdolcomplexExtractor(SankakuExtractor): self.logged_in = False - @cache(maxage=90*24*3600, keyarg=1) + @cache(maxage=90*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 6c0684e..b926cb2 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -64,7 +64,7 @@ class ImgbbExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=360*24*3600, keyarg=1) + @cache(maxage=365*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index 4ad37fc..62586af 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -103,7 +103,8 @@ class InkbunnyPoolExtractor(InkbunnyExtractor): subcategory = "pool" pattern = (BASE_PATTERN + r"/(?:" r"poolview_process\.php\?pool_id=(\d+)|" - r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))") + r"submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=pool(?:&[^#]+)?))") example = "https://inkbunny.net/poolview_process.php?pool_id=12345" def __init__(self, match): @@ -133,7 +134,8 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor): subcategory = "favorite" pattern = (BASE_PATTERN + r"/(?:" r"userfavorites_process\.php\?favs_user_id=(\d+)|" - r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))") + r"submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=userfavs(?:&[^#]+)?))") example = ("https://inkbunny.net/userfavorites_process.php" "?favs_user_id=12345") @@ -161,11 +163,31 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor): return self.api.search(params) +class InkbunnyUnreadExtractor(InkbunnyExtractor): + """Extractor for unread inkbunny submissions""" + subcategory = "unread" + pattern = (BASE_PATTERN + r"/submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=unreadsubs(?:&[^#]+)?)") + example = ("https://inkbunny.net/submissionsviewall.php" + "?text=&mode=unreadsubs&type=") + + def __init__(self, match): + InkbunnyExtractor.__init__(self, match) + self.params = text.parse_query(match.group(1)) + + def posts(self): + params = self.params.copy() + params.pop("rid", None) + params.pop("mode", None) + params["unread_submissions"] = "yes" + return self.api.search(params) + + class InkbunnySearchExtractor(InkbunnyExtractor): """Extractor for inkbunny search results""" subcategory = "search" - pattern = (BASE_PATTERN + - r"/submissionsviewall\.php\?([^#]+&mode=search&[^#]+)") + pattern = (BASE_PATTERN + r"/submissionsviewall\.php" + r"\?((?:[^#]+&)?mode=search(?:&[^#]+)?)") example = ("https://inkbunny.net/submissionsviewall.php" "?text=TAG&mode=search&type=") @@ -201,7 +223,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor): subcategory = "following" pattern = (BASE_PATTERN + r"/(?:" r"watchlist_process\.php\?mode=watching&user_id=(\d+)|" - r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))") + r"usersviewall\.php" + r"\?((?:[^#]+&)?mode=watching(?:&[^#]+)?))") example = ("https://inkbunny.net/watchlist_process.php" "?mode=watching&user_id=12345") @@ -324,6 +347,9 @@ class InkbunnyAPI(): while True: data = self._call("search", params) + if not data["submissions"]: + return + yield from self.detail(data["submissions"]) if data["page"] >= data["pages_count"]: @@ -334,7 +360,7 @@ class InkbunnyAPI(): params["page"] += 1 -@cache(maxage=360*24*3600, keyarg=1) +@cache(maxage=365*86400, keyarg=1) def _authenticate_impl(api, username, password): api.extractor.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 8ec6741..6eae7db 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -977,7 +977,7 @@ class InstagramGraphqlAPI(): variables["after"] = extr._update_cursor(info["end_cursor"]) -@cache(maxage=90*24*3600, keyarg=1) +@cache(maxage=90*86400, keyarg=1) def _login_impl(extr, username, password): extr.log.error("Login with username & password is no longer supported. " "Use browser cookies instead.") diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index cba6211..c24e57d 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -129,7 +129,7 @@ class KemonopartyExtractor(Extractor): self.cookies_update(self._login_impl( (username, self.cookies_domain), password)) - @cache(maxage=28*24*3600, keyarg=1) + @cache(maxage=28*86400, keyarg=1) def _login_impl(self, username, password): username = username[0] self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index dbaf4cb..94bea57 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -266,6 +266,6 @@ class MangadexAPI(): return -@cache(maxage=28*24*3600, keyarg=0) +@cache(maxage=28*86400, keyarg=0) def _refresh_token_cache(username): return None diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index c5fe840..0b63d6c 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -152,7 +152,7 @@ class MastodonFollowingExtractor(MastodonExtractor): class MastodonStatusExtractor(MastodonExtractor): """Extractor for images from a status""" subcategory = "status" - pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)" + pattern = BASE_PATTERN + r"/@[^/?#]+/(?!following)([^/?#]+)" example = "https://mastodon.social/@USER/12345" def statuses(self): @@ -277,6 +277,6 @@ class MastodonAPI(): params = None -@cache(maxage=100*365*24*3600, keyarg=0) +@cache(maxage=36500*86400, keyarg=0) def _access_token_cache(instance): return None diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 33a2284..5e8179e 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -16,12 +16,12 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): root = "https://myhentaigallery.com" directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}") pattern = (r"(?:https?://)?myhentaigallery\.com" - r"/gallery/(?:thumbnails|show)/(\d+)") - example = "https://myhentaigallery.com/gallery/thumbnails/12345" + r"/g(?:allery/(?:thumbnails|show))?/(\d+)") + example = "https://myhentaigallery.com/g/12345" def __init__(self, match): self.gallery_id = match.group(1) - url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id) + url = "{}/g/{}".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) def _init(self): diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index a6971e8..4cdcf87 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -23,7 +23,7 @@ class NewgroundsExtractor(Extractor): root = "https://www.newgrounds.com" cookies_domain = ".newgrounds.com" cookies_names = ("NG_GG_username", "vmk1du5I8m") - request_interval = 1.0 + request_interval = (0.5, 1.5) def __init__(self, match): Extractor.__init__(self, match) @@ -98,7 +98,7 @@ class NewgroundsExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=360*24*3600, keyarg=1) + @cache(maxage=365*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 54f2942..57c3118 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -124,15 +124,15 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): return username, password = self._get_auth_info() - self.cookies_update(self._login_impl(username, password)) + if username: + return self.cookies_update(self._login_impl(username, password)) - @cache(maxage=90*24*3600, keyarg=1) - def _login_impl(self, username, password): - if not username or not password: - raise exception.AuthenticationError( - "Username and password required") + raise exception.AuthenticationError("Username and password required") + @cache(maxage=90*86400, keyarg=1) + def _login_impl(self, username, password): self.log.info("Logging in as %s", username) + url = "{}/login_int.php".format(self.root) data = {"email": username, "password": password, "save": "on"} diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 65db94d..1690160 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -376,7 +376,7 @@ class OAuthMastodon(OAuthBase): cache=mastodon._access_token_cache, ) - @cache(maxage=10*365*24*3600, keyarg=1) + @cache(maxage=36500*86400, keyarg=1) def _register(self, instance): self.log.info("Registering application for '%s'", instance) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index fb560e9..6c2f39d 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -249,6 +249,15 @@ class PatreonExtractor(Extractor): return [genmap[ft] for ft in filetypes] def _extract_bootstrap(self, page): + data = text.extr( + page, 'id="__NEXT_DATA__" type="application/json">', '</script') + if data: + try: + return (util.json_loads(data)["props"]["pageProps"] + ["bootstrapEnvelope"]["bootstrap"]) + except Exception as exc: + self.log.debug("%s: %s", exc.__class__.__name__, exc) + bootstrap = text.extr( page, 'window.patreon = {"bootstrap":', '},"apiServer"') if bootstrap: diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index 3a0f5b0..ac6a391 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -18,7 +18,7 @@ class PhilomenaExtractor(BooruExtractor): basecategory = "philomena" filename_fmt = "{filename}.{extension}" archive_fmt = "{id}" - request_interval = 1.0 + request_interval = (0.5, 1.5) page_start = 1 per_page = 50 diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py index ff591fb..5362f13 100644 --- a/gallery_dl/extractor/pillowfort.py +++ b/gallery_dl/extractor/pillowfort.py @@ -91,7 +91,7 @@ class PillowfortExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=14*24*3600, keyarg=1) + @cache(maxage=14*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index e9f124f..4b26393 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -10,7 +10,6 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import cache import itertools BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+" @@ -33,7 +32,6 @@ class PinterestExtractor(Extractor): self.api = PinterestAPI(self) def items(self): - self.api.login() data = self.metadata() videos = self.config("videos", True) @@ -416,41 +414,6 @@ class PinterestAPI(): options = {"query": query, "scope": "pins", "rs": "typed"} return self._pagination("BaseSearch", options) - def login(self): - """Login and obtain session cookies""" - username, password = self.extractor._get_auth_info() - if username: - self.cookies.update(self._login_impl(username, password)) - - @cache(maxage=180*24*3600, keyarg=1) - def _login_impl(self, username, password): - self.extractor.log.info("Logging in as %s", username) - - url = self.root + "/resource/UserSessionResource/create/" - options = { - "username_or_email": username, - "password" : password, - } - data = { - "data" : util.json_dumps({"options": options}), - "source_url": "", - } - - try: - response = self.extractor.request( - url, method="POST", headers=self.headers, - cookies=self.cookies, data=data) - resource = response.json()["resource_response"] - except (exception.HttpError, ValueError, KeyError): - raise exception.AuthenticationError() - - if resource["status"] != "success": - raise exception.AuthenticationError() - return { - cookie.name: cookie.value - for cookie in response.cookies - } - def _call(self, resource, options): url = "{}/resource/{}Resource/get/".format(self.root, resource) params = { diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 411d191..4414c71 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -594,7 +594,7 @@ class PixivSeriesExtractor(PixivExtractor): class PixivNovelExtractor(PixivExtractor): """Extractor for pixiv novels""" subcategory = "novel" - request_interval = 1.0 + request_interval = (0.5, 1.5) pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)" example = "https://www.pixiv.net/novel/show.php?id=12345" @@ -996,6 +996,6 @@ class PixivAppAPI(): params = text.parse_query(query) -@cache(maxage=10*365*24*3600, keyarg=0) +@cache(maxage=36500*86400, keyarg=0) def _refresh_token_cache(username): return None diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py index 5a3bf5a..be0dbde 100644 --- a/gallery_dl/extractor/plurk.py +++ b/gallery_dl/extractor/plurk.py @@ -18,7 +18,7 @@ class PlurkExtractor(Extractor): """Base class for plurk extractors""" category = "plurk" root = "https://www.plurk.com" - request_interval = 1.0 + request_interval = (0.5, 1.5) def items(self): urls = self._urls_ex if self.config("comments", False) else self._urls diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py new file mode 100644 index 0000000..29b351b --- /dev/null +++ b/gallery_dl/extractor/postmill.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for Postmill instances""" + +import re +from .common import BaseExtractor, Message +from .. import text, exception + + +class PostmillExtractor(BaseExtractor): + """Base class for Postmill extractors""" + basecategory = "postmill" + directory_fmt = ("{category}", "{instance}", "{forum}") + filename_fmt = "{id}_{title[:220]}.{extension}" + archive_fmt = "{filename}" + + def _init(self): + self.instance = self.root.partition("://")[2] + self.save_link_post_body = self.config("save-link-post-body", False) + self._search_canonical_url = re.compile(r"/f/([\w\d_]+)/(\d+)/").search + self._search_image_tag = re.compile( + r'<a href="[^"]+"\n +class="submission__image-link"').search + + def items(self): + for post_url in self.post_urls(): + page = self.request(post_url).text + extr = text.extract_from(page) + + title = text.unescape(extr( + '<meta property="og:title" content="', '">')) + date = text.parse_datetime(extr( + '<meta property="og:article:published_time" content="', '">')) + username = extr( + '<meta property="og:article:author" content="', '">') + post_canonical_url = text.unescape(extr( + '<link rel="canonical" href="', '">')) + + url = text.unescape(extr( + '<h1 class="submission__title unheaderize inline"><a href="', + '"')) + body = extr( + '<div class="submission__body break-text text-flow">', + '</div>') + + match = self._search_canonical_url(post_canonical_url) + forum = match.group(1) + id = int(match.group(2)) + + is_text_post = url.startswith("/") + is_image_post = self._search_image_tag(page) is not None + data = { + "title": title, + "date": date, + "username": username, + "forum": forum, + "id": id, + "flair": [text.unescape(i) for i in text.extract_iter( + page, '<span class="flair__label">', '</span>')], + "instance": self.instance, + } + + urls = [] + if is_text_post or self.save_link_post_body: + urls.append((Message.Url, "text:" + body)) + + if is_image_post: + urls.append((Message.Url, url)) + elif not is_text_post: + urls.append((Message.Queue, url)) + + data["count"] = len(urls) + yield Message.Directory, data + for data["num"], (msg, url) in enumerate(urls, 1): + if url.startswith("text:"): + data["filename"], data["extension"] = "", "htm" + else: + data = text.nameext_from_url(url, data) + + yield msg, url, data + + +class PostmillSubmissionsExtractor(PostmillExtractor): + """Base class for Postmill submissions extractors""" + whitelisted_parameters = () + + def __init__(self, match): + PostmillExtractor.__init__(self, match) + groups = match.groups() + self.base = groups[-3] + self.sorting_path = groups[-2] or "" + self.query = {key: value for key, value in text.parse_query( + groups[-1]).items() if self.acceptable_query(key)} + + def items(self): + url = self.root + self.base + self.sorting_path + + while url: + response = self.request(url, params=self.query) + if response.history: + redirect_url = response.url + if redirect_url == self.root + "/login": + raise exception.StopExtraction( + "HTTP redirect to login page (%s)", redirect_url) + page = response.text + + for nav in text.extract_iter(page, + '<nav class="submission__nav">', + '</nav>'): + post_url = text.unescape(text.extr(nav, '<a href="', '"')) + yield Message.Queue, text.urljoin(url, post_url), \ + {"_extractor": PostmillPostExtractor} + + url = text.unescape(text.extr(page, + '<link rel="next" href="', '">')) + + def acceptable_query(self, key): + return key in self.whitelisted_parameters or key == "t" or \ + (key.startswith("next[") and key.endswith("]")) + + +BASE_PATTERN = PostmillExtractor.update({ + "raddle": { + "root" : None, + "pattern": (r"(?:raddle\.me|" + r"c32zjeghcp5tj3kb72pltz56piei66drc63vkhn5yixiyk4cmerrjtid" + r"\.onion)"), + } +}) +QUERY_RE = r"(?:\?([^#]+))?$" +SORTING_RE = r"(/(?:hot|new|active|top|controversial|most_commented))?" + \ + QUERY_RE + + +class PostmillPostExtractor(PostmillExtractor): + """Extractor for a single submission URL""" + subcategory = "post" + pattern = BASE_PATTERN + r"/f/(\w+)/(\d+)" + example = "https://raddle.me/f/FORUM/123/TITLE" + + def __init__(self, match): + PostmillExtractor.__init__(self, match) + self.forum = match.group(3) + self.post_id = match.group(4) + + def post_urls(self): + return (self.root + "/f/" + self.forum + "/" + self.post_id,) + + +class PostmillShortURLExtractor(PostmillExtractor): + """Extractor for short submission URLs""" + subcategory = "shorturl" + pattern = BASE_PATTERN + r"/(\d+)$" + example = "https://raddle.me/123" + + def __init__(self, match): + PostmillExtractor.__init__(self, match) + self.post_id = match.group(3) + + def items(self): + url = self.root + "/" + self.post_id + response = self.request(url, method="HEAD", allow_redirects=False) + full_url = text.urljoin(url, response.headers["Location"]) + yield Message.Queue, full_url, {"_extractor": PostmillPostExtractor} + + +class PostmillHomeExtractor(PostmillSubmissionsExtractor): + """Extractor for the home page""" + subcategory = "home" + pattern = BASE_PATTERN + r"(/(?:featured|subscribed|all)?)" + SORTING_RE + example = "https://raddle.me/" + + +class PostmillForumExtractor(PostmillSubmissionsExtractor): + """Extractor for submissions on a forum""" + subcategory = "forum" + pattern = BASE_PATTERN + r"(/f/\w+)" + SORTING_RE + example = "https://raddle.me/f/FORUM" + + +class PostmillUserSubmissionsExtractor(PostmillSubmissionsExtractor): + """Extractor for submissions made by a user""" + subcategory = "usersubmissions" + pattern = BASE_PATTERN + r"(/user/\w+/submissions)()" + QUERY_RE + example = "https://raddle.me/user/USER/submissions" + + +class PostmillTagExtractor(PostmillSubmissionsExtractor): + """Extractor for submissions on a forum with a specific tag""" + subcategory = "tag" + pattern = BASE_PATTERN + r"(/tag/\w+)" + SORTING_RE + example = "https://raddle.me/tag/TAG" + + +class PostmillSearchExtractor(PostmillSubmissionsExtractor): + """Extractor for search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"(/search)()\?(q=[^#]+)$" + example = "https://raddle.me/search?q=QUERY" + whitelisted_parameters = ("q",) diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index 9a6c8a5..ab555d8 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -18,7 +18,7 @@ class ReactorExtractor(BaseExtractor): basecategory = "reactor" filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}" archive_fmt = "{post_id}_{num}" - request_interval = 5.0 + request_interval = (3.0, 6.0) def __init__(self, match): BaseExtractor.__init__(self, match) diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index 93e41be..3569860 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -23,7 +23,7 @@ class ReadcomiconlineBase(): filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}" archive_fmt = "{issue_id}_{page}" root = "https://readcomiconline.li" - request_interval = (3.0, 7.0) + request_interval = (3.0, 6.0) def request(self, url, **kwargs): """Detect and handle redirects to CAPTCHA pages""" diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index feb6d1f..2ef0f9f 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -159,7 +159,7 @@ class RedditExtractor(Extractor): data = meta[item["media_id"]] if data["status"] != "valid" or "s" not in data: self.log.warning( - "gallery %s: skipping item %s ('status: %s')", + "gallery %s: skipping item %s (status: %s)", submission["id"], item["media_id"], data.get("status")) continue src = data["s"] @@ -531,7 +531,7 @@ class RedditAPI(): return util.bdecode(sid, "0123456789abcdefghijklmnopqrstuvwxyz") -@cache(maxage=100*365*24*3600, keyarg=0) +@cache(maxage=36500*86400, keyarg=0) def _refresh_token_cache(token): if token and token[0] == "#": return None diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 8941258..602895c 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -285,7 +285,7 @@ class SankakuAPI(): return -@cache(maxage=365*24*3600, keyarg=1) +@cache(maxage=365*86400, keyarg=1) def _authenticate_impl(extr, username, password): extr.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py index 912e601..8a08fab 100644 --- a/gallery_dl/extractor/shimmie2.py +++ b/gallery_dl/extractor/shimmie2.py @@ -41,8 +41,9 @@ class Shimmie2Extractor(BaseExtractor): for post in self.posts(): - for key in ("id", "width", "height"): - post[key] = text.parse_int(post[key]) + post["id"] = text.parse_int(post["id"]) + post["width"] = text.parse_int(post["width"]) + post["height"] = text.parse_int(post["height"]) post["tags"] = text.unquote(post["tags"]) post.update(data) @@ -64,6 +65,13 @@ class Shimmie2Extractor(BaseExtractor): """Return an iterable containing data of all relevant posts""" return () + def _quote_type(self, page): + """Return quoting character used in 'page' (' or ")""" + try: + return page[page.index("<link rel=")+10] + except Exception: + return "'" + INSTANCES = { "loudbooru": { @@ -85,6 +93,10 @@ INSTANCES = { "pattern": r"booru\.cavemanon\.xyz", "file_url": "{0}/index.php?q=image/{2}.{4}", }, + "rule34hentai": { + "root": "https://rule34hentai.net", + "pattern": r"rule34hentai\.net", + }, } BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?" @@ -121,21 +133,26 @@ class Shimmie2TagExtractor(Shimmie2Extractor): if init: init = False - has_mime = ("data-mime='" in page) - has_pid = ("data-post-id='" in page) + quote = self._quote_type(page) + has_mime = (" data-mime=" in page) + has_pid = (" data-post-id=" in page) while True: if has_mime: - mime = extr("data-mime='", "'") + mime = extr(" data-mime="+quote, quote) if has_pid: - pid = extr("data-post-id='", "'") + pid = extr(" data-post-id="+quote, quote) else: - pid = extr("href='/post/view/", "?") + pid = extr(" href='/post/view/", quote) if not pid: break - tags, dimensions, size = extr("title='", "'").split(" // ") + data = extr("title="+quote, quote).split(" // ") + tags = data[0] + dimensions = data[1] + size = data[2] + width, _, height = dimensions.partition("x") md5 = extr("/_thumbs/", "/") @@ -200,15 +217,17 @@ class Shimmie2PostExtractor(Shimmie2Extractor): def posts(self): url = "{}/post/view/{}".format(self.root, self.post_id) - extr = text.extract_from(self.request(url).text) + page = self.request(url).text + extr = text.extract_from(page) + quote = self._quote_type(page) post = { "id" : self.post_id, "tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"), "md5" : extr("/_thumbs/", "/"), "file_url": self.root + ( - extr("id='main_image' src='", "'") or - extr("<source src='", "'")).lstrip("."), + extr("id={0}main_image{0} src={0}".format(quote), quote) or + extr("<source src="+quote, quote)).lstrip("."), "width" : extr("data-width=", " ").strip("\"'"), "height" : extr("data-height=", ">").partition( " ")[0].strip("\"'"), diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 6b4cba2..31fb891 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -56,7 +56,7 @@ class SubscribestarExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=28*24*3600, keyarg=1) + @cache(maxage=28*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py index bfca7a6..0a9df20 100644 --- a/gallery_dl/extractor/tapas.py +++ b/gallery_dl/extractor/tapas.py @@ -81,7 +81,7 @@ class TapasExtractor(Extractor): self.cookies.set( "adjustedBirthDate", "1981-02-03", domain=self.cookies_domain) - @cache(maxage=14*24*3600, keyarg=1) + @cache(maxage=14*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py index de7cdfc..bce661a 100644 --- a/gallery_dl/extractor/tsumino.py +++ b/gallery_dl/extractor/tsumino.py @@ -27,7 +27,7 @@ class TsuminoBase(): self.cookies.setdefault( "ASP.NET_SessionId", "x1drgggilez4cpkttneukrc5") - @cache(maxage=14*24*3600, keyarg=1) + @cache(maxage=14*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) url = "{}/Account/Login".format(self.root) diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index f50ddb7..fee0145 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -9,7 +9,7 @@ """Extractors for https://www.tumblr.com/""" from .common import Extractor, Message -from .. import text, oauth, exception +from .. import text, util, oauth, exception from datetime import datetime, date, timedelta import re @@ -262,7 +262,7 @@ class TumblrExtractor(Extractor): return updated, (resized == updated) def _original_image_fallback(self, url, post_id): - for _ in range(self.fallback_retries): + for _ in util.repeat(self.fallback_retries): self.sleep(self.fallback_delay, "image token") yield self._update_image_token(url)[0] self.log.warning("Unable to fetch higher-resolution " @@ -404,66 +404,70 @@ class TumblrAPI(oauth.OAuth1API): def _call(self, endpoint, params, **kwargs): url = self.ROOT + endpoint kwargs["params"] = params - response = self.request(url, **kwargs) - try: - data = response.json() - except ValueError: - data = response.text - status = response.status_code - else: - status = data["meta"]["status"] - if 200 <= status < 400: - return data["response"] - - self.log.debug(data) - if status == 403: - raise exception.AuthorizationError() + while True: + response = self.request(url, **kwargs) - elif status == 404: try: - error = data["errors"][0]["detail"] - board = ("only viewable within the Tumblr dashboard" in error) - except Exception: - board = False - - if board: - self.log.info("Run 'gallery-dl oauth:tumblr' " - "to access dashboard-only blogs") - raise exception.AuthorizationError(error) - raise exception.NotFoundError("user or post") - - elif status == 429: - # daily rate limit - if response.headers.get("x-ratelimit-perday-remaining") == "0": - self.log.info("Daily API rate limit exceeded") - reset = response.headers.get("x-ratelimit-perday-reset") - - api_key = self.api_key or self.session.auth.consumer_key - if api_key == self.API_KEY: - self.log.info("Register your own OAuth application and " - "use its credentials to prevent this error: " - "https://github.com/mikf/gallery-dl/blob/mas" - "ter/docs/configuration.rst#extractortumblra" - "pi-key--api-secret") - - if self.extractor.config("ratelimit") == "wait": + data = response.json() + except ValueError: + data = response.text + status = response.status_code + else: + status = data["meta"]["status"] + if 200 <= status < 400: + return data["response"] + + self.log.debug(data) + + if status == 403: + raise exception.AuthorizationError() + + elif status == 404: + try: + error = data["errors"][0]["detail"] + board = ("only viewable within the Tumblr dashboard" + in error) + except Exception: + board = False + + if board: + self.log.info("Run 'gallery-dl oauth:tumblr' " + "to access dashboard-only blogs") + raise exception.AuthorizationError(error) + raise exception.NotFoundError("user or post") + + elif status == 429: + # daily rate limit + if response.headers.get("x-ratelimit-perday-remaining") == "0": + self.log.info("Daily API rate limit exceeded") + reset = response.headers.get("x-ratelimit-perday-reset") + + api_key = self.api_key or self.session.auth.consumer_key + if api_key == self.API_KEY: + self.log.info( + "Register your own OAuth application and use its " + "credentials to prevent this error: https://githu" + "b.com/mikf/gallery-dl/blob/master/docs/configurat" + "ion.rst#extractortumblrapi-key--api-secret") + + if self.extractor.config("ratelimit") == "wait": + self.extractor.wait(seconds=reset) + continue + + t = (datetime.now() + timedelta(0, float(reset))).time() + raise exception.StopExtraction( + "Aborting - Rate limit will reset at %s", + "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)) + + # hourly rate limit + reset = response.headers.get("x-ratelimit-perhour-reset") + if reset: + self.log.info("Hourly API rate limit exceeded") self.extractor.wait(seconds=reset) - return self._call(endpoint, params, **kwargs) - - t = (datetime.now() + timedelta(seconds=float(reset))).time() - raise exception.StopExtraction( - "Aborting - Rate limit will reset at %s", - "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)) - - # hourly rate limit - reset = response.headers.get("x-ratelimit-perhour-reset") - if reset: - self.log.info("Hourly API rate limit exceeded") - self.extractor.wait(seconds=reset) - return self._call(endpoint, params, **kwargs) + continue - raise exception.StopExtraction(data) + raise exception.StopExtraction(data) def _pagination(self, blog, endpoint, params, key="posts", cache=False): endpoint = "/v2/blog/{}{}".format(blog, endpoint) diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py index 49c8419..f57f479 100644 --- a/gallery_dl/extractor/twibooru.py +++ b/gallery_dl/extractor/twibooru.py @@ -22,7 +22,7 @@ class TwibooruExtractor(BooruExtractor): root = "https://twibooru.org" filename_fmt = "{id}_{filename}.{extension}" archive_fmt = "{id}" - request_interval = 6.05 + request_interval = (6.0, 6.1) page_start = 1 per_page = 50 @@ -44,7 +44,7 @@ class TwibooruExtractor(BooruExtractor): class TwibooruPostExtractor(TwibooruExtractor): """Extractor for single twibooru posts""" subcategory = "post" - request_interval = 1.0 + request_interval = (0.5, 1.5) pattern = BASE_PATTERN + r"/(\d+)" example = "https://twibooru.org/12345" diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index f874f12..fdcefdd 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -45,7 +45,6 @@ class TwitterExtractor(Extractor): self.cards = self.config("cards", False) self.ads = self.config("ads", False) self.cards_blacklist = self.config("cards-blacklist") - self.syndication = self.config("syndication") if not self.config("transform", True): self._transform_user = util.identity @@ -367,9 +366,6 @@ class TwitterExtractor(Extractor): if "legacy" in user: user = user["legacy"] - elif "statuses_count" not in user and self.syndication == "extended": - # try to fetch extended user data - user = self.api.user_by_screen_name(user["screen_name"])["legacy"] uget = user.get if uget("withheld_scope"): @@ -550,7 +546,7 @@ class TwitterTimelineExtractor(TwitterExtractor): def _select_tweet_source(self): strategy = self.config("strategy") if strategy is None or strategy == "auto": - if self.retweets or self.textonly: + if self.retweets or self.replies or self.textonly: return self.api.user_tweets else: return self.api.user_media @@ -603,12 +599,6 @@ class TwitterLikesExtractor(TwitterExtractor): def tweets(self): return self.api.user_likes(self.user) - def _transform_tweet(self, tweet): - tdata = TwitterExtractor._transform_tweet(self, tweet) - tdata["date_liked"] = text.parse_timestamp( - (int(tweet["sortIndex"] or 0) >> 20) // 1000) - return tdata - class TwitterBookmarkExtractor(TwitterExtractor): """Extractor for bookmarked tweets""" @@ -871,7 +861,6 @@ class TwitterAPI(): self.root = "https://twitter.com/i/api" self._nsfw_warning = True - self._syndication = self.extractor.syndication self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode cookies = extractor.cookies @@ -1651,69 +1640,14 @@ class TwitterAPI(): tweet_id = entry["entryId"].rpartition("-")[2] if text.startswith("Age-restricted"): - if self._syndication: - return self._syndication_tweet(tweet_id) - elif self._nsfw_warning: + if self._nsfw_warning: self._nsfw_warning = False self.extractor.log.warning('"%s"', text) self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text) - def _syndication_tweet(self, tweet_id): - base_url = "https://cdn.syndication.twimg.com/tweet-result?id=" - tweet = self.extractor.request(base_url + tweet_id).json() - - tweet["user"]["description"] = "" - tweet["user"]["entities"] = {"description": {}} - tweet["user_id_str"] = tweet["user"]["id_str"] - - if tweet["id_str"] != tweet_id: - tweet["retweeted_status_id_str"] = tweet["id_str"] - tweet["id_str"] = retweet_id = tweet_id - else: - retweet_id = None - - # assume 'conversation_id' is the same as 'id' when the tweet - # is not a reply - if "conversation_id_str" not in tweet and \ - "in_reply_to_status_id_str" not in tweet: - tweet["conversation_id_str"] = tweet["id_str"] - - if int(tweet_id) < 300000000000000: - tweet["created_at"] = text.parse_datetime( - tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime( - "%a %b %d %H:%M:%S +0000 %Y") - - if "video" in tweet: - video = tweet["video"] - video["variants"] = (max( - (v for v in video["variants"] if v["type"] == "video/mp4"), - key=lambda v: text.parse_int( - v["src"].split("/")[-2].partition("x")[0]) - ),) - video["variants"][0]["url"] = video["variants"][0]["src"] - tweet["extended_entities"] = {"media": [{ - "video_info" : video, - "original_info": {"width" : 0, "height": 0}, - }]} - elif "photos" in tweet: - for p in tweet["photos"]: - p["media_url_https"] = p["url"] - p["original_info"] = { - "width" : p["width"], - "height": p["height"], - } - tweet["extended_entities"] = {"media": tweet["photos"]} - - return { - "rest_id": tweet["id_str"], - "legacy" : tweet, - "core" : {"user_results": {"result": tweet["user"]}}, - "_retweet_id_str": retweet_id, - } - -@cache(maxage=360*86400, keyarg=1) +@cache(maxage=365*86400, keyarg=1) def _login_impl(extr, username, password): import re diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py index 4ee252e..5374f1c 100644 --- a/gallery_dl/extractor/vipergirls.py +++ b/gallery_dl/extractor/vipergirls.py @@ -45,7 +45,7 @@ class VipergirlsExtractor(Extractor): if username: self.cookies_update(self._login_impl(username, password)) - @cache(maxage=90*24*3600, keyarg=1) + @cache(maxage=90*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index c9cd02f..c22e67e 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -21,7 +21,7 @@ class VkExtractor(Extractor): filename_fmt = "{id}.{extension}" archive_fmt = "{id}" root = "https://vk.com" - request_interval = 1.0 + request_interval = (0.5, 1.5) def items(self): sizes = "wzyxrqpo" diff --git a/gallery_dl/job.py b/gallery_dl/job.py index ac2ac7a..eb10a0c 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -520,7 +520,7 @@ class DownloadJob(Job): archive, archive_format, archive_pragma) except Exception as exc: extr.log.warning( - "Failed to open download archive at '%s' ('%s: %s')", + "Failed to open download archive at '%s' (%s: %s)", archive, exc.__class__.__name__, exc) else: extr.log.debug("Using download archive '%s'", archive) diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py index ac38c4d..8508ee1 100644 --- a/gallery_dl/oauth.py +++ b/gallery_dl/oauth.py @@ -138,6 +138,6 @@ class OAuth1API(): return self.extractor.request(url, **kwargs) -@cache(maxage=100*365*24*3600, keyarg=0) +@cache(maxage=36500*86400, keyarg=0) def _token_cache(key): return None, None diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 10d9fba..1d2fba8 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -41,7 +41,7 @@ class PostProcessor(): "_archive_" + self.name) except Exception as exc: self.log.warning( - "Failed to open %s archive at '%s' ('%s: %s')", + "Failed to open %s archive at '%s' (%s: %s)", self.name, archive, exc.__class__.__name__, exc) else: self.log.debug("Using %s archive '%s'", self.name, archive) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 53502ef..751c398 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -55,6 +55,13 @@ def advance(iterable, num): return iterator +def repeat(times): + """Return an iterator that returns None""" + if times < 0: + return itertools.repeat(None) + return itertools.repeat(None, times) + + def unique(iterable): """Yield unique elements from 'iterable' while preserving order""" seen = set() diff --git a/gallery_dl/version.py b/gallery_dl/version.py index f0d55f6..b74d977 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.26.4" +__version__ = "1.26.5" |
