aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-12-25 01:27:47 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2023-12-25 01:27:47 -0500
commit4d7a4f1ecef2c96269f3590335d2834ebcdd50bf (patch)
treec66c0b829ed69c7424befddc193eaa51054b1410
parent30dee4697019389ef29458b2e3931adc976389b2 (diff)
New upstream version 1.26.5.upstream/1.26.5
-rw-r--r--CHANGELOG.md28
-rw-r--r--PKG-INFO43
-rw-r--r--README.rst41
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.566
-rw-r--r--gallery_dl.egg-info/PKG-INFO43
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/aryion.py2
-rw-r--r--gallery_dl/extractor/danbooru.py21
-rw-r--r--gallery_dl/extractor/deviantart.py13
-rw-r--r--gallery_dl/extractor/exhentai.py93
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/idolcomplex.py4
-rw-r--r--gallery_dl/extractor/imgbb.py2
-rw-r--r--gallery_dl/extractor/inkbunny.py38
-rw-r--r--gallery_dl/extractor/instagram.py2
-rw-r--r--gallery_dl/extractor/kemonoparty.py2
-rw-r--r--gallery_dl/extractor/mangadex.py2
-rw-r--r--gallery_dl/extractor/mastodon.py4
-rw-r--r--gallery_dl/extractor/myhentaigallery.py6
-rw-r--r--gallery_dl/extractor/newgrounds.py4
-rw-r--r--gallery_dl/extractor/nijie.py12
-rw-r--r--gallery_dl/extractor/oauth.py2
-rw-r--r--gallery_dl/extractor/patreon.py9
-rw-r--r--gallery_dl/extractor/philomena.py2
-rw-r--r--gallery_dl/extractor/pillowfort.py2
-rw-r--r--gallery_dl/extractor/pinterest.py37
-rw-r--r--gallery_dl/extractor/pixiv.py4
-rw-r--r--gallery_dl/extractor/plurk.py2
-rw-r--r--gallery_dl/extractor/postmill.py203
-rw-r--r--gallery_dl/extractor/reactor.py2
-rw-r--r--gallery_dl/extractor/readcomiconline.py2
-rw-r--r--gallery_dl/extractor/reddit.py4
-rw-r--r--gallery_dl/extractor/sankaku.py2
-rw-r--r--gallery_dl/extractor/shimmie2.py41
-rw-r--r--gallery_dl/extractor/subscribestar.py2
-rw-r--r--gallery_dl/extractor/tapas.py2
-rw-r--r--gallery_dl/extractor/tsumino.py2
-rw-r--r--gallery_dl/extractor/tumblr.py118
-rw-r--r--gallery_dl/extractor/twibooru.py4
-rw-r--r--gallery_dl/extractor/twitter.py72
-rw-r--r--gallery_dl/extractor/vipergirls.py2
-rw-r--r--gallery_dl/extractor/vk.py2
-rw-r--r--gallery_dl/job.py2
-rw-r--r--gallery_dl/oauth.py2
-rw-r--r--gallery_dl/postprocessor/common.py2
-rw-r--r--gallery_dl/util.py7
-rw-r--r--gallery_dl/version.py2
49 files changed, 643 insertions, 320 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 88dbc44..8907e07 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,33 @@
# Changelog
+## 1.26.5 - 2023-12-23
+### Extractors
+#### Additions
+- [deviantart] add `intermediary` option ([#4955](https://github.com/mikf/gallery-dl/issues/4955))
+- [inkbunny] add `unread` extractor ([#4934](https://github.com/mikf/gallery-dl/issues/4934))
+- [mastodon] support non-numeric status IDs ([#4936](https://github.com/mikf/gallery-dl/issues/4936))
+- [myhentaigallery] recognize `/g/` URLs ([#4920](https://github.com/mikf/gallery-dl/issues/4920))
+- [postmill] add support ([#4917](https://github.com/mikf/gallery-dl/issues/4917), [#4919](https://github.com/mikf/gallery-dl/issues/4919))
+- {shimmie2[ support `rule34hentai.net` ([#861](https://github.com/mikf/gallery-dl/issues/861), [#4789](https://github.com/mikf/gallery-dl/issues/4789), [#4945](https://github.com/mikf/gallery-dl/issues/4945))
+#### Fixes
+- [deviantart] add workaround for integer `client-id` values ([#4924](https://github.com/mikf/gallery-dl/issues/4924))
+- [exhentai] fix error for infinite `fallback-retries` ([#4911](https://github.com/mikf/gallery-dl/issues/4911))
+- [inkbunny] stop pagination on empty results
+- [patreon] fix bootstrap data extraction again ([#4904](https://github.com/mikf/gallery-dl/issues/4904))
+- [tumblr] fix exception after waiting for rate limit ([#4916](https://github.com/mikf/gallery-dl/issues/4916))
+#### Improvements
+- [exhentai] output continuation URL when interrupted ([#4782](https://github.com/mikf/gallery-dl/issues/4782))
+- [inkbunny] improve `/submissionsviewall.php` patterns ([#4934](https://github.com/mikf/gallery-dl/issues/4934))
+- [tumblr] support infinite `fallback-retries`
+- [twitter] default to `tweets` timeline when `replies` are enabled ([#4953](https://github.com/mikf/gallery-dl/issues/4953))
+#### Metadata
+- [danbooru] provide `tags` as list ([#4942](https://github.com/mikf/gallery-dl/issues/4942))
+- [deviantart] set `is_original` for intermediary URLs to `false`
+- [twitter] remove `date_liked` ([#3850](https://github.com/mikf/gallery-dl/issues/3850), [#4108](https://github.com/mikf/gallery-dl/issues/4108), [#4657](https://github.com/mikf/gallery-dl/issues/4657))
+### Docker
+- add Docker instructions to README ([#4850](https://github.com/mikf/gallery-dl/issues/4850))
+- fix auto-generation of `latest` tags
+
## 1.26.4 - 2023-12-10
### Extractors
#### Additions
diff --git a/PKG-INFO b/PKG-INFO
index e75fd05..bdd0025 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.26.4
+Version: 1.26.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__
Nightly Builds
@@ -172,6 +172,43 @@ For macOS users with MacPorts:
sudo port install gallery-dl
+Docker
+--------
+Using the Dockerfile in the repository:
+
+.. code:: bash
+
+ git clone https://github.com/mikf/gallery-dl.git
+ cd gallery-dl/
+ docker build -t gallery-dl:latest .
+
+Pulling image from `Docker Hub <https://hub.docker.com/r/mikf123/gallery-dl>`__:
+
+.. code:: bash
+
+ docker pull mikf123/gallery-dl
+ docker tag mikf123/gallery-dl gallery-dl
+
+Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-dl/pkgs/container/gallery-dl>`__:
+
+.. code:: bash
+
+ docker pull ghcr.io/mikf/gallery-dl
+ docker tag ghcr.io/mikf/gallery-dl gallery-dl
+
+To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs.
+
+Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there.
+
+If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure.
+
+This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running.
+
+.. code:: bash
+
+ docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest
+
+You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command.
Usage
=====
diff --git a/README.rst b/README.rst
index 5603929..782c8de 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__
Nightly Builds
@@ -132,6 +132,43 @@ For macOS users with MacPorts:
sudo port install gallery-dl
+Docker
+--------
+Using the Dockerfile in the repository:
+
+.. code:: bash
+
+ git clone https://github.com/mikf/gallery-dl.git
+ cd gallery-dl/
+ docker build -t gallery-dl:latest .
+
+Pulling image from `Docker Hub <https://hub.docker.com/r/mikf123/gallery-dl>`__:
+
+.. code:: bash
+
+ docker pull mikf123/gallery-dl
+ docker tag mikf123/gallery-dl gallery-dl
+
+Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-dl/pkgs/container/gallery-dl>`__:
+
+.. code:: bash
+
+ docker pull ghcr.io/mikf/gallery-dl
+ docker tag ghcr.io/mikf/gallery-dl gallery-dl
+
+To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs.
+
+Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there.
+
+If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure.
+
+This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running.
+
+.. code:: bash
+
+ docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest
+
+You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command.
Usage
=====
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 277b227..caa0d4a 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-12-10" "1.26.4" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-12-23" "1.26.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 95e9627..b641f29 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-12-10" "1.26.4" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-12-23" "1.26.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1592,6 +1592,18 @@ Possible values are
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.deviantart.intermediary
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+For older non-downloadable images,
+download a higher-quality \f[I]/intermediary/\f[] version.
+
+
.SS extractor.deviantart.journals
.IP "Type:" 6
\f[I]string\f[]
@@ -1814,8 +1826,8 @@ depending on the input URL
\f[I]2\f[]
.IP "Description:" 4
-Number of times a failed image gets retried.
-Use \f[I]-1\f[] for infinite retries
+Number of times a failed image gets retried
+or \f[I]-1\f[] for infinite retries.
.SS extractor.exhentai.fav
@@ -3158,6 +3170,17 @@ A value of \f[I]0\f[] means no limit.
Also search Plurk comments for URLs.
+.SS extractor.[postmill].save-link-post-body
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Whether or not to save the body for link/image posts.
+
+
.SS extractor.reactor.gif
.IP "Type:" 6
\f[I]bool\f[]
@@ -3626,7 +3649,8 @@ for fetching full-resolution images.
\f[I]2\f[]
.IP "Description:" 4
-Number of retries for fetching full-resolution images.
+Number of retries for fetching full-resolution images
+or \f[I]-1\f[] for infinite retries.
.SS extractor.twibooru.api-key
@@ -3763,8 +3787,6 @@ with enabled \f[I]conversations\f[] option
for each Tweet in said timeline.
Note: This requires at least 1 additional API call per initial Tweet.
-Age-restricted replies cannot be expanded when using the
-\f[I]syndication\f[] API.
.SS extractor.twitter.include
@@ -3844,36 +3866,6 @@ Known available sizes are
\f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[].
-.SS extractor.twitter.syndication
-.IP "Type:" 6
-.br
-* \f[I]bool\f[]
-.br
-* \f[I]string\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Controls how to retrieve age-restricted content when not logged in.
-
-.br
-* \f[I]false\f[]: Skip age-restricted Tweets.
-.br
-* \f[I]true\f[]: Download using Twitter's syndication API.
-.br
-* \f[I]"extended"\f[]: Try to fetch Tweet metadata using the normal API
-in addition to the syndication API. This requires additional HTTP
-requests in some cases (e.g. when \f[I]retweets\f[]
-are enabled).
-
-Note: This does not apply to search results (including
-\f[I]timeline strategies\f[]).
-To retrieve such content from search results, you must log in and
-disable "Hide sensitive content" in your \f[I]search settings
-<https://twitter.com/settings/search>\f[].
-
-
.SS extractor.twitter.logout
.IP "Type:" 6
\f[I]bool\f[]
@@ -3979,7 +3971,7 @@ Controls the strategy / tweet source used for timeline URLs
.br
* \f[I]"with_replies"\f[]: \f[I]/with_replies\f[] timeline + search
.br
-* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[] and \f[I]text-tweets\f[] settings
+* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[], \f[I]replies\f[], and \f[I]text-tweets\f[] settings
.SS extractor.twitter.text-tweets
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index e1b709b..934609a 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.26.4
+Version: 1.26.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__
Nightly Builds
@@ -172,6 +172,43 @@ For macOS users with MacPorts:
sudo port install gallery-dl
+Docker
+--------
+Using the Dockerfile in the repository:
+
+.. code:: bash
+
+ git clone https://github.com/mikf/gallery-dl.git
+ cd gallery-dl/
+ docker build -t gallery-dl:latest .
+
+Pulling image from `Docker Hub <https://hub.docker.com/r/mikf123/gallery-dl>`__:
+
+.. code:: bash
+
+ docker pull mikf123/gallery-dl
+ docker tag mikf123/gallery-dl gallery-dl
+
+Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-dl/pkgs/container/gallery-dl>`__:
+
+.. code:: bash
+
+ docker pull ghcr.io/mikf/gallery-dl
+ docker tag ghcr.io/mikf/gallery-dl gallery-dl
+
+To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs.
+
+Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there.
+
+If you gave the container a different tag or are using podman then make sure you adjust. Run ``docker image ls`` to check the name if you are not sure.
+
+This will remove the container after every use so you will always have a fresh environment for it to run. If you setup a ci-cd pipeline to autobuild the container you can also add a ``--pull=newer`` flag so that when you run it docker will check to see if there is a newer container and download it before running.
+
+.. code:: bash
+
+ docker run --rm -v $HOME/Downloads/:/gallery-dl/ -v $HOME/.config/gallery-dl/gallery-dl.conf:/etc/gallery-dl.conf -it gallery-dl:latest
+
+You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command.
Usage
=====
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 9bcf0b2..30cda54 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -167,6 +167,7 @@ gallery_dl/extractor/plurk.py
gallery_dl/extractor/poipiku.py
gallery_dl/extractor/pornhub.py
gallery_dl/extractor/pornpics.py
+gallery_dl/extractor/postmill.py
gallery_dl/extractor/pururin.py
gallery_dl/extractor/reactor.py
gallery_dl/extractor/readcomiconline.py
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d074de2..695b8b2 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -124,6 +124,7 @@ modules = [
"poipiku",
"pornhub",
"pornpics",
+ "postmill",
"pururin",
"reactor",
"readcomiconline",
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 576bc83..ec86263 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -40,7 +40,7 @@ class AryionExtractor(Extractor):
if username:
self.cookies_update(self._login_impl(username, password))
- @cache(maxage=14*24*3600, keyarg=1)
+ @cache(maxage=14*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 9e6516e..09beb5f 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -20,7 +20,7 @@ class DanbooruExtractor(BaseExtractor):
page_limit = 1000
page_start = None
per_page = 200
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
def _init(self):
self.ugoira = self.config("ugoira", False)
@@ -72,6 +72,25 @@ class DanbooruExtractor(BaseExtractor):
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["tags"] = (
+ post["tag_string"].split(" ")
+ if post["tag_string"] else ())
+ post["tags_artist"] = (
+ post["tag_string_artist"].split(" ")
+ if post["tag_string_artist"] else ())
+ post["tags_character"] = (
+ post["tag_string_character"].split(" ")
+ if post["tag_string_character"] else ())
+ post["tags_copyright"] = (
+ post["tag_string_copyright"].split(" ")
+ if post["tag_string_copyright"] else ())
+ post["tags_general"] = (
+ post["tag_string_general"].split(" ")
+ if post["tag_string_general"] else ())
+ post["tags_meta"] = (
+ post["tag_string_meta"].split(" ")
+ if post["tag_string_meta"] else ())
+
if post["extension"] == "zip":
if self.ugoira:
post["frames"] = self._ugoira_frames(post)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 1852dc1..2ba47e1 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -48,6 +48,7 @@ class DeviantartExtractor(Extractor):
self.quality = self.config("quality", "100")
self.original = self.config("original", True)
self.comments = self.config("comments", False)
+ self.intermediary = self.config("intermediary", True)
self.api = DeviantartOAuthAPI(self)
self.group = False
@@ -136,12 +137,13 @@ class DeviantartExtractor(Extractor):
elif self.jwt:
self._update_token(deviation, content)
elif content["src"].startswith("https://images-wixmp-"):
- if deviation["index"] <= 790677560:
+ if self.intermediary and deviation["index"] <= 790677560:
# https://github.com/r888888888/danbooru/issues/4069
intermediary, count = re.subn(
r"(/f/[^/]+/[^/]+)/v\d+/.*",
r"/intermediary\1", content["src"], 1)
if count:
+ deviation["is_original"] = False
deviation["_fallback"] = (content["src"],)
content["src"] = intermediary
if self.quality:
@@ -1003,8 +1005,9 @@ class DeviantartOAuthAPI():
self.strategy = extractor.config("pagination")
self.public = extractor.config("public", True)
- self.client_id = extractor.config("client-id")
- if self.client_id:
+ client_id = extractor.config("client-id")
+ if client_id:
+ self.client_id = str(client_id)
self.client_secret = extractor.config("client-secret")
else:
self.client_id = self.CLIENT_ID
@@ -1012,7 +1015,7 @@ class DeviantartOAuthAPI():
token = extractor.config("refresh-token")
if token is None or token == "cache":
- token = "#" + str(self.client_id)
+ token = "#" + self.client_id
if not _refresh_token_cache(token):
token = None
self.refresh_token_key = token
@@ -1578,7 +1581,7 @@ class DeviantartEclipseAPI():
return token
-@cache(maxage=100*365*86400, keyarg=0)
+@cache(maxage=36500*86400, keyarg=0)
def _refresh_token_cache(token):
if token and token[0] == "#":
return None
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index a479d00..acad95c 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -26,7 +26,7 @@ class ExhentaiExtractor(Extractor):
cookies_domain = ".exhentai.org"
cookies_names = ("ipb_member_id", "ipb_pass_hash")
root = "https://exhentai.org"
- request_interval = 5.0
+ request_interval = (3.0, 6.0)
ciphers = "DEFAULT:!DH"
LIMIT = False
@@ -67,14 +67,15 @@ class ExhentaiExtractor(Extractor):
if username:
return self.cookies_update(self._login_impl(username, password))
- self.log.info("no username given; using e-hentai.org")
- self.root = "https://e-hentai.org"
- self.cookies_domain = ".e-hentai.org"
- self.cookies.set("nw", "1", domain=self.cookies_domain)
+ if self.version == "ex":
+ self.log.info("No username or cookies given; using e-hentai.org")
+ self.root = "https://e-hentai.org"
+ self.cookies_domain = ".e-hentai.org"
+ self.cookies.set("nw", "1", domain=self.cookies_domain)
self.original = False
self.limits = False
- @cache(maxage=90*24*3600, keyarg=1)
+ @cache(maxage=90*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
@@ -124,6 +125,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.key_show = None
self.key_next = None
self.count = 0
+ self.data = None
def _init(self):
source = self.config("source")
@@ -138,11 +140,15 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.limits = False
self.fallback_retries = self.config("fallback-retries", 2)
- if self.fallback_retries < 0:
- self.fallback_retries = float("inf")
-
self.original = self.config("original", True)
+ def finalize(self):
+ if self.data:
+ self.log.info("Use '%s/s/%s/%s-%s' as input URL "
+ "to continue downloading from the current position",
+ self.root, self.data["image_token"],
+ self.gallery_id, self.data["num"])
+
def favorite(self, slot="0"):
url = self.root + "/gallerypopups.php"
params = {
@@ -178,32 +184,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.gallery_token = part.split("/")[1]
gpage = self._gallery_page()
- data = self.get_metadata(gpage)
+ self.data = data = self.get_metadata(gpage)
self.count = text.parse_int(data["filecount"])
yield Message.Directory, data
- def _validate_response(response):
- # declared inside 'items()' to be able to access 'data'
- if not response.history and response.headers.get(
- "content-type", "").startswith("text/html"):
- page = response.text
- self.log.warning("'%s'", page)
-
- if " requires GP" in page:
- gp = self.config("gp")
- if gp == "stop":
- raise exception.StopExtraction("Not enough GP")
- elif gp == "wait":
- input("Press ENTER to continue.")
- return response.url
-
- self.log.info("Falling back to non-original downloads")
- self.original = False
- return data["_url_1280"]
-
- self._report_limits(data)
- return True
-
images = itertools.chain(
(self.image_from_page(ipage),), self.images_from_api())
for url, image in images:
@@ -211,7 +195,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if self.limits:
self._check_limits(data)
if "/fullimg" in url:
- data["_http_validate"] = _validate_response
+ data["_http_validate"] = self._validate_response
else:
data["_http_validate"] = None
yield Message.Url, url, data
@@ -219,6 +203,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
fav = self.config("fav")
if fav is not None:
self.favorite(fav)
+ self.data = None
def _items_hitomi(self):
if self.config("metadata", False):
@@ -332,7 +317,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["_nl"] = nl
self.key_show = extr('var showkey="', '";')
- self._check_509(iurl, data)
+ self._check_509(iurl)
return url, text.nameext_from_url(url, data)
def images_from_api(self):
@@ -382,33 +367,51 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["_url_1280"] = imgurl
data["_nl"] = nl
- self._check_509(imgurl, data)
+ self._check_509(imgurl)
yield url, text.nameext_from_url(url, data)
request["imgkey"] = nextkey
- def _report_limits(self, data):
+ def _validate_response(self, response):
+ if not response.history and response.headers.get(
+ "content-type", "").startswith("text/html"):
+ page = response.text
+ self.log.warning("'%s'", page)
+
+ if " requires GP" in page:
+ gp = self.config("gp")
+ if gp == "stop":
+ raise exception.StopExtraction("Not enough GP")
+ elif gp == "wait":
+ input("Press ENTER to continue.")
+ return response.url
+
+ self.log.info("Falling back to non-original downloads")
+ self.original = False
+ return self.data["_url_1280"]
+
+ self._report_limits()
+ return True
+
+ def _report_limits(self):
ExhentaiExtractor.LIMIT = True
- raise exception.StopExtraction(
- "Image limit reached! "
- "Continue with '%s/s/%s/%s-%s' as URL after resetting it.",
- self.root, data["image_token"], self.gallery_id, data["num"])
+ raise exception.StopExtraction("Image limit reached!")
def _check_limits(self, data):
if not self._remaining or data["num"] % 25 == 0:
self._update_limits()
self._remaining -= data["cost"]
if self._remaining <= 0:
- self._report_limits(data)
+ self._report_limits()
- def _check_509(self, url, data):
+ def _check_509(self, url):
# full 509.gif URLs
# - https://exhentai.org/img/509.gif
# - https://ehgt.org/g/509.gif
if url.endswith(("hentai.org/img/509.gif",
"ehgt.org/g/509.gif")):
self.log.debug(url)
- self._report_limits(data)
+ self._report_limits()
def _update_limits(self):
url = "https://e-hentai.org/home.php"
@@ -449,14 +452,14 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def _fallback_original(self, nl, fullimg):
url = "{}?nl={}".format(fullimg, nl)
- for _ in range(self.fallback_retries):
+ for _ in util.repeat(self.fallback_retries):
yield url
def _fallback_1280(self, nl, num, token=None):
if not token:
token = self.key_start
- for _ in range(self.fallback_retries):
+ for _ in util.repeat(self.fallback_retries):
url = "{}/s/{}/{}-{}?nl={}".format(
self.root, token, self.gallery_id, num, nl)
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 93ac541..cedac0c 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -169,7 +169,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
directory_fmt = ("{category}", "search", "{search}")
pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
example = "https://archived.moe/_/search/text/QUERY/"
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index 5c7a1b3..b9e2c3d 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -25,7 +25,7 @@ class IdolcomplexExtractor(SankakuExtractor):
cookies_domain = "idol.sankakucomplex.com"
cookies_names = ("_idolcomplex_session",)
referer = False
- request_interval = (4.0, 6.0)
+ request_interval = (3.0, 6.0)
def __init__(self, match):
SankakuExtractor.__init__(self, match)
@@ -67,7 +67,7 @@ class IdolcomplexExtractor(SankakuExtractor):
self.logged_in = False
- @cache(maxage=90*24*3600, keyarg=1)
+ @cache(maxage=90*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 6c0684e..b926cb2 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -64,7 +64,7 @@ class ImgbbExtractor(Extractor):
if username:
self.cookies_update(self._login_impl(username, password))
- @cache(maxage=360*24*3600, keyarg=1)
+ @cache(maxage=365*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 4ad37fc..62586af 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -103,7 +103,8 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
subcategory = "pool"
pattern = (BASE_PATTERN + r"/(?:"
r"poolview_process\.php\?pool_id=(\d+)|"
- r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))")
+ r"submissionsviewall\.php"
+ r"\?((?:[^#]+&)?mode=pool(?:&[^#]+)?))")
example = "https://inkbunny.net/poolview_process.php?pool_id=12345"
def __init__(self, match):
@@ -133,7 +134,8 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
subcategory = "favorite"
pattern = (BASE_PATTERN + r"/(?:"
r"userfavorites_process\.php\?favs_user_id=(\d+)|"
- r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))")
+ r"submissionsviewall\.php"
+ r"\?((?:[^#]+&)?mode=userfavs(?:&[^#]+)?))")
example = ("https://inkbunny.net/userfavorites_process.php"
"?favs_user_id=12345")
@@ -161,11 +163,31 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
return self.api.search(params)
+class InkbunnyUnreadExtractor(InkbunnyExtractor):
+ """Extractor for unread inkbunny submissions"""
+ subcategory = "unread"
+ pattern = (BASE_PATTERN + r"/submissionsviewall\.php"
+ r"\?((?:[^#]+&)?mode=unreadsubs(?:&[^#]+)?)")
+ example = ("https://inkbunny.net/submissionsviewall.php"
+ "?text=&mode=unreadsubs&type=")
+
+ def __init__(self, match):
+ InkbunnyExtractor.__init__(self, match)
+ self.params = text.parse_query(match.group(1))
+
+ def posts(self):
+ params = self.params.copy()
+ params.pop("rid", None)
+ params.pop("mode", None)
+ params["unread_submissions"] = "yes"
+ return self.api.search(params)
+
+
class InkbunnySearchExtractor(InkbunnyExtractor):
"""Extractor for inkbunny search results"""
subcategory = "search"
- pattern = (BASE_PATTERN +
- r"/submissionsviewall\.php\?([^#]+&mode=search&[^#]+)")
+ pattern = (BASE_PATTERN + r"/submissionsviewall\.php"
+ r"\?((?:[^#]+&)?mode=search(?:&[^#]+)?)")
example = ("https://inkbunny.net/submissionsviewall.php"
"?text=TAG&mode=search&type=")
@@ -201,7 +223,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
subcategory = "following"
pattern = (BASE_PATTERN + r"/(?:"
r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
- r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))")
+ r"usersviewall\.php"
+ r"\?((?:[^#]+&)?mode=watching(?:&[^#]+)?))")
example = ("https://inkbunny.net/watchlist_process.php"
"?mode=watching&user_id=12345")
@@ -324,6 +347,9 @@ class InkbunnyAPI():
while True:
data = self._call("search", params)
+ if not data["submissions"]:
+ return
+
yield from self.detail(data["submissions"])
if data["page"] >= data["pages_count"]:
@@ -334,7 +360,7 @@ class InkbunnyAPI():
params["page"] += 1
-@cache(maxage=360*24*3600, keyarg=1)
+@cache(maxage=365*86400, keyarg=1)
def _authenticate_impl(api, username, password):
api.extractor.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 8ec6741..6eae7db 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -977,7 +977,7 @@ class InstagramGraphqlAPI():
variables["after"] = extr._update_cursor(info["end_cursor"])
-@cache(maxage=90*24*3600, keyarg=1)
+@cache(maxage=90*86400, keyarg=1)
def _login_impl(extr, username, password):
extr.log.error("Login with username & password is no longer supported. "
"Use browser cookies instead.")
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index cba6211..c24e57d 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -129,7 +129,7 @@ class KemonopartyExtractor(Extractor):
self.cookies_update(self._login_impl(
(username, self.cookies_domain), password))
- @cache(maxage=28*24*3600, keyarg=1)
+ @cache(maxage=28*86400, keyarg=1)
def _login_impl(self, username, password):
username = username[0]
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index dbaf4cb..94bea57 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -266,6 +266,6 @@ class MangadexAPI():
return
-@cache(maxage=28*24*3600, keyarg=0)
+@cache(maxage=28*86400, keyarg=0)
def _refresh_token_cache(username):
return None
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index c5fe840..0b63d6c 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -152,7 +152,7 @@ class MastodonFollowingExtractor(MastodonExtractor):
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
- pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)"
+ pattern = BASE_PATTERN + r"/@[^/?#]+/(?!following)([^/?#]+)"
example = "https://mastodon.social/@USER/12345"
def statuses(self):
@@ -277,6 +277,6 @@ class MastodonAPI():
params = None
-@cache(maxage=100*365*24*3600, keyarg=0)
+@cache(maxage=36500*86400, keyarg=0)
def _access_token_cache(instance):
return None
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 33a2284..5e8179e 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -16,12 +16,12 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
root = "https://myhentaigallery.com"
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
pattern = (r"(?:https?://)?myhentaigallery\.com"
- r"/gallery/(?:thumbnails|show)/(\d+)")
- example = "https://myhentaigallery.com/gallery/thumbnails/12345"
+ r"/g(?:allery/(?:thumbnails|show))?/(\d+)")
+ example = "https://myhentaigallery.com/g/12345"
def __init__(self, match):
self.gallery_id = match.group(1)
- url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id)
+ url = "{}/g/{}".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)
def _init(self):
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index a6971e8..4cdcf87 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -23,7 +23,7 @@ class NewgroundsExtractor(Extractor):
root = "https://www.newgrounds.com"
cookies_domain = ".newgrounds.com"
cookies_names = ("NG_GG_username", "vmk1du5I8m")
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
def __init__(self, match):
Extractor.__init__(self, match)
@@ -98,7 +98,7 @@ class NewgroundsExtractor(Extractor):
if username:
self.cookies_update(self._login_impl(username, password))
- @cache(maxage=360*24*3600, keyarg=1)
+ @cache(maxage=365*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 54f2942..57c3118 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -124,15 +124,15 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
return
username, password = self._get_auth_info()
- self.cookies_update(self._login_impl(username, password))
+ if username:
+ return self.cookies_update(self._login_impl(username, password))
- @cache(maxage=90*24*3600, keyarg=1)
- def _login_impl(self, username, password):
- if not username or not password:
- raise exception.AuthenticationError(
- "Username and password required")
+ raise exception.AuthenticationError("Username and password required")
+ @cache(maxage=90*86400, keyarg=1)
+ def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
+
url = "{}/login_int.php".format(self.root)
data = {"email": username, "password": password, "save": "on"}
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 65db94d..1690160 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -376,7 +376,7 @@ class OAuthMastodon(OAuthBase):
cache=mastodon._access_token_cache,
)
- @cache(maxage=10*365*24*3600, keyarg=1)
+ @cache(maxage=36500*86400, keyarg=1)
def _register(self, instance):
self.log.info("Registering application for '%s'", instance)
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index fb560e9..6c2f39d 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -249,6 +249,15 @@ class PatreonExtractor(Extractor):
return [genmap[ft] for ft in filetypes]
def _extract_bootstrap(self, page):
+ data = text.extr(
+ page, 'id="__NEXT_DATA__" type="application/json">', '</script')
+ if data:
+ try:
+ return (util.json_loads(data)["props"]["pageProps"]
+ ["bootstrapEnvelope"]["bootstrap"])
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+
bootstrap = text.extr(
page, 'window.patreon = {"bootstrap":', '},"apiServer"')
if bootstrap:
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index 3a0f5b0..ac6a391 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -18,7 +18,7 @@ class PhilomenaExtractor(BooruExtractor):
basecategory = "philomena"
filename_fmt = "{filename}.{extension}"
archive_fmt = "{id}"
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
page_start = 1
per_page = 50
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
index ff591fb..5362f13 100644
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@@ -91,7 +91,7 @@ class PillowfortExtractor(Extractor):
if username:
self.cookies_update(self._login_impl(username, password))
- @cache(maxage=14*24*3600, keyarg=1)
+ @cache(maxage=14*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index e9f124f..4b26393 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -10,7 +10,6 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import cache
import itertools
BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
@@ -33,7 +32,6 @@ class PinterestExtractor(Extractor):
self.api = PinterestAPI(self)
def items(self):
- self.api.login()
data = self.metadata()
videos = self.config("videos", True)
@@ -416,41 +414,6 @@ class PinterestAPI():
options = {"query": query, "scope": "pins", "rs": "typed"}
return self._pagination("BaseSearch", options)
- def login(self):
- """Login and obtain session cookies"""
- username, password = self.extractor._get_auth_info()
- if username:
- self.cookies.update(self._login_impl(username, password))
-
- @cache(maxage=180*24*3600, keyarg=1)
- def _login_impl(self, username, password):
- self.extractor.log.info("Logging in as %s", username)
-
- url = self.root + "/resource/UserSessionResource/create/"
- options = {
- "username_or_email": username,
- "password" : password,
- }
- data = {
- "data" : util.json_dumps({"options": options}),
- "source_url": "",
- }
-
- try:
- response = self.extractor.request(
- url, method="POST", headers=self.headers,
- cookies=self.cookies, data=data)
- resource = response.json()["resource_response"]
- except (exception.HttpError, ValueError, KeyError):
- raise exception.AuthenticationError()
-
- if resource["status"] != "success":
- raise exception.AuthenticationError()
- return {
- cookie.name: cookie.value
- for cookie in response.cookies
- }
-
def _call(self, resource, options):
url = "{}/resource/{}Resource/get/".format(self.root, resource)
params = {
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 411d191..4414c71 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -594,7 +594,7 @@ class PixivSeriesExtractor(PixivExtractor):
class PixivNovelExtractor(PixivExtractor):
"""Extractor for pixiv novels"""
subcategory = "novel"
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
example = "https://www.pixiv.net/novel/show.php?id=12345"
@@ -996,6 +996,6 @@ class PixivAppAPI():
params = text.parse_query(query)
-@cache(maxage=10*365*24*3600, keyarg=0)
+@cache(maxage=36500*86400, keyarg=0)
def _refresh_token_cache(username):
return None
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 5a3bf5a..be0dbde 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -18,7 +18,7 @@ class PlurkExtractor(Extractor):
"""Base class for plurk extractors"""
category = "plurk"
root = "https://www.plurk.com"
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
def items(self):
urls = self._urls_ex if self.config("comments", False) else self._urls
diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py
new file mode 100644
index 0000000..29b351b
--- /dev/null
+++ b/gallery_dl/extractor/postmill.py
@@ -0,0 +1,203 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Postmill instances"""
+
+import re
+from .common import BaseExtractor, Message
+from .. import text, exception
+
+
+class PostmillExtractor(BaseExtractor):
+ """Base class for Postmill extractors"""
+ basecategory = "postmill"
+ directory_fmt = ("{category}", "{instance}", "{forum}")
+ filename_fmt = "{id}_{title[:220]}.{extension}"
+ archive_fmt = "{filename}"
+
+ def _init(self):
+ self.instance = self.root.partition("://")[2]
+ self.save_link_post_body = self.config("save-link-post-body", False)
+ self._search_canonical_url = re.compile(r"/f/([\w\d_]+)/(\d+)/").search
+ self._search_image_tag = re.compile(
+ r'<a href="[^"]+"\n +class="submission__image-link"').search
+
+ def items(self):
+ for post_url in self.post_urls():
+ page = self.request(post_url).text
+ extr = text.extract_from(page)
+
+ title = text.unescape(extr(
+ '<meta property="og:title" content="', '">'))
+ date = text.parse_datetime(extr(
+ '<meta property="og:article:published_time" content="', '">'))
+ username = extr(
+ '<meta property="og:article:author" content="', '">')
+ post_canonical_url = text.unescape(extr(
+ '<link rel="canonical" href="', '">'))
+
+ url = text.unescape(extr(
+ '<h1 class="submission__title unheaderize inline"><a href="',
+ '"'))
+ body = extr(
+ '<div class="submission__body break-text text-flow">',
+ '</div>')
+
+ match = self._search_canonical_url(post_canonical_url)
+ forum = match.group(1)
+ id = int(match.group(2))
+
+ is_text_post = url.startswith("/")
+ is_image_post = self._search_image_tag(page) is not None
+ data = {
+ "title": title,
+ "date": date,
+ "username": username,
+ "forum": forum,
+ "id": id,
+ "flair": [text.unescape(i) for i in text.extract_iter(
+ page, '<span class="flair__label">', '</span>')],
+ "instance": self.instance,
+ }
+
+ urls = []
+ if is_text_post or self.save_link_post_body:
+ urls.append((Message.Url, "text:" + body))
+
+ if is_image_post:
+ urls.append((Message.Url, url))
+ elif not is_text_post:
+ urls.append((Message.Queue, url))
+
+ data["count"] = len(urls)
+ yield Message.Directory, data
+ for data["num"], (msg, url) in enumerate(urls, 1):
+ if url.startswith("text:"):
+ data["filename"], data["extension"] = "", "htm"
+ else:
+ data = text.nameext_from_url(url, data)
+
+ yield msg, url, data
+
+
+class PostmillSubmissionsExtractor(PostmillExtractor):
+ """Base class for Postmill submissions extractors"""
+ whitelisted_parameters = ()
+
+ def __init__(self, match):
+ PostmillExtractor.__init__(self, match)
+ groups = match.groups()
+ self.base = groups[-3]
+ self.sorting_path = groups[-2] or ""
+ self.query = {key: value for key, value in text.parse_query(
+ groups[-1]).items() if self.acceptable_query(key)}
+
+ def items(self):
+ url = self.root + self.base + self.sorting_path
+
+ while url:
+ response = self.request(url, params=self.query)
+ if response.history:
+ redirect_url = response.url
+ if redirect_url == self.root + "/login":
+ raise exception.StopExtraction(
+ "HTTP redirect to login page (%s)", redirect_url)
+ page = response.text
+
+ for nav in text.extract_iter(page,
+ '<nav class="submission__nav">',
+ '</nav>'):
+ post_url = text.unescape(text.extr(nav, '<a href="', '"'))
+ yield Message.Queue, text.urljoin(url, post_url), \
+ {"_extractor": PostmillPostExtractor}
+
+ url = text.unescape(text.extr(page,
+ '<link rel="next" href="', '">'))
+
+ def acceptable_query(self, key):
+ return key in self.whitelisted_parameters or key == "t" or \
+ (key.startswith("next[") and key.endswith("]"))
+
+
+BASE_PATTERN = PostmillExtractor.update({
+ "raddle": {
+ "root" : None,
+ "pattern": (r"(?:raddle\.me|"
+ r"c32zjeghcp5tj3kb72pltz56piei66drc63vkhn5yixiyk4cmerrjtid"
+ r"\.onion)"),
+ }
+})
+QUERY_RE = r"(?:\?([^#]+))?$"
+SORTING_RE = r"(/(?:hot|new|active|top|controversial|most_commented))?" + \
+ QUERY_RE
+
+
+class PostmillPostExtractor(PostmillExtractor):
+ """Extractor for a single submission URL"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/f/(\w+)/(\d+)"
+ example = "https://raddle.me/f/FORUM/123/TITLE"
+
+ def __init__(self, match):
+ PostmillExtractor.__init__(self, match)
+ self.forum = match.group(3)
+ self.post_id = match.group(4)
+
+ def post_urls(self):
+ return (self.root + "/f/" + self.forum + "/" + self.post_id,)
+
+
+class PostmillShortURLExtractor(PostmillExtractor):
+ """Extractor for short submission URLs"""
+ subcategory = "shorturl"
+ pattern = BASE_PATTERN + r"/(\d+)$"
+ example = "https://raddle.me/123"
+
+ def __init__(self, match):
+ PostmillExtractor.__init__(self, match)
+ self.post_id = match.group(3)
+
+ def items(self):
+ url = self.root + "/" + self.post_id
+ response = self.request(url, method="HEAD", allow_redirects=False)
+ full_url = text.urljoin(url, response.headers["Location"])
+ yield Message.Queue, full_url, {"_extractor": PostmillPostExtractor}
+
+
+class PostmillHomeExtractor(PostmillSubmissionsExtractor):
+ """Extractor for the home page"""
+ subcategory = "home"
+ pattern = BASE_PATTERN + r"(/(?:featured|subscribed|all)?)" + SORTING_RE
+ example = "https://raddle.me/"
+
+
+class PostmillForumExtractor(PostmillSubmissionsExtractor):
+ """Extractor for submissions on a forum"""
+ subcategory = "forum"
+ pattern = BASE_PATTERN + r"(/f/\w+)" + SORTING_RE
+ example = "https://raddle.me/f/FORUM"
+
+
+class PostmillUserSubmissionsExtractor(PostmillSubmissionsExtractor):
+ """Extractor for submissions made by a user"""
+ subcategory = "usersubmissions"
+ pattern = BASE_PATTERN + r"(/user/\w+/submissions)()" + QUERY_RE
+ example = "https://raddle.me/user/USER/submissions"
+
+
+class PostmillTagExtractor(PostmillSubmissionsExtractor):
+ """Extractor for submissions on a forum with a specific tag"""
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"(/tag/\w+)" + SORTING_RE
+ example = "https://raddle.me/tag/TAG"
+
+
+class PostmillSearchExtractor(PostmillSubmissionsExtractor):
+ """Extractor for search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"(/search)()\?(q=[^#]+)$"
+ example = "https://raddle.me/search?q=QUERY"
+ whitelisted_parameters = ("q",)
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 9a6c8a5..ab555d8 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -18,7 +18,7 @@ class ReactorExtractor(BaseExtractor):
basecategory = "reactor"
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
archive_fmt = "{post_id}_{num}"
- request_interval = 5.0
+ request_interval = (3.0, 6.0)
def __init__(self, match):
BaseExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 93e41be..3569860 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -23,7 +23,7 @@ class ReadcomiconlineBase():
filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.li"
- request_interval = (3.0, 7.0)
+ request_interval = (3.0, 6.0)
def request(self, url, **kwargs):
"""Detect and handle redirects to CAPTCHA pages"""
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index feb6d1f..2ef0f9f 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -159,7 +159,7 @@ class RedditExtractor(Extractor):
data = meta[item["media_id"]]
if data["status"] != "valid" or "s" not in data:
self.log.warning(
- "gallery %s: skipping item %s ('status: %s')",
+ "gallery %s: skipping item %s (status: %s)",
submission["id"], item["media_id"], data.get("status"))
continue
src = data["s"]
@@ -531,7 +531,7 @@ class RedditAPI():
return util.bdecode(sid, "0123456789abcdefghijklmnopqrstuvwxyz")
-@cache(maxage=100*365*24*3600, keyarg=0)
+@cache(maxage=36500*86400, keyarg=0)
def _refresh_token_cache(token):
if token and token[0] == "#":
return None
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 8941258..602895c 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -285,7 +285,7 @@ class SankakuAPI():
return
-@cache(maxage=365*24*3600, keyarg=1)
+@cache(maxage=365*86400, keyarg=1)
def _authenticate_impl(extr, username, password):
extr.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 912e601..8a08fab 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -41,8 +41,9 @@ class Shimmie2Extractor(BaseExtractor):
for post in self.posts():
- for key in ("id", "width", "height"):
- post[key] = text.parse_int(post[key])
+ post["id"] = text.parse_int(post["id"])
+ post["width"] = text.parse_int(post["width"])
+ post["height"] = text.parse_int(post["height"])
post["tags"] = text.unquote(post["tags"])
post.update(data)
@@ -64,6 +65,13 @@ class Shimmie2Extractor(BaseExtractor):
"""Return an iterable containing data of all relevant posts"""
return ()
+ def _quote_type(self, page):
+ """Return quoting character used in 'page' (' or ")"""
+ try:
+ return page[page.index("<link rel=")+10]
+ except Exception:
+ return "'"
+
INSTANCES = {
"loudbooru": {
@@ -85,6 +93,10 @@ INSTANCES = {
"pattern": r"booru\.cavemanon\.xyz",
"file_url": "{0}/index.php?q=image/{2}.{4}",
},
+ "rule34hentai": {
+ "root": "https://rule34hentai.net",
+ "pattern": r"rule34hentai\.net",
+ },
}
BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
@@ -121,21 +133,26 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
if init:
init = False
- has_mime = ("data-mime='" in page)
- has_pid = ("data-post-id='" in page)
+ quote = self._quote_type(page)
+ has_mime = (" data-mime=" in page)
+ has_pid = (" data-post-id=" in page)
while True:
if has_mime:
- mime = extr("data-mime='", "'")
+ mime = extr(" data-mime="+quote, quote)
if has_pid:
- pid = extr("data-post-id='", "'")
+ pid = extr(" data-post-id="+quote, quote)
else:
- pid = extr("href='/post/view/", "?")
+ pid = extr(" href='/post/view/", quote)
if not pid:
break
- tags, dimensions, size = extr("title='", "'").split(" // ")
+ data = extr("title="+quote, quote).split(" // ")
+ tags = data[0]
+ dimensions = data[1]
+ size = data[2]
+
width, _, height = dimensions.partition("x")
md5 = extr("/_thumbs/", "/")
@@ -200,15 +217,17 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
def posts(self):
url = "{}/post/view/{}".format(self.root, self.post_id)
- extr = text.extract_from(self.request(url).text)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+ quote = self._quote_type(page)
post = {
"id" : self.post_id,
"tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
"md5" : extr("/_thumbs/", "/"),
"file_url": self.root + (
- extr("id='main_image' src='", "'") or
- extr("<source src='", "'")).lstrip("."),
+ extr("id={0}main_image{0} src={0}".format(quote), quote) or
+ extr("<source src="+quote, quote)).lstrip("."),
"width" : extr("data-width=", " ").strip("\"'"),
"height" : extr("data-height=", ">").partition(
" ")[0].strip("\"'"),
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 6b4cba2..31fb891 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -56,7 +56,7 @@ class SubscribestarExtractor(Extractor):
if username:
self.cookies_update(self._login_impl(username, password))
- @cache(maxage=28*24*3600, keyarg=1)
+ @cache(maxage=28*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index bfca7a6..0a9df20 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -81,7 +81,7 @@ class TapasExtractor(Extractor):
self.cookies.set(
"adjustedBirthDate", "1981-02-03", domain=self.cookies_domain)
- @cache(maxage=14*24*3600, keyarg=1)
+ @cache(maxage=14*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index de7cdfc..bce661a 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -27,7 +27,7 @@ class TsuminoBase():
self.cookies.setdefault(
"ASP.NET_SessionId", "x1drgggilez4cpkttneukrc5")
- @cache(maxage=14*24*3600, keyarg=1)
+ @cache(maxage=14*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
url = "{}/Account/Login".format(self.root)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index f50ddb7..fee0145 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.tumblr.com/"""
from .common import Extractor, Message
-from .. import text, oauth, exception
+from .. import text, util, oauth, exception
from datetime import datetime, date, timedelta
import re
@@ -262,7 +262,7 @@ class TumblrExtractor(Extractor):
return updated, (resized == updated)
def _original_image_fallback(self, url, post_id):
- for _ in range(self.fallback_retries):
+ for _ in util.repeat(self.fallback_retries):
self.sleep(self.fallback_delay, "image token")
yield self._update_image_token(url)[0]
self.log.warning("Unable to fetch higher-resolution "
@@ -404,66 +404,70 @@ class TumblrAPI(oauth.OAuth1API):
def _call(self, endpoint, params, **kwargs):
url = self.ROOT + endpoint
kwargs["params"] = params
- response = self.request(url, **kwargs)
- try:
- data = response.json()
- except ValueError:
- data = response.text
- status = response.status_code
- else:
- status = data["meta"]["status"]
- if 200 <= status < 400:
- return data["response"]
-
- self.log.debug(data)
- if status == 403:
- raise exception.AuthorizationError()
+ while True:
+ response = self.request(url, **kwargs)
- elif status == 404:
try:
- error = data["errors"][0]["detail"]
- board = ("only viewable within the Tumblr dashboard" in error)
- except Exception:
- board = False
-
- if board:
- self.log.info("Run 'gallery-dl oauth:tumblr' "
- "to access dashboard-only blogs")
- raise exception.AuthorizationError(error)
- raise exception.NotFoundError("user or post")
-
- elif status == 429:
- # daily rate limit
- if response.headers.get("x-ratelimit-perday-remaining") == "0":
- self.log.info("Daily API rate limit exceeded")
- reset = response.headers.get("x-ratelimit-perday-reset")
-
- api_key = self.api_key or self.session.auth.consumer_key
- if api_key == self.API_KEY:
- self.log.info("Register your own OAuth application and "
- "use its credentials to prevent this error: "
- "https://github.com/mikf/gallery-dl/blob/mas"
- "ter/docs/configuration.rst#extractortumblra"
- "pi-key--api-secret")
-
- if self.extractor.config("ratelimit") == "wait":
+ data = response.json()
+ except ValueError:
+ data = response.text
+ status = response.status_code
+ else:
+ status = data["meta"]["status"]
+ if 200 <= status < 400:
+ return data["response"]
+
+ self.log.debug(data)
+
+ if status == 403:
+ raise exception.AuthorizationError()
+
+ elif status == 404:
+ try:
+ error = data["errors"][0]["detail"]
+ board = ("only viewable within the Tumblr dashboard"
+ in error)
+ except Exception:
+ board = False
+
+ if board:
+ self.log.info("Run 'gallery-dl oauth:tumblr' "
+ "to access dashboard-only blogs")
+ raise exception.AuthorizationError(error)
+ raise exception.NotFoundError("user or post")
+
+ elif status == 429:
+ # daily rate limit
+ if response.headers.get("x-ratelimit-perday-remaining") == "0":
+ self.log.info("Daily API rate limit exceeded")
+ reset = response.headers.get("x-ratelimit-perday-reset")
+
+ api_key = self.api_key or self.session.auth.consumer_key
+ if api_key == self.API_KEY:
+ self.log.info(
+ "Register your own OAuth application and use its "
+ "credentials to prevent this error: https://githu"
+ "b.com/mikf/gallery-dl/blob/master/docs/configurat"
+ "ion.rst#extractortumblrapi-key--api-secret")
+
+ if self.extractor.config("ratelimit") == "wait":
+ self.extractor.wait(seconds=reset)
+ continue
+
+ t = (datetime.now() + timedelta(0, float(reset))).time()
+ raise exception.StopExtraction(
+ "Aborting - Rate limit will reset at %s",
+ "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second))
+
+ # hourly rate limit
+ reset = response.headers.get("x-ratelimit-perhour-reset")
+ if reset:
+ self.log.info("Hourly API rate limit exceeded")
self.extractor.wait(seconds=reset)
- return self._call(endpoint, params, **kwargs)
-
- t = (datetime.now() + timedelta(seconds=float(reset))).time()
- raise exception.StopExtraction(
- "Aborting - Rate limit will reset at %s",
- "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second))
-
- # hourly rate limit
- reset = response.headers.get("x-ratelimit-perhour-reset")
- if reset:
- self.log.info("Hourly API rate limit exceeded")
- self.extractor.wait(seconds=reset)
- return self._call(endpoint, params, **kwargs)
+ continue
- raise exception.StopExtraction(data)
+ raise exception.StopExtraction(data)
def _pagination(self, blog, endpoint, params, key="posts", cache=False):
endpoint = "/v2/blog/{}{}".format(blog, endpoint)
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index 49c8419..f57f479 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -22,7 +22,7 @@ class TwibooruExtractor(BooruExtractor):
root = "https://twibooru.org"
filename_fmt = "{id}_{filename}.{extension}"
archive_fmt = "{id}"
- request_interval = 6.05
+ request_interval = (6.0, 6.1)
page_start = 1
per_page = 50
@@ -44,7 +44,7 @@ class TwibooruExtractor(BooruExtractor):
class TwibooruPostExtractor(TwibooruExtractor):
"""Extractor for single twibooru posts"""
subcategory = "post"
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
pattern = BASE_PATTERN + r"/(\d+)"
example = "https://twibooru.org/12345"
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index f874f12..fdcefdd 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -45,7 +45,6 @@ class TwitterExtractor(Extractor):
self.cards = self.config("cards", False)
self.ads = self.config("ads", False)
self.cards_blacklist = self.config("cards-blacklist")
- self.syndication = self.config("syndication")
if not self.config("transform", True):
self._transform_user = util.identity
@@ -367,9 +366,6 @@ class TwitterExtractor(Extractor):
if "legacy" in user:
user = user["legacy"]
- elif "statuses_count" not in user and self.syndication == "extended":
- # try to fetch extended user data
- user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
uget = user.get
if uget("withheld_scope"):
@@ -550,7 +546,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
def _select_tweet_source(self):
strategy = self.config("strategy")
if strategy is None or strategy == "auto":
- if self.retweets or self.textonly:
+ if self.retweets or self.replies or self.textonly:
return self.api.user_tweets
else:
return self.api.user_media
@@ -603,12 +599,6 @@ class TwitterLikesExtractor(TwitterExtractor):
def tweets(self):
return self.api.user_likes(self.user)
- def _transform_tweet(self, tweet):
- tdata = TwitterExtractor._transform_tweet(self, tweet)
- tdata["date_liked"] = text.parse_timestamp(
- (int(tweet["sortIndex"] or 0) >> 20) // 1000)
- return tdata
-
class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
@@ -871,7 +861,6 @@ class TwitterAPI():
self.root = "https://twitter.com/i/api"
self._nsfw_warning = True
- self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
cookies = extractor.cookies
@@ -1651,69 +1640,14 @@ class TwitterAPI():
tweet_id = entry["entryId"].rpartition("-")[2]
if text.startswith("Age-restricted"):
- if self._syndication:
- return self._syndication_tweet(tweet_id)
- elif self._nsfw_warning:
+ if self._nsfw_warning:
self._nsfw_warning = False
self.extractor.log.warning('"%s"', text)
self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
- def _syndication_tweet(self, tweet_id):
- base_url = "https://cdn.syndication.twimg.com/tweet-result?id="
- tweet = self.extractor.request(base_url + tweet_id).json()
-
- tweet["user"]["description"] = ""
- tweet["user"]["entities"] = {"description": {}}
- tweet["user_id_str"] = tweet["user"]["id_str"]
-
- if tweet["id_str"] != tweet_id:
- tweet["retweeted_status_id_str"] = tweet["id_str"]
- tweet["id_str"] = retweet_id = tweet_id
- else:
- retweet_id = None
-
- # assume 'conversation_id' is the same as 'id' when the tweet
- # is not a reply
- if "conversation_id_str" not in tweet and \
- "in_reply_to_status_id_str" not in tweet:
- tweet["conversation_id_str"] = tweet["id_str"]
-
- if int(tweet_id) < 300000000000000:
- tweet["created_at"] = text.parse_datetime(
- tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
- "%a %b %d %H:%M:%S +0000 %Y")
-
- if "video" in tweet:
- video = tweet["video"]
- video["variants"] = (max(
- (v for v in video["variants"] if v["type"] == "video/mp4"),
- key=lambda v: text.parse_int(
- v["src"].split("/")[-2].partition("x")[0])
- ),)
- video["variants"][0]["url"] = video["variants"][0]["src"]
- tweet["extended_entities"] = {"media": [{
- "video_info" : video,
- "original_info": {"width" : 0, "height": 0},
- }]}
- elif "photos" in tweet:
- for p in tweet["photos"]:
- p["media_url_https"] = p["url"]
- p["original_info"] = {
- "width" : p["width"],
- "height": p["height"],
- }
- tweet["extended_entities"] = {"media": tweet["photos"]}
-
- return {
- "rest_id": tweet["id_str"],
- "legacy" : tweet,
- "core" : {"user_results": {"result": tweet["user"]}},
- "_retweet_id_str": retweet_id,
- }
-
-@cache(maxage=360*86400, keyarg=1)
+@cache(maxage=365*86400, keyarg=1)
def _login_impl(extr, username, password):
import re
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 4ee252e..5374f1c 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -45,7 +45,7 @@ class VipergirlsExtractor(Extractor):
if username:
self.cookies_update(self._login_impl(username, password))
- @cache(maxage=90*24*3600, keyarg=1)
+ @cache(maxage=90*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index c9cd02f..c22e67e 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -21,7 +21,7 @@ class VkExtractor(Extractor):
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
root = "https://vk.com"
- request_interval = 1.0
+ request_interval = (0.5, 1.5)
def items(self):
sizes = "wzyxrqpo"
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index ac2ac7a..eb10a0c 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -520,7 +520,7 @@ class DownloadJob(Job):
archive, archive_format, archive_pragma)
except Exception as exc:
extr.log.warning(
- "Failed to open download archive at '%s' ('%s: %s')",
+ "Failed to open download archive at '%s' (%s: %s)",
archive, exc.__class__.__name__, exc)
else:
extr.log.debug("Using download archive '%s'", archive)
diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py
index ac38c4d..8508ee1 100644
--- a/gallery_dl/oauth.py
+++ b/gallery_dl/oauth.py
@@ -138,6 +138,6 @@ class OAuth1API():
return self.extractor.request(url, **kwargs)
-@cache(maxage=100*365*24*3600, keyarg=0)
+@cache(maxage=36500*86400, keyarg=0)
def _token_cache(key):
return None, None
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index 10d9fba..1d2fba8 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -41,7 +41,7 @@ class PostProcessor():
"_archive_" + self.name)
except Exception as exc:
self.log.warning(
- "Failed to open %s archive at '%s' ('%s: %s')",
+ "Failed to open %s archive at '%s' (%s: %s)",
self.name, archive, exc.__class__.__name__, exc)
else:
self.log.debug("Using %s archive '%s'", self.name, archive)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 53502ef..751c398 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -55,6 +55,13 @@ def advance(iterable, num):
return iterator
+def repeat(times):
+ """Return an iterator that returns None"""
+ if times < 0:
+ return itertools.repeat(None)
+ return itertools.repeat(None, times)
+
+
def unique(iterable):
"""Yield unique elements from 'iterable' while preserving order"""
seen = set()
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index f0d55f6..b74d977 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.4"
+__version__ = "1.26.5"