From a768930761f7f20587ae40a8cacca0e55c85290a Mon Sep 17 00:00:00 2001
From: Unit 193
Date: Mon, 29 Aug 2022 02:17:16 -0400
Subject: New upstream version 1.23.0.
---
CHANGELOG.md | 54 ++++++++++++
PKG-INFO | 9 +-
README.rst | 7 +-
data/man/gallery-dl.1 | 2 +-
data/man/gallery-dl.conf.5 | 152 ++++++++++++++++++++++++++++++++--
docs/gallery-dl.conf | 21 ++++-
gallery_dl.egg-info/PKG-INFO | 9 +-
gallery_dl.egg-info/SOURCES.txt | 2 +
gallery_dl/__init__.py | 13 ++-
gallery_dl/extractor/__init__.py | 2 +
gallery_dl/extractor/artstation.py | 5 +-
gallery_dl/extractor/blogger.py | 3 -
gallery_dl/extractor/bunkr.py | 14 ++--
gallery_dl/extractor/catbox.py | 56 +++++++++++++
gallery_dl/extractor/common.py | 13 ++-
gallery_dl/extractor/danbooru.py | 9 +-
gallery_dl/extractor/deviantart.py | 11 ++-
gallery_dl/extractor/fanbox.py | 2 +
gallery_dl/extractor/foolfuuka.py | 28 +++++--
gallery_dl/extractor/gelbooru.py | 48 +++++++----
gallery_dl/extractor/gelbooru_v02.py | 55 +++++++-----
gallery_dl/extractor/hitomi.py | 14 ++--
gallery_dl/extractor/instagram.py | 29 ++++++-
gallery_dl/extractor/itaku.py | 11 +--
gallery_dl/extractor/kemonoparty.py | 48 ++++++++---
gallery_dl/extractor/luscious.py | 4 +-
gallery_dl/extractor/mastodon.py | 16 ++--
gallery_dl/extractor/nijie.py | 2 +-
gallery_dl/extractor/oauth.py | 3 +-
gallery_dl/extractor/philomena.py | 2 +-
gallery_dl/extractor/poipiku.py | 8 +-
gallery_dl/extractor/skeb.py | 16 +++-
gallery_dl/extractor/slideshare.py | 8 +-
gallery_dl/extractor/smugmug.py | 4 +-
gallery_dl/extractor/tapas.py | 2 +-
gallery_dl/extractor/tumblr.py | 58 ++++++++++---
gallery_dl/extractor/twitter.py | 155 ++++++++++++++++++++--------------
gallery_dl/extractor/unsplash.py | 4 +-
gallery_dl/extractor/vk.py | 7 +-
gallery_dl/extractor/vsco.py | 2 +-
gallery_dl/extractor/wallhaven.py | 13 ++-
gallery_dl/extractor/weibo.py | 32 ++++---
gallery_dl/extractor/zerochan.py | 156 +++++++++++++++++++++++++++++++++++
gallery_dl/formatter.py | 11 ++-
gallery_dl/job.py | 7 +-
gallery_dl/output.py | 6 ++
gallery_dl/postprocessor/metadata.py | 39 ++++++++-
gallery_dl/text.py | 10 +++
gallery_dl/util.py | 13 +--
gallery_dl/version.py | 2 +-
test/test_formatter.py | 3 +-
test/test_postprocessor.py | 46 +++++++++++
test/test_text.py | 19 ++++-
test/test_util.py | 36 ++++++++
54 files changed, 1063 insertions(+), 238 deletions(-)
create mode 100644 gallery_dl/extractor/catbox.py
create mode 100644 gallery_dl/extractor/zerochan.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be9a4f7..61987d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,59 @@
# Changelog
+## 1.23.0 - 2022-08-28
+### Changes
+- [twitter] update `user` and `author` metdata fields
+ - for URLs with a single username or ID like `https://twitter.com/USER` or a search with a single `from:` statement, `user` will now always refer to the user referenced in the URL.
+ - for all other URLs like `https://twitter.com/i/bookmarks`, `user` and `author` refer to the same user
+ - `author` will always refer to the original Tweet author
+- [twitter] update `quote_id` and `quote_by` metadata fields
+ - `quote_id` is now non-zero for quoted Tweets and contains the Tweet ID of the quotng Tweet (was the other way round before)
+ - `quote_by` is only defined for quoted Tweets like before, but now contains the screen name of the user quoting this Tweet
+- [skeb] improve archive IDs for thumbnails and article images
+### Additions
+- [artstation] add `num` and `count` metadata fields ([#2764](https://github.com/mikf/gallery-dl/issues/2764))
+- [catbox] add `album` extractor ([#2410](https://github.com/mikf/gallery-dl/issues/2410))
+- [blogger] emit metadata for posts without files ([#2789](https://github.com/mikf/gallery-dl/issues/2789))
+- [foolfuuka] update supported domains
+- [gelbooru] add support for `api_key` and `user_id` ([#2767](https://github.com/mikf/gallery-dl/issues/2767))
+- [gelbooru] implement pagination for `pool` results ([#2853](https://github.com/mikf/gallery-dl/issues/2853))
+- [instagram] add support for a user's saved collections ([#2769](https://github.com/mikf/gallery-dl/issues/2769))
+- [instagram] provide `date` for directory format strings ([#2830](https://github.com/mikf/gallery-dl/issues/2830))
+- [kemonoparty] add `favorites` option ([#2826](https://github.com/mikf/gallery-dl/issues/2826), [#2831](https://github.com/mikf/gallery-dl/issues/2831))
+- [oauth] add `host` config option ([#2806](https://github.com/mikf/gallery-dl/issues/2806))
+- [rule34] implement pagination for `pool` results ([#2853](https://github.com/mikf/gallery-dl/issues/2853))
+- [skeb] add option to download `article` images ([#1031](https://github.com/mikf/gallery-dl/issues/1031))
+- [tumblr] download higher-quality images ([#2761](https://github.com/mikf/gallery-dl/issues/2761))
+- [tumblr] add `count` metadata field ([#2804](https://github.com/mikf/gallery-dl/issues/2804))
+- [wallhaven] implement `metadata` option ([#2803](https://github.com/mikf/gallery-dl/issues/2803))
+- [zerochan] add `tag` and `image` extractors ([#1434](https://github.com/mikf/gallery-dl/issues/1434))
+- [zerochan] implement login with username & password ([#1434](https://github.com/mikf/gallery-dl/issues/1434))
+- [postprocessor:metadata] implement `mode: modify` and `mode: delete` ([#2640](https://github.com/mikf/gallery-dl/issues/2640))
+- [formatter] add `g` conversion for slugifying a string ([#2410](https://github.com/mikf/gallery-dl/issues/2410))
+- [formatter] apply `:J` only to lists ([#2833](https://github.com/mikf/gallery-dl/issues/2833))
+- implement `path-metadata` option ([#2734](https://github.com/mikf/gallery-dl/issues/2734))
+- allow comments after input file URLs ([#2808](https://github.com/mikf/gallery-dl/issues/2808))
+- add global `warnings` option to control `urllib3` warning behavior ([#2762](https://github.com/mikf/gallery-dl/issues/2762))
+### Fixes
+- [bunkr] fix extraction ([#2788](https://github.com/mikf/gallery-dl/issues/2788))
+- [deviantart] use public access token for journals ([#2702](https://github.com/mikf/gallery-dl/issues/2702))
+- [e621] fix extraction of `popular` posts
+- [fanbox] download cover images in original size ([#2784](https://github.com/mikf/gallery-dl/issues/2784))
+- [mastodon] allow downloading without access token ([#2782](https://github.com/mikf/gallery-dl/issues/2782))
+- [hitomi] update cache expiry time ([#2863](https://github.com/mikf/gallery-dl/issues/2863))
+- [hitomi] fix error when number of tag results is a multiple of 25 ([#2870](https://github.com/mikf/gallery-dl/issues/2870))
+- [mangahere] fix `page-reverse` option ([#2795](https://github.com/mikf/gallery-dl/issues/2795))
+- [poipiku] fix posts with more than one image ([#2796](https://github.com/mikf/gallery-dl/issues/2796))
+- [poipiku] update filter for static images ([#2796](https://github.com/mikf/gallery-dl/issues/2796))
+- [slideshare] fix metadata extraction
+- [twitter] unescape `+` in search queries ([#2226](https://github.com/mikf/gallery-dl/issues/2226))
+- [twitter] fall back to unfiltered search ([#2766](https://github.com/mikf/gallery-dl/issues/2766))
+- [twitter] ignore invalid user entries ([#2850](https://github.com/mikf/gallery-dl/issues/2850))
+- [vk] prevent exceptions for broken/invalid photos ([#2774](https://github.com/mikf/gallery-dl/issues/2774))
+- [vsco] fix `collection` extraction
+- [weibo] prevent exception for missing `playback_list` ([#2792](https://github.com/mikf/gallery-dl/issues/2792))
+- [weibo] prevent errors when paginating over album entries ([#2817](https://github.com/mikf/gallery-dl/issues/2817))
+
## 1.22.4 - 2022-07-15
### Additions
- [instagram] add `pinned` metadata field ([#2752](https://github.com/mikf/gallery-dl/issues/2752))
diff --git a/PKG-INFO b/PKG-INFO
index aaf3516..60a798f 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.22.4
+Version: 1.23.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows `__
-- `Linux `__
+- `Windows `__
+- `Linux `__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -251,7 +251,8 @@ and optional for
``subscribestar``,
``tapas``,
``tsumino``,
-and ``twitter``.
+``twitter``,
+and ``zerochan``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
diff --git a/README.rst b/README.rst
index 1d25a83..2b45b27 100644
--- a/README.rst
+++ b/README.rst
@@ -66,8 +66,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows `__
-- `Linux `__
+- `Windows `__
+- `Linux `__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -218,7 +218,8 @@ and optional for
``subscribestar``,
``tapas``,
``tsumino``,
-and ``twitter``.
+``twitter``,
+and ``zerochan``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 751d470..d4efeed 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-07-15" "1.22.4" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-08-28" "1.23.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 39550ad..642cb78 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-07-15" "1.22.4" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-08-28" "1.23.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -458,6 +458,8 @@ and optional for
* \f[I]tsumino\f[]
.br
* \f[I]twitter\f[]
+.br
+* \f[I]zerochan\f[]
These values can also be specified via the
\f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or
@@ -667,6 +669,21 @@ This can then be used in \f[I]filenames\f[],
with a \f[I]metadata\f[] post processor, etc.
+.SS extractor.*.path-metadata
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Insert a reference to the current \f[I]PathFormat\f[]
+data structure into metadata dictionaries as the given name.
+
+For example, setting this option to \f[I]"gdl_path"\f[] would make it possible
+to access the current file's filename as \f[I]"[gdl_path.filename}"\f[].
+
+
.SS extractor.*.category-transfer
.IP "Type:" 6
\f[I]bool\f[]
@@ -1516,6 +1533,19 @@ Selects which site layout to expect when parsing posts.
* \f[I]"new"\f[]: Expect the *new* site layout
+.SS extractor.gelbooru.api-key & .user-id
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Values from the API Access Credentials section found at the bottom of your
+\f[I]Account Options\f[]
+page.
+
+
.SS extractor.generic.enabled
.IP "Type:" 6
\f[I]bool\f[]
@@ -1751,6 +1781,19 @@ Controls how to handle duplicate files in a post.
Extract a user's direct messages as \f[I]dms\f[] metadata.
+.SS extractor.kemonoparty.favorites
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]artist\f[]
+
+.IP "Description:" 4
+Determines the type of favorites to be downloaded.
+
+Available types are \f[I]artist\f[], and \f[I]post\f[].
+
+
.SS extractor.kemonoparty.files
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -2007,6 +2050,17 @@ Store tokens received during OAuth authorizations
in \f[I]cache\f[].
+.SS extractor.oauth.host
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"localhost"\f[]
+
+.IP "Description:" 4
+Host name / IP address to bind to during OAuth authorization.
+
+
.SS extractor.oauth.port
.IP "Type:" 6
\f[I]integer\f[]
@@ -2424,6 +2478,17 @@ Download video embeds from external sites.
Download videos.
+.SS extractor.skeb.article
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download article images.
+
+
.SS extractor.skeb.sent-requests
.IP "Type:" 6
\f[I]bool\f[]
@@ -2502,6 +2567,21 @@ images from them.
Search posts for inline images and videos.
+.SS extractor.tumblr.original
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download full-resolution \f[I]photo\f[] images.
+
+For each photo with "maximum" resolution
+(width equal to 2048 or height equal to 3072),
+use an extra HTTP request to find the URL to its full-resolution version.
+
+
.SS extractor.tumblr.reblogs
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -2846,6 +2926,19 @@ to use your account's browsing settings and default filters when searching.
See https://wallhaven.cc/help/api for more information.
+.SS extractor.wallhaven.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract additional metadata (tags, uploader)
+
+Note: This requires 1 additional HTTP request for each post.
+
+
.SS extractor.weasyl.api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -3714,16 +3807,20 @@ See \f[I]metadata.event\f[] for a list of available events.
\f[I]"json"\f[]
.IP "Description:" 4
-Select how to write metadata.
+Selects how to process metadata.
.br
-* \f[I]"json"\f[]: all metadata using \f[I]json.dump()
+* \f[I]"json"\f[]: write metadata using \f[I]json.dump()
\f[]
.br
-* \f[I]"tags"\f[]: \f[I]tags\f[] separated by newlines
+* \f[I]"tags"\f[]: write \f[I]tags\f[] separated by newlines
.br
-* \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[]
+* \f[I]"custom"\f[]: write the result of applying \f[I]metadata.content-format\f[]
to a file's metadata dictionary
+.br
+* \f[I]"modify"\f[]: add or modify metadata entries
+.br
+* \f[I]"delete"\f[]: remove metadata entries
.SS metadata.filename
@@ -3821,6 +3918,39 @@ When starting to download all files of a post,
e.g. a Tweet on Twitter or a post on Patreon.
+.SS metadata.fields
+.IP "Type:" 6
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]object\f[] (field name -> \f[I]format string\f[])
+
+.IP "Example:" 4
+.br
+* .. code:: json
+
+["blocked", "watching", "status[creator][name]"]
+
+.br
+* .. code:: json
+
+{
+"blocked" : "***",
+"watching" : "\\fE 'yes' if watching else 'no'",
+"status[username]": "{status[creator][name]!l}"
+}
+
+
+.IP "Description:" 4
+.br
+* \f[I]"mode": "delete"\f[]:
+A list of metadata field names to remove.
+.br
+* \f[I]"mode": "modify"\f[]:
+An object with metadata field names mapping to a \f[I]format string\f[]
+whose result is assigned to said field name.
+
+
.SS metadata.content-format
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -4190,6 +4320,18 @@ The list of signal names to ignore, i.e. set
as signal handler for.
+.SS warnings
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"default"\f[]
+
+.IP "Description:" 4
+The \f[I]Warnings Filter action\f[]
+used for (urllib3) warnings.
+
+
.SS pyopenssl
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 1492653..1e485ee 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -109,6 +109,11 @@
"include": "gallery",
"layout": "auto"
},
+ "gelbooru":
+ {
+ "api-key": null,
+ "user-id": null
+ },
"gfycat":
{
"format": ["mp4", "webm", "mobile", "gif"]
@@ -193,6 +198,7 @@
{
"browser": true,
"cache": true,
+ "host": "localhost",
"port": 6414
},
"paheal":
@@ -248,6 +254,12 @@
"username": null,
"password": null
},
+ "skeb":
+ {
+ "article": false,
+ "sent-requests": false,
+ "thumbnails": false
+ },
"smugmug":
{
"videos": true
@@ -273,6 +285,7 @@
"external": false,
"inline": true,
"posts": "all",
+ "original": true,
"reblogs": true
},
"twitter":
@@ -302,7 +315,8 @@
},
"wallhaven":
{
- "api-key": null
+ "api-key": null,
+ "metadata": false
},
"weasyl":
{
@@ -324,6 +338,11 @@
"module": null,
"raw-options": null
},
+ "zerochan":
+ {
+ "username": null,
+ "password": null
+ },
"booru":
{
"tags": false,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 1e1d74d..6b9d68b 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.22.4
+Version: 1.23.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows `__
-- `Linux `__
+- `Windows `__
+- `Linux `__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -251,7 +251,8 @@ and optional for
``subscribestar``,
``tapas``,
``tsumino``,
-and ``twitter``.
+``twitter``,
+and ``zerochan``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index b323e38..5f5084b 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -58,6 +58,7 @@ gallery_dl/extractor/behance.py
gallery_dl/extractor/blogger.py
gallery_dl/extractor/booru.py
gallery_dl/extractor/bunkr.py
+gallery_dl/extractor/catbox.py
gallery_dl/extractor/comicvine.py
gallery_dl/extractor/common.py
gallery_dl/extractor/cyberdrop.py
@@ -197,6 +198,7 @@ gallery_dl/extractor/wikieat.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
gallery_dl/extractor/ytdl.py
+gallery_dl/extractor/zerochan.py
gallery_dl/postprocessor/__init__.py
gallery_dl/postprocessor/classify.py
gallery_dl/postprocessor/common.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 04ea54c..329e7ab 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -38,11 +38,11 @@ def parse_inputfile(file, log):
Lines starting with '#' and empty lines will be ignored.
Lines starting with '-' will be interpreted as a key-value pair separated
by an '='. where 'key' is a dot-separated option name and 'value' is a
- JSON-parsable value for it. These config options will be applied while
+ JSON-parsable value. These configuration options will be applied while
processing the next URL.
Lines starting with '-G' are the same as above, except these options will
- be valid for all following URLs, i.e. they are Global.
- Everything else will be used as potential URL.
+ be applied for *all* following URLs, i.e. they are Global.
+ Everything else will be used as a potential URL.
Example input file:
@@ -57,7 +57,8 @@ def parse_inputfile(file, log):
https://example.org/
# next URL uses default filename and 'skip' is false.
- https://example.com/index.htm
+ https://example.com/index.htm # comment1
+ https://example.com/404.htm # comment2
"""
gconf = []
lconf = []
@@ -94,6 +95,10 @@ def parse_inputfile(file, log):
else:
# url
+ if " #" in line:
+ line = line.partition(" #")[0]
+ elif "\t#" in line:
+ line = line.partition("\t#")[0]
if gconf or lconf:
yield util.ExtendedUrl(line, gconf, lconf)
gconf = []
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 70cebb3..9e4507a 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -26,6 +26,7 @@ modules = [
"behance",
"blogger",
"bunkr",
+ "catbox",
"comicvine",
"cyberdrop",
"danbooru",
@@ -150,6 +151,7 @@ modules = [
"wikieat",
"xhamster",
"xvideos",
+ "zerochan",
"booru",
"moebooru",
"foolfuuka",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 19b9d97..c0e8e67 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -32,9 +32,11 @@ class ArtstationExtractor(Extractor):
data = self.metadata()
for project in self.projects():
- for asset in self.get_project_assets(project["hash_id"]):
+ for num, asset in enumerate(
+ self.get_project_assets(project["hash_id"]), 1):
asset.update(data)
adict = asset["asset"]
+ asset["num"] = num
yield Message.Directory, asset
if adict["has_embedded_player"] and self.external:
@@ -85,6 +87,7 @@ class ArtstationExtractor(Extractor):
assets = data["assets"]
del data["assets"]
+ data["count"] = len(assets)
if len(assets) == 1:
data["asset"] = assets[0]
yield data
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 21ca991..e0885d2 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -67,9 +67,6 @@ class BloggerExtractor(Extractor):
key=lambda x: x["format_id"],
)["play_url"])
- if not files:
- continue
-
post["author"] = post["author"]["displayName"]
post["replies"] = post["replies"]["totalItems"]
post["content"] = text.remove_html(content)
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 9904d0a..3091f57 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -16,10 +16,10 @@ import json
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for bunkr.is albums"""
category = "bunkr"
- root = "https://app.bunkr.is"
+ root = "https://bunkr.is"
pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
test = (
- ("https://app.bunkr.is/a/Lktg9Keq", {
+ ("https://bunkr.is/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
@@ -33,7 +33,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
},
}),
# mp4 (#2239)
- ("https://bunkr.is/a/ptRHaCn2", {
+ ("https://app.bunkr.is/a/ptRHaCn2", {
"pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
@@ -70,16 +70,16 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
album = props["album"]
files = props["files"]
except Exception as exc:
- self.log.debug(exc)
+ self.log.debug(exc.__class__.__name__, exc)
self.root = self.root.replace("bunkr", "app.bunkr", 1)
return self._fetch_album_api(album_id)
for file in files:
name = file["name"]
+ cdn = file["cdn"]
if name.endswith(".mp4"):
- file["file"] = "https://media-files.bunkr.is/" + name
- else:
- file["file"] = file["cdn"] + "/" + name
+ cdn = cdn.replace("//cdn", "//media-files")
+ file["file"] = cdn + "/" + name
return files, {
"album_id" : self.album_id,
diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py
new file mode 100644
index 0000000..509108f
--- /dev/null
+++ b/gallery_dl/extractor/catbox.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://catbox.moe/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class CatboxAlbumExtractor(GalleryExtractor):
+ """Extractor for catbox albums"""
+ category = "catbox"
+ subcategory = "album"
+ root = "https://catbox.moe"
+ filename_fmt = "{filename}.{extension}"
+ directory_fmt = ("{category}", "{album_name} ({album_id})")
+ archive_fmt = "{album_id}_{filename}"
+ pattern = r"(?:https?://)?(?:www\.)?catbox\.moe(/c/[^/?#]+)"
+ test = (
+ ("https://catbox.moe/c/1igcbe", {
+ "url": "35866a88c29462814f103bc22ec031eaeb380f8a",
+ "content": "70ddb9de3872e2d17cc27e48e6bf395e5c8c0b32",
+ "pattern": r"https://files\.catbox\.moe/\w+\.\w{3}$",
+ "count": 3,
+ "keyword": {
+ "album_id": "1igcbe",
+ "album_name": "test",
+ "date": "dt:2022-08-18 00:00:00",
+ "description": "album test &>",
+ },
+ }),
+ ("https://www.catbox.moe/c/cd90s1"),
+ ("https://catbox.moe/c/w7tm47#"),
+ )
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ return {
+ "album_id" : self.gallery_url.rpartition("/")[2],
+ "album_name" : text.unescape(extr("", "<")),
+ "date" : text.parse_datetime(extr(
+ "
Created ", "<"), "%B %d %Y"),
+ "description": text.unescape(extr("
", "<")),
+ }
+
+ def images(self, page):
+ return [
+ ("https://files.catbox.moe/" + path, None)
+ for path in text.extract_iter(
+ page, ">https://files.catbox.moe/", "<")
+ ]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 6ccae7f..1b41101 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -795,12 +795,23 @@ SSL_CIPHERS = {
}
+urllib3 = requests.packages.urllib3
+
# detect brotli support
try:
- BROTLI = requests.packages.urllib3.response.brotli is not None
+ BROTLI = urllib3.response.brotli is not None
except AttributeError:
BROTLI = False
+# set (urllib3) warnings filter
+action = config.get((), "warnings", "default")
+if action:
+ try:
+ import warnings
+ warnings.simplefilter(action, urllib3.exceptions.HTTPWarning)
+ except Exception:
+ pass
+del action
# Undo automatic pyOpenSSL injection by requests
pyopenssl = config.get((), "pyopenssl", False)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ec0db68..8c2ed53 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -34,6 +34,7 @@ class DanbooruExtractor(BaseExtractor):
self.per_page = iget("per-page", 200)
self.request_interval_min = iget("request-interval-min", 0.0)
self._pools = iget("pools")
+ self._popular_endpoint = iget("popular", "/explore/posts/popular.json")
BaseExtractor.__init__(self, match)
@@ -150,6 +151,7 @@ INSTANCES = {
"headers": {"User-Agent": "gallery-dl/{} (by mikf)".format(
__version__)},
"pools": "sort",
+ "popular": "/popular.json",
"page-limit": 750,
"per-page": 320,
"request-interval-min": 1.0,
@@ -308,7 +310,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
- pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
+ pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
test = (
("https://danbooru.donmai.us/explore/posts/popular"),
(("https://danbooru.donmai.us/explore/posts/popular"
@@ -316,7 +318,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
"range": "1-120",
"count": 120,
}),
- ("https://e621.net/explore/posts/popular"),
+ ("https://e621.net/popular"),
(("https://e621.net/explore/posts/popular"
"?date=2019-06-01&scale=month"), {
"pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
@@ -345,8 +347,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def posts(self):
if self.page_start is None:
self.page_start = 1
- return self._pagination(
- "/explore/posts/popular.json", self.params, True)
+ return self._pagination(self._popular_endpoint, self.params, True)
class DanbooruFavoriteExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 39ae484..60f644d 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1128,11 +1128,18 @@ class DeviantartOAuthAPI():
self._folders((deviation,))
return deviation
- def deviation_content(self, deviation_id, public=False):
+ def deviation_content(self, deviation_id, public=True):
"""Get extended content of a single Deviation"""
endpoint = "/deviation/content"
params = {"deviationid": deviation_id}
- return self._call(endpoint, params=params, public=public)
+ content = self._call(endpoint, params=params, public=public)
+ if public and content["html"].startswith(
+ ' \d+)")
test = (
("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"count": 6,
}),
- ("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
- "options": (("api", False),),
- "count": 6,
- }),
)
def metadata(self):
- url = "{}/index.php?page=pool&s=show&id={}".format(
- self.root, self.pool_id)
- page = self.request(url).text
+ url = self.root + "/index.php"
+ self._params = {
+ "page": "pool",
+ "s" : "show",
+ "id" : self.pool_id,
+ "pid" : self.page_start,
+ }
+ self._page = self.request(url, params=self._params).text
- name, pos = text.extract(page, "Now Viewing: ", "
")
+ name, pos = text.extract(self._page, "Now Viewing: ", "
")
if not name:
raise exception.NotFoundError("pool")
- self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
return {
"pool": text.parse_int(self.pool_id),
@@ -114,9 +120,23 @@ class GelbooruPoolExtractor(GelbooruBase,
}
def posts(self):
- params = {}
- for params["id"] in util.advance(self.post_ids, self.page_start):
- yield from self._api_request(params)
+ url = self.root + "/index.php"
+ params = self._params
+
+ page = self._page
+ del self._page
+ data = {}
+
+ while True:
+ num_ids = 0
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ yield from self._api_request(data)
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+ page = self.request(url, params=params).text
class GelbooruPostExtractor(GelbooruBase,
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 35a3448..8214614 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -21,6 +21,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def __init__(self, match):
booru.BooruExtractor.__init__(self, match)
+ self.api_key = self.config("api-key")
+ self.user_id = self.config("user-id")
+
try:
self.api_root = INSTANCES[self.category]["api_root"]
except KeyError:
@@ -59,6 +62,24 @@ class GelbooruV02Extractor(booru.BooruExtractor):
return
params["pid"] += 1
+ def _pagination_html(self, params):
+ url = self.root + "/index.php"
+ params["pid"] = self.page_start * self.per_page
+
+ data = {}
+ while True:
+ num_ids = 0
+ page = self.request(url, params=params).text
+
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ for post in self._api_request(data):
+ yield post.attrib
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+
@staticmethod
def _prepare(post):
post["date"] = text.parse_datetime(
@@ -204,7 +225,12 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
self.pool_id = match.group(match.lastindex)
- self.post_ids = ()
+
+ if self.category == "rule34":
+ self.posts = self._posts_pages
+ self.per_page = 45
+ else:
+ self.post_ids = ()
def skip(self, num):
self.page_start += num
@@ -232,6 +258,13 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
for post in self._api_request(params):
yield post.attrib
+ def _posts_pages(self):
+ return self._pagination_html({
+ "page": "pool",
+ "s" : "show",
+ "id" : self.pool_id,
+ })
+
class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
subcategory = "favorite"
@@ -265,27 +298,11 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
return {"favorite_id": text.parse_int(self.favorite_id)}
def posts(self):
- url = self.root + "/index.php"
- params = {
+ return self._pagination_html({
"page": "favorites",
"s" : "view",
"id" : self.favorite_id,
- "pid" : self.page_start * self.per_page,
- }
-
- data = {}
- while True:
- num_ids = 0
- page = self.request(url, params=params).text
-
- for data["id"] in text.extract_iter(page, '" id="p', '"'):
- num_ids += 1
- for post in self._api_request(data):
- yield post.attrib
-
- if num_ids < self.per_page:
- return
- params["pid"] += self.per_page
+ })
class GelbooruV02PostExtractor(GelbooruV02Extractor):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index ca7e692..f8b0c3b 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -174,23 +174,27 @@ class HitomiTagExtractor(Extractor):
}
offset = 0
+ total = None
while True:
headers["Referer"] = "{}/{}/{}.html?page={}".format(
self.root, self.type, self.tag, offset // 100 + 1)
headers["Range"] = "bytes={}-{}".format(offset, offset+99)
- nozomi = self.request(nozomi_url, headers=headers).content
+ response = self.request(nozomi_url, headers=headers)
- for gallery_id in decode_nozomi(nozomi):
+ for gallery_id in decode_nozomi(response.content):
gallery_url = "{}/galleries/{}.html".format(
self.root, gallery_id)
yield Message.Queue, gallery_url, data
- if len(nozomi) < 100:
- return
offset += 100
+ if total is None:
+ total = text.parse_int(
+ response.headers["content-range"].rpartition("/")[2])
+ if offset >= total:
+ return
-@memcache()
+@memcache(maxage=1800)
def _parse_gg(extr):
page = extr.request("https://ltn.hitomi.la/gg.js").text
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 4a2c3bb..d56af8b 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -270,6 +270,7 @@ class InstagramExtractor(Extractor):
"post_shortcode": post["code"],
"likes": post["like_count"],
"pinned": post.get("timeline_pinned_user_ids", ()),
+ "date": text.parse_timestamp(post.get("taken_at")),
}
caption = post["caption"]
@@ -399,6 +400,8 @@ class InstagramExtractor(Extractor):
self.log.debug("Cursor: %s", self._cursor)
def _pagination_api(self, endpoint, params=None):
+ if params is None:
+ params = {}
while True:
data = self._request_api(endpoint, params=params)
yield from data["items"]
@@ -509,7 +512,7 @@ class InstagramChannelExtractor(InstagramExtractor):
class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for ProfilePage saved media"""
subcategory = "saved"
- pattern = USER_PATTERN + r"/saved"
+ pattern = USER_PATTERN + r"/saved/?$"
test = ("https://www.instagram.com/instagram/saved/",)
def posts(self):
@@ -518,6 +521,30 @@ class InstagramSavedExtractor(InstagramExtractor):
return self._pagination_graphql(query_hash, variables)
+class InstagramCollectionExtractor(InstagramExtractor):
+ """Extractor for ProfilePage saved collection media"""
+ subcategory = "collection"
+ pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
+ test = (
+ "https://www.instagram.com/instagram/saved/collection_name/123456789/",
+ )
+
+ def __init__(self, match):
+ InstagramExtractor.__init__(self, match)
+ self.user, self.collection_name, self.collection_id = match.groups()
+
+ def metadata(self):
+ return {
+ "collection_id" : self.collection_id,
+ "collection_name": text.unescape(self.collection_name),
+ }
+
+ def posts(self):
+ endpoint = "/v1/feed/collection/{}/posts/".format(self.collection_id)
+ for item in self._pagination_api(endpoint):
+ yield item["media"]
+
+
class InstagramTagExtractor(InstagramExtractor):
"""Extractor for TagPage"""
subcategory = "tag"
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 6b2cf4c..00a32cd 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -101,9 +101,9 @@ class ItakuImageExtractor(ItakuExtractor):
"/gallery_imgs/220504_oUNIAFT/xl.jpg",
"liked_by_you": False,
"maturity_rating": "SFW",
- "num_comments": 2,
- "num_likes": 80,
- "num_reshares": 2,
+ "num_comments": int,
+ "num_likes": int,
+ "num_reshares": int,
"obj_tags": 136446,
"owner": 16775,
"owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
@@ -115,8 +115,9 @@ class ItakuImageExtractor(ItakuExtractor):
"tags": list,
"tags_character": ["hatsune_miku"],
"tags_copyright": ["vocaloid"],
- "tags_general" : ["twintails", "green_hair", "flag", "gloves",
- "green_eyes", "female", "racing_miku"],
+ "tags_general" : ["female", "green_eyes", "twintails",
+ "green_hair", "gloves", "flag",
+ "racing_miku"],
"title": "Racing Miku 2022 Ver.",
"too_mature": False,
"uncompressed_filesize": "0.62",
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index f1eb79f..816b561 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -440,20 +440,44 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/favorites"
- test = ("https://kemono.party/favorites", {
- "pattern": KemonopartyUserExtractor.pattern,
- "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
- "count": 3,
- })
+ pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
+ test = (
+ ("https://kemono.party/favorites", {
+ "pattern": KemonopartyUserExtractor.pattern,
+ "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
+ "count": 3,
+ }),
+ ("https://kemono.party/favorites?type=post", {
+ "pattern": KemonopartyPostExtractor.pattern,
+ "url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
+ "count": 3,
+ }),
+ )
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.favorites = (text.parse_query(match.group(2)).get("type") or
+ self.config("favorites") or
+ "artist")
def items(self):
self._prepare_ddosguard_cookies()
self.login()
- users = self.request(self.root + "/api/favorites").json()
- for user in users:
- user["_extractor"] = KemonopartyUserExtractor
- url = "{}/{}/user/{}".format(
- self.root, user["service"], user["id"])
- yield Message.Queue, url, user
+ if self.favorites == "artist":
+ users = self.request(
+ self.root + "/api/v1/account/favorites?type=artist").json()
+ for user in users:
+ user["_extractor"] = KemonopartyUserExtractor
+ url = "{}/{}/user/{}".format(
+ self.root, user["service"], user["id"])
+ yield Message.Queue, url, user
+
+ elif self.favorites == "post":
+ posts = self.request(
+ self.root + "/api/v1/account/favorites?type=post").json()
+ for post in posts:
+ post["_extractor"] = KemonopartyPostExtractor
+ url = "{}/{}/user/{}/post/{}".format(
+ self.root, post["service"], post["user"], post["id"])
+ yield Message.Queue, url, post
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index b5db3dd..57db0c9 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -49,7 +49,9 @@ class LusciousAlbumExtractor(LusciousExtractor):
r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)")
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
- "url": "7e4984a271a1072ac6483e4228a045895aff86f3",
+ "pattern": r"https://storage\.bhs\.cloud\.ovh\.net/v1/AUTH_\w+"
+ r"/images/NTRshouldbeillegal/277031"
+ r"/luscious_net_\d+_\d+\.jpg$",
# "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
"keyword": {
"album": {
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 6e780e8..493a8ef 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -179,12 +179,11 @@ class MastodonAPI():
try:
access_token = INSTANCES[extractor.category]["access-token"]
except (KeyError, TypeError):
- raise exception.StopExtraction(
- "Missing access token.\n"
- "Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
- extractor.instance)
-
- self.headers = {"Authorization": "Bearer " + access_token}
+ pass
+ if access_token:
+ self.headers = {"Authorization": "Bearer " + access_token}
+ else:
+ self.headers = None
def account_id_by_username(self, username):
if username.startswith("id:"):
@@ -232,6 +231,11 @@ class MastodonAPI():
if code < 400:
return response
+ if code == 401:
+ raise exception.StopExtraction(
+ "Invalid or missing access token.\n"
+ "Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
+ self.extractor.instance)
if code == 404:
raise exception.NotFoundError()
if code == 429:
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 122ea46..2c8e72c 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -126,7 +126,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
username, password = self._get_auth_info()
self._update_cookies(self._login_impl(username, password))
- @cache(maxage=150*24*3600, keyarg=1)
+ @cache(maxage=90*24*3600, keyarg=1)
def _login_impl(self, username, password):
if not username or not password:
raise exception.AuthenticationError(
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 653822f..d6628c4 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -41,7 +41,8 @@ class OAuthBase(Extractor):
stdout_write("Waiting for response. (Cancel with Ctrl+c)\n")
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- server.bind(("localhost", self.config("port", 6414)))
+ server.bind((self.config("host", "localhost"),
+ self.config("port", 6414)))
server.listen(1)
# workaround for ctrl+c not working during server.accept on Windows
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index fba1312..225f0ff 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -122,7 +122,7 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2022-04-25T09:30:57Z",
+ "updated_at": r"re:\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ",
"uploader": "Clover the Clever",
"uploader_id": 211188,
"upvotes": int,
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index e1846cc..8203885 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -51,13 +51,13 @@ class PoipikuExtractor(Extractor):
thumb = extr('class="IllustItemThumbImg" src="', '"')
if not thumb:
break
- elif thumb.startswith("/img/"):
+ elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
continue
post["num"] += 1
url = text.ensure_http_scheme(thumb[:-8])
yield Message.Url, url, text.nameext_from_url(url, post)
- if not extr(' show all', '<'):
+ if not extr('> show all', '<'):
continue
url = self.root + "/f/ShowAppendFileF.jsp"
@@ -131,7 +131,7 @@ class PoipikuPostExtractor(PoipikuExtractor):
pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
test = (
("https://poipiku.com/25049/5864576.html", {
- "pattern": r"https://img\.poipiku\.com/user_img03/000025049"
+ "pattern": r"https://img\.poipiku\.com/user_img\d+/000025049"
r"/005864576_EWN1Y65gQ\.png$",
"keyword": {
"count": "1",
@@ -146,7 +146,7 @@ class PoipikuPostExtractor(PoipikuExtractor):
},
}),
("https://poipiku.com/2166245/6411749.html", {
- "pattern": r"https://img\.poipiku\.com/user_img01/002166245"
+ "pattern": r"https://img\.poipiku\.com/user_img\d+/002166245"
r"/006411749_\w+\.jpeg$",
"count": 4,
"keyword": {
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 6dfc907..cd8c238 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -16,13 +16,14 @@ class SkebExtractor(Extractor):
category = "skeb"
directory_fmt = ("{category}", "{creator[screen_name]}")
filename_fmt = "{post_num}_{file_id}.{extension}"
- archive_fmt = "{post_num}_{file_id}_{content_category}"
+ archive_fmt = "{post_num}_{_file_id}_{content_category}"
root = "https://skeb.jp"
def __init__(self, match):
Extractor.__init__(self, match)
self.user_name = match.group(1)
self.thumbnails = self.config("thumbnails", False)
+ self.article = self.config("article", False)
def items(self):
for user_name, post_num in self.posts():
@@ -64,6 +65,7 @@ class SkebExtractor(Extractor):
resp = self.request(url, headers=headers).json()
creator = resp["creator"]
post = {
+ "post_id" : resp["id"],
"post_num" : post_num,
"post_url" : self.root + resp["path"],
"body" : resp["body"],
@@ -102,12 +104,22 @@ class SkebExtractor(Extractor):
if self.thumbnails and "og_image_url" in resp:
post["content_category"] = "thumb"
post["file_id"] = "thumb"
+ post["_file_id"] = str(resp["id"]) + "t"
post["file_url"] = resp["og_image_url"]
yield post
+ if self.article and "article_image_url" in resp:
+ url = resp["article_image_url"]
+ if url:
+ post["content_category"] = "article"
+ post["file_id"] = "article"
+ post["_file_id"] = str(resp["id"]) + "a"
+ post["file_url"] = url
+ yield post
+
for preview in resp["previews"]:
post["content_category"] = "preview"
- post["file_id"] = preview["id"]
+ post["file_id"] = post["_file_id"] = preview["id"]
post["file_url"] = preview["url"]
info = preview["information"]
post["original"] = {
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index b0b8f3b..506db26 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -59,7 +59,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
# mobile URL
(("https://www.slideshare.net"
"/mobile/uqudent/introduction-to-fixed-prosthodontics"), {
- "url": "59993ad7b0cb93c73011547eedcd02c622649e9d",
+ "url": "43eda2adf4dd221a251c8df794dfb82649e94647",
}),
)
@@ -72,14 +72,14 @@ class SlidesharePresentationExtractor(GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
descr = extr('', '')
- published = extr('
', '
')
comments = extr('content="UserComments:', '"')
likes = extr('content="UserLikes:', '"')
views = extr('content="UserPageVisits:', '"')
+ title = extr('', '')
+ published = extr('', '
')
if descr.endswith("…"):
- alt_descr = extr('id="slideshow-description-text"', '
')
+ alt_descr = extr('slideshow-description-text"', '')
if alt_descr:
descr = text.remove_html(alt_descr.partition(">")[2]).strip()
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 98e914e..4010da3 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -111,13 +111,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
test = (
("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
"url": "e6408fd2c64e721fd146130dceb56a971ceb4259",
- "keyword": "460a773f5addadd3e216bda346fc524fe4eedc52",
+ "keyword": "b31a63d07c9c26eb0f79f52d60d171a98938f99b",
"content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0",
}),
# video
("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
"url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "eb74e5cf6780d5152ab8f11b431ec1b17fa8f69b",
+ "keyword": "4cef98133ace511adc874c9d9abac5817ba0d856",
}),
)
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index fcdf18f..545a95b 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -108,7 +108,7 @@ class TapasSeriesExtractor(TapasExtractor):
test = (
("https://tapas.io/series/just-leave-me-be", {
"pattern": r"https://\w+\.cloudfront\.net/pc/\w\w/[0-9a-f-]+\.jpg",
- "count": 127,
+ "count": 132,
}),
("https://tapas.io/series/yona", { # mature
"count": 26,
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index ded7fd1..b694fa0 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -64,6 +64,7 @@ class TumblrExtractor(Extractor):
self.inline = self.config("inline", True)
self.reblogs = self.config("reblogs", True)
self.external = self.config("external", False)
+ self.original = self.config("original", True)
if len(self.types) == 1:
self.api.posts_type = next(iter(self.types))
@@ -101,8 +102,7 @@ class TumblrExtractor(Extractor):
del post["trail"]
post["blog"] = blog
post["date"] = text.parse_timestamp(post["timestamp"])
- yield Message.Directory, post
- post["num"] = 0
+ posts = []
if "photos" in post: # type "photo" or "link"
photos = post["photos"]
@@ -110,18 +110,31 @@ class TumblrExtractor(Extractor):
for photo in photos:
post["photo"] = photo
- photo.update(photo["original_size"])
+
+ best_photo = photo["original_size"]
+ for alt_photo in photo["alt_sizes"]:
+ if (alt_photo["height"] > best_photo["height"] or
+ alt_photo["width"] > best_photo["width"]):
+ best_photo = alt_photo
+ photo.update(best_photo)
+
+ if self.original and "/s2048x3072/" in photo["url"] and (
+ photo["width"] == 2048 or photo["height"] == 3072):
+ photo["url"] = self._original_image(photo["url"])
+
del photo["original_size"]
del photo["alt_sizes"]
- yield self._prepare_image(photo["url"], post)
+ posts.append(
+ self._prepare_image(photo["url"], post.copy()))
+ del post["photo"]
url = post.get("audio_url") # type "audio"
if url and url.startswith("https://a.tumblr.com/"):
- yield self._prepare(url, post)
+ posts.append(self._prepare(url, post.copy()))
url = post.get("video_url") # type "video"
if url:
- yield self._prepare(_original_video(url), post)
+ posts.append(self._prepare(_original_video(url), post.copy()))
if self.inline and "reblog" in post: # inline media
# only "chat" posts are missing a "reblog" key in their
@@ -129,16 +142,25 @@ class TumblrExtractor(Extractor):
body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
for url in re.findall('
= meta["last_page"]:
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index bdbdc8c..189c0c5 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -99,13 +99,14 @@ class WeiboExtractor(Extractor):
else:
yield pic["largest"].copy()
- if "page_info" in status:
- page_info = status["page_info"]
- if "media_info" not in page_info or not self.videos:
- return
- media = max(page_info["media_info"]["playback_list"],
- key=lambda m: m["meta"]["quality_index"])
- yield media["play_info"].copy()
+ if "page_info" in status and self.videos:
+ try:
+ media = max(status["page_info"]["media_info"]["playback_list"],
+ key=lambda m: m["meta"]["quality_index"])
+ except KeyError:
+ pass
+ else:
+ yield media["play_info"].copy()
def _status_by_id(self, status_id):
url = "{}/ajax/statuses/show?id={}".format(self.root, status_id)
@@ -147,14 +148,17 @@ class WeiboExtractor(Extractor):
return
yield from statuses
- if "next_cursor" in data:
+ if "next_cursor" in data: # videos, newvideo
params["cursor"] = data["next_cursor"]
- elif "page" in params:
+ elif "page" in params: # home, article
params["page"] += 1
- elif data["since_id"]:
+ elif data["since_id"]: # album
params["sinceid"] = data["since_id"]
- else:
- params["since_id"] = statuses[-1]["id"] - 1
+ else: # feed, last album page
+ try:
+ params["since_id"] = statuses[-1]["id"] - 1
+ except KeyError:
+ return
def _sina_visitor_system(self, response):
self.log.info("Sina Visitor System")
@@ -366,6 +370,10 @@ class WeiboStatusExtractor(WeiboExtractor):
"pattern": r"https://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM0104"
r"120005tc0E010\.mp4\?label=gif_mp4",
}),
+ # missing 'playback_list' (#2792)
+ ("https://weibo.com/2909128931/4409545658754086", {
+ "count": 9,
+ }),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
)
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
new file mode 100644
index 0000000..2b5acd8
--- /dev/null
+++ b/gallery_dl/extractor/zerochan.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.zerochan.net/"""
+
+from .booru import BooruExtractor
+from ..cache import cache
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
+
+
+class ZerochanExtractor(BooruExtractor):
+ """Base class for zerochan extractors"""
+ category = "zerochan"
+ root = "https://www.zerochan.net"
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ cookiedomain = ".zerochan.net"
+ cookienames = ("z_id", "z_hash")
+
+ def login(self):
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+ # force legacy layout
+ self.session.cookies.set("v3", "0", domain=self.cookiedomain)
+
+ @cache(maxage=90*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/login"
+ headers = {
+ "Origin" : self.root,
+ "Referer" : url,
+ }
+ data = {
+ "ref" : "/",
+ "name" : username,
+ "password": password,
+ "login" : "Login",
+ }
+
+ response = self.request(url, method="POST", headers=headers, data=data)
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ return response.cookies
+
+ def _parse_entry_page(self, entry_id):
+ url = "{}/{}".format(self.root, entry_id)
+ extr = text.extract_from(self.request(url).text)
+
+ return {
+ "id" : entry_id,
+ "author": extr('"author": "', '"'),
+ "file_url": extr('"contentUrl": "', '"'),
+ "date" : text.parse_datetime(extr(
+ '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"),
+ "width" : extr('"width": "', ' '),
+ "height": extr('"height": "', ' '),
+ "size" : extr('"contentSize": "', 'B'),
+ "path" : text.split_html(extr(
+ 'class="breadcrumbs', ''))[3::2],
+ "tags" : extr('alt="Tags: ', '"').split(", ")
+ }
+
+
+class ZerochanTagExtractor(ZerochanExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?"
+ test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
+ "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
+ "count": "> 24",
+ "keywords": {
+ "extension": r"re:jpg|png",
+ "file_url": "",
+ "filename": r"re:Perth.\(Kantai.Collection\).full.\d+",
+ "height": r"re:^\d+$",
+ "id": r"re:^\d+$",
+ "name": "Perth (Kantai Collection)",
+ "search_tags": "Perth (Kantai Collection)",
+ "size": r"re:^\d+k$",
+ "width": r"re:^\d+$",
+ },
+ })
+
+ def __init__(self, match):
+ ZerochanExtractor.__init__(self, match)
+ self.search_tag, self.query = match.groups()
+
+ def metadata(self):
+ return {"search_tags": text.unquote(
+ self.search_tag.replace("+", " "))}
+
+ def posts(self):
+ url = self.root + "/" + self.search_tag
+ params = text.parse_query(self.query)
+ params["p"] = text.parse_int(params.get("p"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+ thumbs = text.extract(page, '