summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md31
-rw-r--r--PKG-INFO21
-rw-r--r--README.rst19
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.547
-rw-r--r--gallery_dl.egg-info/PKG-INFO21
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/downloader/http.py30
-rw-r--r--gallery_dl/extractor/2chen.py35
-rw-r--r--gallery_dl/extractor/__init__.py4
-rw-r--r--gallery_dl/extractor/behance.py42
-rw-r--r--gallery_dl/extractor/deviantart.py23
-rw-r--r--gallery_dl/extractor/imagefap.py26
-rw-r--r--gallery_dl/extractor/imagehosts.py23
-rw-r--r--gallery_dl/extractor/imgur.py6
-rw-r--r--gallery_dl/extractor/itchio.py82
-rw-r--r--gallery_dl/extractor/manganelo.py23
-rw-r--r--gallery_dl/extractor/nana.py20
-rw-r--r--gallery_dl/extractor/nitter.py11
-rw-r--r--gallery_dl/extractor/nozomi.py44
-rw-r--r--gallery_dl/extractor/oauth.py7
-rw-r--r--gallery_dl/extractor/paheal.py4
-rw-r--r--gallery_dl/extractor/pixiv.py5
-rw-r--r--gallery_dl/extractor/reddit.py28
-rw-r--r--gallery_dl/extractor/sankaku.py30
-rw-r--r--gallery_dl/extractor/shimmie2.py326
-rw-r--r--gallery_dl/extractor/tumblr.py141
-rw-r--r--gallery_dl/extractor/twitter.py19
-rw-r--r--gallery_dl/extractor/vipergirls.py108
-rw-r--r--gallery_dl/postprocessor/metadata.py23
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py9
-rw-r--r--test/test_postprocessor.py47
-rw-r--r--test/test_ytdl.py16
34 files changed, 1105 insertions, 173 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a67e3ab..a76a0dd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
# Changelog
+## 1.25.3 - 2023-04-30
+### Additions
+- [imagefap] extract `description` and `categories` metadata ([#3905](https://github.com/mikf/gallery-dl/issues/3905))
+- [imxto] add `gallery` extractor ([#1289](https://github.com/mikf/gallery-dl/issues/1289))
+- [itchio] add `game` extractor ([#3923](https://github.com/mikf/gallery-dl/issues/3923))
+- [nitter] extract user IDs from encoded banner URLs
+- [pixiv] allow sorting search results by popularity ([#3970](https://github.com/mikf/gallery-dl/issues/3970))
+- [reddit] match `preview.redd.it` URLs ([#3935](https://github.com/mikf/gallery-dl/issues/3935))
+- [sankaku] support post URLs with MD5 hashes ([#3952](https://github.com/mikf/gallery-dl/issues/3952))
+- [shimmie2] add generic extractors for Shimmie2 sites ([#3734](https://github.com/mikf/gallery-dl/issues/3734), [#943](https://github.com/mikf/gallery-dl/issues/943))
+- [tumblr] add `day` extractor ([#3951](https://github.com/mikf/gallery-dl/issues/3951))
+- [twitter] support `profile-conversation` entries ([#3938](https://github.com/mikf/gallery-dl/issues/3938))
+- [vipergirls] add `thread` and `post` extractors ([#3812](https://github.com/mikf/gallery-dl/issues/3812), [#2720](https://github.com/mikf/gallery-dl/issues/2720), [#731](https://github.com/mikf/gallery-dl/issues/731))
+- [downloader:http] add `consume-content` option ([#3748](https://github.com/mikf/gallery-dl/issues/3748))
+### Fixes
+- [2chen] update domain to sturdychan.help
+- [behance] fix extraction ([#3980](https://github.com/mikf/gallery-dl/issues/3980))
+- [deviantart] retry downloads with private token ([#3941](https://github.com/mikf/gallery-dl/issues/3941))
+- [imagefap] fix empty `tags` metadata
+- [manganelo] support arbitrary minor version separators ([#3972](https://github.com/mikf/gallery-dl/issues/3972))
+- [nozomi] fix file URLs ([#3925](https://github.com/mikf/gallery-dl/issues/3925))
+- [oauth] catch exceptions from `webbrowser.get()` ([#3947](https://github.com/mikf/gallery-dl/issues/3947))
+- [pixiv] fix `pixivision` extraction
+- [reddit] ignore `id-max` value `"zik0zj"`/`2147483647` ([#3939](https://github.com/mikf/gallery-dl/issues/3939), [#3862](https://github.com/mikf/gallery-dl/issues/3862), [#3697](https://github.com/mikf/gallery-dl/issues/3697), [#3606](https://github.com/mikf/gallery-dl/issues/3606), [#3546](https://github.com/mikf/gallery-dl/issues/3546), [#3521](https://github.com/mikf/gallery-dl/issues/3521), [#3412](https://github.com/mikf/gallery-dl/issues/3412))
+- [sankaku] sanitize `date:` tags ([#1790](https://github.com/mikf/gallery-dl/issues/1790))
+- [tumblr] fix and update pagination logic ([#2191](https://github.com/mikf/gallery-dl/issues/2191))
+- [twitter] fix `user` metadata when downloading quoted Tweets ([#3922](https://github.com/mikf/gallery-dl/issues/3922))
+- [ytdl] fix crash due to `--geo-bypass` deprecation ([#3975](https://github.com/mikf/gallery-dl/issues/3975))
+- [postprocessor:metadata] support putting keys in quotes
+- include more optional dependencies in executables ([#3907](https://github.com/mikf/gallery-dl/issues/3907))
+
## 1.25.2 - 2023-04-15
### Additions
- [deviantart] add `public` option
diff --git a/PKG-INFO b/PKG-INFO
index cb01fca..f4807ab 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.25.2
+Version: 1.25.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -66,6 +66,9 @@ Optional
- yt-dlp_ or youtube-dl_: Video downloads
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
+- PyYAML_: YAML configuration file support
+- toml_: TOML configuration file support for Python<3.11
+- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
Installation
@@ -106,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__
Nightly Builds
@@ -157,6 +160,15 @@ For macOS or Linux users using Homebrew:
brew install gallery-dl
+MacPorts
+--------
+
+For macOS users with MacPorts:
+
+.. code:: bash
+
+ sudo port install gallery-dl
+
Usage
=====
@@ -410,6 +422,9 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
+.. _PyYAML: https://pyyaml.org/
+.. _toml: https://pypi.org/project/toml/
+.. _SecretStorage: https://pypi.org/project/SecretStorage/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/README.rst b/README.rst
index 8472d2d..1f4b692 100644
--- a/README.rst
+++ b/README.rst
@@ -29,6 +29,9 @@ Optional
- yt-dlp_ or youtube-dl_: Video downloads
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
+- PyYAML_: YAML configuration file support
+- toml_: TOML configuration file support for Python<3.11
+- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
Installation
@@ -69,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__
Nightly Builds
@@ -120,6 +123,15 @@ For macOS or Linux users using Homebrew:
brew install gallery-dl
+MacPorts
+--------
+
+For macOS users with MacPorts:
+
+.. code:: bash
+
+ sudo port install gallery-dl
+
Usage
=====
@@ -373,6 +385,9 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
+.. _PyYAML: https://pyyaml.org/
+.. _toml: https://pypi.org/project/toml/
+.. _SecretStorage: https://pypi.org/project/SecretStorage/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 8aa419d..b5ad7f2 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-04-15" "1.25.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-04-30" "1.25.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 63d78f0..3bd9f17 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-04-15" "1.25.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-04-30" "1.25.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1935,7 +1935,11 @@ even ones without a \f[I]generic:\f[] prefix.
.IP "Description:" 4
List of names of the preferred animation format, which can be
-\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"mobile"\f[], \f[I]"gif"\f[], or \f[I]"webp"\f[].
+\f[I]"mp4"\f[],
+\f[I]"webm"\f[],
+\f[I]"mobile"\f[],
+\f[I]"gif"\f[], or
+\f[I]"webp"\f[].
If a selected format is not available, the next one in the list will be
tried until an available format is found.
@@ -2027,6 +2031,14 @@ Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[].
but is most likely going to fail with \f[I]403 Forbidden\f[] errors.
+.SS extractor.imgur.client-id
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+Custom Client ID value for API requests.
+
+
.SS extractor.imgur.mp4
.IP "Type:" 6
.br
@@ -3002,13 +3014,18 @@ but it will not always get the best video quality available.
.IP "Description:" 4
List of names of the preferred animation format, which can be
-\f[I]"hd"\f[], \f[I]"sd"\f[], "gif", "vthumbnail"`, "thumbnail"\f[I], or \f[]"poster"\f[I].
+\f[I]"hd"\f[],
+\f[I]"sd"\f[],
+\f[I]"gif"\f[],
+\f[I]"thumbnail"\f[],
+\f[I]"vthumbnail"\f[], or
+\f[I]"poster"\f[].
If a selected format is not available, the next one in the list will be
tried until an available format is found.
-If the format is given as \f[]string\f[I], it will be extended with
-\f[]["hd", "sd", "gif"]``. Use a list with one element to
+If the format is given as \f[I]string\f[], it will be extended with
+\f[I]["hd", "sd", "gif"]\f[]. Use a list with one element to
restrict it to only one possible format.
@@ -4178,6 +4195,26 @@ of a file called \f[I]example.png\f[] from \f[I]png\f[] to \f[I]jpg\f[] when sai
contains JPEG/JFIF data.
+.SS downloader.http.consume-content
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Controls the behavior when an HTTP response is considered
+unsuccessful
+
+If the value is \f[I]true\f[], consume the response body. This
+avoids closing the connection and therefore improves connection
+reuse.
+
+If the value is \f[I]false\f[], immediately close the connection
+without reading the response. This can be useful if the server
+is known to send large bodies for error responses.
+
+
.SS downloader.http.chunk-size
.IP "Type:" 6
.br
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 25c9619..73bcd92 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.25.2
+Version: 1.25.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -66,6 +66,9 @@ Optional
- yt-dlp_ or youtube-dl_: Video downloads
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
+- PyYAML_: YAML configuration file support
+- toml_: TOML configuration file support for Python<3.11
+- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
Installation
@@ -106,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__
Nightly Builds
@@ -157,6 +160,15 @@ For macOS or Linux users using Homebrew:
brew install gallery-dl
+MacPorts
+--------
+
+For macOS users with MacPorts:
+
+.. code:: bash
+
+ sudo port install gallery-dl
+
Usage
=====
@@ -410,6 +422,9 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
+.. _PyYAML: https://pyyaml.org/
+.. _toml: https://pypi.org/project/toml/
+.. _SecretStorage: https://pypi.org/project/SecretStorage/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index bb2ff51..2e66ea6 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -112,6 +112,7 @@ gallery_dl/extractor/inkbunny.py
gallery_dl/extractor/instagram.py
gallery_dl/extractor/issuu.py
gallery_dl/extractor/itaku.py
+gallery_dl/extractor/itchio.py
gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
@@ -177,6 +178,7 @@ gallery_dl/extractor/sankakucomplex.py
gallery_dl/extractor/seiga.py
gallery_dl/extractor/senmanga.py
gallery_dl/extractor/sexcom.py
+gallery_dl/extractor/shimmie2.py
gallery_dl/extractor/shopify.py
gallery_dl/extractor/simplyhentai.py
gallery_dl/extractor/skeb.py
@@ -202,6 +204,7 @@ gallery_dl/extractor/uploadir.py
gallery_dl/extractor/urlshortener.py
gallery_dl/extractor/vanillarock.py
gallery_dl/extractor/vichan.py
+gallery_dl/extractor/vipergirls.py
gallery_dl/extractor/vk.py
gallery_dl/extractor/vsco.py
gallery_dl/extractor/wallhaven.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 88e86e9..4ec0398 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -44,6 +44,12 @@ class HttpDownloader(DownloaderBase):
self.mtime = self.config("mtime", True)
self.rate = self.config("rate")
+ if not self.config("consume-content", False):
+ # this resets the underlying TCP connection, and therefore
+ # if the program makes another request to the same domain,
+ # a new connection (either TLS or plain TCP) must be made
+ self.release_conn = lambda resp: resp.close()
+
if self.retries < 0:
self.retries = float("inf")
if self.minsize:
@@ -106,7 +112,7 @@ class HttpDownloader(DownloaderBase):
while True:
if tries:
if response:
- response.close()
+ self.release_conn(response)
response = None
self.log.warning("%s (%s/%s)", msg, tries, self.retries+1)
if tries > self.retries:
@@ -165,18 +171,24 @@ class HttpDownloader(DownloaderBase):
retry = kwdict.get("_http_retry")
if retry and retry(response):
continue
+ self.release_conn(response)
self.log.warning(msg)
return False
# check for invalid responses
validate = kwdict.get("_http_validate")
if validate and self.validate:
- result = validate(response)
+ try:
+ result = validate(response)
+ except Exception:
+ self.release_conn(response)
+ raise
if isinstance(result, str):
url = result
tries -= 1
continue
if not result:
+ self.release_conn(response)
self.log.warning("Invalid response")
return False
@@ -184,11 +196,13 @@ class HttpDownloader(DownloaderBase):
size = text.parse_int(size, None)
if size is not None:
if self.minsize and size < self.minsize:
+ self.release_conn(response)
self.log.warning(
"File size smaller than allowed minimum (%s < %s)",
size, self.minsize)
return False
if self.maxsize and size > self.maxsize:
+ self.release_conn(response)
self.log.warning(
"File size larger than allowed maximum (%s > %s)",
size, self.maxsize)
@@ -280,6 +294,18 @@ class HttpDownloader(DownloaderBase):
return True
+ def release_conn(self, response):
+ """Release connection back to pool by consuming response body"""
+ try:
+ for _ in response.iter_content(self.chunk_size):
+ pass
+ except (RequestException, SSLError, OpenSSLError) as exc:
+ print()
+ self.log.debug(
+ "Unable to consume response body (%s: %s); "
+ "closing the connection anyway", exc.__class__.__name__, exc)
+ response.close()
+
@staticmethod
def receive(fp, content, bytes_total, bytes_start):
write = fp.write
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py
index d9674d8..f142690 100644
--- a/gallery_dl/extractor/2chen.py
+++ b/gallery_dl/extractor/2chen.py
@@ -4,35 +4,46 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://2chen.moe/"""
+"""Extractors for https://sturdychan.help/"""
from .common import Extractor, Message
from .. import text
+BASE_PATTERN = r"(?:https?://)?(?:sturdychan.help|2chen\.(?:moe|club))"
+
class _2chenThreadExtractor(Extractor):
"""Extractor for 2chen threads"""
category = "2chen"
subcategory = "thread"
+ root = "https://sturdychan.help"
directory_fmt = ("{category}", "{board}", "{thread} {title}")
filename_fmt = "{time} {filename}.{extension}"
archive_fmt = "{board}_{thread}_{hash}_{time}"
- pattern = r"(?:https?://)?2chen\.(?:moe|club)/([^/?#]+)/(\d+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
test = (
- ("https://2chen.moe/tv/496715", {
- "pattern": r"https://2chen\.su/assets/images/src/\w{40}\.\w+$",
+ ("https://sturdychan.help/tv/268929", {
+ "pattern": r"https://sturdychan\.help/assets/images"
+ r"/src/\w{40}\.\w+$",
"count": ">= 179",
+ "keyword": {
+ "board": "tv",
+ "date": "type:datetime",
+ "hash": r"re:[0-9a-f]{40}",
+ "name": "Anonymous",
+ "no": r"re:\d+",
+ "thread": "268929",
+ "time": int,
+ "title": "「/ttg/ #118: 🇧🇷 edition」",
+ "url": str,
+ },
}),
- ("https://2chen.club/tv/1", {
- "count": 5,
- }),
- # 404
+ ("https://2chen.club/tv/1"),
("https://2chen.moe/jp/303786"),
)
def __init__(self, match):
Extractor.__init__(self, match)
- self.root = text.root_from_url(match.group(0))
self.board, self.thread = match.groups()
def items(self):
@@ -88,9 +99,10 @@ class _2chenBoardExtractor(Extractor):
"""Extractor for 2chen boards"""
category = "2chen"
subcategory = "board"
- pattern = r"(?:https?://)?2chen\.(?:moe|club)/([^/?#]+)(?:/catalog|/?$)"
+ root = "https://sturdychan.help"
+ pattern = BASE_PATTERN + r"/([^/?#]+)(?:/catalog|/?$)"
test = (
- ("https://2chen.moe/co/", {
+ ("https://sturdychan.help/co/", {
"pattern": _2chenThreadExtractor.pattern
}),
("https://2chen.moe/co"),
@@ -100,7 +112,6 @@ class _2chenBoardExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.root = text.root_from_url(match.group(0))
self.board = match.group(1)
def items(self):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 553a110..9841ca7 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -74,6 +74,7 @@ modules = [
"instagram",
"issuu",
"itaku",
+ "itchio",
"kabeuchi",
"keenspot",
"kemonoparty",
@@ -93,7 +94,6 @@ modules = [
"mangapark",
"mangasee",
"mangoxo",
- "mememuseum",
"misskey",
"myhentaigallery",
"myportfolio",
@@ -133,6 +133,7 @@ modules = [
"seiga",
"senmanga",
"sexcom",
+ "shimmie2",
"simplyhentai",
"skeb",
"slickpic",
@@ -156,6 +157,7 @@ modules = [
"urlshortener",
"vanillarock",
"vichan",
+ "vipergirls",
"vk",
"vsco",
"wallhaven",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 1469aad..d8cc51d 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -81,10 +81,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
("https://www.behance.net/gallery/88276087/Audi-R8-RWD", {
"count": 20,
"url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
+ "pattern": r"https://mir-s3-cdn-cf\.behance\.net/project_modules"
+ r"/source/[0-9a-f]+.[0-9a-f]+\.jpg"
}),
# 'video' modules (#1282)
("https://www.behance.net/gallery/101185577/COLCCI", {
- "pattern": r"ytdl:https://cdn-prod-ccv\.adobe\.com/",
+ "pattern": r"https://cdn-prod-ccv\.adobe\.com/\w+"
+ r"/rend/\w+_720\.mp4\?",
"count": 3,
}),
)
@@ -129,26 +132,35 @@ class BehanceGalleryExtractor(BehanceExtractor):
append = result.append
for module in data["modules"]:
- mtype = module["type"]
+ mtype = module["__typename"]
- if mtype == "image":
- url = module["sizes"]["original"]
+ if mtype == "ImageModule":
+ url = module["imageSizes"]["size_original"]["url"]
append((url, module))
- elif mtype == "video":
- page = self.request(module["src"]).text
- url = text.extr(page, '<source src="', '"')
- if text.ext_from_url(url) == "m3u8":
- url = "ytdl:" + url
+ elif mtype == "VideoModule":
+ renditions = module["videoData"]["renditions"]
+ try:
+ url = [
+ r["url"] for r in renditions
+ if text.ext_from_url(r["url"]) != "m3u8"
+ ][-1]
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ url = "ytdl:" + renditions[-1]["url"]
append((url, module))
- elif mtype == "media_collection":
+ elif mtype == "MediaCollectionModule":
for component in module["components"]:
- url = component["sizes"]["source"]
- append((url, module))
-
- elif mtype == "embed":
- embed = module.get("original_embed") or module.get("embed")
+ for size in component["imageSizes"].values():
+ if size:
+ parts = size["url"].split("/")
+ parts[4] = "source"
+ append(("/".join(parts), module))
+ break
+
+ elif mtype == "EmbedModule":
+ embed = module.get("originalEmbed") or module.get("fluidEmbed")
if embed:
append(("ytdl:" + text.extr(embed, 'src="', '"'), module))
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index f532a97..18d9867 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1417,7 +1417,14 @@ class DeviantartOAuthAPI():
"""Get the original file download (if allowed)"""
endpoint = "/deviation/download/" + deviation_id
params = {"mature_content": self.mature}
- return self._call(endpoint, params=params, public=public)
+
+ try:
+ return self._call(
+ endpoint, params=params, public=public, log=False)
+ except Exception:
+ if not self.refresh_token_key:
+ raise
+ return self._call(endpoint, params=params, public=False)
def deviation_metadata(self, deviations):
""" Fetch deviation metadata for a set of deviations"""
@@ -1518,7 +1525,7 @@ class DeviantartOAuthAPI():
refresh_token_key, data["refresh_token"])
return "Bearer " + data["access_token"]
- def _call(self, endpoint, fatal=True, public=None, **kwargs):
+ def _call(self, endpoint, fatal=True, log=True, public=None, **kwargs):
"""Call an API endpoint"""
url = "https://www.deviantart.com/api/v1/oauth2" + endpoint
kwargs["fatal"] = None
@@ -1563,7 +1570,8 @@ class DeviantartOAuthAPI():
"cs/configuration.rst#extractordeviantartclient-id"
"--client-secret")
else:
- self.log.error(msg)
+ if log:
+ self.log.error(msg)
return data
def _pagination(self, endpoint, params,
@@ -1571,15 +1579,14 @@ class DeviantartOAuthAPI():
warn = True
if public is None:
public = self.public
- elif not public:
- self.public = False
while True:
data = self._call(endpoint, params=params, public=public)
- if key not in data:
+ try:
+ results = data[key]
+ except KeyError:
self.log.error("Unexpected API response: %s", data)
return
- results = data[key]
if unpack:
results = [item["journal"] for item in results
@@ -1588,7 +1595,7 @@ class DeviantartOAuthAPI():
if public and len(results) < params["limit"]:
if self.refresh_token_key:
self.log.debug("Switching to private access token")
- self.public = public = False
+ public = False
continue
elif data["has_more"] and warn:
warn = False
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 497f1ef..c91347e 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -49,14 +49,16 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
("https://www.imagefap.com/gallery/7102714", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg",
- "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
+ "keyword": "bdcb75b1e4b9dddc718f3d66e1a58afa9d81a38b",
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
}),
("https://www.imagefap.com/gallery/7876223", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": {
+ "categories": ["Asses", "Softcore", "Pornstars"],
"count": 44,
+ "description": "",
"gallery_id": 7876223,
"image_id": int,
"num": int,
@@ -67,6 +69,21 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
},
"count": 44,
}),
+ # description (#3905)
+ ("https://www.imagefap.com/gallery/6180555", {
+ "range": "1",
+ "keyword": {
+ "categories": ["Amateur", "Softcore", "Homemade"],
+ "count": 36,
+ "description": "Nude and dressed sluts showing off the goods",
+ "gallery_id": 6180555,
+ "image_id": int,
+ "num": int,
+ "tags": [] ,
+ "title": "Dressed or Undressed MG*",
+ "uploader": "splitopen",
+ },
+ }),
("https://www.imagefap.com/pictures/7102714"),
("https://www.imagefap.com/gallery.php?gid=7102714"),
("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -92,9 +109,14 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
data = {
"gallery_id": text.parse_int(self.gid),
- "tags": extr('name="keywords" content="', '"').split(", "),
"uploader": extr("porn picture gallery by ", " to see hottest"),
"title": text.unescape(extr("<title>", "<")),
+ "description": text.unescape(extr(
+ 'id="gdesc_text"', '<').partition(">")[2]),
+ "categories": text.split_html(extr(
+ 'id="cnt_cats"', '</div>'))[1::2],
+ "tags": text.split_html(extr(
+ 'id="cnt_tags"', '</div>'))[1::2],
"count": text.parse_int(extr(' 1 of ', ' pics"')),
}
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index d57ec89..df4ff26 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -135,6 +135,29 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
}
+class ImxtoGalleryExtractor(ImagehostImageExtractor):
+ """Extractor for image galleries from imx.to"""
+ category = "imxto"
+ subcategory = "gallery"
+ pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))"
+ test = ("https://imx.to/g/ozdy", {
+ "pattern": ImxtoImageExtractor.pattern,
+ "keyword": {"title": "untitled gallery"},
+ "count": 40,
+ })
+
+ def items(self):
+ page = self.request(self.page_url).text
+ title, pos = text.extract(page, '<div class="title', '<')
+ data = {
+ "_extractor": ImxtoImageExtractor,
+ "title": text.unescape(title.partition(">")[2]).strip(),
+ }
+
+ for url in text.extract_iter(page, "<a href=", " ", pos):
+ yield Message.Queue, url.strip("\"'"), data
+
+
class AcidimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from acidimg.cc"""
category = "acidimg"
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 42d0a7b..f8f1600 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -347,8 +347,8 @@ class ImgurAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
- "Authorization": "Client-ID " + extractor.config(
- "client-id", "546c25a59c58ad7"),
+ "Authorization": "Client-ID " + (
+ extractor.config("client-id") or "546c25a59c58ad7"),
}
def account_favorites(self, account):
diff --git a/gallery_dl/extractor/itchio.py b/gallery_dl/extractor/itchio.py
new file mode 100644
index 0000000..6034d12
--- /dev/null
+++ b/gallery_dl/extractor/itchio.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://itch.io/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class ItchioGameExtractor(Extractor):
+ """Extractor for itch.io games"""
+ category = "itchio"
+ subcategory = "game"
+ root = "https://itch.io"
+ directory_fmt = ("{category}", "{user[name]}")
+ filename_fmt = "{game[title]} ({id}).{extension}"
+ archive_fmt = "{id}"
+ pattern = r"(?:https?://)?(\w+).itch\.io/([\w-]+)"
+ test = (
+ ("https://sirtartarus.itch.io/a-craft-of-mine", {
+ "pattern": r"https://\w+\.ssl\.hwcdn\.net/upload2"
+ r"/game/1983311/7723751\?",
+ "count": 1,
+ "keyword": {
+ "extension": "",
+ "filename": "7723751",
+ "game": {
+ "id": 1983311,
+ "noun": "game",
+ "title": "A Craft Of Mine",
+ "url": "https://sirtartarus.itch.io/a-craft-of-mine",
+ },
+ "user": {
+ "id": 4060052,
+ "name": "SirTartarus",
+ "url": "https://sirtartarus.itch.io",
+ },
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ self.user, self.slug = match.groups()
+ Extractor.__init__(self, match)
+
+ def items(self):
+ game_url = "https://{}.itch.io/{}".format(self.user, self.slug)
+ page = self.request(game_url).text
+
+ params = {
+ "source": "view_game",
+ "as_props": "1",
+ "after_download_lightbox": "true",
+ }
+ headers = {
+ "Referer": game_url,
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin": "https://{}.itch.io".format(self.user),
+ }
+ data = {
+ "csrf_token": text.unquote(self.session.cookies["itchio_token"]),
+ }
+
+ for upload_id in text.extract_iter(page, 'data-upload_id="', '"'):
+ file_url = "{}/file/{}".format(game_url, upload_id)
+ info = self.request(file_url, method="POST", params=params,
+ headers=headers, data=data).json()
+
+ game = info["lightbox"]["game"]
+ user = info["lightbox"]["user"]
+ game["url"] = game_url
+ user.pop("follow_button", None)
+ game = {"game": game, "user": user, "id": upload_id}
+
+ url = info["url"]
+ yield Message.Directory, game
+ yield Message.Url, url, text.nameext_from_url(url, game)
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 5ba18a3..6fd9f49 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -16,21 +16,26 @@ BASE_PATTERN = r"(?:https?://)?((?:chap|read|www\.|m\.)?mangan(?:at|el)o\.com)"
class ManganeloBase():
category = "manganelo"
root = "https://chapmanganato.com"
+ _match_chapter = None
def __init__(self, match):
domain, path = match.groups()
super().__init__(match, "https://" + domain + path)
self.session.headers['Referer'] = self.root
- self._match_chapter = re.compile(
- r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
- r"[Cc]hapter\s*([^:]+)"
- r"(?::\s*(.+))?").match
+ if self._match_chapter is None:
+ ManganeloBase._match_chapter = re.compile(
+ r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
+ r"[Cc]hapter\s*(\d+)([^:]*)"
+ r"(?::\s*(.+))?").match
def _parse_chapter(self, info, manga, author, date=None):
match = self._match_chapter(info)
- volume, chapter, title = match.groups() if match else ("", "", info)
- chapter, sep, minor = chapter.partition(".")
+ if match:
+ volume, chapter, minor, title = match.groups()
+ else:
+ volume = chapter = minor = ""
+ title = info
return {
"manga" : manga,
@@ -39,7 +44,7 @@ class ManganeloBase():
"title" : text.unescape(title) if title else "",
"volume" : text.parse_int(volume),
"chapter" : text.parse_int(chapter),
- "chapter_minor": sep + minor,
+ "chapter_minor": minor,
"lang" : "en",
"language" : "English",
}
@@ -61,6 +66,10 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
"keyword": "06e01fa9b3fc9b5b954c0d4a98f0153b40922ded",
"count": 45,
}),
+ ("https://chapmanganato.com/manga-no991297/chapter-8", {
+ "keyword": {"chapter": 8, "chapter_minor": "-1"},
+ "count": 20,
+ }),
("https://readmanganato.com/manga-gn983696/chapter-23"),
("https://manganelo.com/chapter/gamers/chapter_15"),
("https://manganelo.com/chapter/gq921227/chapter_23"),
diff --git a/gallery_dl/extractor/nana.py b/gallery_dl/extractor/nana.py
index 0f79d7f..24e676f 100644
--- a/gallery_dl/extractor/nana.py
+++ b/gallery_dl/extractor/nana.py
@@ -20,19 +20,23 @@ class NanaGalleryExtractor(GalleryExtractor):
"059f7de55a4297413bfbd432ce7d6e724dd42bae"), {
"pattern": r"https://nana\.my\.id/reader/"
r"\w+/image/page\?path=.*\.\w+",
- "title" : "Everybody Loves Shion",
- "artist" : "fuzui",
- "tags" : list,
- "count" : 29,
+ "keyword": {
+ "title" : "Everybody Loves Shion",
+ "artist": "fuzui",
+ "tags" : list,
+ "count" : 29,
+ },
}),
(("https://nana.my.id/reader/"
"77c8712b67013e427923573379f5bafcc0c72e46"), {
"pattern": r"https://nana\.my\.id/reader/"
r"\w+/image/page\?path=.*\.\w+",
- "title" : "Lovey-Dovey With an Otaku-Friendly Gyaru",
- "artist" : "Sueyuu",
- "tags" : ["Sueyuu"],
- "count" : 58,
+ "keyword": {
+ "title" : "Lovey-Dovey With an Otaku-Friendly Gyaru",
+ "artist": "Sueyuu",
+ "tags" : ["Sueyuu"],
+ "count" : 58,
+ },
}),
)
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index 5f4ceea..beb3da2 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -162,7 +162,11 @@ class NitterExtractor(BaseExtractor):
banner = extr('class="profile-banner"><a href="', '"')
try:
- uid = banner.split("%2F")[4]
+ if "/enc/" in banner:
+ uid = binascii.a2b_base64(banner.rpartition(
+ "/")[2]).decode().split("/")[4]
+ else:
+ uid = banner.split("%2F")[4]
except Exception:
uid = 0
@@ -302,7 +306,10 @@ class NitterTweetsExtractor(NitterExtractor):
r"/media%2FCGMNYZvW0AIVoom\.jpg",
"range": "1",
}),
- ("https://nitter.1d4.us/supernaturepics"),
+ ("https://nitter.1d4.us/supernaturepics", {
+ "range": "1",
+ "keyword": {"user": {"id": "2976459548"}},
+ }),
("https://nitter.kavin.rocks/id:2976459548"),
("https://nitter.unixfox.eu/supernaturepics"),
)
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index f381f12..af2a367 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -63,10 +63,20 @@ class NozomiExtractor(Extractor):
yield Message.Directory, post
for post["num"], image in enumerate(images, 1):
post["filename"] = post["dataid"] = did = image["dataid"]
- post["extension"] = ext = image["type"]
post["is_video"] = video = bool(image.get("is_video"))
+
+ ext = image["type"]
+ if video:
+ subdomain = "v"
+ elif ext == "gif":
+ subdomain = "g"
+ else:
+ subdomain = "w"
+ ext = "webp"
+
+ post["extension"] = ext
post["url"] = url = "https://{}.nozomi.la/{}/{}/{}.{}".format(
- "v" if video else "i", did[-1], did[-3:-1], did, ext)
+ subdomain, did[-1], did[-3:-1], did, ext)
yield Message.Url, url, post
def posts(self):
@@ -97,15 +107,17 @@ class NozomiPostExtractor(NozomiExtractor):
pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
test = (
("https://nozomi.la/post/3649262.html", {
- "url": "f4522adfc8159355fd0476de28761b5be0f02068",
- "content": "cd20d2c5149871a0b80a1b0ce356526278964999",
+ "url": "e5525e717aec712843be8b88592d6406ae9e60ba",
+ "pattern": r"https://w\.nozomi\.la/2/15/aaa9f7c632cde1e1a5baaff3fb"
+ r"6a6d857ec73df7fdc5cf5a358caf604bf73152\.webp",
+ "content": "6d62c4a7fea50c0a89d499603c4e7a2b4b9bffa8",
"keyword": {
"artist" : ["hammer (sunset beach)"],
"character": ["patchouli knowledge"],
"copyright": ["touhou"],
"dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5",
"date" : "dt:2016-07-26 02:32:03",
- "extension": "jpg",
+ "extension": "webp",
"filename" : str,
"height" : 768,
"is_video" : False,
@@ -118,14 +130,26 @@ class NozomiPostExtractor(NozomiExtractor):
}),
# multiple images per post
("https://nozomi.la/post/25588032.html", {
- "url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
- "keyword": "2a2998af93c6438863c4077bd386b613b8bc2957",
+ "url": "fb956ccedcf2cf509739d26e2609e910244aa56c",
+ "keyword": "516ca5cbd0d2a46a8ce26679d6e08de5ac42184b",
"count": 7,
}),
# empty 'date' (#1163)
("https://nozomi.la/post/130309.html", {
"keyword": {"date": None},
- })
+ }),
+ # gif
+ ("https://nozomi.la/post/1647.html", {
+ "pattern": r"https://g\.nozomi\.la/a/f0/d1b06469e00d72e4f6346209c1"
+ r"49db459d76b58a074416c260ed93cc31fa9f0a\.gif",
+ "content": "952efb78252bbc9fb56df2e8fafb68d5e6364181",
+ }),
+ # video
+ ("https://nozomi.la/post/2269847.html", {
+ "pattern": r"https://v\.nozomi\.la/d/0e/ff88398862669783691b31519f"
+ r"2bea3a35c24b6e62e3ba2d89b4409e41c660ed\.webm",
+ "content": "57065e6c16da7b1c7098a63b36fb0c6c6f1b9bca",
+ }),
)
def __init__(self, match):
@@ -160,7 +184,7 @@ class NozomiTagExtractor(NozomiExtractor):
archive_fmt = "t_{search_tags}_{dataid}"
pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
- "pattern": r"^https://[iv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
+ "pattern": r"^https://[wgv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
"count": ">= 25",
"range": "1-25",
})
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index ec46ca3..404f296 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -71,8 +71,11 @@ class OAuthBase(Extractor):
browser = self.config("browser", True)
if browser:
- import webbrowser
- browser = webbrowser.get()
+ try:
+ import webbrowser
+ browser = webbrowser.get()
+ except Exception:
+ browser = None
if browser and browser.open(url):
name = getattr(browser, "name", "Browser")
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 56e3b39..f0a50c8 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -14,7 +14,7 @@ from .. import text
class PahealExtractor(Extractor):
"""Base class for paheal extractors"""
- basecategory = "booru"
+ basecategory = "shimmie2"
category = "paheal"
filename_fmt = "{category}_{id}_{md5}.{extension}"
archive_fmt = "{id}"
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index a17518f..b704031 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -596,6 +596,9 @@ class PixivSearchExtractor(PixivExtractor):
sort_map = {
"date": "date_asc",
"date_d": "date_desc",
+ "popular_d": "popular_desc",
+ "popular_male_d": "popular_male_desc",
+ "popular_female_d": "popular_female_desc",
}
try:
self.sort = sort = sort_map[sort]
@@ -670,7 +673,7 @@ class PixivPixivisionExtractor(PixivExtractor):
def works(self):
return (
- self.api.illust_detail(illust_id)
+ self.api.illust_detail(illust_id.partition("?")[0])
for illust_id in util.unique_sequence(text.extract_iter(
self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
)
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 305de2a..cefe8d3 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -303,8 +303,8 @@ class RedditImageExtractor(Extractor):
category = "reddit"
subcategory = "image"
archive_fmt = "{filename}"
- pattern = (r"(?:https?://)?i\.redd(?:\.it|ituploads\.com)"
- r"/[^/?#]+(?:\?[^#]*)?")
+ pattern = (r"(?:https?://)?((?:i|preview)\.redd\.it|i\.reddituploads\.com)"
+ r"/([^/?#]+)(\?[^#]*)?")
test = (
("https://i.redd.it/upjtjcx2npzz.jpg", {
"url": "0de614900feef103e580b632190458c0b62b641a",
@@ -315,12 +315,29 @@ class RedditImageExtractor(Extractor):
"url": "f24f25efcedaddeec802e46c60d77ef975dc52a5",
"content": "541dbcc3ad77aa01ee21ca49843c5e382371fae7",
}),
+ # preview.redd.it -> i.redd.it
+ (("https://preview.redd.it/00af44lpn0u51.jpg?width=960&crop=smart"
+ "&auto=webp&v=enabled&s=dbca8ab84033f4a433772d9c15dbe0429c74e8ac"), {
+ "pattern": r"^https://i\.redd\.it/00af44lpn0u51\.jpg$"
+ }),
)
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ domain = match.group(1)
+ self.path = match.group(2)
+ if domain == "preview.redd.it":
+ self.domain = "i.redd.it"
+ self.query = ""
+ else:
+ self.domain = domain
+ self.query = match.group(3) or ""
+
def items(self):
- data = text.nameext_from_url(self.url)
+ url = "https://{}/{}{}".format(self.domain, self.path, self.query)
+ data = text.nameext_from_url(url)
yield Message.Directory, data
- yield Message.Url, self.url, data
+ yield Message.Url, url, data
class RedditAPI():
@@ -459,6 +476,9 @@ class RedditAPI():
def _pagination(self, endpoint, params):
id_min = self._parse_id("id-min", 0)
id_max = self._parse_id("id-max", float("inf"))
+ if id_max == 2147483647:
+ self.log.debug("Ignoring 'id-max' setting \"zik0zj\"")
+ id_max = float("inf")
date_min, date_max = self.extractor._get_date_min_max(0, 253402210800)
while True:
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index ea4cf43..f36051b 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -13,6 +13,7 @@ from .common import Message
from .. import text, util, exception
from ..cache import cache
import collections
+import re
BASE_PATTERN = r"(?:https?://)?" \
r"(?:(?:chan|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
@@ -101,6 +102,11 @@ class SankakuTagExtractor(SankakuExtractor):
# match arbitrary query parameters
("https://chan.sankakucomplex.com"
"/?tags=marie_rose&page=98&next=3874906&commit=Search"),
+ # 'date:' tags (#1790)
+ ("https://chan.sankakucomplex.com/?tags=date:2023-03-20", {
+ "range": "1",
+ "count": 1,
+ }),
)
def __init__(self, match):
@@ -108,6 +114,15 @@ class SankakuTagExtractor(SankakuExtractor):
query = text.parse_query(match.group(1))
self.tags = text.unquote(query.get("tags", "").replace("+", " "))
+ if "date:" in self.tags:
+ # rewrite 'date:' tags (#1790)
+ self.tags = re.sub(
+ r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)",
+ r"date:\3.\2.\1", self.tags)
+ self.tags = re.sub(
+ r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)",
+ r"date:\1.\2.\3", self.tags)
+
def metadata(self):
return {"search_tags": self.tags}
@@ -153,7 +168,7 @@ class SankakuPostExtractor(SankakuExtractor):
"""Extractor for single posts from sankaku.app"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/show/(\d+)"
+ pattern = BASE_PATTERN + r"/post/show/([0-9a-f]+)"
test = (
("https://sankaku.app/post/show/360451", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
@@ -181,6 +196,17 @@ class SankakuPostExtractor(SankakuExtractor):
"tags_general": ["key(mangaka)", "key(mangaka)"],
},
}),
+ # md5 hexdigest instead of ID (#3952)
+ (("https://chan.sankakucomplex.com/post/show"
+ "/f8ba89043078f0e4be2d9c46550b840a"), {
+ "pattern": r"https://s\.sankakucomplex\.com"
+ r"/data/f8/ba/f8ba89043078f0e4be2d9c46550b840a\.jpg",
+ "count": 1,
+ "keyword": {
+ "id": 33195194,
+ "md5": "f8ba89043078f0e4be2d9c46550b840a",
+ },
+ }),
("https://chan.sankakucomplex.com/post/show/360451"),
("https://chan.sankakucomplex.com/ja/post/show/360451"),
("https://beta.sankakucomplex.com/post/show/360451"),
@@ -248,7 +274,7 @@ class SankakuAPI():
"lang" : "en",
"page" : "1",
"limit": "1",
- "tags" : "id_range:" + post_id,
+ "tags" : ("md5:" if len(post_id) == 32 else "id_range:") + post_id,
}
return self._call("/posts", params)
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
new file mode 100644
index 0000000..285cd8f
--- /dev/null
+++ b/gallery_dl/extractor/shimmie2.py
@@ -0,0 +1,326 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Shimmie2 instances"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class Shimmie2Extractor(BaseExtractor):
+ """Base class for shimmie2 extractors"""
+ basecategory = "shimmie2"
+ filename_fmt = "{category}_{id}{md5:?_//}.{extension}"
+ archive_fmt = "{id}"
+
+ def __init__(self, match):
+ BaseExtractor.__init__(self, match)
+
+ try:
+ instance = INSTANCES[self.category]
+ except KeyError:
+ pass
+ else:
+ cookies = instance.get("cookies")
+ if cookies:
+ domain = self.root.rpartition("/")[2]
+ self._update_cookies_dict(cookies, domain=domain)
+ file_url = instance.get("file_url")
+ if file_url:
+ self.file_url_fmt = file_url
+
+ def items(self):
+ data = self.metadata()
+
+ for post in self.posts():
+
+ for key in ("id", "width", "height"):
+ post[key] = text.parse_int(post[key])
+ post["tags"] = text.unquote(post["tags"])
+ post.update(data)
+
+ url = post["file_url"]
+ if "/index.php?" in url:
+ post["filename"], _, post["extension"] = \
+ url.rpartition("/")[2].rpartition(".")
+ else:
+ text.nameext_from_url(url, post)
+
+ yield Message.Directory, post
+ yield Message.Url, url, post
+
+ def metadata(self):
+ """Return general metadata"""
+ return ()
+
+ def posts(self):
+ """Return an iterable containing data of all relevant posts"""
+ return ()
+
+
+INSTANCES = {
+ "mememuseum": {
+ "root": "https://meme.museum",
+ "pattern": r"meme\.museum",
+ },
+ "loudbooru": {
+ "root": "https://loudbooru.com",
+ "pattern": r"loudbooru\.com",
+ "cookies": {"ui-tnc-agreed": "true"},
+ },
+ "giantessbooru": {
+ "root": "https://giantessbooru.com",
+ "pattern": r"giantessbooru\.com",
+ "cookies": {"agreed": "true"},
+ },
+ "tentaclerape": {
+ "root": "https://tentaclerape.net",
+ "pattern": r"tentaclerape\.net",
+ },
+ "cavemanon": {
+ "root": "https://booru.cavemanon.xyz",
+ "pattern": r"booru\.cavemanon\.xyz",
+ "file_url": "{0}/index.php?q=image/{2}.{4}"
+ },
+}
+
+BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=)?"
+
+
+class Shimmie2TagExtractor(Shimmie2Extractor):
+ """Extractor for shimmie2 posts by tag search"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ file_url_fmt = "{}/_images/{}/{}%20-%20{}.{}"
+ pattern = BASE_PATTERN + r"post/list/([^/?#]+)(?:/(\d+))?()"
+ test = (
+ ("https://meme.museum/post/list/animated/1", {
+ "pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
+ "count": ">= 30"
+ }),
+ ("https://loudbooru.com/post/list/original_character/1", {
+ "pattern": r"https://loudbooru\.com/_images/[0-9a-f]{32}/\d+",
+ "range": "1-100",
+ "count": 100,
+ }),
+ ("https://giantessbooru.com/post/list/smiling/1", {
+ "pattern": r"https://giantessbooru\.com/_images/[0-9a-f]{32}/\d+",
+ "range": "1-100",
+ "count": 100,
+ }),
+ ("https://tentaclerape.net/post/list/comic/1", {
+ "pattern": r"https://tentaclerape\.net/_images/[0-9a-f]{32}/\d+",
+ "range": "1-100",
+ "count": 100,
+ }),
+ ("https://booru.cavemanon.xyz/index.php?q=post/list/Amber/1", {
+ "pattern": r"https://booru\.cavemanon\.xyz"
+ r"/index\.php\?q=image/\d+\.\w+",
+ "range": "1-100",
+ "count": 100,
+ }),
+ )
+
+ def __init__(self, match):
+ Shimmie2Extractor.__init__(self, match)
+ lastindex = match.lastindex
+ self.tags = text.unquote(match.group(lastindex-2))
+ self.page = match.group(lastindex-1)
+
+ def metadata(self):
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ pnum = text.parse_int(self.page, 1)
+ file_url_fmt = self.file_url_fmt.format
+
+ init = True
+ mime = ""
+
+ while True:
+ url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ if init:
+ init = False
+ has_mime = ("data-mime='" in page)
+ has_pid = ("data-post-id='" in page)
+
+ while True:
+ if has_mime:
+ mime = extr("data-mime='", "'")
+ if has_pid:
+ pid = extr("data-post-id='", "'")
+ else:
+ pid = extr("href='/post/view/", "?")
+
+ if not pid:
+ break
+
+ tags, dimensions, size = extr("title='", "'").split(" // ")
+ width, _, height = dimensions.partition("x")
+ md5 = extr("/_thumbs/", "/")
+
+ yield {
+ "file_url": file_url_fmt(
+ self.root, md5, pid, text.quote(tags),
+ mime.rpartition("/")[2] if mime else "jpg"),
+ "id": pid,
+ "md5": md5,
+ "tags": tags,
+ "width": width,
+ "height": height,
+ "size": text.parse_bytes(size[:-1]),
+ }
+
+ pnum += 1
+ if not extr(">Next<", ">"):
+ if not extr("/{}'>{}<".format(pnum, pnum), ">"):
+ return
+
+
+class Shimmie2PostExtractor(Shimmie2Extractor):
+ """Extractor for single shimmie2 posts"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"post/view/(\d+)"
+ test = (
+ ("https://meme.museum/post/view/10243", {
+ "pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc"
+ r"49971f78/10243%20-%20g%20beard%20open_source%20richar"
+ r"d_stallman%20stallman%20tagme%20text\.jpg",
+ "content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
+ "keyword": {
+ "extension": "jpg",
+ "file_url": "https://meme.museum/_images/105febebcd5ca791ee332"
+ "adc49971f78/10243%20-%20g%20beard%20open_source%2"
+ "0richard_stallman%20stallman%20tagme%20text.jpg",
+ "filename": "10243 - g beard open_source richard_stallman "
+ "stallman tagme text",
+ "height": 451,
+ "id": 10243,
+ "md5": "105febebcd5ca791ee332adc49971f78",
+ "size": 0,
+ "subcategory": "post",
+ "tags": "/g/ beard open_source "
+ "richard_stallman stallman tagme text",
+ "width": 480,
+ },
+ }),
+ ("https://loudbooru.com/post/view/33828", {
+ "pattern": r"https://loudbooru\.com/_images/.+\.png",
+ "content": "a4755f787ba23ae2aa297a46810f802ca9032739",
+ "keyword": {
+ "extension": "png",
+ "file_url": "https://loudbooru.com/_images/ca2638d903c86e8337f"
+ "e9aeb4974be88/33828%20-%202020%20artist%3Astikyfi"
+ "nkaz%20character%3Alisa_loud%20cover%20fanfiction"
+ "%3Aplatz_eins%20frowning%20half-closed_eyes%20sol"
+ "o%20text%20title_card.png",
+ "filename": "33828 - 2020 artist:stikyfinkaz character:lisa_"
+ "loud cover fanfiction:platz_eins frowning "
+ "half-closed_eyes solo text title_card",
+ "height": 1920,
+ "id": 33828,
+ "md5": "ca2638d903c86e8337fe9aeb4974be88",
+ "tags": "2020 artist:stikyfinkaz character:lisa_loud cover "
+ "fanfiction:platz_eins frowning half-closed_eyes "
+ "solo text title_card",
+ "width": 1078,
+ },
+ }),
+ ("https://giantessbooru.com/post/view/41", {
+ "pattern": r"https://giantessbooru\.com/_images"
+ r"/3f67e1986496806b7b14ff3e82ac5af4/41\.jpg",
+ "content": "79115ed309d1f4e82e7bead6948760e889139c91",
+ "keyword": {
+ "extension": "jpg",
+ "file_url": "https://giantessbooru.com/_images"
+ "/3f67e1986496806b7b14ff3e82ac5af4/41.jpg",
+ "filename": "41",
+ "height": 0,
+ "id": 41,
+ "md5": "3f67e1986496806b7b14ff3e82ac5af4",
+ "size": 0,
+ "tags": "anime bare_midriff color drawing gentle giantess "
+ "karbo looking_at_tinies negeyari outdoors smiling "
+ "snake_girl white_hair",
+ "width": 0
+
+
+ },
+ }),
+ ("https://tentaclerape.net/post/view/10", {
+ "pattern": r"https://tentaclerape\.net/\./index\.php"
+ r"\?q=/image/10\.jpg",
+ "content": "d0fd8f0f6517a76cb5e23ba09f3844950bf2c516",
+ "keyword": {
+ "extension": "jpg",
+ "file_url": "https://tentaclerape.net/./index.php"
+ "?q=/image/10.jpg",
+ "filename": "10",
+ "height": 427,
+ "id": 10,
+ "md5": "945db71eeccaef82ce44b77564260c0b",
+ "size": 0,
+ "subcategory": "post",
+ "tags": "Deviant_Art Pet Tentacle artist_sche blonde_hair "
+ "blouse boots green_eyes highheels leash miniskirt "
+ "octopus schoolgirl white_skin willing",
+ "width": 300,
+ },
+ }),
+ # video
+ ("https://tentaclerape.net/post/view/91267", {
+ "pattern": r"https://tentaclerape\.net/\./index\.php"
+ r"\?q=/image/91267\.mp4",
+ }),
+ ("https://booru.cavemanon.xyz/index.php?q=post/view/8335", {
+ "pattern": r"https://booru\.cavemanon\.xyz"
+ r"/index\.php\?q=image/8335\.png",
+ "content": "7158f7e4abbbf143bad5835eb93dbe4d68c1d4ab",
+ "keyword": {
+ "extension": "png",
+ "file_url": "https://booru.cavemanon.xyz"
+ "/index.php?q=image/8335.png",
+ "filename": "8335",
+ "height": 460,
+ "id": 8335,
+ "md5": "",
+ "size": 0,
+ "tags": "Color Fang",
+ "width": 459,
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ Shimmie2Extractor.__init__(self, match)
+ self.post_id = match.group(match.lastindex)
+
+ def posts(self):
+ url = "{}/post/view/{}".format(self.root, self.post_id)
+ extr = text.extract_from(self.request(url).text)
+
+ post = {
+ "id" : self.post_id,
+ "tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
+ "md5" : extr("/_thumbs/", "/"),
+ "file_url": self.root + (
+ extr("id='main_image' src='", "'") or
+ extr("<source src='", "'")),
+ "width" : extr("data-width=", " ").strip("\"'"),
+ "height" : extr("data-height=", ">").partition(
+ " ")[0].strip("\"'"),
+ "size" : 0,
+ }
+
+ if not post["md5"]:
+ post["md5"] = text.extr(post["file_url"], "/_images/", "/")
+
+ return (post,)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 155db1e..b45609d 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, oauth, exception
-from datetime import datetime, timedelta
+from datetime import datetime, date, timedelta
import re
@@ -269,7 +269,7 @@ class TumblrExtractor(Extractor):
class TumblrUserExtractor(TumblrExtractor):
- """Extractor for all images from a tumblr-user"""
+ """Extractor for a Tumblr user's posts"""
subcategory = "user"
pattern = BASE_PATTERN + r"(?:/page/\d+|/archive)?/?$"
test = (
@@ -307,6 +307,16 @@ class TumblrUserExtractor(TumblrExtractor):
"options": (("date-min", "201804"), ("date-max", "201805"),
("date-format", "%Y%m"))
}),
+ # pagination with 'date-max' (#2191) and 'api-key'
+ ("https://donttrustthetits.tumblr.com/", {
+ "options": (
+ ("access-token", None),
+ ("original", False),
+ ("date-max", "2015-04-25T00:00:00"),
+ ("date-min", "2015-04-01T00:00:00"),
+ ),
+ "count": 316,
+ }),
("https://demo.tumblr.com/page/2"),
("https://demo.tumblr.com/archive"),
("tumblr:http://www.b-authentique.com/"),
@@ -321,7 +331,7 @@ class TumblrUserExtractor(TumblrExtractor):
class TumblrPostExtractor(TumblrExtractor):
- """Extractor for images from a single post on tumblr"""
+ """Extractor for a single Tumblr post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)"
test = (
@@ -389,7 +399,7 @@ class TumblrPostExtractor(TumblrExtractor):
class TumblrTagExtractor(TumblrExtractor):
- """Extractor for images from a tumblr-user by tag"""
+ """Extractor for Tumblr user's posts by tag"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/tagged/([^/?#]+)"
test = (
@@ -411,8 +421,37 @@ class TumblrTagExtractor(TumblrExtractor):
return self.api.posts(self.blog, {"tag": self.tag})
+class TumblrDayExtractor(TumblrExtractor):
+ """Extractor for Tumblr user's posts by day"""
+ subcategory = "day"
+ pattern = BASE_PATTERN + r"/day/(\d\d\d\d/\d\d/\d\d)"
+ test = (
+ ("https://mikf123.tumblr.com/day/2018/01/05", {
+ "pattern": r"https://64\.media\.tumblr\.com"
+ r"/1a2be8c63f1df58abd2622861696c72a"
+ r"/tumblr_ozm9nqst9t1wgha4yo1_1280\.jpg",
+ "keyword": {"id": 169341068404},
+ "count": 1,
+ }),
+ ("https://www.tumblr.com/blog/view/mikf123/day/2018/01/05"),
+ ("https://www.tumblr.com/blog/mikf123/day/2018/01/05"),
+ ("https://www.tumblr.com/mikf123/day/2018/01/05"),
+ )
+
+ def __init__(self, match):
+ TumblrExtractor.__init__(self, match)
+ year, month, day = match.group(4).split("/")
+ self.date_min = ts = (
+ # 719163 == date(1970, 1, 1).toordinal()
+ date(int(year), int(month), int(day)).toordinal() - 719163) * 86400
+ self.api.before = ts + 86400
+
+ def posts(self):
+ return self.api.posts(self.blog, {})
+
+
class TumblrLikesExtractor(TumblrExtractor):
- """Extractor for images from a tumblr-user's liked posts"""
+ """Extractor for a Tumblr user's liked posts"""
subcategory = "likes"
directory_fmt = ("{category}", "{blog_name}", "likes")
archive_fmt = "f_{blog[name]}_{id}_{num}"
@@ -431,7 +470,11 @@ class TumblrLikesExtractor(TumblrExtractor):
class TumblrAPI(oauth.OAuth1API):
- """Minimal interface for the Tumblr API v2"""
+ """Interface for the Tumblr API v2
+
+ https://github.com/tumblr/docs/blob/master/api.md
+ """
+ ROOT = "https://api.tumblr.com"
API_KEY = "O3hU2tMi5e4Qs5t3vezEi6L0qRORJ5y9oUpSGsrWu8iA3UCc3B"
API_SECRET = "sFdsK3PDdP2QpYMRAoq0oDnw0sFS24XigXmdfnaeNZpJpqAn03"
BLOG_CACHE = {}
@@ -442,55 +485,46 @@ class TumblrAPI(oauth.OAuth1API):
def info(self, blog):
"""Return general information about a blog"""
- if blog not in self.BLOG_CACHE:
- self.BLOG_CACHE[blog] = self._call(blog, "info", {})["blog"]
- return self.BLOG_CACHE[blog]
+ try:
+ return self.BLOG_CACHE[blog]
+ except KeyError:
+ endpoint = "/v2/blog/{}/info".format(blog)
+ params = {"api_key": self.api_key} if self.api_key else None
+ self.BLOG_CACHE[blog] = blog = self._call(endpoint, params)["blog"]
+ return blog
def avatar(self, blog, size="512"):
"""Retrieve a blog avatar"""
if self.api_key:
- url_fmt = "https://api.tumblr.com/v2/blog/{}/avatar/{}?api_key={}"
- return url_fmt.format(blog, size, self.api_key)
+ return "{}/v2/blog/{}/avatar/{}?api_key={}".format(
+ self.ROOT, blog, size, self.api_key)
+ endpoint = "/v2/blog/{}/avatar".format(blog)
params = {"size": size}
- data = self._call(blog, "avatar", params, allow_redirects=False)
- return data["avatar_url"]
+ return self._call(
+ endpoint, params, allow_redirects=False)["avatar_url"]
def posts(self, blog, params):
"""Retrieve published posts"""
- params["offset"] = self.extractor.config("offset") or 0
- params["limit"] = 50
+ params["offset"] = self.extractor.config("offset")
+ params["limit"] = "50"
params["reblog_info"] = "true"
+ params["type"] = self.posts_type
+ params["before"] = self.before
- if self.posts_type:
- params["type"] = self.posts_type
- if self.before:
- params["before"] = self.before
+ if self.before and params["offset"]:
+ self.log.warning("'offset' and 'date-max' cannot be used together")
- while True:
- data = self._call(blog, "posts", params)
- self.BLOG_CACHE[blog] = data["blog"]
- yield from data["posts"]
- params["offset"] += params["limit"]
- if params["offset"] >= data["total_posts"]:
- return
+ return self._pagination(blog, "/posts", params, cache=True)
def likes(self, blog):
"""Retrieve liked posts"""
params = {"limit": "50", "before": self.before}
- while True:
- posts = self._call(blog, "likes", params)["liked_posts"]
- if not posts:
- return
- yield from posts
- params["before"] = posts[-1]["liked_timestamp"]
-
- def _call(self, blog, endpoint, params, **kwargs):
- if self.api_key:
- params["api_key"] = self.api_key
- url = "https://api.tumblr.com/v2/blog/{}/{}".format(
- blog, endpoint)
+ return self._pagination(blog, "/likes", params, key="liked_posts")
- response = self.request(url, params=params, **kwargs)
+ def _call(self, endpoint, params, **kwargs):
+ url = self.ROOT + endpoint
+ kwargs["params"] = params
+ response = self.request(url, **kwargs)
try:
data = response.json()
@@ -535,7 +569,7 @@ class TumblrAPI(oauth.OAuth1API):
if self.extractor.config("ratelimit") == "wait":
self.extractor.wait(seconds=reset)
- return self._call(blog, endpoint, params)
+ return self._call(endpoint, params, **kwargs)
t = (datetime.now() + timedelta(seconds=float(reset))).time()
raise exception.StopExtraction(
@@ -547,6 +581,29 @@ class TumblrAPI(oauth.OAuth1API):
if reset:
self.log.info("Hourly API rate limit exceeded")
self.extractor.wait(seconds=reset)
- return self._call(blog, endpoint, params)
+ return self._call(endpoint, params, **kwargs)
raise exception.StopExtraction(data)
+
+ def _pagination(self, blog, endpoint, params, key="posts", cache=False):
+ endpoint = "/v2/blog/{}{}".format(blog, endpoint)
+ if self.api_key:
+ params["api_key"] = self.api_key
+
+ while True:
+ data = self._call(endpoint, params)
+
+ if cache:
+ self.BLOG_CACHE[blog] = data["blog"]
+ cache = False
+
+ yield from data[key]
+
+ try:
+ endpoint = data["_links"]["next"]["href"]
+ except KeyError:
+ return
+
+ params = None
+ if self.api_key:
+ endpoint += "&api_key=" + self.api_key
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2ccc7e5..5e68f13 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -45,7 +45,8 @@ class TwitterExtractor(Extractor):
if not self.config("transform", True):
self._transform_user = util.identity
self._transform_tweet = util.identity
- self._user = self._user_obj = None
+ self._user = None
+ self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -769,6 +770,13 @@ class TwitterTweetExtractor(TwitterExtractor):
"pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg",
"count": 4,
}),
+ # different 'user' and 'author' in quoted Tweet (#3922)
+ ("https://twitter.com/web/status/1644907989109751810", {
+ "keyword": {
+ "author": {"id": 321629993 , "name": "Cakes_Comics"},
+ "user" : {"id": 718928225360080897, "name": "StobiesGalaxy"},
+ },
+ }),
# TwitPic embeds (#579)
("https://twitter.com/i/web/status/112900228289540096", {
"options": (("twitpic", True), ("cards", False)),
@@ -897,7 +905,8 @@ Your reaction.""",
for tweet in self.api.tweet_detail(tweet_id):
if tweet["rest_id"] == tweet_id or \
tweet.get("_retweet_id_str") == tweet_id:
- self._assign_user(tweet["core"]["user_results"]["result"])
+ if self._user_obj is None:
+ self._assign_user(tweet["core"]["user_results"]["result"])
tweets.append(tweet)
tweet_id = tweet["legacy"].get("quoted_status_id_str")
@@ -1561,9 +1570,9 @@ class TwitterAPI():
if esw("tweet-"):
tweets.append(entry)
- elif esw("homeConversation-"):
- tweets.extend(entry["content"]["items"])
- elif esw("conversationthread-"):
+ elif esw(("homeConversation-",
+ "profile-conversation-",
+ "conversationthread-")):
tweets.extend(entry["content"]["items"])
elif esw("tombstone-"):
item = entry["content"]["itemContent"]
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
new file mode 100644
index 0000000..1cebdf7
--- /dev/null
+++ b/gallery_dl/extractor/vipergirls.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://vipergirls.to/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
+
+
+class VipergirlsExtractor(Extractor):
+ """Base class for vipergirls extractors"""
+ category = "vipergirls"
+ root = "https://vipergirls.to"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.session.headers["Referer"] = self.root
+
+ def items(self):
+ for html in self.posts():
+
+ pos = html.find('<a href="')
+ if pos < 0:
+ continue
+
+ title = text.extr(html, '<h2 class="title', '<')
+ data = {
+ "title": text.unescape(title.partition(">")[2].strip()),
+ }
+
+ yield Message.Directory, data
+ for href in text.extract_iter(html, '<a href="', '"', pos):
+ yield Message.Queue, href, data
+
+
+class VipergirlsThreadExtractor(VipergirlsExtractor):
+ """Extractor for vipergirls threads"""
+ subcategory = "thread"
+ pattern = BASE_PATTERN + r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?$"
+ test = (
+ (("https://vipergirls.to/threads/4328304"
+ "-2011-05-28-Danica-Simply-Beautiful-x112-4500x3000"), {
+ "url": "b22feaa35a358bb36086c2b9353aee28989e1d7a",
+ "count": 227,
+ }),
+ ("https://vipergirls.to/threads/6858916-Karina/page4", {
+ "count": 1294,
+ }),
+ ("https://vipergirls.to/threads/4328304"),
+ )
+
+ def __init__(self, match):
+ VipergirlsExtractor.__init__(self, match)
+ self.thread_id, self.page = match.groups()
+
+ def posts(self):
+ url = "{}/threads/{}{}".format(
+ self.root, self.thread_id, self.page or "")
+
+ while True:
+ page = self.request(url).text
+ yield from text.extract_iter(
+ page, '<div class="postbody">', '</blockquote>')
+
+ url = text.extr(page, '<a rel="next" href="', '"')
+ if not url:
+ return
+ url = "{}/{}".format(self.root, url)
+
+
+class VipergirlsPostExtractor(VipergirlsExtractor):
+ """Extractor for vipergirls posts"""
+ subcategory = "post"
+ pattern = (BASE_PATTERN +
+ r"/threads/(\d+)(?:-[^/?#]+)?\?(p=\d+[^#]*)#post(\d+)")
+ test = (
+ (("https://vipergirls.to/threads/4328304-2011-05-28-Danica-Simply-"
+ "Beautiful-x112-4500x3000?p=116038081&viewfull=1#post116038081"), {
+ "pattern": r"https://vipr\.im/\w{12}$",
+ "range": "2-113",
+ "count": 112,
+ "keyword": {
+ "title": "FemJoy Danica - Simply Beautiful (x112) 3000x4500",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ VipergirlsExtractor.__init__(self, match)
+ self.thread_id, self.query, self.post_id = match.groups()
+
+ def posts(self):
+ url = "{}/threads/{}?{}".format(self.root, self.thread_id, self.query)
+ page = self.request(url).text
+
+ try:
+ pos = page.index('id="post_' + self.post_id + '"')
+ return (text.extract(
+ page, '<div class="postbody">', '</blockquote>', pos)[0],)
+ except Exception:
+ raise exception.NotFoundError("post")
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 714f4fe..5004bed 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -124,10 +124,8 @@ class MetadataPP(PostProcessor):
for key, func in self.fields.items():
obj = kwdict
try:
- while "[" in key:
- name, _, key = key.partition("[")
- obj = obj[name]
- key = key.rstrip("]")
+ if "[" in key:
+ obj, key = _traverse(obj, key)
obj[key] = func(kwdict)
except Exception:
pass
@@ -137,10 +135,8 @@ class MetadataPP(PostProcessor):
for key in self.fields:
obj = kwdict
try:
- while "[" in key:
- name, _, key = key.partition("[")
- obj = obj[name]
- key = key.rstrip("]")
+ if "[" in key:
+ obj, key = _traverse(obj, key)
del obj[key]
except Exception:
pass
@@ -214,4 +210,15 @@ class MetadataPP(PostProcessor):
)
+def _traverse(obj, key):
+ name, _, key = key.partition("[")
+ obj = obj[name]
+
+ while "[" in key:
+ name, _, key = key.partition("[")
+ obj = obj[name.strip("\"']")]
+
+ return obj, key.strip("\"']")
+
+
__postprocessor__ = MetadataPP
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index c40736a..d4ef532 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.25.2"
+__version__ = "1.25.3"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index b4638b7..eb09b9b 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -409,9 +409,12 @@ def parse_command_line(module, argv):
"postprocessor_args": opts.postprocessor_args,
"cn_verification_proxy": opts.cn_verification_proxy,
"geo_verification_proxy": opts.geo_verification_proxy,
- "geo_bypass": opts.geo_bypass,
- "geo_bypass_country": opts.geo_bypass_country,
- "geo_bypass_ip_block": opts.geo_bypass_ip_block,
+ "geo_bypass": getattr(
+ opts, "geo_bypass", "default"),
+ "geo_bypass_country": getattr(
+ opts, "geo_bypass_country", None),
+ "geo_bypass_ip_block": getattr(
+ opts, "geo_bypass_ip_block", None),
"compat_opts": compat_opts,
}
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index c78d7b0..ac89b55 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -388,51 +388,60 @@ class MetadataTest(BasePostprocessorTest):
""")
def test_metadata_modify(self):
- kwdict = {"foo": 0, "bar": {"bax": 1, "bay": 2, "baz": 3}}
+ kwdict = {"foo": 0, "bar": {"bax": 1, "bay": 2, "baz": 3, "ba2": {}}}
self._create({
"mode": "modify",
"fields": {
- "foo" : "{filename}-{foo!s}",
- "foo2" : "\fE bar['bax'] + 122",
- "bar[baz]": "{_now}",
- "bar[ba2]": "test",
+ "foo" : "{filename}-{foo!s}",
+ "foo2" : "\fE bar['bax'] + 122",
+ "bar[\"baz\"]" : "{_now}",
+ "bar['ba2'][a]": "test",
},
}, kwdict)
- pdict = self.pathfmt.kwdict
+ pdict = self.pathfmt.kwdict
self.assertIsNot(kwdict, pdict)
self.assertEqual(pdict["foo"], kwdict["foo"])
self.assertEqual(pdict["bar"], kwdict["bar"])
self._trigger()
- self.assertEqual(pdict["foo"] , "file-0")
- self.assertEqual(pdict["foo2"] , 123)
- self.assertEqual(pdict["bar"]["ba2"], "test")
+ self.assertEqual(pdict["foo"] , "file-0")
+ self.assertEqual(pdict["foo2"], 123)
+ self.assertEqual(pdict["bar"]["ba2"]["a"], "test")
self.assertIsInstance(pdict["bar"]["baz"], datetime)
def test_metadata_delete(self):
- kwdict = {"foo": 0, "bar": {"bax": 1, "bay": 2, "baz": 3}}
- self._create({"mode": "delete", "fields": ["foo", "bar[baz]"]}, kwdict)
- pdict = self.pathfmt.kwdict
+ kwdict = {
+ "foo": 0,
+ "bar": {
+ "bax": 1,
+ "bay": 2,
+ "baz": {"a": 3, "b": 4},
+ },
+ }
+ self._create({
+ "mode": "delete",
+ "fields": ["foo", "bar['bax']", "bar[\"baz\"][a]"],
+ }, kwdict)
+ pdict = self.pathfmt.kwdict
self.assertIsNot(kwdict, pdict)
+
self.assertEqual(pdict["foo"], kwdict["foo"])
self.assertEqual(pdict["bar"], kwdict["bar"])
- del kwdict["foo"]
- del kwdict["bar"]["baz"]
-
self._trigger()
+
self.assertNotIn("foo", pdict)
- self.assertNotIn("baz", pdict["bar"])
- self.assertEqual(kwdict["bar"], pdict["bar"])
+ self.assertNotIn("bax", pdict["bar"])
+ self.assertNotIn("a", pdict["bar"]["baz"])
# no errors for deleted/undefined fields
self._trigger()
self.assertNotIn("foo", pdict)
- self.assertNotIn("baz", pdict["bar"])
- self.assertEqual(kwdict["bar"], pdict["bar"])
+ self.assertNotIn("bax", pdict["bar"])
+ self.assertNotIn("a", pdict["bar"]["baz"])
def test_metadata_option_skip(self):
self._create({"skip": True})
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index 7b82a0f..4c20f67 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -235,6 +235,12 @@ class Test_CommandlineArguments(unittest.TestCase):
class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
module_name = "yt_dlp"
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+ if cls.module.version.__version__ > "2023.03.04":
+ cls.test_geo_bypass = cls._test_geo_bypass_xff
+
def test_retries_extractor(self):
inf = float("inf")
@@ -269,6 +275,16 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
"title:%(artist)s - %(title)s")],
})
+ def _test_geo_bypass_xff(self):
+ self._("--geo-bypass",
+ "geo_bypass", "default")
+ self._("--no-geo-bypass",
+ "geo_bypass", "never")
+ self._(["--geo-bypass-country", "EN"],
+ "geo_bypass", "EN")
+ self._(["--geo-bypass-ip-block", "198.51.100.14/24"],
+ "geo_bypass", "198.51.100.14/24")
+
if __name__ == "__main__":
unittest.main(warnings="ignore")