aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md50
-rw-r--r--PKG-INFO9
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.596
-rw-r--r--docs/gallery-dl.conf9
-rw-r--r--gallery_dl.egg-info/PKG-INFO9
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/aes.py10
-rw-r--r--gallery_dl/config.py1
-rw-r--r--gallery_dl/downloader/ytdl.py62
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/arcalive.py14
-rw-r--r--gallery_dl/extractor/bbc.py33
-rw-r--r--gallery_dl/extractor/bunkr.py3
-rw-r--r--gallery_dl/extractor/common.py47
-rw-r--r--gallery_dl/extractor/danbooru.py101
-rw-r--r--gallery_dl/extractor/deviantart.py84
-rw-r--r--gallery_dl/extractor/hentaifox.py119
-rw-r--r--gallery_dl/extractor/hitomi.py69
-rw-r--r--gallery_dl/extractor/imhentai.py50
-rw-r--r--gallery_dl/extractor/instagram.py11
-rw-r--r--gallery_dl/extractor/kemonoparty.py68
-rw-r--r--gallery_dl/extractor/mangapark.py280
-rw-r--r--gallery_dl/extractor/mastodon.py3
-rw-r--r--gallery_dl/extractor/nozomi.py11
-rw-r--r--gallery_dl/extractor/patreon.py9
-rw-r--r--gallery_dl/extractor/pinterest.py3
-rw-r--r--gallery_dl/extractor/sexcom.py121
-rw-r--r--gallery_dl/extractor/skeb.py7
-rw-r--r--gallery_dl/extractor/subscribestar.py6
-rw-r--r--gallery_dl/extractor/tiktok.py110
-rw-r--r--gallery_dl/extractor/zerochan.py22
-rw-r--r--gallery_dl/util.py4
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_config.py1
-rw-r--r--test/test_cookies.py48
-rw-r--r--test/test_extractor.py55
-rw-r--r--test/test_results.py9
39 files changed, 879 insertions, 665 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 484ddeb..257f47b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,15 +1,43 @@
-## 1.29.2 - 2025-03-15
+## 1.29.3 - 2025-03-29
### Extractors
#### Additions
-- [arcalive] add support ([#5657](https://github.com/mikf/gallery-dl/issues/5657) [#7100](https://github.com/mikf/gallery-dl/issues/7100))
-- [furaffinity] add `folder` extractor ([#1817](https://github.com/mikf/gallery-dl/issues/1817) [#7159](https://github.com/mikf/gallery-dl/issues/7159))
+- [danbooru] add `favgroup` extractor
+- [imhentai] support `hentaienvy.com` and `hentaizap.com` ([#7192](https://github.com/mikf/gallery-dl/issues/7192) [#7218](https://github.com/mikf/gallery-dl/issues/7218))
#### Fixes
-- [civitai] fix/improve query parameter handling ([#7138](https://github.com/mikf/gallery-dl/issues/7138))
-- [facebook] improve `date` extraction ([#7151](https://github.com/mikf/gallery-dl/issues/7151))
-- [sankaku] update API URLs ([#7154](https://github.com/mikf/gallery-dl/issues/7154) [#7155](https://github.com/mikf/gallery-dl/issues/7155) [#7163](https://github.com/mikf/gallery-dl/issues/7163))
-- [twitter] prevent exception in `_extract_components()` ([#7139](https://github.com/mikf/gallery-dl/issues/7139))
+- [bunkr] fix `filename` extraction ([#7237](https://github.com/mikf/gallery-dl/issues/7237))
+- [deviantart:stash] fix legacy `sta.sh` links ([#7181](https://github.com/mikf/gallery-dl/issues/7181))
+- [hitomi] fix extractors ([#7230](https://github.com/mikf/gallery-dl/issues/7230))
+- [mangapark] fix extractors ([#4999](https://github.com/mikf/gallery-dl/issues/4999) [#5883](https://github.com/mikf/gallery-dl/issues/5883) [#6507](https://github.com/mikf/gallery-dl/issues/6507) [#6908](https://github.com/mikf/gallery-dl/issues/6908) [#7232](https://github.com/mikf/gallery-dl/issues/7232))
+- [nozomi] fix extractors ([#7242](https://github.com/mikf/gallery-dl/issues/7242))
+- [patreon] include subdomains in `session_id` cookie check ([#7188](https://github.com/mikf/gallery-dl/issues/7188))
+- [patreon] do not match `/messages` URLs as creator ([#7187](https://github.com/mikf/gallery-dl/issues/7187))
+- [pinterest] handle `story_pin_static_sticker_block` blocks ([#7251](https://github.com/mikf/gallery-dl/issues/7251))
+- [sexcom] fix `gif` pin extraction ([#7239](https://github.com/mikf/gallery-dl/issues/7239))
+- [skeb] make exceptions when extracting posts non-fatal ([#7250](https://github.com/mikf/gallery-dl/issues/7250))
+- [zerochan] parse `JSON-LD` data ([#7178](https://github.com/mikf/gallery-dl/issues/7178))
#### Improvements
-- [batoto] add `domain` option ([#7174](https://github.com/mikf/gallery-dl/issues/7174))
-- [furaffinity] extract `scraps` metadata ([#7015](https://github.com/mikf/gallery-dl/issues/7015))
-- [tiktok] implement audio extraction without `yt-dlp`
-- [wikimedia] add `subcategories` option ([#2340](https://github.com/mikf/gallery-dl/issues/2340))
+- [arcalive] extend `gifs` option
+- [deviantart] support multiple images for single posts ([#6653](https://github.com/mikf/gallery-dl/issues/6653) [#7261](https://github.com/mikf/gallery-dl/issues/7261))
+- [deviantart] add subfolder support ([#4988](https://github.com/mikf/gallery-dl/issues/4988) [#7185](https://github.com/mikf/gallery-dl/issues/7185) [#7220](https://github.com/mikf/gallery-dl/issues/7220))
+- [deviantart] match `/gallery/recommended-for-you` URLs ([#7168](https://github.com/mikf/gallery-dl/issues/7168) [#7243](https://github.com/mikf/gallery-dl/issues/7243))
+- [instagram] extract videos from `video_dash_manifest` data ([#6379](https://github.com/mikf/gallery-dl/issues/6379) [#7006](https://github.com/mikf/gallery-dl/issues/7006))
+- [mangapark] support mirror domains
+- [mangapark] support v3 URLs ([#2072](https://github.com/mikf/gallery-dl/issues/2072))
+- [mastodon] support `/statuses` URLs ([#7255](https://github.com/mikf/gallery-dl/issues/7255))
+- [sexcom] support new-style `/gifs` and `/videos` URLs ([#7239](https://github.com/mikf/gallery-dl/issues/7239))
+- [subscribestar] detect redirects to `/age_confirmation_warning` pages
+- [tiktok] add retry mechanism to rehydration data extraction ([#7191](https://github.com/mikf/gallery-dl/issues/7191))
+#### Metadata
+- [bbc] extract more metadata ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [kemonoparty] extract `archives` metadata ([#7195](https://github.com/mikf/gallery-dl/issues/7195))
+- [kemonoparty] enable `username`/`user_profile` metadata by default
+- [kemonoparty:discord] always provide `channel_name` metadata ([#7245](https://github.com/mikf/gallery-dl/issues/7245))
+- [sexcom] extract `date_url` metadata ([#7239](https://github.com/mikf/gallery-dl/issues/7239))
+- [subscribestar] extract `title` metadata ([#7219](https://github.com/mikf/gallery-dl/issues/7219))
+### Downloaders
+- [ytdl] support processing inline HLS/DASH manifest data ([#6379](https://github.com/mikf/gallery-dl/issues/6379) [#7006](https://github.com/mikf/gallery-dl/issues/7006))
+### Miscellaneous
+- [aes] simplify `block_count` calculation
+- [common] add `subdomains` argument to `cookies_check()` ([#7188](https://github.com/mikf/gallery-dl/issues/7188))
+- [config] fix using the same key multiple times with `apply` ([#7127](https://github.com/mikf/gallery-dl/issues/7127))
+- [tests] implement expected failures
diff --git a/PKG-INFO b/PKG-INFO
index 1d71036..4481e14 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.29.2
+Version: 1.29.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -46,6 +46,7 @@ Dynamic: download-url
Dynamic: home-page
Dynamic: keywords
Dynamic: license
+Dynamic: license-file
Dynamic: maintainer
Dynamic: maintainer-email
Dynamic: provides-extra
@@ -132,9 +133,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.3/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index ae51968..43f18a9 100644
--- a/README.rst
+++ b/README.rst
@@ -77,9 +77,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.3/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 8c34ff3..5b0e7e7 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-03-15" "1.29.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-03-29" "1.29.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index aaf94b3..d032f25 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-03-15" "1.29.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-03-29" "1.29.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1651,14 +1651,26 @@ Download emoticon images.
.SS extractor.arcalive.gifs
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
.IP "Description:" 4
-Check if \f[I].mp4\f[] videos have a \f[I].gif\f[] version
-and download those instead.
+Try to download \f[I].gif\f[] versions of \f[I].mp4\f[] videos.
+
+\f[I]true\f[] | \f[I]"fallback\f[]
+Use the \f[I].gif\f[] version as primary URL
+and provide the \f[I].mp4\f[] one as
+\f[I]fallback\f[].
+\f[I]"check"\f[]
+Check whether a \f[I].gif\f[] version is available
+by sending an extra HEAD request.
+\f[I]false\f[]
+Always download the \f[I].mp4\f[] version.
.SS extractor.artstation.external
@@ -2219,7 +2231,7 @@ For unavailable or restricted posts,
follow the \f[I]source\f[] and download from there if possible.
-.SS extractor.[Danbooru].pool.order-posts
+.SS extractor.[Danbooru].favgroup.order-posts
.IP "Type:" 6
\f[I]string\f[]
@@ -2227,7 +2239,7 @@ follow the \f[I]source\f[] and download from there if possible.
\f[I]"pool"\f[]
.IP "Description:" 4
-Controls the order in which pool posts are returned.
+Controls the order in which \f[I]pool\f[]/\f[I]favgroup\f[] posts are returned.
\f[I]"pool"\f[] \f[I] \f[I]"pool_asc"\f[] \f[] \f[I]"asc"\f[] \f[I] \f[I]"asc_pool"\f[]
Pool order
@@ -2689,6 +2701,17 @@ Leave \f[I]SIZE\f[] empty to download the regular, small avatar format.
.br
+.SS extractor.deviantart.folder.subfolders
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Also extract subfolder content.
+
+
.SS extractor.discord.embeds
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -3293,9 +3316,6 @@ Selects which image format to download.
Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[].
-\f[I]"original"\f[] will try to download the original \f[I]jpg\f[] or \f[I]png\f[] versions,
-but is most likely going to fail with \f[I]403 Forbidden\f[] errors.
-
.SS extractor.imagechest.access-token
.IP "Type:" 6
@@ -3513,13 +3533,23 @@ Download video previews.
.SS extractor.instagram.videos
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
.IP "Description:" 4
-Download video files.
+Controls video download behavior.
+
+\f[I]true\f[] \f[I] \f[I]"dash"\f[] \f[] \f[I]"ytdl"\f[]
+Download videos from \f[I]video_dash_manifest\f[] data using \f[I]ytdl\f[]
+\f[I]"merged"\f[]
+Download pre-merged video formats
+\f[I]false\f[]
+Do not download videos
.SS extractor.itaku.videos
@@ -3533,6 +3563,20 @@ Download video files.
Download video files.
+.SS extractor.kemonoparty.archives
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract additional metadata for \f[I]archives\f[] files, including
+\f[I]file\f[], \f[I]file_list\f[], and \f[I]password\f[].
+
+Note: This requires 1 additional HTTP request per \f[I]archives\f[] file.
+
+
.SS extractor.kemonoparty.comments
.IP "Type:" 6
\f[I]bool\f[]
@@ -3626,10 +3670,10 @@ Limit the number of posts to download.
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]false\f[]
+\f[I]true\f[]
.IP "Description:" 4
-Extract \f[I]username\f[] metadata.
+Extract \f[I]username\f[] and \f[I]user_profile\f[] metadata.
.SS extractor.kemonoparty.revisions
@@ -5187,31 +5231,31 @@ tried until a format is found.
Possible formats include
.br
-* \f[I]"gif"\f[]
+* \f[I]gif\f[]
.br
-* \f[I]"gif_transparent"\f[]
+* \f[I]gif_transparent\f[]
.br
-* \f[I]"gifpreview"\f[]
+* \f[I]mediumgif\f[]
.br
-* \f[I]"mediumgif"\f[]
+* \f[I]gifpreview\f[]
.br
-* \f[I]"tinygif"\f[]
+* \f[I]tinygif\f[]
.br
-* \f[I]"tinygif_transparent"\f[]
+* \f[I]tinygif_transparent\f[]
.br
-* \f[I]"mp4"\f[]
+* \f[I]mp4\f[]
.br
-* \f[I]"tinymp4"\f[]
+* \f[I]tinymp4\f[]
.br
-* \f[I]"webm"\f[]
+* \f[I]webm\f[]
.br
-* \f[I]"webp"\f[]
+* \f[I]webp\f[]
.br
-* \f[I]"webp_transparent"\f[]
+* \f[I]webp_transparent\f[]
.br
-* \f[I]"tinywebp"\f[]
+* \f[I]tinywebp\f[]
.br
-* \f[I]"tinywebp_transparent"\f[]
+* \f[I]tinywebp_transparent\f[]
.SS extractor.tiktok.audio
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 7887fd5..8ede568 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -235,6 +235,9 @@
"avatar": {
"formats": null
+ },
+ "folder": {
+ "subfolders": true
}
},
"exhentai":
@@ -368,13 +371,14 @@
"password": "",
"announcements": false,
+ "archives" : false,
"comments" : false,
"dms" : false,
"duplicates" : false,
"favorites" : "artist",
"files" : ["attachments", "file", "inline"],
"max-posts" : null,
- "metadata" : false,
+ "metadata" : true,
"revisions" : false,
"order-revisions": "desc"
},
@@ -788,6 +792,9 @@
"threshold": "auto",
"ugoira" : false,
+ "favgroup": {
+ "order-posts": "pool"
+ },
"pool": {
"order-posts": "pool"
}
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 1d71036..4481e14 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.29.2
+Version: 1.29.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -46,6 +46,7 @@ Dynamic: download-url
Dynamic: home-page
Dynamic: keywords
Dynamic: license
+Dynamic: license-file
Dynamic: maintainer
Dynamic: maintainer-email
Dynamic: provides-extra
@@ -132,9 +133,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.3/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 3e8f365..2f4a87c 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -108,7 +108,6 @@ gallery_dl/extractor/hatenablog.py
gallery_dl/extractor/hentai2read.py
gallery_dl/extractor/hentaicosplays.py
gallery_dl/extractor/hentaifoundry.py
-gallery_dl/extractor/hentaifox.py
gallery_dl/extractor/hentaihand.py
gallery_dl/extractor/hentaihere.py
gallery_dl/extractor/hentainexus.py
diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py
index 6727541..3fd1d5e 100644
--- a/gallery_dl/aes.py
+++ b/gallery_dl/aes.py
@@ -78,7 +78,7 @@ def aes_ecb_encrypt(data, key, iv=None):
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = []
for i in range(block_count):
@@ -99,7 +99,7 @@ def aes_ecb_decrypt(data, key, iv=None):
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = []
for i in range(block_count):
@@ -132,7 +132,7 @@ def aes_ctr_encrypt(data, key, iv):
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
counter = iter_vector(iv)
encrypted_data = []
@@ -158,7 +158,7 @@ def aes_cbc_decrypt(data, key, iv):
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
decrypted_data = []
previous_cipher_block = iv
@@ -184,7 +184,7 @@ def aes_cbc_encrypt(data, key, iv):
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ block_count = ceil(len(data) / BLOCK_SIZE_BYTES)
encrypted_data = []
previous_cipher_block = iv
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index f932e3a..92e55d3 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -322,6 +322,7 @@ class apply():
set(path, key, value)
def __exit__(self, exc_type, exc_value, traceback):
+ self.original.reverse()
for path, key, value in self.original:
if value is util.SENTINEL:
unset(path, key)
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 1242098..9d653b3 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -10,6 +10,7 @@
from .common import DownloaderBase
from .. import ytdl, text
+from xml.etree import ElementTree
import os
@@ -76,7 +77,8 @@ class YoutubeDLDownloader(DownloaderBase):
manifest = kwdict.pop("_ytdl_manifest", None)
if manifest:
info_dict = self._extract_manifest(
- ytdl_instance, url, manifest)
+ ytdl_instance, url, manifest,
+ kwdict.pop("_ytdl_manifest_data", None))
else:
info_dict = self._extract_info(ytdl_instance, url)
except Exception as exc:
@@ -154,37 +156,55 @@ class YoutubeDLDownloader(DownloaderBase):
def _extract_info(self, ytdl, url):
return ytdl.extract_info(url, download=False)
- def _extract_manifest(self, ytdl, url, manifest):
+ def _extract_manifest(self, ytdl, url, manifest_type, manifest_data=None):
extr = ytdl.get_info_extractor("Generic")
video_id = extr._generic_id(url)
- if manifest == "hls":
- try:
- formats, subtitles = extr._extract_m3u8_formats_and_subtitles(
- url, video_id, "mp4")
- except AttributeError:
- formats = extr._extract_m3u8_formats(url, video_id, "mp4")
- subtitles = None
-
- elif manifest == "dash":
- try:
- formats, subtitles = extr._extract_mpd_formats_and_subtitles(
- url, video_id)
- except AttributeError:
- formats = extr._extract_mpd_formats(url, video_id)
- subtitles = None
+ if manifest_type == "hls":
+ if manifest_data is None:
+ try:
+ fmts, subs = extr._extract_m3u8_formats_and_subtitles(
+ url, video_id, "mp4")
+ except AttributeError:
+ fmts = extr._extract_m3u8_formats(url, video_id, "mp4")
+ subs = None
+ else:
+ try:
+ fmts, subs = extr._parse_m3u8_formats_and_subtitles(
+ url, video_id, "mp4")
+ except AttributeError:
+ fmts = extr._parse_m3u8_formats(url, video_id, "mp4")
+ subs = None
+
+ elif manifest_type == "dash":
+ if manifest_data is None:
+ try:
+ fmts, subs = extr._extract_mpd_formats_and_subtitles(
+ url, video_id)
+ except AttributeError:
+ fmts = extr._extract_mpd_formats(url, video_id)
+ subs = None
+ else:
+ if isinstance(manifest_data, str):
+ manifest_data = ElementTree.fromstring(manifest_data)
+ try:
+ fmts, subs = extr._parse_mpd_formats_and_subtitles(
+ manifest_data, mpd_id="dash")
+ except AttributeError:
+ fmts = extr._parse_mpd_formats(
+ manifest_data, mpd_id="dash")
+ subs = None
else:
- self.log.error("Unsupported manifest type '%s'", manifest)
+ self.log.error("Unsupported manifest type '%s'", manifest_type)
return None
info_dict = {
"id" : video_id,
"title" : video_id,
- "formats" : formats,
- "subtitles": subtitles,
+ "formats" : fmts,
+ "subtitles": subs,
}
- # extr._extra_manifest_info(info_dict, url)
return ytdl.process_ie_result(info_dict, download=False)
def _progress_hook(self, info):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 8198619..87c3798 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -68,7 +68,6 @@ modules = [
"hentai2read",
"hentaicosplays",
"hentaifoundry",
- "hentaifox",
"hentaihand",
"hentaihere",
"hentainexus",
diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py
index 8e832fe..8c44256 100644
--- a/gallery_dl/extractor/arcalive.py
+++ b/gallery_dl/extractor/arcalive.py
@@ -41,7 +41,9 @@ class ArcalivePostExtractor(ArcaliveExtractor):
def items(self):
self.emoticons = self.config("emoticons", False)
- self.gifs = self.config("gifs", True)
+ self.gifs = gifs = self.config("gifs", True)
+ if gifs:
+ self.gifs_fallback = (gifs != "check")
post = self.api.post(self.groups[0])
files = self._extract_files(post)
@@ -90,11 +92,15 @@ class ArcalivePostExtractor(ArcaliveExtractor):
url = path + "." + orig
elif video and self.gifs:
url_gif = url.rpartition(".")[0] + ".gif"
- response = self.request(
- url_gif + "?type=orig", method="HEAD", fatal=False)
- if response.status_code < 400:
+ if self.gifs_fallback:
fallback = (url + "?type=orig",)
url = url_gif
+ else:
+ response = self.request(
+ url_gif + "?type=orig", method="HEAD", fatal=False)
+ if response.status_code < 400:
+ fallback = (url + "?type=orig",)
+ url = url_gif
files.append({
"url" : url + "?type=orig",
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index 113a669..b398152 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -27,7 +27,12 @@ class BbcGalleryExtractor(GalleryExtractor):
def metadata(self, page):
data = self._extract_jsonld(page)
+
return {
+ "title": text.unescape(text.extr(
+ page, "<h1>", "</h1>").rpartition("</span>")[2]),
+ "description": text.unescape(text.extr(
+ page, 'property="og:description" content="', '"')),
"programme": self.gallery_url.split("/")[4],
"path": list(util.unique_sequence(
element["name"]
@@ -40,11 +45,20 @@ class BbcGalleryExtractor(GalleryExtractor):
width = width - width % 16 if width else 1920
dimensions = "/{}xn/".format(width)
- return [
- (src.replace("/320x180_b/", dimensions),
- {"_fallback": self._fallback_urls(src, width)})
- for src in text.extract_iter(page, 'data-image-src="', '"')
- ]
+ results = []
+ for img in text.extract_iter(page, 'class="gallery__thumbnail', ">"):
+ src = text.extr(img, 'data-image-src="', '"')
+ results.append((
+ src.replace("/320x180_b/", dimensions),
+ {
+ "title_image": text.unescape(text.extr(
+ img, 'data-gallery-title="', '"')),
+ "synopsis": text.unescape(text.extr(
+ img, 'data-gallery-synopsis="', '"')),
+ "_fallback": self._fallback_urls(src, width),
+ },
+ ))
+ return results
@staticmethod
def _fallback_urls(src, max_width):
@@ -62,14 +76,11 @@ class BbcProgrammeExtractor(Extractor):
pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?"
example = "https://www.bbc.co.uk/programmes/ID/galleries"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.path, self.page = match.groups()
-
def items(self):
+ path, pnum = self.groups
data = {"_extractor": BbcGalleryExtractor}
- params = {"page": text.parse_int(self.page, 1)}
- galleries_url = self.root + self.path
+ params = {"page": text.parse_int(pnum, 1)}
+ galleries_url = self.root + path
while True:
page = self.request(galleries_url, params=params).text
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index d74f59c..481e962 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -189,8 +189,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
else:
file_url = data["url"]
- file_name = (text.extr(page, 'property="og:title" content="', '"') or
- text.extr(page, "<title>", " | Bunkr<"))
+ file_name = text.extr(page, "<h1", "<").rpartition(">")[2]
fallback = text.extr(page, 'property="og:url" content="', '"')
return {
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index a85eedd..995505f 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -539,7 +539,7 @@ class Extractor():
for name, value in cookiedict.items():
set_cookie(name, value, domain=domain)
- def cookies_check(self, cookies_names, domain=None):
+ def cookies_check(self, cookies_names, domain=None, subdomains=False):
"""Check if all 'cookies_names' are in the session's cookiejar"""
if not self.cookies:
return False
@@ -550,26 +550,31 @@ class Extractor():
now = time.time()
for cookie in self.cookies:
- if cookie.name in names and (
- not domain or cookie.domain == domain):
-
- if cookie.expires:
- diff = int(cookie.expires - now)
-
- if diff <= 0:
- self.log.warning(
- "Cookie '%s' has expired", cookie.name)
- continue
-
- elif diff <= 86400:
- hours = diff // 3600
- self.log.warning(
- "Cookie '%s' will expire in less than %s hour%s",
- cookie.name, hours + 1, "s" if hours else "")
-
- names.discard(cookie.name)
- if not names:
- return True
+ if cookie.name not in names:
+ continue
+
+ if not domain or cookie.domain == domain:
+ pass
+ elif not subdomains or not cookie.domain.endswith(domain):
+ continue
+
+ if cookie.expires:
+ diff = int(cookie.expires - now)
+
+ if diff <= 0:
+ self.log.warning(
+ "Cookie '%s' has expired", cookie.name)
+ continue
+
+ elif diff <= 86400:
+ hours = diff // 3600
+ self.log.warning(
+ "Cookie '%s' will expire in less than %s hour%s",
+ cookie.name, hours + 1, "s" if hours else "")
+
+ names.discard(cookie.name)
+ if not names:
+ return True
return False
def _extract_jsonld(self, page):
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 8d00728..741800c 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -175,6 +175,51 @@ class DanbooruExtractor(BaseExtractor):
return [{"file": fmt(index), "delay": delay}
for index, delay in enumerate(delays)]
+ def _collection_posts(self, cid, ctype):
+ reverse = prefix = None
+
+ order = self.config("order-posts")
+ if not order or order in {"asc", "pool", "pool_asc", "asc_pool"}:
+ params = {"tags": "ord{}:{}".format(ctype, cid)}
+ elif order in {"id", "desc_id", "id_desc"}:
+ params = {"tags": "{}:{}".format(ctype, cid)}
+ prefix = "b"
+ elif order in {"desc", "desc_pool", "pool_desc"}:
+ params = {"tags": "ord{}:{}".format(ctype, cid)}
+ reverse = True
+ elif order in {"asc_id", "id_asc"}:
+ params = {"tags": "{}:{}".format(ctype, cid)}
+ reverse = True
+
+ posts = self._pagination("/posts.json", params, prefix)
+ if reverse:
+ self.log.info("Collecting posts of %s %s", ctype, cid)
+ return self._collection_enumerate_reverse(posts)
+ else:
+ return self._collection_enumerate(posts)
+
+ def _collection_metadata(self, cid, ctype, cname=None):
+ url = "{}/{}s/{}.json".format(self.root, cname or ctype, cid)
+ collection = self.request(url).json()
+ collection["name"] = collection["name"].replace("_", " ")
+ self.post_ids = collection.pop("post_ids", ())
+ return {ctype: collection}
+
+ def _collection_enumerate(self, posts):
+ pid_to_num = {pid: num for num, pid in enumerate(self.post_ids, 1)}
+ for post in posts:
+ post["num"] = pid_to_num[post["id"]]
+ yield post
+
+ def _collection_enumerate_reverse(self, posts):
+ posts = list(posts)
+ posts.reverse()
+
+ pid_to_num = {pid: num for num, pid in enumerate(self.post_ids, 1)}
+ for post in posts:
+ post["num"] = pid_to_num[post["id"]]
+ return posts
+
BASE_PATTERN = DanbooruExtractor.update({
"danbooru": {
@@ -228,7 +273,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
class DanbooruPoolExtractor(DanbooruExtractor):
- """Extractor for posts from danbooru pools"""
+ """Extractor for Danbooru pools"""
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
filename_fmt = "{num:>04}_{id}_{filename}.{extension}"
@@ -237,50 +282,28 @@ class DanbooruPoolExtractor(DanbooruExtractor):
example = "https://danbooru.donmai.us/pools/12345"
def metadata(self):
- self.pool_id = self.groups[-1]
- url = "{}/pools/{}.json".format(self.root, self.pool_id)
- pool = self.request(url).json()
- pool["name"] = pool["name"].replace("_", " ")
- self.post_ids = pool.pop("post_ids", ())
- return {"pool": pool}
+ return self._collection_metadata(self.groups[-1], "pool")
def posts(self):
- reverse = prefix = None
+ return self._collection_posts(self.groups[-1], "pool")
- order = self.config("order-posts")
- if not order or order in ("asc", "pool", "pool_asc", "asc_pool"):
- params = {"tags": "ordpool:" + self.pool_id}
- elif order in ("id", "desc_id", "id_desc"):
- params = {"tags": "pool:" + self.pool_id}
- prefix = "b"
- elif order in ("desc", "desc_pool", "pool_desc"):
- params = {"tags": "ordpool:" + self.pool_id}
- reverse = True
- elif order in ("asc_id", "id_asc"):
- params = {"tags": "pool:" + self.pool_id}
- reverse = True
- posts = self._pagination("/posts.json", params, prefix)
- if reverse:
- return self._enumerate_posts_reverse(posts)
- else:
- return self._enumerate_posts(posts)
-
- def _enumerate_posts(self, posts):
- pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)}
- for post in posts:
- post["num"] = pid_to_num[post["id"]]
- yield post
+class DanbooruFavgroupExtractor(DanbooruExtractor):
+ """Extractor for Danbooru favorite groups"""
+ subcategory = "favgroup"
+ directory_fmt = ("{category}", "Favorite Groups",
+ "{favgroup[id]} {favgroup[name]}")
+ filename_fmt = "{num:>04}_{id}_{filename}.{extension}"
+ archive_fmt = "fg_{favgroup[id]}_{id}"
+ pattern = BASE_PATTERN + r"/favorite_group(?:s|/show)/(\d+)"
+ example = "https://danbooru.donmai.us/favorite_groups/12345"
- def _enumerate_posts_reverse(self, posts):
- self.log.info("Collecting posts of pool %s", self.pool_id)
- posts = list(posts)
- posts.reverse()
+ def metadata(self):
+ return self._collection_metadata(
+ self.groups[-1], "favgroup", "favorite_group")
- pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)}
- for post in posts:
- post["num"] = pid_to_num[post["id"]]
- return posts
+ def posts(self):
+ return self._collection_posts(self.groups[-1], "favgroup")
class DanbooruPostExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 59b2d6d..3a862c1 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -687,10 +687,18 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
for folder in folders:
if match(folder["name"]):
return folder
+ elif folder["has_subfolders"]:
+ for subfolder in folder["subfolders"]:
+ if match(subfolder["name"]):
+ return subfolder
else:
for folder in folders:
if folder["folderid"] == uuid:
return folder
+ elif folder["has_subfolders"]:
+ for subfolder in folder["subfolders"]:
+ if subfolder["folderid"] == uuid:
+ return subfolder
raise exception.NotFoundError("folder")
def _folder_urls(self, folders, category, extractor):
@@ -891,7 +899,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
archive_fmt = "g_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
+ pattern = (BASE_PATTERN + r"/gallery"
+ r"(?:/all|/recommended-for-you|/?\?catpath=)?/?$")
example = "https://www.deviantart.com/USER/gallery/"
def deviations(self):
@@ -987,13 +996,36 @@ class DeviantartFolderExtractor(DeviantartExtractor):
def deviations(self):
folders = self.api.gallery_folders(self.user)
folder = self._find_folder(folders, self.folder_name, self.folder_id)
+
+ # Leaving this here for backwards compatibility
self.folder = {
"title": folder["name"],
"uuid" : folder["folderid"],
"index": self.folder_id,
"owner": self.user,
+ "parent_uuid": folder["parent"],
}
- return self.api.gallery(self.user, folder["folderid"], self.offset)
+
+ if folder.get("subfolder"):
+ self.folder["parent_folder"] = folder["parent_folder"]
+ self.archive_fmt = "F_{folder[parent_uuid]}_{index}.{extension}"
+
+ if self.flat:
+ self.directory_fmt = ("{category}", "{username}",
+ "{folder[parent_folder]}")
+ else:
+ self.directory_fmt = ("{category}", "{username}",
+ "{folder[parent_folder]}",
+ "{folder[title]}")
+
+ if folder.get("has_subfolders") and self.config("subfolders", True):
+ for subfolder in folder["subfolders"]:
+ subfolder["parent_folder"] = folder["name"]
+ subfolder["subfolder"] = True
+ yield from self._folder_urls(
+ folder["subfolders"], "gallery", DeviantartFolderExtractor)
+
+ yield from self.api.gallery(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
@@ -1004,7 +1036,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
archive_fmt = "{index}.{extension}"
- pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.sh)"
+ pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.s(h))"
r"/([a-z0-9]+)")
example = "https://www.deviantart.com/stash/abcde"
@@ -1016,9 +1048,18 @@ class DeviantartStashExtractor(DeviantartExtractor):
def deviations(self, stash_id=None):
if stash_id is None:
- stash_id = self.groups[0]
- url = "https://www.deviantart.com/stash/" + stash_id
- page = self._limited_request(url).text
+ legacy_url, stash_id = self.groups
+ else:
+ legacy_url = False
+
+ if legacy_url and stash_id[0] == "2":
+ url = "https://sta.sh/" + stash_id
+ response = self._limited_request(url)
+ stash_id = response.url.rpartition("/")[2]
+ page = response.text
+ else:
+ url = "https://www.deviantart.com/stash/" + stash_id
+ page = self._limited_request(url).text
if stash_id[0] == "0":
uuid = text.extr(page, '//deviation/', '"')
@@ -1235,7 +1276,34 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
deviation = self.api.deviation(uuid)
deviation["_page"] = page
- return (deviation,)
+
+ _dev_info = text.extr(
+ page, '\\"deviationExtended\\":', ',\\"deviation\\":', None)
+ # Clean up escaped quotes
+ _json_str = re.sub(
+ r'(?<!\\)\\{1}"', '"', _dev_info).replace("\\'", "'")
+ _extended_info = util.json_loads(_json_str)[self.deviation_id]
+ additional_media = _extended_info.get("additionalMedia") or ()
+
+ if additional_media:
+ self.filename_fmt = ("{category}_{index}_{index_file}_{title}_"
+ "{num:>02}.{extension}")
+ self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}."
+ "{extension}")
+
+ deviation["index_file"] = 0
+ deviation["count"] = 1 + len(additional_media)
+ deviation["num"] = 1
+ yield deviation
+
+ for index, post in enumerate(additional_media):
+ uri = post["media"]["baseUri"].encode().decode("unicode-escape")
+ deviation["content"]["src"] = uri
+ deviation["num"] += 1
+ deviation["index_file"] = post["fileId"]
+ # Download only works on purchased materials - no way to check
+ deviation["is_downloadable"] = False
+ yield deviation
class DeviantartScrapsExtractor(DeviantartExtractor):
@@ -1366,7 +1434,7 @@ class DeviantartOAuthAPI():
def __init__(self, extractor):
self.extractor = extractor
self.log = extractor.log
- self.headers = {"dA-minor-version": "20200519"}
+ self.headers = {"dA-minor-version": "20210526"}
self._warn_429 = True
self.delay = extractor.config("wait-min", 0)
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
deleted file mode 100644
index 31a302d..0000000
--- a/gallery_dl/extractor/hentaifox.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2019-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://hentaifox.com/"""
-
-from .common import GalleryExtractor, Extractor, Message
-from .. import text, util
-
-
-class HentaifoxBase():
- """Base class for hentaifox extractors"""
- category = "hentaifox"
- root = "https://hentaifox.com"
-
-
-class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
- """Extractor for image galleries on hentaifox.com"""
- pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
- example = "https://hentaifox.com/gallery/12345/"
-
- def __init__(self, match):
- GalleryExtractor.__init__(self, match)
- self.gallery_id = match.group(2)
-
- @staticmethod
- def _split(txt):
- return [
- text.remove_html(tag.partition(">")[2], "", "")
- for tag in text.extract_iter(
- txt, "class='tag_btn", "<span class='t_badge")
- ]
-
- def metadata(self, page):
- extr = text.extract_from(page)
- split = self._split
-
- return {
- "gallery_id": text.parse_int(self.gallery_id),
- "parody" : split(extr(">Parodies:" , "</ul>")),
- "characters": split(extr(">Characters:", "</ul>")),
- "tags" : split(extr(">Tags:" , "</ul>")),
- "artist" : split(extr(">Artists:" , "</ul>")),
- "group" : split(extr(">Groups:" , "</ul>")),
- "type" : text.remove_html(extr(">Category:", "<span")),
- "title" : text.unescape(extr(
- 'id="gallery_title" value="', '"')),
- "language" : "English",
- "lang" : "en",
- }
-
- def images(self, page):
- cover, pos = text.extract(page, '<img src="', '"')
- data , pos = text.extract(page, "$.parseJSON('", "');", pos)
- path = "/".join(cover.split("/")[3:-1])
-
- result = []
- append = result.append
- extmap = {"j": "jpg", "p": "png", "g": "gif"}
- urlfmt = ("/" + path + "/{}.{}").format
-
- server1 = "https://i.hentaifox.com"
- server2 = "https://i2.hentaifox.com"
-
- for num, image in util.json_loads(data).items():
- ext, width, height = image.split(",")
- path = urlfmt(num, extmap[ext])
- append((server1 + path, {
- "width" : width,
- "height" : height,
- "_fallback": (server2 + path,),
- }))
-
- return result
-
-
-class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
- """Extractor for search results and listings on hentaifox.com"""
- subcategory = "search"
- pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
- r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)")
- example = "https://hentaifox.com/tag/TAG/"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.path = match.group(1)
-
- def items(self):
- for gallery in self.galleries():
- yield Message.Queue, gallery["url"], gallery
-
- def galleries(self):
- num = 1
-
- while True:
- url = "{}{}/pag/{}/".format(self.root, self.path, num)
- page = self.request(url).text
-
- for info in text.extract_iter(
- page, 'class="g_title"><a href="', '</a>'):
- url, _, title = info.partition('">')
-
- yield {
- "url" : text.urljoin(self.root, url),
- "gallery_id": text.parse_int(
- url.strip("/").rpartition("/")[2]),
- "title" : text.unescape(title),
- "_extractor": HentaifoxGalleryExtractor,
- }
-
- pos = page.find(">Next<")
- url = text.rextract(page, "href=", ">", pos)[0]
- if pos == -1 or "/pag" not in url:
- return
- num += 1
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index e15e13c..086b77c 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -16,19 +16,25 @@ import string
import re
-class HitomiGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries from hitomi.la"""
+class HitomiExtractor(Extractor):
+ """Base class for hitomi extractors"""
category = "hitomi"
root = "https://hitomi.la"
+ domain = "gold-usergeneratedcontent.net"
+
+
+class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
+ """Extractor for hitomi.la galleries"""
pattern = (r"(?:https?://)?hitomi\.la"
r"/(?:manga|doujinshi|cg|gamecg|imageset|galleries|reader)"
r"/(?:[^/?#]+-)?(\d+)")
example = "https://hitomi.la/manga/TITLE-867789.html"
def __init__(self, match):
- self.gid = match.group(1)
- url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gid)
- GalleryExtractor.__init__(self, match, url)
+ GalleryExtractor.__init__(self, match, False)
+ self.gid = gid = self.groups[0]
+ self.gallery_url = "https://ltn.{}/galleries/{}.js".format(
+ self.domain, gid)
def _init(self):
self.session.headers["Referer"] = "{}/reader/{}.html".format(
@@ -71,43 +77,34 @@ class HitomiGalleryExtractor(GalleryExtractor):
}
def images(self, _):
- # see https://ltn.hitomi.la/gg.js
+ # https://ltn.gold-usergeneratedcontent.net/gg.js
gg_m, gg_b, gg_default = _parse_gg(self)
- fmt = self.config("format") or "webp"
- if fmt == "original":
- subdomain, path, ext, check = "b", "images", None, False
- else:
- subdomain, path, ext, check = "a", fmt, fmt, (fmt != "webp")
+ fmt = ext = self.config("format") or "webp"
+ check = (fmt != "webp")
result = []
for image in self.info["files"]:
if check:
- if image.get("has" + fmt):
- path = ext = fmt
- else:
- path = ext = "webp"
+ ext = fmt if image.get("has" + fmt) else "webp"
ihash = image["hash"]
idata = text.nameext_from_url(image["name"])
idata["extension_original"] = idata["extension"]
- if ext:
- idata["extension"] = ext
+ idata["extension"] = ext
- # see https://ltn.hitomi.la/common.js
+ # https://ltn.gold-usergeneratedcontent.net/common.js
inum = int(ihash[-1] + ihash[-3:-1], 16)
- url = "https://{}{}.hitomi.la/{}/{}/{}/{}.{}".format(
- chr(97 + gg_m.get(inum, gg_default)),
- subdomain, path, gg_b, inum, ihash, idata["extension"],
+ url = "https://{}{}.{}/{}/{}/{}.{}".format(
+ ext[0], gg_m.get(inum, gg_default) + 1, self.domain,
+ gg_b, inum, ihash, ext,
)
result.append((url, idata))
return result
-class HitomiTagExtractor(Extractor):
+class HitomiTagExtractor(HitomiExtractor):
"""Extractor for galleries from tag searches on hitomi.la"""
- category = "hitomi"
subcategory = "tag"
- root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la"
r"/(tag|artist|group|series|type|character)"
r"/([^/?#]+)\.html")
@@ -126,8 +123,8 @@ class HitomiTagExtractor(Extractor):
"_extractor": HitomiGalleryExtractor,
"search_tags": text.unquote(self.tag.rpartition("-")[0]),
}
- nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format(
- self.type, self.tag)
+ nozomi_url = "https://ltn.{}/{}/{}.nozomi".format(
+ self.domain, self.type, self.tag)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
@@ -166,8 +163,8 @@ class HitomiIndexExtractor(HitomiTagExtractor):
def items(self):
data = {"_extractor": HitomiGalleryExtractor}
- nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
- self.tag, self.language)
+ nozomi_url = "https://ltn.{}/{}-{}.nozomi".format(
+ self.domain, self.tag, self.language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
@@ -194,11 +191,9 @@ class HitomiIndexExtractor(HitomiTagExtractor):
return
-class HitomiSearchExtractor(Extractor):
+class HitomiSearchExtractor(HitomiExtractor):
"""Extractor for galleries from multiple tag searches on hitomi.la"""
- category = "hitomi"
subcategory = "search"
- root = "https://hitomi.la"
pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
example = "https://hitomi.la/search.html?QUERY"
@@ -224,11 +219,11 @@ class HitomiSearchExtractor(Extractor):
area, tag, language = self.get_nozomi_args(full_tag)
if area:
- nozomi_url = "https://ltn.hitomi.la/n/{}/{}-{}.nozomi".format(
- area, tag, language)
+ nozomi_url = "https://ltn.{}/n/{}/{}-{}.nozomi".format(
+ self.domain, area, tag, language)
else:
- nozomi_url = "https://ltn.hitomi.la/n/{}-{}.nozomi".format(
- tag, language)
+ nozomi_url = "https://ltn.{}/n/{}-{}.nozomi".format(
+ self.domain, tag, language)
headers = {
"Origin": self.root,
@@ -257,7 +252,7 @@ class HitomiSearchExtractor(Extractor):
@memcache(maxage=1800)
def _parse_gg(extr):
- page = extr.request("https://ltn.hitomi.la/gg.js").text
+ page = extr.request("https://ltn.gold-usergeneratedcontent.net/gg.js").text
m = {}
@@ -280,4 +275,4 @@ def _parse_gg(extr):
d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page)
b = re.search(r"b:\s*[\"'](.+)[\"']", page)
- return m, b.group(1).strip("/"), int(d.group(1)) if d else 1
+ return m, b.group(1).strip("/"), int(d.group(1)) if d else 0
diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py
index 0439f5b..1b0fba3 100644
--- a/gallery_dl/extractor/imhentai.py
+++ b/gallery_dl/extractor/imhentai.py
@@ -22,10 +22,15 @@ class ImhentaiExtractor(BaseExtractor):
while True:
page = self.request(url).text
+
+ pos = page.find('class="ranking_list"')
+ if pos >= 0:
+ page = page[:pos]
+
extr = text.extract_from(page)
while True:
- gallery_id = extr('<a href="/gallery/', '"')
+ gallery_id = extr('href="/gallery/', '"')
if gallery_id == prev:
continue
if not gallery_id:
@@ -57,6 +62,18 @@ BASE_PATTERN = ImhentaiExtractor.update({
"root": "https://hentairox.com",
"pattern": r"(?:www\.)?hentairox\.com",
},
+ "hentaifox": {
+ "root": "https://hentaifox.com",
+ "pattern": r"(?:www\.)?hentaifox\.com",
+ },
+ "hentaienvy": {
+ "root": "https://hentaienvy.com",
+ "pattern": r"(?:www\.)?hentaienvy\.com",
+ },
+ "hentaizap": {
+ "root": "https://hentaizap.com",
+ "pattern": r"(?:www\.)?hentaizap\.com",
+ },
})
@@ -72,17 +89,20 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
+ title = extr("<h1>", "<")
+ title_alt = extr('class="subtitle">', "<")
+ end = "</li>" if extr('<ul class="galleries_info', ">") else "</ul>"
data = {
"gallery_id": text.parse_int(self.gallery_id),
- "title" : text.unescape(extr("<h1>", "<")),
- "title_alt" : text.unescape(extr('class="subtitle">', "<")),
- "parody" : self._split(extr(">Parodies", "</li>")),
- "character" : self._split(extr(">Characters", "</li>")),
- "tags" : self._split(extr(">Tags", "</li>")),
- "artist" : self._split(extr(">Artists", "</li>")),
- "group" : self._split(extr(">Groups", "</li>")),
- "language" : self._split(extr(">Languages", "</li>")),
+ "title" : text.unescape(title),
+ "title_alt" : text.unescape(title_alt),
+ "parody" : self._split(extr(">Parodies", end)),
+ "character" : self._split(extr(">Characters", end)),
+ "tags" : self._split(extr(">Tags", end)),
+ "artist" : self._split(extr(">Artists", end)),
+ "group" : self._split(extr(">Groups", end)),
+ "language" : self._split(extr(">Languages", end)),
"type" : extr("href='/category/", "/"),
}
@@ -94,10 +114,12 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
def _split(self, html):
results = []
for tag in text.extract_iter(html, ">", "</a>"):
- tag = tag.partition(" <span class='badge'>")[0]
- if "<" in tag:
- tag = text.remove_html(tag)
+ badge = ("badge'>" in tag or "class='badge" in tag)
+ tag = text.remove_html(tag)
+ if badge:
+ tag = tag.rpartition(" ")[0]
results.append(tag)
+ results.sort()
return results
def images(self, page):
@@ -132,9 +154,9 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
class ImhentaiSearchExtractor(ImhentaiExtractor):
"""Extractor for imhentai search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
+ pattern = BASE_PATTERN + r"/search(/?\?[^#]+|/[^/?#]+/?)"
example = "https://imhentai.xxx/search/?key=QUERY"
def items(self):
- url = self.root + "/search/?" + self.groups[-1]
+ url = self.root + "/search" + self.groups[-1]
return self._pagination(url)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index e344b2f..aa26408 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -56,9 +56,11 @@ class InstagramExtractor(Extractor):
data = self.metadata()
videos = self.config("videos", True)
+ if videos:
+ videos_dash = (videos != "merged")
+ videos_headers = {"User-Agent": "Mozilla/5.0"}
previews = self.config("previews", False)
max_posts = self.config("max-posts")
- video_headers = {"User-Agent": "Mozilla/5.0"}
order = self.config("order-files")
reverse = order[0] in ("r", "d") if order else False
@@ -92,8 +94,12 @@ class InstagramExtractor(Extractor):
url = file.get("video_url")
if url:
if videos:
- file["_http_headers"] = video_headers
+ file["_http_headers"] = videos_headers
text.nameext_from_url(url, file)
+ if videos_dash:
+ file["_fallback"] = (url,)
+ file["_ytdl_manifest"] = "dash"
+ url = "ytdl:dash"
yield Message.Url, url, file
if previews:
file["media_id"] += "p"
@@ -246,6 +252,7 @@ class InstagramExtractor(Extractor):
"video_url" : video["url"] if video else None,
"width" : media["width"],
"height" : media["height"],
+ "_ytdl_manifest_data": item.get("video_dash_manifest"),
}
if "expiring_at" in item:
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 788b5d9..860e771 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -57,11 +57,13 @@ class KemonopartyExtractor(Extractor):
find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
announcements = True if self.config("announcements") else None
+ archives = True if self.config("archives") else False
comments = True if self.config("comments") else False
duplicates = True if self.config("duplicates") else False
dms = True if self.config("dms") else None
max_posts = self.config("max-posts")
- creator_info = {} if self.config("metadata") else None
+ creator_info = {} if self.config("metadata", True) else None
+ exts_archive = {"zip", "rar", "7z"}
# prevent files from being sent with gzip compression
headers = {"Accept-Encoding": "identity"}
@@ -115,6 +117,7 @@ class KemonopartyExtractor(Extractor):
files = []
hashes = set()
+ post_archives = post["archives"] = []
for file in itertools.chain.from_iterable(
g(post) for g in generators):
@@ -129,31 +132,45 @@ class KemonopartyExtractor(Extractor):
continue
hashes.add(hash)
else:
- file["hash"] = ""
+ file["hash"] = hash = ""
+
+ if url[0] == "/":
+ url = self.root + "/data" + url
+ elif url.startswith(self.root):
+ url = self.root + "/data" + url[20:]
+ file["url"] = url
+
+ text.nameext_from_url(file.get("name", url), file)
+ ext = text.ext_from_url(url)
+ if not file["extension"]:
+ file["extension"] = ext
+ elif ext == "txt" and file["extension"] != "txt":
+ file["_http_validate"] = _validate
+ elif ext in exts_archive:
+ file["type"] = "archive"
+ if archives:
+ try:
+ data = self.api.posts_archives(file["hash"])
+ data.update(file)
+ post_archives.append(data)
+ except Exception as exc:
+ self.log.warning(
+ "%s: Failed to retrieve archive metadata of "
+ "'%s' (%s: %s)", post["id"], file.get("name"),
+ exc.__class__.__name__, exc)
+ post_archives.append(file.copy())
+ else:
+ post_archives.append(file.copy())
files.append(file)
post["count"] = len(files)
yield Message.Directory, post
-
for post["num"], file in enumerate(files, 1):
- post["_http_validate"] = None
- post["hash"] = file["hash"]
- post["type"] = file["type"]
- url = file["path"]
-
- text.nameext_from_url(file.get("name", url), post)
- ext = text.ext_from_url(url)
- if not post["extension"]:
- post["extension"] = ext
- elif ext == "txt" and post["extension"] != "txt":
- post["_http_validate"] = _validate
-
- if url[0] == "/":
- url = self.root + "/data" + url
- elif url.startswith(self.root):
- url = self.root + "/data" + url[20:]
- yield Message.Url, url, post
+ if "id" in file:
+ del file["id"]
+ post.update(file)
+ yield Message.Url, file["url"], post
def login(self):
username, password = self._get_auth_info()
@@ -368,17 +385,18 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
key = "id"
else:
key = "name"
+ else:
+ key = "id"
+ channel = channel_id
+ if not channel_name or not channel_id:
for ch in self.api.discord_server(server_id):
if ch[key] == channel:
break
else:
raise exception.NotFoundError("channel")
-
channel_id = ch["id"]
channel_name = ch["name"]
- elif channel_name is None:
- channel_name = ""
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
@@ -503,6 +521,10 @@ class KemonoAPI():
params = {"q": query, "o": offset, "tag": tags}
return self._pagination(endpoint, params, 50, "posts")
+ def posts_archives(self, file_hash):
+ endpoint = "/posts/archives/" + file_hash
+ return self._call(endpoint)["archive"]
+
def creator_posts(self, service, creator_id, offset=0, query=None):
endpoint = "/{}/user/{}".format(service, creator_id)
params = {"q": query, "o": offset}
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 6f7a238..b11f81d 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -10,9 +10,13 @@
from .common import ChapterExtractor, Extractor, Message
from .. import text, util, exception
+from ..cache import memcache
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)"
+BASE_PATTERN = (r"(?:https?://)?(?:www\.)?(?:"
+ r"(?:manga|comic|read)park\.(?:com|net|org|me|io|to)|"
+ r"parkmanga\.(?:com|net|org)|"
+ r"mpark\.to)")
class MangaparkBase():
@@ -31,57 +35,87 @@ class MangaparkBase():
match = self._match_title(title)
return match.groups() if match else (0, 0, "", "")
+ @memcache(keyarg=1)
+ def _extract_manga(self, manga_id):
+ variables = {
+ "getComicNodeId": manga_id,
+ }
+ return self._request_graphql("Get_comicNode", variables)["data"]
+
+ def _extract_chapter(self, chapter_id):
+ variables = {
+ "getChapterNodeId": chapter_id,
+ }
+ return self._request_graphql("Get_chapterNode", variables)["data"]
+
+ def _extract_chapters_all(self, manga_id):
+ variables = {
+ "comicId": manga_id,
+ }
+ return self._request_graphql("Get_comicChapterList", variables)
+
+ def _extract_chapters_source(self, source_id):
+ variables = {
+ "sourceId": source_id,
+ }
+ return self._request_graphql(
+ "get_content_source_chapterList", variables)
+
+ def _request_graphql(self, opname, variables):
+ url = self.root + "/apo/"
+ data = {
+ "query" : QUERIES[opname],
+ "variables" : variables,
+ "operationName": opname,
+ }
+ return self.request(
+ url, method="POST", json=data).json()["data"].popitem()[1]
+
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
- pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)"
+ pattern = (BASE_PATTERN +
+ r"/(?:title/[^/?#]+/|comic/\d+/[^/?#]+/[^/?#]+-i)(\d+)")
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
- url = "{}/title/_/{}".format(self.root, match.group(1))
- ChapterExtractor.__init__(self, match, url)
-
- def metadata(self, page):
- data = self._extract_nextdata(page)
- chapter = (data["props"]["pageProps"]["dehydratedState"]
- ["queries"][0]["state"]["data"]["data"])
- manga = chapter["comicNode"]["data"]
- source = chapter["sourceNode"]["data"]
-
- self._urls = chapter["imageSet"]["httpLis"]
- self._params = chapter["imageSet"]["wordLis"]
+ ChapterExtractor.__init__(self, match, False)
+
+ def metadata(self, _):
+ chapter = self._extract_chapter(self.groups[0])
+ manga = self._extract_manga(chapter["comicNode"]["id"])
+
+ self._urls = chapter["imageFile"]["urlList"]
vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
+ lang = chapter.get("lang") or "en"
return {
"manga" : manga["name"],
- "manga_id" : manga["id"],
- "artist" : source["artists"],
- "author" : source["authors"],
- "genre" : source["genres"],
+ "manga_id" : text.parse_int(manga["id"]),
+ "artist" : manga["artists"],
+ "author" : manga["authors"],
+ "genre" : manga["genres"],
"volume" : text.parse_int(vol),
"chapter" : text.parse_int(ch),
"chapter_minor": minor,
- "chapter_id": chapter["id"],
- "title" : chapter["title"] or title or "",
- "lang" : chapter["lang"],
- "language" : util.code_to_language(chapter["lang"]),
- "source" : source["srcTitle"],
- "source_id" : source["id"],
+ "chapter_id": text.parse_int(chapter["id"]),
+ "title" : title or "",
+ "lang" : lang,
+ "language" : util.code_to_language(lang),
+ "source" : chapter["srcTitle"],
+ "source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(chapter["dateCreate"] // 1000),
}
- def images(self, page):
- return [
- (url + "?" + params, None)
- for url, params in zip(self._urls, self._params)
- ]
+ def images(self, _):
+ return [(url, None) for url in self._urls]
class MangaparkMangaExtractor(MangaparkBase, Extractor):
"""Extractor for manga from mangapark.net"""
subcategory = "manga"
- pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$"
+ pattern = BASE_PATTERN + r"/(?:title|comic)/(\d+)(?:[/-][^/?#]*)?/?$"
example = "https://mangapark.net/title/12345-MANGA"
def __init__(self, match):
@@ -95,6 +129,8 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
url = self.root + chapter["urlPath"]
vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
+ lang = chapter.get("lang") or "en"
+
data = {
"manga_id" : self.manga_id,
"volume" : text.parse_int(vol),
@@ -102,8 +138,8 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"chapter_minor": minor,
"chapter_id": chapter["id"],
"title" : chapter["title"] or title or "",
- "lang" : chapter["lang"],
- "language" : util.code_to_language(chapter["lang"]),
+ "lang" : lang,
+ "language" : util.code_to_language(lang),
"source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(
@@ -114,45 +150,12 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
def chapters(self):
source = self.config("source")
- if not source:
- return self.chapters_all()
-
- source_id = self._select_source(source)
- self.log.debug("Requesting chapters for source_id %s", source_id)
- return self.chapters_source(source_id)
-
- def chapters_all(self):
- pnum = 0
- variables = {
- "select": {
- "comicId": self.manga_id,
- "range" : None,
- "isAsc" : not self.config("chapter-reverse"),
- }
- }
-
- while True:
- data = self._request_graphql(
- "get_content_comicChapterRangeList", variables)
-
- for item in data["items"]:
- yield from item["chapterNodes"]
-
- if not pnum:
- pager = data["pager"]
- pnum += 1
-
- try:
- variables["select"]["range"] = pager[pnum]
- except IndexError:
- return
-
- def chapters_source(self, source_id):
- variables = {
- "sourceId": source_id,
- }
- chapters = self._request_graphql(
- "get_content_source_chapterList", variables)
+ if source:
+ source_id = self._select_source(source)
+ self.log.debug("Requesting chapters for source_id %s", source_id)
+ chapters = self._extract_chapters_source(source_id)
+ else:
+ chapters = self._extract_chapters_all(self.groups[0])
if self.config("chapter-reverse"):
chapters.reverse()
@@ -180,101 +183,58 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
raise exception.StopExtraction(
"'%s' does not match any available source", source)
- def _request_graphql(self, opname, variables):
- url = self.root + "/apo/"
- data = {
- "query" : QUERIES[opname],
- "variables" : util.json_dumps(variables),
- "operationName": opname,
- }
- return self.request(
- url, method="POST", json=data).json()["data"][opname]
-
QUERIES = {
- "get_content_comicChapterRangeList": """
- query get_content_comicChapterRangeList($select: Content_ComicChapterRangeList_Select) {
- get_content_comicChapterRangeList(
- select: $select
- ) {
- reqRange{x y}
- missing
- pager {x y}
- items{
- serial
- chapterNodes {
-
- id
- data {
-
-
- id
- sourceId
-
- dbStatus
- isNormal
- isHidden
- isDeleted
- isFinal
-
- dateCreate
- datePublic
- dateModify
- lang
- volume
- serial
- dname
- title
- urlPath
-
- srcTitle srcColor
-
- count_images
-
- stat_count_post_child
- stat_count_post_reply
- stat_count_views_login
- stat_count_views_guest
-
- userId
- userNode {
-
- id
- data {
-
-id
-name
-uniq
-avatarUrl
-urlPath
-
-verified
-deleted
-banned
-
-dateCreate
-dateOnline
-
-stat_count_chapters_normal
-stat_count_chapters_others
-
-is_adm is_mod is_vip is_upr
-
- }
-
- }
-
- disqusId
-
-
- }
+ "Get_comicChapterList": """
+query Get_comicChapterList($comicId: ID!) {
+ get_comicChapterList(comicId: $comicId) {
+ data {
+ id
+ dname
+ title
+ lang
+ urlPath
+ srcTitle
+ sourceId
+ dateCreate
+ }
+ }
+}
+""",
- sser_read
+ "Get_chapterNode": """
+query Get_chapterNode($getChapterNodeId: ID!) {
+ get_chapterNode(id: $getChapterNodeId) {
+ data {
+ id
+ dname
+ lang
+ sourceId
+ srcTitle
+ dateCreate
+ comicNode{
+ id
+ }
+ imageFile {
+ urlList
+ }
}
- }
+ }
+}
+""",
+ "Get_comicNode": """
+query Get_comicNode($getComicNodeId: ID!) {
+ get_comicNode(id: $getComicNodeId) {
+ data {
+ id
+ name
+ artists
+ authors
+ genres
+ }
}
- }
+}
""",
"get_content_source_chapterList": """
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 5b354ac..5e78ad4 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -196,7 +196,8 @@ class MastodonFollowingExtractor(MastodonExtractor):
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
- pattern = BASE_PATTERN + r"/@[^/?#]+/(?!following)([^/?#]+)"
+ pattern = (BASE_PATTERN + r"/(?:@[^/?#]+|(?:users/[^/?#]+/)?statuses)"
+ r"/(?!following)([^/?#]+)")
example = "https://mastodon.social/@USER/12345"
def statuses(self):
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 851f663..3d1722a 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -21,6 +21,7 @@ class NozomiExtractor(Extractor):
"""Base class for nozomi extractors"""
category = "nozomi"
root = "https://nozomi.la"
+ domain = "gold-usergeneratedcontent.net"
filename_fmt = "{postid} {dataid}.{extension}"
archive_fmt = "{dataid}"
@@ -31,8 +32,8 @@ class NozomiExtractor(Extractor):
data = self.metadata()
for post_id in map(str, self.posts()):
- url = "https://j.nozomi.la/post/{}/{}/{}.json".format(
- post_id[-1], post_id[-3:-1], post_id)
+ url = "https://j.{}/post/{}/{}/{}.json".format(
+ self.domain, post_id[-1], post_id[-3:-1], post_id)
response = self.request(url, fatal=False)
if response.status_code >= 400:
@@ -76,8 +77,8 @@ class NozomiExtractor(Extractor):
ext = "webp"
post["extension"] = ext
- post["url"] = url = "https://{}.nozomi.la/{}/{}/{}.{}".format(
- subdomain, did[-1], did[-3:-1], did, ext)
+ post["url"] = url = "https://{}.{}/{}/{}/{}.{}".format(
+ subdomain, self.domain, did[-1], did[-3:-1], did, ext)
yield Message.Url, url, post
def posts(self):
@@ -168,7 +169,7 @@ class NozomiSearchExtractor(NozomiExtractor):
negative = []
def nozomi(path):
- url = "https://j.nozomi.la/" + path + ".nozomi"
+ url = "https://j.{}/{}.nozomi".format(self.domain, path)
return decode_nozomi(self.request(url).content)
for tag in self.tags:
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index f5a33d5..b8c6acb 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -27,7 +27,7 @@ class PatreonExtractor(Extractor):
_warning = True
def _init(self):
- if not self.cookies_check(("session_id",)):
+ if not self.cookies_check(("session_id",), subdomains=True):
if self._warning:
PatreonExtractor._warning = False
self.log.warning("no 'session_id' cookie set")
@@ -329,10 +329,11 @@ class PatreonCreatorExtractor(PatreonExtractor):
"""Extractor for a creator's works"""
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
- r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))"
+ r"/(?!(?:home|create|login|signup|search|posts|messages)"
+ r"(?:$|[/?#]))"
r"(?:profile/creators|(?:c/)?([^/?#]+)(?:/posts)?)"
r"/?(?:\?([^#]+))?")
- example = "https://www.patreon.com/USER"
+ example = "https://www.patreon.com/c/USER"
def posts(self):
creator, query = self.groups
@@ -370,7 +371,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
data = None
data = self._extract_bootstrap(page)
return data["campaign"]["data"]["id"]
- except (KeyError, ValueError) as exc:
+ except Exception as exc:
if data:
self.log.debug(data)
raise exception.StopExtraction(
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 121c7bf..1a299c1 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -132,6 +132,9 @@ class PinterestExtractor(Extractor):
"extension": "txt",
"media_id": block.get("id")}
+ elif type == "story_pin_static_sticker_block":
+ continue
+
else:
self.log.warning("%s: Unsupported story block '%s'",
pin.get("id"), type)
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 7708b5c..9e7d75d 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -10,6 +10,9 @@
from .common import Extractor, Message
from .. import text
+from datetime import datetime
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com"
class SexcomExtractor(Extractor):
@@ -23,8 +26,20 @@ class SexcomExtractor(Extractor):
def items(self):
yield Message.Directory, self.metadata()
for pin in map(self._parse_pin, self.pins()):
- if pin:
- yield Message.Url, pin["url"], pin
+ if not pin:
+ continue
+
+ url = pin["url"]
+ parts = url.rsplit("/", 4)
+ try:
+ pin["date_url"] = dt = datetime(
+ int(parts[1]), int(parts[2]), int(parts[3]))
+ if "date" not in pin:
+ pin["date"] = dt
+ except Exception:
+ pass
+
+ yield Message.Url, url, pin
def metadata(self):
return {}
@@ -53,10 +68,18 @@ class SexcomExtractor(Extractor):
self.log.warning('Unable to fetch %s ("%s %s")',
url, response.status_code, response.reason)
return None
+
+ if "/pin/" in response.url:
+ return self._parse_pin_legacy(response)
+ if "/videos/" in response.url:
+ return self._parse_pin_video(response)
+ return self._parse_pin_gifs(response)
+
+ def _parse_pin_legacy(self, response):
extr = text.extract_from(response.text)
data = {}
- data["_http_headers"] = {"Referer": url}
+ data["_http_headers"] = {"Referer": response.url}
data["thumbnail"] = extr('itemprop="thumbnail" content="', '"')
data["type"] = extr('<h1>' , '<').rstrip(" -").strip().lower()
data["title"] = text.unescape(extr('itemprop="name">' , '<'))
@@ -82,7 +105,8 @@ class SexcomExtractor(Extractor):
src = (text.extr(iframe, ' src="', '"') or
text.extr(iframe, " src='", "'"))
if not src:
- self.log.warning("Unable to fetch media from %s", url)
+ self.log.warning(
+ "Unable to fetch media from %s", response.url)
return None
data["extension"] = None
data["url"] = "ytdl:" + src
@@ -100,27 +124,60 @@ class SexcomExtractor(Extractor):
return data
+ def _parse_pin_gifs(self, response):
+ extr = text.extract_from(response.text)
+
+ data = {
+ "_http_headers": {"Referer": response.url},
+ "type": "gif",
+ "url": extr(' href="', '"'),
+ "title": text.unescape(extr("<title>", " Gif | Sex.com<")),
+ "pin_id": text.parse_int(extr(
+ 'rel="canonical" href="', '"').rpartition("/")[2]),
+ "tags": text.split_html(extr("</h1>", "</section>")),
+ }
+
+ return text.nameext_from_url(data["url"], data)
+
+ def _parse_pin_video(self, response):
+ extr = text.extract_from(response.text)
+
+ if not self.cookies.get("CloudFront-Key-Pair-Id", domain=".sex.com"):
+ self.log.warning("CloudFront cookies required for video downloads")
+
+ data = {
+ "_ytdl_manifest": "hls",
+ "extension": "mp4",
+ "type": "video",
+ "title": text.unescape(extr("<title>", " | Sex.com<")),
+ "pin_id": text.parse_int(extr(
+ 'rel="canonical" href="', '"').rpartition("/")[2]),
+ "tags": text.split_html(extr(
+ 'event_name="video_tags_click"', "<div data-testid=")
+ .partition(">")[2]),
+ "url": "ytdl:" + extr('<source src="', '"'),
+ }
+
+ return data
+
class SexcomPinExtractor(SexcomExtractor):
"""Extractor for a pinned image or video on www.sex.com"""
subcategory = "pin"
directory_fmt = ("{category}",)
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+)(?!.*#related$)"
+ pattern = (BASE_PATTERN +
+ r"(/(?:pin|\w\w/(?:gif|video)s)/\d+/?)(?!.*#related$)")
example = "https://www.sex.com/pin/12345-TITLE/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.pin_id = match.group(1)
-
def pins(self):
- return ("{}/pin/{}/".format(self.root, self.pin_id),)
+ return (self.root + self.groups[0],)
class SexcomRelatedPinExtractor(SexcomPinExtractor):
"""Extractor for related pins on www.sex.com"""
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[pin_id]}")
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$"
+ pattern = BASE_PATTERN + r"(/pin/(\d+)/?).*#related$"
example = "https://www.sex.com/pin/12345#related"
def metadata(self):
@@ -129,7 +186,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
def pins(self):
url = "{}/pin/related?pinId={}&limit=24&offset=0".format(
- self.root, self.pin_id)
+ self.root, self.groups[1])
return self._pagination(url)
@@ -137,18 +194,14 @@ class SexcomPinsExtractor(SexcomExtractor):
"""Extractor for a user's pins on www.sex.com"""
subcategory = "pins"
directory_fmt = ("{category}", "{user}")
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/pins/"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/pins/"
example = "https://www.sex.com/user/USER/pins/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.user = match.group(1)
-
def metadata(self):
- return {"user": text.unquote(self.user)}
+ return {"user": text.unquote(self.groups[0])}
def pins(self):
- url = "{}/user/{}/pins/".format(self.root, self.user)
+ url = "{}/user/{}/pins/".format(self.root, self.groups[0])
return self._pagination(url)
@@ -156,18 +209,14 @@ class SexcomLikesExtractor(SexcomExtractor):
"""Extractor for a user's liked pins on www.sex.com"""
subcategory = "likes"
directory_fmt = ("{category}", "{user}", "Likes")
- pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/likes/"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/likes/"
example = "https://www.sex.com/user/USER/likes/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.user = match.group(1)
-
def metadata(self):
- return {"user": text.unquote(self.user)}
+ return {"user": text.unquote(self.groups[0])}
def pins(self):
- url = "{}/user/{}/likes/".format(self.root, self.user)
+ url = "{}/user/{}/likes/".format(self.root, self.groups[0])
return self._pagination(url)
@@ -175,15 +224,12 @@ class SexcomBoardExtractor(SexcomExtractor):
"""Extractor for pins from a board on www.sex.com"""
subcategory = "board"
directory_fmt = ("{category}", "{user}", "{board}")
- pattern = (r"(?:https?://)?(?:www\.)?sex\.com/user"
+ pattern = (BASE_PATTERN + r"/user"
r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)")
example = "https://www.sex.com/user/USER/BOARD/"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.user, self.board = match.groups()
-
def metadata(self):
+ self.user, self.board = self.groups
return {
"user" : text.unquote(self.user),
"board": text.unquote(self.board),
@@ -198,19 +244,18 @@ class SexcomSearchExtractor(SexcomExtractor):
"""Extractor for search results on www.sex.com"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search[query]}")
- pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:"
+ pattern = (BASE_PATTERN + r"/((?:"
r"(pic|gif|video)s/([^/?#]*)|search/(pic|gif|video)s"
r")/?(?:\?([^#]+))?)")
example = "https://www.sex.com/search/pics?query=QUERY"
- def __init__(self, match):
- SexcomExtractor.__init__(self, match)
- self.path = match.group(1)
+ def _init(self):
+ self.path, t1, query_alt, t2, query = self.groups
- self.search = text.parse_query(match.group(5))
- self.search["type"] = match.group(2) or match.group(4)
+ self.search = text.parse_query(query)
+ self.search["type"] = t1 or t2
if "query" not in self.search:
- self.search["query"] = match.group(3) or ""
+ self.search["query"] = query_alt or ""
def metadata(self):
return {"search": self.search}
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 07c9b21..cdccd4c 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -48,7 +48,12 @@ class SkebExtractor(Extractor):
def items(self):
metadata = self.metadata()
for user_name, post_num in self.posts():
- response, post = self._get_post_data(user_name, post_num)
+ try:
+ response, post = self._get_post_data(user_name, post_num)
+ except Exception as exc:
+ self.log.error("@%s/%s: %s: %s", user_name, post_num,
+ exc.__class__.__name__, exc)
+ continue
if metadata:
post.update(metadata)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 6c43941..5d0ec46 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -39,6 +39,8 @@ class SubscribestarExtractor(Extractor):
for post_html in self.posts():
media = self._media_from_post(post_html)
data = self._data_from_post(post_html)
+ data["title"] = text.unescape(text.extr(
+ data["content"], "<h1>", "</h1>"))
yield Message.Directory, data
for num, item in enumerate(media, 1):
item.update(data)
@@ -55,7 +57,9 @@ class SubscribestarExtractor(Extractor):
while True:
response = Extractor.request(self, url, **kwargs)
- if response.history and "/verify_subscriber" in response.url:
+ if response.history and (
+ "/verify_subscriber" in response.url or
+ "/age_confirmation_warning" in response.url):
raise exception.StopExtraction(
"HTTP redirect to %s", response.url)
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index 30f310d..4c1da7a 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -25,14 +25,8 @@ class TiktokExtractor(Extractor):
def _init(self):
self.audio = self.config("audio", True)
self.video = self.config("videos", True)
- if not self.config("avatar", True):
- self.avatar = util.false
def items(self):
- # We assume that all of the URLs served by urls() come from the same
- # author.
- downloaded_avatar = not self.avatar()
-
for tiktok_url in self.urls():
tiktok_url = self._sanitize_url(tiktok_url)
data = self._extract_rehydration_data(tiktok_url)
@@ -49,18 +43,10 @@ class TiktokExtractor(Extractor):
post = video_detail["itemInfo"]["itemStruct"]
author = post["author"]
- post["user"] = user = author["uniqueId"]
+ post["user"] = author["uniqueId"]
post["date"] = text.parse_timestamp(post["createTime"])
original_title = title = post["desc"]
- if not downloaded_avatar:
- avatar_url = author["avatarLarger"]
- avatar = self._generate_avatar(
- avatar_url, post, user, author["id"])
- yield Message.Directory, avatar
- yield Message.Url, avatar_url, avatar
- downloaded_avatar = True
-
yield Message.Directory, post
ytdl_media = False
@@ -111,44 +97,29 @@ class TiktokExtractor(Extractor):
})
yield Message.Url, "ytdl:" + tiktok_url, post
- # If we couldn't download the avatar because the given user has no
- # posts, we'll need to make a separate request for the user's page
- # and download the avatar that way.
- if not downloaded_avatar:
- user_name = self.avatar()
- profile_url = "https://www.tiktok.com/@{}".format(user_name)
- data = self._extract_rehydration_data(profile_url)
- data = data["webapp.user-detail"]["userInfo"]["user"]
- data["user"] = user_name
- avatar_url = data["avatarLarger"]
- avatar = self._generate_avatar(
- avatar_url, data, user_name, data["id"])
- yield Message.Directory, avatar
- yield Message.Url, avatar_url, avatar
-
- def avatar(self):
- return False
-
- def _generate_avatar(self, avatar_url, data, user_name, user_id):
- avatar = text.nameext_from_url(avatar_url, data.copy())
- avatar.update({
- "type" : "avatar",
- "title" : "@" + user_name,
- "id" : user_id,
- "img_id": avatar["filename"].partition("~")[0],
- "num" : 0,
- })
- return avatar
-
def _sanitize_url(self, url):
return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1))
def _extract_rehydration_data(self, url):
- html = self.request(url).text
- data = text.extr(
- html, '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" '
- 'type="application/json">', '</script>')
- return util.json_loads(data)["__DEFAULT_SCOPE__"]
+ tries = 0
+ while True:
+ try:
+ html = self.request(url).text
+ data = text.extr(
+ html, '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" '
+ 'type="application/json">', '</script>')
+ return util.json_loads(data)["__DEFAULT_SCOPE__"]
+ except ValueError:
+ # We failed to retrieve rehydration data. This happens
+ # relatively frequently when making many requests, so
+ # retry.
+ if tries >= self._retries:
+ raise
+ tries += 1
+ self.log.warning("%s: Failed to retrieve rehydration data "
+ "(%s/%s)", url.rpartition("/")[2], tries,
+ self._retries)
+ self.sleep(self._timeout, "retry")
def _extract_audio(self, post):
audio = post["music"]
@@ -179,7 +150,7 @@ class TiktokExtractor(Extractor):
elif status == 10204:
self.log.error("%s: Requested post not available", url)
elif status == 10231:
- self.log.error("%s: Region locked - Try downloading with a"
+ self.log.error("%s: Region locked - Try downloading with a "
"VPN/proxy connection", url)
else:
self.log.error(
@@ -230,7 +201,10 @@ class TiktokUserExtractor(TiktokExtractor):
pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)"
example = "https://www.tiktok.com/@USER"
- def urls(self):
+ def _init(self):
+ self.avatar = self.config("avatar", True)
+
+ def items(self):
"""Attempt to use yt-dlp/youtube-dl to extract links from a
user's page"""
@@ -263,19 +237,39 @@ class TiktokUserExtractor(TiktokExtractor):
ytdl_instance = ytdl.construct_YoutubeDL(
module, self, user_opts, extr_opts)
- # transfer cookies to ytdl
+ # Transfer cookies to ytdl.
if self.cookies:
set_cookie = ytdl_instance.cookiejar.set_cookie
for cookie in self.cookies:
set_cookie(cookie)
+ user_name = self.groups[0]
+ profile_url = "{}/@{}".format(self.root, user_name)
+ if self.avatar:
+ avatar_url, avatar = self._generate_avatar(user_name, profile_url)
+ yield Message.Directory, avatar
+ yield Message.Url, avatar_url, avatar
+
with ytdl_instance as ydl:
info_dict = ydl._YoutubeDL__extract_info(
- "{}/@{}".format(self.root, self.groups[0]),
- ydl.get_info_extractor("TikTokUser"),
+ profile_url, ydl.get_info_extractor("TikTokUser"),
False, {}, True)
# This should include video and photo posts in /video/ URL form.
- return [video["url"] for video in info_dict["entries"]]
-
- def avatar(self):
- return self.groups[0]
+ for video in info_dict["entries"]:
+ data = {"_extractor": TiktokPostExtractor}
+ yield Message.Queue, video["url"].partition("?")[0], data
+
+ def _generate_avatar(self, user_name, profile_url):
+ data = self._extract_rehydration_data(profile_url)
+ data = data["webapp.user-detail"]["userInfo"]["user"]
+ data["user"] = user_name
+ avatar_url = data["avatarLarger"]
+ avatar = text.nameext_from_url(avatar_url, data.copy())
+ avatar.update({
+ "type" : "avatar",
+ "title" : "@" + user_name,
+ "id" : data["id"],
+ "img_id": avatar["filename"].partition("~")[0],
+ "num" : 0,
+ })
+ return (avatar_url, avatar)
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index bc135ad..ac1400e 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -64,16 +64,22 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_html(self, entry_id):
url = "{}/{}".format(self.root, entry_id)
- extr = text.extract_from(self.request(url).text)
+ page = self.request(url).text
+ try:
+ jsonld = self._extract_jsonld(page)
+ except Exception:
+ return {"id": entry_id}
+
+ extr = text.extract_from(page)
data = {
"id" : text.parse_int(entry_id),
- "author" : text.parse_unicode_escapes(extr(' "name": "', '"')),
- "file_url": extr('"contentUrl": "', '"'),
- "date" : text.parse_datetime(extr('"datePublished": "', '"')),
- "width" : text.parse_int(extr('"width": "', ' ')),
- "height" : text.parse_int(extr('"height": "', ' ')),
- "size" : text.parse_bytes(extr('"contentSize": "', 'B')),
+ "author" : jsonld["author"]["name"],
+ "file_url": jsonld["contentUrl"],
+ "date" : text.parse_datetime(jsonld["datePublished"]),
+ "width" : text.parse_int(jsonld["width"][:-3]),
+ "height" : text.parse_int(jsonld["height"][:-3]),
+ "size" : text.parse_bytes(jsonld["contentSize"][:-1]),
"path" : text.split_html(extr(
'class="breadcrumbs', '</nav>'))[2:],
"uploader": extr('href="/user/', '"'),
@@ -86,7 +92,7 @@ class ZerochanExtractor(BooruExtractor):
tags = data["tags"] = []
for tag in html.split("<li class=")[1:]:
category = text.extr(tag, '"', '"')
- name = text.extr(tag, 'data-tag="', '"')
+ name = text.unescape(text.extr(tag, 'data-tag="', '"'))
tags.append(category.partition(" ")[0].capitalize() + ":" + name)
return data
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 06a580b..76e6517 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -686,8 +686,10 @@ class CustomNone():
# v128.0 release on 2024-07-09 has ordinal 739076
+# v137.0 release on 2025-04-01 has ordinal 739342
# 735492 == 739076 - 128 * 28
-_ff_ver = (datetime.date.today().toordinal() - 735492) // 28
+# 735506 == 739342 - 137 * 28
+_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
NONE = CustomNone()
EPOCH = datetime.datetime(1970, 1, 1)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 558b02e..43b234d 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.29.2"
+__version__ = "1.29.3"
__variant__ = None
diff --git a/test/test_config.py b/test/test_config.py
index 1d49d77..be58456 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -168,6 +168,7 @@ class TestConfig(unittest.TestCase):
options = (
(("b",) , "c", [1, 2, 3]),
(("e", "f"), "g", 234),
+ (("e", "f"), "g", 234),
)
self.assertEqual(config.get(("b",) , "c"), "text")
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 60c83ff..9ba562c 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -153,19 +153,49 @@ class TestCookieUtils(unittest.TestCase):
self.assertFalse(extr.cookies_check(("a",)))
self.assertFalse(extr.cookies_check(("a", "b")))
- extr.cookies.set("a", "1")
- self.assertFalse(extr.cookies_check(("a",)))
+ extr.cookies.set("nd_a", "1")
+ self.assertFalse(extr.cookies_check(("nd_a",)))
- extr.cookies.set("a", "1", domain=extr.cookies_domain)
- self.assertTrue(extr.cookies_check(("a",)))
+ extr.cookies.set("cd_a", "1", domain=extr.cookies_domain)
+ self.assertTrue(extr.cookies_check(("cd_a",)))
- extr.cookies.set("a", "1", domain="www" + extr.cookies_domain)
+ extr.cookies.set("wd_a", "1", domain="www" + extr.cookies_domain)
+ self.assertFalse(extr.cookies_check(("wd_a",)))
self.assertEqual(len(extr.cookies), 3)
- self.assertTrue(extr.cookies_check(("a",)))
- extr.cookies.set("b", "2", domain=extr.cookies_domain)
- extr.cookies.set("c", "3", domain=extr.cookies_domain)
- self.assertTrue(extr.cookies_check(("a", "b", "c")))
+ extr.cookies.set("cd_b", "2", domain=extr.cookies_domain)
+ extr.cookies.set("cd_c", "3", domain=extr.cookies_domain)
+ self.assertFalse(extr.cookies_check(("nd_a", "cd_b", "cd_c")))
+ self.assertTrue(extr.cookies_check(("cd_a", "cd_b", "cd_c")))
+ self.assertFalse(extr.cookies_check(("wd_a", "cd_b", "cd_c")))
+ self.assertEqual(len(extr.cookies), 5)
+
+ def test_check_cookies_domain_sub(self):
+ extr = _get_extractor("test")
+ self.assertFalse(extr.cookies, "empty")
+ extr.cookies_domain = ".example.org"
+
+ self.assertFalse(extr.cookies_check(("a",), subdomains=True))
+ self.assertFalse(extr.cookies_check(("a", "b"), subdomains=True))
+
+ extr.cookies.set("nd_a", "1")
+ self.assertFalse(extr.cookies_check(("nd_a",), subdomains=True))
+
+ extr.cookies.set("cd_a", "1", domain=extr.cookies_domain)
+ self.assertTrue(extr.cookies_check(("cd_a",), subdomains=True))
+
+ extr.cookies.set("wd_a", "1", domain="www" + extr.cookies_domain)
+ self.assertTrue(extr.cookies_check(("wd_a",), subdomains=True))
+
+ extr.cookies.set("cd_b", "2", domain=extr.cookies_domain)
+ extr.cookies.set("cd_c", "3", domain=extr.cookies_domain)
+ self.assertEqual(len(extr.cookies), 5)
+ self.assertFalse(extr.cookies_check(
+ ("nd_a", "cd_b", "cd_c"), subdomains=True))
+ self.assertTrue(extr.cookies_check(
+ ("cd_a", "cd_b", "cd_c"), subdomains=True))
+ self.assertTrue(extr.cookies_check(
+ ("wd_a", "cd_b", "cd_c"), subdomains=True))
def test_check_cookies_expires(self):
extr = _get_extractor("test")
diff --git a/test/test_extractor.py b/test/test_extractor.py
index cc85fb2..dfc5ff8 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -104,27 +104,39 @@ class TestExtractorModule(unittest.TestCase):
@unittest.skipIf(not results, "no test data")
def test_categories(self):
for result in results.all():
- url = result["#url"]
- cls = result["#class"]
- try:
- extr = cls.from_url(url)
- except ImportError as exc:
- if exc.name in ("youtube_dl", "yt_dlp"):
- print("Skipping '{}' category checks".format(cls.category))
- continue
- raise
- self.assertTrue(extr, url)
-
- categories = result.get("#category")
- if categories:
- base, cat, sub = categories
+ if result.get("#fail"):
+ try:
+ self.assertCategories(result)
+ except AssertionError:
+ pass
+ else:
+ self.fail(result["#url"] + ": Test did not fail")
else:
- cat = cls.category
- sub = cls.subcategory
- base = cls.basecategory
- self.assertEqual(extr.category, cat, url)
- self.assertEqual(extr.subcategory, sub, url)
- self.assertEqual(extr.basecategory, base, url)
+ self.assertCategories(result)
+
+ def assertCategories(self, result):
+ url = result["#url"]
+ cls = result["#class"]
+
+ try:
+ extr = cls.from_url(url)
+ except ImportError as exc:
+ if exc.name in ("youtube_dl", "yt_dlp"):
+ print("Skipping '{}' category checks".format(cls.category))
+ return
+ raise
+ self.assertTrue(extr, url)
+
+ categories = result.get("#category")
+ if categories:
+ base, cat, sub = categories
+ else:
+ cat = cls.category
+ sub = cls.subcategory
+ base = cls.basecategory
+ self.assertEqual(extr.category, cat, url)
+ self.assertEqual(extr.subcategory, sub, url)
+ self.assertEqual(extr.basecategory, base, url)
@unittest.skipIf(not results, "no test data")
def test_unique_pattern_matches(self):
@@ -133,7 +145,8 @@ class TestExtractorModule(unittest.TestCase):
append = test_urls.append
for result in results.all():
- append((result["#url"], result["#class"]))
+ if not result.get("#fail"):
+ append((result["#url"], result["#class"]))
# iterate over all testcase URLs
for url, extr1 in test_urls:
diff --git a/test/test_results.py b/test/test_results.py
index 3136743..28db6c3 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -92,6 +92,15 @@ class TestExtractorResults(unittest.TestCase):
self.assertGreaterEqual(value, range.start, msg=msg)
def _run_test(self, result):
+ if result.get("#fail"):
+ del result["#fail"]
+ try:
+ self._run_test(result)
+ except AssertionError:
+ return
+ else:
+ self.fail("Test did not fail")
+
base, cat, sub = result_categories(result)
result.pop("#comment", None)
result.pop("#category", None)