summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-09-07 20:40:45 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-09-07 20:40:45 -0400
commit243b2597edb922fe7e0b0d887e80bb7ebbe72ab7 (patch)
tree1a42ddec8ae1f21e3c4c88849818e1ea9140aaaa
parent1df55d9de48105dace9cc16f1511dba3c9a6da6f (diff)
New upstream version 1.30.6.upstream/1.30.6
-rw-r--r--CHANGELOG.md65
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish2
-rw-r--r--data/man/gallery-dl.14
-rw-r--r--data/man/gallery-dl.conf.588
-rw-r--r--docs/gallery-dl.conf16
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/downloader/ytdl.py17
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/bunkr.py54
-rw-r--r--gallery_dl/extractor/chevereto.py77
-rw-r--r--gallery_dl/extractor/comick.py47
-rw-r--r--gallery_dl/extractor/common.py2
-rw-r--r--gallery_dl/extractor/deviantart.py12
-rw-r--r--gallery_dl/extractor/erome.py21
-rw-r--r--gallery_dl/extractor/fansly.py318
-rw-r--r--gallery_dl/extractor/hentaifoundry.py5
-rw-r--r--gallery_dl/extractor/imagebam.py1
-rw-r--r--gallery_dl/extractor/instagram.py23
-rw-r--r--gallery_dl/extractor/kemono.py3
-rw-r--r--gallery_dl/extractor/khinsider.py1
-rw-r--r--gallery_dl/extractor/reddit.py110
-rw-r--r--gallery_dl/extractor/shimmie2.py11
-rw-r--r--gallery_dl/extractor/tumblr.py61
-rw-r--r--gallery_dl/extractor/tungsten.py100
-rw-r--r--gallery_dl/extractor/twitter.py44
-rw-r--r--gallery_dl/extractor/vk.py79
-rw-r--r--gallery_dl/extractor/zerochan.py3
-rw-r--r--gallery_dl/formatter.py1
-rw-r--r--gallery_dl/option.py6
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_formatter.py20
-rw-r--r--test/test_results.py6
36 files changed, 995 insertions, 226 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 227e251..8fb17a4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,36 +1,43 @@
-## 1.30.5 - 2025-08-24
+## 1.30.6 - 2025-09-06
### Extractors
#### Additions
-- [shimmie2] support `noz.rip/booru` ([#8101](https://github.com/mikf/gallery-dl/issues/8101))
-- [sizebooru] add support ([#7667](https://github.com/mikf/gallery-dl/issues/7667))
-- [twitter] add `highlights` extractor ([#7826](https://github.com/mikf/gallery-dl/issues/7826))
-- [twitter] add `home` extractor ([#7974](https://github.com/mikf/gallery-dl/issues/7974))
+- [chevereto] add `video` extractor ([#8149](https://github.com/mikf/gallery-dl/issues/8149))
+- [comick] add `covers` extractor
+- [fansly] add support ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
+- [instagram] add `stories-tray` extractor ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [shimmie2] support `co.llection.pics` ([#8166](https://github.com/mikf/gallery-dl/issues/8166))
+- [tungsten] add support ([#8061](https://github.com/mikf/gallery-dl/issues/8061))
+- [vk] add `wall-post` extractor ([#474](https://github.com/mikf/gallery-dl/issues/474) [#6378](https://github.com/mikf/gallery-dl/issues/6378) [#8159](https://github.com/mikf/gallery-dl/issues/8159))
#### Fixes
-- [aryion] fix pagination ([#8091](https://github.com/mikf/gallery-dl/issues/8091))
-- [rule34] support using `api-key` & `user-id` ([#8077](https://github.com/mikf/gallery-dl/issues/8077) [#8088](https://github.com/mikf/gallery-dl/issues/8088) [#8098](https://github.com/mikf/gallery-dl/issues/8098))
-- [tumblr:search] fix `ValueError: not enough values to unpack` ([#8079](https://github.com/mikf/gallery-dl/issues/8079))
-- [twitter] handle `KeyError: 'result'` for retweets ([#8072](https://github.com/mikf/gallery-dl/issues/8072))
-- [zerochan] expect `500 Internal Server Error` responses for HTML requests ([#8097](https://github.com/mikf/gallery-dl/issues/8097))
+- [bunkr] fix downloading albums with more than 100 files ([#8150](https://github.com/mikf/gallery-dl/issues/8150) [#8155](https://github.com/mikf/gallery-dl/issues/8155) [#8175](https://github.com/mikf/gallery-dl/issues/8175))
+- [chevereto:user] fix names starting with an `a` ([#8149](https://github.com/mikf/gallery-dl/issues/8149))
+- [common] prevent exception when using empty `user-agent` ([#8116](https://github.com/mikf/gallery-dl/issues/8116))
+- [deviantart:search] fix extraction ([#8083](https://github.com/mikf/gallery-dl/issues/8083))
+- [hentaifoundry:story] fix `src` & `description` extraction ([#8163](https://github.com/mikf/gallery-dl/issues/8163))
+- [imagebam] update guard page bypass cookies ([#8123](https://github.com/mikf/gallery-dl/issues/8123))
+- [kemono] fix `.bin` archive files not being added to archives list ([#8156](https://github.com/mikf/gallery-dl/issues/8156))
+- [reddit] fix `TypeaError` when processing comments ([#8139](https://github.com/mikf/gallery-dl/issues/8139))
+- [tumblr] fix pagination when using `date-max`
+- [twitter] prevent exceptions in `_transform_community()` ([#8134](https://github.com/mikf/gallery-dl/issues/8134))
+- [twitter] prevent `KeyError: 'name'` in `_transform_user()` ([#8154](https://github.com/mikf/gallery-dl/issues/8154))
+- [twitter] fix `KeyError: 'core'` when processing communities ([#8141](https://github.com/mikf/gallery-dl/issues/8141))
+- [zerochan] fix `500 Internal Server Error` during login ([#8097](https://github.com/mikf/gallery-dl/issues/8097) [#8114](https://github.com/mikf/gallery-dl/issues/8114))
#### Improvements
-- [civitai:search] add `token` option ([#8093](https://github.com/mikf/gallery-dl/issues/8093))
-- [instagram] warn about lower quality video downloads ([#7921](https://github.com/mikf/gallery-dl/issues/7921) [#8078](https://github.com/mikf/gallery-dl/issues/8078))
-- [instagram] remove `candidates` warning ([#7921](https://github.com/mikf/gallery-dl/issues/7921) [#7989](https://github.com/mikf/gallery-dl/issues/7989) [#8071](https://github.com/mikf/gallery-dl/issues/8071))
-- [oauth] improve error messages ([#8086](https://github.com/mikf/gallery-dl/issues/8086))
-- [pixiv] distinguish empty from deleted profiles ([#8066](https://github.com/mikf/gallery-dl/issues/8066))
-- [twitter] update API endpoint query hashes & parameters
+- [comick] detect broken chapters ([#8054](https://github.com/mikf/gallery-dl/issues/8054))
+- [erome] handle reposts on user profiles ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [instagram] improve video quality warning regex ([#8078](https://github.com/mikf/gallery-dl/issues/8078))
+- [jpgfish] update domain to `jpg6.su`
+- [reddit] add `api` & `limit` options ([#7997](https://github.com/mikf/gallery-dl/issues/7997) [#8012](https://github.com/mikf/gallery-dl/issues/8012) [#8092](https://github.com/mikf/gallery-dl/issues/8092))
+- [reddit] support video embeds ([#8139](https://github.com/mikf/gallery-dl/issues/8139))
+- [tumblr:tagged] support `/archive/tagged/` URLs ([#8160](https://github.com/mikf/gallery-dl/issues/8160))
#### Metadata
-- [batoto] extract more metadata ([#7994](https://github.com/mikf/gallery-dl/issues/7994))
-- [instagram:highlights] extract `author` & `owner` & `user` metadata ([#7846](https://github.com/mikf/gallery-dl/issues/7846))
-- [newgrounds] extract `slug` metadata ([#8064](https://github.com/mikf/gallery-dl/issues/8064))
-- [twitter] extract `community` metadata ([#7424](https://github.com/mikf/gallery-dl/issues/7424))
-#### Removals
-- [shimmie2] remove `sizechangebooru.com` ([#7667](https://github.com/mikf/gallery-dl/issues/7667))
-- [zzup] remove module ([#4604](https://github.com/mikf/gallery-dl/issues/4604))
+- [khinsider] extract `description` metadata
+- [tumblr:tagged] provide `search_tags` metadata ([#8160](https://github.com/mikf/gallery-dl/issues/8160))
+- [vk] parse `date` & `description` metadata ([#8029](https://github.com/mikf/gallery-dl/issues/8029))
+- [vk:album] extract more metadata ([#8029](https://github.com/mikf/gallery-dl/issues/8029))
### Downloaders
-- [ytdl] improve playlist handling ([#8085](https://github.com/mikf/gallery-dl/issues/8085))
-### Scripts
-- implement `rm` helper script
-- add `-g/--git` command-line options
-- [util] add `git()` & `lines()` helper functions
+- [ytdl] implement `_ytdl_manifest_cookies`
### Miscellaneous
-- [config] add `conf` argument to `config.load()` ([#8084](https://github.com/mikf/gallery-dl/issues/8084))
+- [formatter] add `R` conversion - extract URLs ([#8125](https://github.com/mikf/gallery-dl/issues/8125))
+- [options] add `-a` as short option for `--user-agent`
+- [scripts/init] implement `-s/--subcategory`
diff --git a/PKG-INFO b/PKG-INFO
index 559c580..353bfae 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.5
+Version: 1.30.6
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -139,9 +139,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 9790cac..71c7cf8 100644
--- a/README.rst
+++ b/README.rst
@@ -79,9 +79,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 07cfcd9..539ec1b 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -11,7 +11,7 @@ _arguments -s -S \
{-d,--destination}'[Target location for file downloads]':'<path>' \
{-D,--directory}'[Exact location for file downloads]':'<path>' \
{-X,--extractors}'[Load external extractors from PATH]':'<path>' \
---user-agent'[User-Agent request header]':'<ua>' \
+{-a,--user-agent}'[User-Agent request header]':'<ua>' \
--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \
--compat'[Restore legacy '\''category'\'' names]' \
{-U,--update-check}'[Check if a newer version is available]' \
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index b7e4fe4..84ff5b5 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -5,7 +5,7 @@ complete -c gallery-dl -x -s 'f' -l 'filename' -d 'Filename format string for do
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'd' -l 'destination' -d 'Target location for file downloads'
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'D' -l 'directory' -d 'Exact location for file downloads'
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'X' -l 'extractors' -d 'Load external extractors from PATH'
-complete -c gallery-dl -x -l 'user-agent' -d 'User-Agent request header'
+complete -c gallery-dl -x -s 'a' -l 'user-agent' -d 'User-Agent request header'
complete -c gallery-dl -x -l 'clear-cache' -d 'Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)'
complete -c gallery-dl -l 'compat' -d 'Restore legacy "category" names'
complete -c gallery-dl -s 'U' -l 'update-check' -d 'Check if a newer version is available'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 89d7116..3cda42a 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-08-24" "1.30.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-09-06" "1.30.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -35,7 +35,7 @@ Exact location for file downloads
.B "\-X, \-\-extractors" \f[I]PATH\f[]
Load external extractors from PATH
.TP
-.B "\-\-user\-agent" \f[I]UA\f[]
+.B "\-a, \-\-user\-agent" \f[I]UA\f[]
User-Agent request header
.TP
.B "\-\-clear\-cache" \f[I]MODULE\f[]
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 4870130..335195f 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-08-24" "1.30.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-09-06" "1.30.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -3020,6 +3020,17 @@ greater than the per-page limit, gallery-dl will stop after the first
batch. The value cannot be less than 1.
+.SS extractor.erome.user.reposts
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Include reposts when extracting albums from a user profile.
+
+
.SS extractor.exhentai.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -3321,6 +3332,32 @@ Note: \f[I]comments\f[] can also be enabled via
\f[I]fanbox.comments\f[]
+.SS extractor.fansly.format
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]integers\f[]
+
+.IP "Default:" 9
+\f[I][303, 302, 1, 2, 4]\f[]
+
+.IP "Description:" 4
+Selects the file format to extract.
+
+When more than one format is given, the first available one is selected.
+
+
+.SS extractor.fansly.token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+"kX7pL9qW3zT2rY8mB5nJ4vC6xF1tA0hD8uE2wG9yR3sQ7iZ4oM5jN6cP8lV0bK2tU9aL1eW"
+
+.IP "Description:" 4
+\f[I]authorization\f[] header value
+used for requests to \f[I]https://apiv3.fansly.com/api\f[]
+to access locked content.
+
+
.SS extractor.flickr.access-token & .access-token-secret
.IP "Type:" 6
\f[I]string\f[]
@@ -5311,6 +5348,33 @@ Sets the \f[I]quality\f[] query parameter of issue pages. (\f[I]"lq"\f[] or \f[I
or \f[I]"hq"\f[] if not present.
+.SS extractor.reddit.api
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"oauth"\f[]
+
+.IP "Description:" 4
+Selects which API endpoints to use.
+
+\f[I]"oauth"\f[]
+Use the OAuth API at \f[I]https://oauth.reddit.com\f[]
+
+Requires
+\f[I]client-id & user-agent\f[]
+and uses a
+\f[I]refresh token\f[]
+for authentication.
+
+\f[I]"rest"\f[]
+Use the REST API at \f[I]https://www.reddit.com\f[]
+
+Uses
+\f[I]cookies\f[]
+for authentication.
+
+
.SS extractor.reddit.comments
.IP "Type:" 6
\f[I]integer\f[]
@@ -5378,6 +5442,23 @@ Ignore all submissions posted before/after this date.
Ignore all submissions posted before/after the submission with this ID.
+.SS extractor.reddit.limit
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Number of results to return in a single API query.
+
+This value specifies the \f[I]limit\f[] parameter
+used for API requests when retrieving paginated results.
+
+\f[I]null\f[] means not including this parameter at all
+and letting Reddit chose a default.
+
+
.SS extractor.reddit.previews
.IP "Type:" 6
\f[I]bool\f[]
@@ -6181,7 +6262,10 @@ use an extra HTTP request to find the URL to its full-resolution version.
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"offset"\f[]
+.br
+* \f[I]"before"\f[] if \f[I]date-max\f[] is set
+.br
+* \f[I]"offset"\f[] otherwise
.IP "Description:" 4
Controls how to paginate over blog posts.
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 1890b72..f1e3833 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -287,6 +287,12 @@
"metadata": false
}
},
+ "erome":
+ {
+ "user": {
+ "reposts": false
+ }
+ },
"exhentai":
{
"username": "",
@@ -322,6 +328,12 @@
"fee-max" : null,
"metadata": false
},
+ "fansly":
+ {
+ "token": "",
+
+ "format": [303, 302, 1, 2, 4]
+ },
"flickr":
{
"access-token" : null,
@@ -617,6 +629,7 @@
"user-agent" : null,
"refresh-token": null,
+ "api" : "oauth",
"comments" : 0,
"morecomments": false,
"embeds" : true,
@@ -625,6 +638,7 @@
"date-format" : "%Y-%m-%dT%H:%M:%S",
"id-min" : null,
"id-max" : null,
+ "limit" : null,
"previews" : true,
"recursion" : 0,
"selftext" : null,
@@ -771,7 +785,7 @@
"inline" : true,
"offset" : 0,
"original" : true,
- "pagination": "offset",
+ "pagination": null,
"posts" : "all",
"ratelimit" : "abort",
"reblogs" : true,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 559c580..353bfae 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.5
+Version: 1.30.6
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -139,9 +139,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 6f7a991..1a0bc19 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -94,6 +94,7 @@ gallery_dl/extractor/everia.py
gallery_dl/extractor/exhentai.py
gallery_dl/extractor/facebook.py
gallery_dl/extractor/fanbox.py
+gallery_dl/extractor/fansly.py
gallery_dl/extractor/fantia.py
gallery_dl/extractor/fapachi.py
gallery_dl/extractor/fapello.py
@@ -240,6 +241,7 @@ gallery_dl/extractor/toyhouse.py
gallery_dl/extractor/tsumino.py
gallery_dl/extractor/tumblr.py
gallery_dl/extractor/tumblrgallery.py
+gallery_dl/extractor/tungsten.py
gallery_dl/extractor/twibooru.py
gallery_dl/extractor/twitter.py
gallery_dl/extractor/unsplash.py
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 9ef8816..a56a6be 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -11,6 +11,7 @@
from .common import DownloaderBase
from .. import ytdl, text
from xml.etree import ElementTree
+from http.cookiejar import Cookie
import os
@@ -85,7 +86,8 @@ class YoutubeDLDownloader(DownloaderBase):
info_dict = self._extract_manifest(
ytdl_instance, url, manifest,
kwdict.pop("_ytdl_manifest_data", None),
- kwdict.pop("_ytdl_manifest_headers", None))
+ kwdict.pop("_ytdl_manifest_headers", None),
+ kwdict.pop("_ytdl_manifest_cookies", None))
else:
info_dict = self._extract_info(ytdl_instance, url)
except Exception as exc:
@@ -194,10 +196,21 @@ class YoutubeDLDownloader(DownloaderBase):
return ytdl.extract_info(url, download=False)
def _extract_manifest(self, ytdl, url, manifest_type, manifest_data=None,
- headers=None):
+ headers=None, cookies=None):
extr = ytdl.get_info_extractor("Generic")
video_id = extr._generic_id(url)
+ if cookies is not None:
+ if isinstance(cookies, dict):
+ cookies = cookies.items()
+ set_cookie = ytdl.cookiejar.set_cookie
+ for name, value in cookies:
+ set_cookie(Cookie(
+ 0, name, value, None, False,
+ "", False, False, "/", False,
+ False, None, False, None, None, {},
+ ))
+
if manifest_type == "hls":
if manifest_data is None:
try:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index aabaa93..574d1e2 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -56,6 +56,7 @@ modules = [
"exhentai",
"facebook",
"fanbox",
+ "fansly",
"fantia",
"fapello",
"fapachi",
@@ -190,6 +191,7 @@ modules = [
"tsumino",
"tumblr",
"tumblrgallery",
+ "tungsten",
"twibooru",
"twitter",
"urlgalleries",
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index eba1678..cf5bce1 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -62,7 +62,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
root = "https://bunkr.si"
root_dl = "https://get.bunkrr.su"
root_api = "https://apidl.bunkr.ru"
- archive_fmt = "{album_id}_{id|id_url}"
+ archive_fmt = "{album_id}_{id|id_url|slug}"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
example = "https://bunkr.si/a/ID"
@@ -134,13 +134,13 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def fetch_album(self, album_id):
# album metadata
- page = self.request(self.root + "/a/" + album_id).text
+ page = self.request(f"{self.root}/a/{album_id}?advanced=1").text
title = text.unescape(text.unescape(text.extr(
page, 'property="og:title" content="', '"')))
# files
- items = list(text.extract_iter(
- page, '<div class="grid-images_box', "</a>"))
+ items = text.extr(
+ page, "window.albumFiles = [", "</script>").split("\n},\n")
return self._extract_files(items), {
"album_id" : album_id,
@@ -156,30 +156,29 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
for item in items:
try:
- url = text.unescape(text.extr(item, ' href="', '"'))
- if url[0] == "/":
- url = self.root + url
-
- file = self._extract_file(url)
- info = text.split_html(item)
- if not file["name"]:
- file["name"] = info[-3]
- file["size"] = info[-2]
- file["date"] = text.parse_datetime(
- info[-1], "%H:%M:%S %d/%m/%Y")
+ data_id = text.extr(item, " id: ", ",").strip()
+ file = self._extract_file(data_id)
+
+ file["name"] = util.json_loads(text.extr(
+ item, 'original:', ',\n').replace("\\'", "'"))
+ file["slug"] = util.json_loads(text.extr(
+ item, 'slug: ', ',\n'))
+ file["uuid"] = text.extr(
+ item, 'name: "', ".")
+ file["size"] = text.parse_int(text.extr(
+ item, "size: ", " ,\n"))
+ file["date"] = text.parse_datetime(text.extr(
+ item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
yield file
except exception.ControlException:
raise
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
- self.log.debug("", exc_info=exc)
-
- def _extract_file(self, webpage_url):
- page = self.request(webpage_url).text
- data_id = text.extr(page, 'data-file-id="', '"')
- referer = self.root_dl + "/file/" + data_id
+ self.log.debug("%s", item, exc_info=exc)
+ def _extract_file(self, data_id):
+ referer = f"{self.root_dl}/file/{data_id}"
headers = {"Referer": referer, "Origin": self.root_dl}
data = self.request_json(self.endpoint, method="POST", headers=headers,
json={"id": data_id})
@@ -190,14 +189,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
else:
file_url = data["url"]
- file_name = text.extr(page, "<h1", "<").rpartition(">")[2]
- fallback = text.extr(page, 'property="og:url" content="', '"')
-
return {
"file" : file_url,
- "name" : text.unescape(file_name),
"id_url" : data_id,
- "_fallback" : (fallback,) if fallback else (),
"_http_headers" : {"Referer": referer},
"_http_validate": self._validate,
}
@@ -222,7 +216,13 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
def fetch_album(self, album_id):
try:
- file = self._extract_file(self.root + album_id)
+ page = self.request(f"{self.root}{album_id}").text
+ data_id = text.extr(page, 'data-file-id="', '"')
+ file = self._extract_file(data_id)
+ file["name"] = text.unquote(text.unescape(text.extr(
+ page, "<h1", "<").rpartition(">")[2]))
+ file["slug"] = album_id.rpartition("/")[2]
+ file["uuid"] = text.extr(page, "/thumbs/", ".")
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 1da7e23..6ba4d08 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -15,7 +15,7 @@ from .. import text, util
class CheveretoExtractor(BaseExtractor):
"""Base class for chevereto extractors"""
basecategory = "chevereto"
- directory_fmt = ("{category}", "{user}", "{album}",)
+ directory_fmt = ("{category}", "{user}", "{album}")
archive_fmt = "{id}"
def _init(self):
@@ -39,7 +39,7 @@ class CheveretoExtractor(BaseExtractor):
BASE_PATTERN = CheveretoExtractor.update({
"jpgfish": {
- "root": "https://jpg5.su",
+ "root": "https://jpg6.su",
"pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
},
"imgkiwi": {
@@ -54,7 +54,7 @@ BASE_PATTERN = CheveretoExtractor.update({
class CheveretoImageExtractor(CheveretoExtractor):
- """Extractor for chevereto Images"""
+ """Extractor for chevereto images"""
subcategory = "image"
pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)"
example = "https://jpg2.su/img/TITLE.ID"
@@ -74,22 +74,66 @@ class CheveretoImageExtractor(CheveretoExtractor):
url, b"seltilovessimpcity@simpcityhatesscrapers",
fromhex=True)
- image = {
+ file = {
"id" : self.path.rpartition(".")[2],
"url" : url,
- "album": text.extr(extr("Added to <a", "/a>"), ">", "<"),
+ "album": text.remove_html(extr(
+ "Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : extr('username: "', '"'),
}
- text.nameext_from_url(image["url"], image)
- yield Message.Directory, image
- yield Message.Url, image["url"], image
+ text.nameext_from_url(file["url"], file)
+ yield Message.Directory, file
+ yield Message.Url, file["url"], file
+
+
+class CheveretoVideoExtractor(CheveretoExtractor):
+ """Extractor for chevereto videos"""
+ subcategory = "video"
+ pattern = BASE_PATTERN + r"(/video/[^/?#]+)"
+ example = "https://imagepond.net/video/TITLE.ID"
+
+ def items(self):
+ url = self.root + self.path
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ file = {
+ "id" : self.path.rpartition(".")[2],
+ "title" : text.unescape(extr(
+ 'property="og:title" content="', '"')),
+ "thumbnail": extr(
+ 'property="og:image" content="', '"'),
+ "url" : extr(
+ 'property="og:video" content="', '"'),
+ "width" : text.parse_int(extr(
+ 'property="video:width" content="', '"')),
+ "height" : text.parse_int(extr(
+ 'property="video:height" content="', '"')),
+ "duration" : extr(
+ 'class="far fa-clock"></i>', "—"),
+ "album": text.remove_html(extr(
+ "Added to <a", "</a>").rpartition(">")[2]),
+ "date" : text.parse_datetime(extr(
+ '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ "user" : extr('username: "', '"'),
+ }
+
+ try:
+ min, _, sec = file["duration"].partition(":")
+ file["duration"] = int(min) * 60 + int(sec)
+ except Exception:
+ pass
+
+ text.nameext_from_url(file["url"], file)
+ yield Message.Directory, file
+ yield Message.Url, file["url"], file
class CheveretoAlbumExtractor(CheveretoExtractor):
- """Extractor for chevereto Albums"""
+ """Extractor for chevereto albums"""
subcategory = "album"
pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
example = "https://jpg2.su/album/TITLE.ID"
@@ -109,9 +153,9 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
class CheveretoUserExtractor(CheveretoExtractor):
- """Extractor for chevereto Users"""
+ """Extractor for chevereto users"""
subcategory = "user"
- pattern = BASE_PATTERN + r"(/(?!img|image|a(?:lbum)?)[^/?#]+(?:/albums)?)"
+ pattern = BASE_PATTERN + r"(/[^/?#]+(?:/albums)?)"
example = "https://jpg2.su/USER"
def items(self):
@@ -119,8 +163,11 @@ class CheveretoUserExtractor(CheveretoExtractor):
if self.path.endswith("/albums"):
data = {"_extractor": CheveretoAlbumExtractor}
+ for url in self._pagination(url):
+ yield Message.Queue, url, data
else:
- data = {"_extractor": CheveretoImageExtractor}
-
- for url in self._pagination(url):
- yield Message.Queue, url, data
+ data_image = {"_extractor": CheveretoImageExtractor}
+ data_video = {"_extractor": CheveretoVideoExtractor}
+ for url in self._pagination(url):
+ data = data_video if "/video/" in url else data_image
+ yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py
index 6c54156..a6aec38 100644
--- a/gallery_dl/extractor/comick.py
+++ b/gallery_dl/extractor/comick.py
@@ -8,7 +8,7 @@
"""Extractors for https://comick.io/"""
-from .common import ChapterExtractor, MangaExtractor, Message
+from .common import GalleryExtractor, ChapterExtractor, MangaExtractor, Message
from .. import text
from ..cache import memcache
@@ -21,6 +21,42 @@ class ComickBase():
root = "https://comick.io"
+class ComickCoversExtractor(ComickBase, GalleryExtractor):
+ """Extractor for comick.io manga covers"""
+ subcategory = "covers"
+ directory_fmt = ("{category}", "{manga}", "Covers")
+ filename_fmt = "{volume:>02}_{lang}.{extension}"
+ archive_fmt = "c_{id}"
+ pattern = BASE_PATTERN + r"/comic/([\w-]+)/cover"
+ example = "https://comick.io/comic/MANGA/cover"
+
+ def metadata(self, page):
+ manga = _manga_info(self, self.groups[0])
+ self.slug = manga['manga_slug']
+ return manga
+
+ def images(self, page):
+ url = f"{self.root}/comic/{self.slug}/cover"
+ page = self.request(url).text
+ data = self._extract_nextdata(page)
+
+ covers = data["props"]["pageProps"]["comic"]["md_covers"]
+ covers.reverse()
+
+ return [
+ (f"https://meo.comick.pictures/{cover['b2key']}", {
+ "id" : cover["id"],
+ "width" : cover["w"],
+ "height": cover["h"],
+ "size" : cover["s"],
+ "lang" : cover["locale"],
+ "volume": text.parse_int(cover["vol"]),
+ "cover" : cover,
+ })
+ for cover in covers
+ ]
+
+
class ComickChapterExtractor(ComickBase, ChapterExtractor):
"""Extractor for comick.io manga chapters"""
archive_fmt = "{chapter_hid}_{page}"
@@ -60,8 +96,15 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
}
def images(self, page):
+ if not self._images[0].get("b2key") and all(
+ not img.get("b2key") for img in self._images):
+ self.log.error(
+ "%s: Broken Chapter (missing 'b2key' for all pages)",
+ self.groups[1])
+ return ()
+
return [
- ("https://meo.comick.pictures/" + img["b2key"], {
+ (f"https://meo.comick.pictures/{img['b2key']}", {
"width" : img["w"],
"height" : img["h"],
"size" : img["s"],
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 719fc62..568f435 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -462,7 +462,7 @@ class Extractor():
headers["Referer"] = self.root + "/"
custom_ua = self.config("user-agent")
- if custom_ua is None or custom_ua == "auto":
+ if not custom_ua or custom_ua == "auto":
pass
elif custom_ua == "browser":
headers["User-Agent"] = _browser_useragent(None)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index d900f4c..39690da 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1356,7 +1356,8 @@ class DeviantartSearchExtractor(DeviantartExtractor):
def _search_html(self, params):
url = self.root + "/search"
-
+ find = text.re(r'''href="https://www.deviantart.com/([^/?#]+)'''
+ r'''/(art|journal)/(?:[^"]+-)?(\d+)''').findall
while True:
response = self.request(url, params=params)
@@ -1364,12 +1365,11 @@ class DeviantartSearchExtractor(DeviantartExtractor):
raise exception.AbortExtraction("HTTP redirect to login page")
page = response.text
- for dev in DeviantartDeviationExtractor.pattern.findall(
- page)[2::3]:
+ for user, type, did in find(page)[:-3:3]:
yield {
- "deviationId": dev[3],
- "author": {"username": dev[0]},
- "isJournal": dev[2] == "journal",
+ "deviationId": did,
+ "author": {"username": user},
+ "isJournal": type == "journal",
}
cursor = text.extr(page, r'\"cursor\":\"', '\\',)
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 7beeac5..68cfdbc 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -48,10 +48,13 @@ class EromeExtractor(Extractor):
self.sleep(5.0, "check")
def _pagination(self, url, params):
- for params["page"] in itertools.count(1):
+ find_albums = EromeAlbumExtractor.pattern.findall
+
+ for params["page"] in itertools.count(
+ text.parse_int(params.get("page"), 1)):
page = self.request(url, params=params).text
- album_ids = EromeAlbumExtractor.pattern.findall(page)[::2]
+ album_ids = find_albums(page)[::2]
yield from album_ids
if len(album_ids) < 36:
@@ -114,12 +117,18 @@ class EromeAlbumExtractor(EromeExtractor):
class EromeUserExtractor(EromeExtractor):
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
+ pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)(?:/?\?([^#]+))?"
example = "https://www.erome.com/USER"
def albums(self):
- url = f"{self.root}/{self.groups[0]}"
- return self._pagination(url, {})
+ user, qs = self.groups
+ url = f"{self.root}/{user}"
+
+ params = text.parse_query(qs)
+ if "t" not in params and not self.config("reposts", False):
+ params["t"] = "posts"
+
+ return self._pagination(url, params)
class EromeSearchExtractor(EromeExtractor):
@@ -128,7 +137,7 @@ class EromeSearchExtractor(EromeExtractor):
example = "https://www.erome.com/search?q=QUERY"
def albums(self):
- url = self.root + "/search"
+ url = f"{self.root}/search"
params = text.parse_query(self.groups[0])
return self._pagination(url, params)
diff --git a/gallery_dl/extractor/fansly.py b/gallery_dl/extractor/fansly.py
new file mode 100644
index 0000000..31d242f
--- /dev/null
+++ b/gallery_dl/extractor/fansly.py
@@ -0,0 +1,318 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fansly.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+import time
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com"
+
+
+class FanslyExtractor(Extractor):
+ """Base class for fansly extractors"""
+ category = "fansly"
+ root = "https://fansly.com"
+ directory_fmt = ("{category}", "{account[username]} ({account[id]})")
+ filename_fmt = "{id}_{num}_{file[id]}.{extension}"
+ archive_fmt = "{file[id]}"
+
+ def _init(self):
+ self.api = FanslyAPI(self)
+ self.formats = self.config("format") or (303, 302, 1, 2, 4)
+
+ def items(self):
+ for post in self.posts():
+ files = self._extract_files(post)
+ post["count"] = len(files)
+ post["date"] = text.parse_timestamp(post["createdAt"])
+
+ yield Message.Directory, post
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ url = file["url"]
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ def _extract_files(self, post):
+ files = []
+ for attachment in post.pop("attachments"):
+ try:
+ self._extract_attachment(files, post, attachment)
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
+ self.log.error(
+ "%s/%s, Failed to extract media (%s: %s)",
+ post["id"], attachment.get("id"),
+ exc.__class__.__name__, exc)
+ return files
+
+ def _extract_attachment(self, files, post, attachment):
+ media = attachment["media"]
+ variants = {
+ variant["type"]: variant
+ for variant in media.pop("variants", ())
+ }
+ variants[media["type"]] = media
+
+ for fmt in self.formats:
+ if fmt in variants and (variant := variants[fmt]).get("locations"):
+ break
+ else:
+ return self.log.warning(
+ "%s/%s: Requested format not available",
+ post["id"], attachment["id"])
+
+ mime = variant["mimetype"]
+ location = variant.pop("locations")[0]
+ if "metadata" in variant:
+ try:
+ variant.update(util.json_loads(variant.pop("metadata")))
+ except Exception:
+ pass
+
+ file = {
+ **variant,
+ "format": fmt,
+ "date": text.parse_timestamp(media["createdAt"]),
+ "date_updated": text.parse_timestamp(media["updatedAt"]),
+ }
+
+ if "metadata" in location:
+ # manifest
+ meta = location["metadata"]
+
+ file["type"] = "video"
+ files.append({
+ "file": file,
+ "url": f"ytdl:{location['location']}",
+ # "_fallback": (media["locations"][0]["location"],),
+ "_ytdl_manifest":
+ "dash" if mime == "application/dash+xml" else "hls",
+ "_ytdl_manifest_cookies": (
+ ("CloudFront-Key-Pair-Id", meta["Key-Pair-Id"]),
+ ("CloudFront-Signature" , meta["Signature"]),
+ ("CloudFront-Policy" , meta["Policy"]),
+ ),
+ })
+ else:
+ file["type"] = "image" if mime.startswith("image/") else "video"
+ files.append({
+ "file": file,
+ "url" : location["location"],
+ })
+
+
+class FanslyPostExtractor(FanslyExtractor):
+ subcategory = "post"
+ pattern = rf"{BASE_PATTERN}/post/(\d+)"
+ example = "https://fansly.com/post/1234567890"
+
+ def posts(self):
+ return self.api.post(self.groups[0])
+
+
+class FanslyHomeExtractor(FanslyExtractor):
+ subcategory = "home"
+ pattern = rf"{BASE_PATTERN}/home(?:/(?:subscribed()|list/(\d+)))?"
+ example = "https://fansly.com/home"
+
+ def posts(self):
+ subscribed, list_id = self.groups
+ if subscribed is not None:
+ mode = "1"
+ elif list_id is not None:
+ mode = None
+ else:
+ mode = "0"
+ return self.api.timeline_home(mode, list_id)
+
+
+class FanslyListExtractor(FanslyExtractor):
+ subcategory = "list"
+ pattern = rf"{BASE_PATTERN}/lists/(\d+)"
+ example = "https://fansly.com/lists/1234567890"
+
+ def items(self):
+ base = f"{self.root}/"
+ for account in self.api.lists_itemsnew(self.groups[0]):
+ account["_extractor"] = FanslyCreatorPostsExtractor
+ url = f"{base}{account['username']}/posts"
+ yield Message.Queue, url, account
+
+
+class FanslyListsExtractor(FanslyExtractor):
+ subcategory = "lists"
+ pattern = rf"{BASE_PATTERN}/lists"
+ example = "https://fansly.com/lists"
+
+ def items(self):
+ base = f"{self.root}/lists/"
+ for list in self.api.lists_account():
+ list["_extractor"] = FanslyListExtractor
+ url = f"{base}{list['id']}#{list['label']}"
+ yield Message.Queue, url, list
+
+
+class FanslyCreatorPostsExtractor(FanslyExtractor):
+ subcategory = "creator-posts"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/posts"
+ example = "https://fansly.com/CREATOR/posts"
+
+ def posts(self):
+ creator = self.groups[0]
+ if creator.startswith("id:"):
+ account = self.api.account_by_id(creator[3:])
+ else:
+ account = self.api.account(creator)
+ wall_id = account["walls"][0]["id"]
+ return self.api.timeline_new(account["id"], wall_id)
+
+
+class FanslyAPI():
+ ROOT = "https://apiv3.fansly.com"
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+
+ token = extractor.config("token")
+ if not token:
+ self.extractor.log.warning("No 'token' provided")
+
+ self.headers = {
+ "fansly-client-ts": None,
+ "Origin" : extractor.root,
+ "authorization" : token,
+ }
+
+ def account(self, username):
+ endpoint = "/v1/account"
+ params = {"usernames": username}
+ return self._call(endpoint, params)[0]
+
+ def account_by_id(self, account_id):
+ endpoint = "/v1/account"
+ params = {"ids": account_id}
+ return self._call(endpoint, params)[0]
+
+ def accounts_by_id(self, account_ids):
+ endpoint = "/v1/account"
+ params = {"ids": ",".join(map(str, account_ids))}
+ return self._call(endpoint, params)
+
+ def lists_account(self):
+ endpoint = "/v1/lists/account"
+ params = {"itemId": ""}
+ return self._call(endpoint, params)
+
+ def lists_itemsnew(self, list_id, sort="3"):
+ endpoint = "/v1/lists/itemsnew"
+ params = {
+ "listId" : list_id,
+ "limit" : 50,
+ "after" : None,
+ "sortMode": sort,
+ }
+ return self._pagination(endpoint, params)
+
+ def post(self, post_id):
+ endpoint = "/v1/post"
+ params = {"ids": post_id}
+ return self._update_posts(self._call(endpoint, params))
+
+ def timeline_home(self, mode="0", list_id=None):
+ endpoint = "/v1/timeline/home"
+ params = {"before": "0", "after": "0"}
+ if list_id is None:
+ params["mode"] = mode
+ else:
+ params["listId"] = list_id
+ return self._pagination(endpoint, params)
+
+ def timeline_new(self, account_id, wall_id):
+ endpoint = f"/v1/timelinenew/{account_id}"
+ params = {
+ "before" : "0",
+ "after" : "0",
+ "wallId" : wall_id,
+ "contentSearch": "",
+ }
+ return self._pagination(endpoint, params)
+
+ def _update_posts(self, response):
+ accounts = {
+ account["id"]: account
+ for account in response["accounts"]
+ }
+ media = {
+ media["id"]: media
+ for media in response["accountMedia"]
+ }
+ bundles = {
+ bundle["id"]: bundle
+ for bundle in response["accountMediaBundles"]
+ }
+
+ posts = response["posts"]
+ for post in posts:
+ post["account"] = accounts[post.pop("accountId")]
+
+ attachments = []
+ for attachment in post["attachments"]:
+ cid = attachment["contentId"]
+ if cid in media:
+ attachments.append(media[cid])
+ elif cid in bundles:
+ bundle = bundles[cid]["bundleContent"]
+ bundle.sort(key=lambda c: c["pos"])
+ attachments.extend(
+ media[m["accountMediaId"]]
+ for m in bundle
+ if m["accountMediaId"] in media
+ )
+ else:
+ self.extractor.log.warning(
+ "%s: Unhandled 'contentId' %s",
+ post["id"], cid)
+ post["attachments"] = attachments
+ return posts
+
+ def _update_items(self, items):
+ ids = [item["id"] for item in items]
+ accounts = {
+ account["id"]: account
+ for account in self.accounts_by_id(ids)
+ }
+ return [accounts[id] for id in ids]
+
+ def _call(self, endpoint, params):
+ url = f"{self.ROOT}/api{endpoint}"
+ params["ngsw-bypass"] = "true"
+ headers = self.headers.copy()
+ headers["fansly-client-ts"] = str(int(time.time() * 1000))
+
+ data = self.extractor.request_json(
+ url, params=params, headers=headers)
+ return data["response"]
+
+ def _pagination(self, endpoint, params):
+ while True:
+ response = self._call(endpoint, params)
+
+ if isinstance(response, list):
+ if not response:
+ return
+ yield from self._update_items(response)
+ params["after"] = response[-1]["sortId"]
+
+ else:
+ if not response.get("posts"):
+ return
+ posts = self._update_posts(response)
+ yield from posts
+ params["before"] = min(p["id"] for p in posts)
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 91bcd38..a08f7bb 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -136,8 +136,9 @@ class HentaifoundryExtractor(Extractor):
">" + c + ":</span>", "<").replace(",", ""))
data["description"] = text.unescape(extr(
- "class='storyDescript'>", "<div"))
- path = extr('href="', '"')
+ "class='storyDescript'>", '<div class="storyRead">')).replace(
+ "\r\n", "\n")
+ path = extr('class="pdfLink" href="', '"')
data["src"] = self.root + path
data["index"] = text.parse_int(path.rsplit("/", 2)[1])
data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr(
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 171feea..abba9df 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -23,6 +23,7 @@ class ImagebamExtractor(Extractor):
def _init(self):
self.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
+ self.cookies.set("sfw_inter", "1", domain="www.imagebam.com")
def _parse_image_page(self, path):
page = self.request(self.root + path).text
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index fa60f91..9b8f8c9 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -242,8 +242,7 @@ class InstagramExtractor(Extractor):
if self._warn_video_ua:
self._warn_video_ua = False
pattern = text.re(
- r"AppleWebKit/537\.36 \(KHTML, like Gecko\) "
- r"Chrome/\d+\.\d+\.\d+\.\d+ Safari/537\.36$")
+ r"Chrome/\d{3,}\.\d+\.\d+\.\d+(?!\d* Mobile)")
if not pattern.search(self.session.headers["User-Agent"]):
self.log.warning("Potentially lowered video quality "
"due to non-Chrome User-Agent")
@@ -568,6 +567,20 @@ class InstagramCollectionExtractor(InstagramExtractor):
return self.api.user_collection(self.collection_id)
+class InstagramStoriesTrayExtractor(InstagramExtractor):
+ """Extractor for your Instagram account's stories tray"""
+ subcategory = "stories-tray"
+ pattern = rf"{BASE_PATTERN}/stories/me/?$()"
+ example = "https://www.instagram.com/stories/me/"
+
+ def items(self):
+ base = f"{self.root}/stories/id:"
+ for story in self.api.reels_tray():
+ story["date"] = text.parse_timestamp(story["latest_reel_media"])
+ story["_extractor"] = InstagramStoriesExtractor
+ yield Message.Queue, f"{base}{story['id']}/", story
+
+
class InstagramStoriesExtractor(InstagramExtractor):
"""Extractor for Instagram stories"""
subcategory = "stories"
@@ -793,7 +806,11 @@ class InstagramRestAPI():
try:
return self._call(endpoint, params=params)["reels_media"]
except KeyError:
- raise exception.AuthorizationError("Login required")
+ raise exception.AuthRequired("authenticated cookies")
+
+ def reels_tray(self):
+ endpoint = "/v1/feed/reels_tray/"
+ return self._call(endpoint)["tray"]
def tags_media(self, tag):
for section in self.tags_sections(tag):
diff --git a/gallery_dl/extractor/kemono.py b/gallery_dl/extractor/kemono.py
index a5e1f6d..fc5972c 100644
--- a/gallery_dl/extractor/kemono.py
+++ b/gallery_dl/extractor/kemono.py
@@ -151,7 +151,8 @@ class KemonoExtractor(Extractor):
file["extension"] = ext
elif ext == "txt" and file["extension"] != "txt":
file["_http_validate"] = _validate
- elif ext in exts_archive:
+ elif ext in exts_archive or \
+ ext == "bin" and file["extension"] in exts_archive:
file["type"] = "archive"
if archives:
try:
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index 9c33d4f..f22d54e 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -63,6 +63,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
"date" : extr("Date Added: <b>", "<"),
"type" : text.remove_html(extr("Album type: <b>", "</b>")),
"uploader": text.remove_html(extr("Uploaded by: ", "</")),
+ "description": extr("<h2>Description</h2>", "<h2>").strip(),
}}
def _extract_tracks(self, page):
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index c87430b..9febda9 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -17,7 +17,7 @@ class RedditExtractor(Extractor):
"""Base class for reddit extractors"""
category = "reddit"
directory_fmt = ("{category}", "{subreddit}")
- filename_fmt = "{id}{num:? //>02} {title[:220]}.{extension}"
+ filename_fmt = "{id}{num:? //>02} {title|link_title:[:220]}.{extension}"
archive_fmt = "{filename}"
cookies_domain = ".reddit.com"
request_interval = 0.6
@@ -84,6 +84,12 @@ class RedditExtractor(Extractor):
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
+ elif embeds and "media_metadata" in media:
+ for embed in self._extract_embed(submission):
+ submission["num"] += 1
+ text.nameext_from_url(embed, submission)
+ yield Message.Url, embed, submission
+
elif media["is_video"]:
if videos:
text.nameext_from_url(url, submission)
@@ -102,6 +108,12 @@ class RedditExtractor(Extractor):
urls.append((url, submission))
if self.api.comments:
+ if comments and not submission:
+ submission = comments[0]
+ submission.setdefault("num", 0)
+ if not parentdir:
+ yield Message.Directory, submission
+
for comment in comments:
html = comment["body_html"] or ""
href = (' href="' in html)
@@ -193,19 +205,26 @@ class RedditExtractor(Extractor):
return
for mid, data in meta.items():
- if data["status"] != "valid" or "s" not in data:
+ if data["status"] != "valid":
self.log.warning(
"embed %s: skipping item %s (status: %s)",
submission["id"], mid, data.get("status"))
continue
- src = data["s"]
- if url := src.get("u") or src.get("gif") or src.get("mp4"):
- yield url.partition("?")[0].replace("/preview.", "/i.", 1)
- else:
- self.log.error(
- "embed %s: unable to fetch download URL for item %s",
- submission["id"], mid)
- self.log.debug(src)
+
+ if src := data.get("s"):
+ if url := src.get("u") or src.get("gif") or src.get("mp4"):
+ yield url.partition("?")[0].replace("/preview.", "/i.", 1)
+ else:
+ self.log.error(
+ "embed %s: unable to fetch download URL for item %s",
+ submission["id"], mid)
+ self.log.debug(src)
+ elif url := data.get("dashUrl"):
+ submission["_ytdl_manifest"] = "dash"
+ yield f"ytdl:{url}"
+ elif url := data.get("hlsUrl"):
+ submission["_ytdl_manifest"] = "hls"
+ yield f"ytdl:{url}"
def _extract_video_ytdl(self, submission):
return "https://www.reddit.com" + submission["permalink"]
@@ -361,6 +380,7 @@ class RedditAPI():
Ref: https://www.reddit.com/dev/api/
"""
+ ROOT = "https://oauth.reddit.com"
CLIENT_ID = "6N9uN0krSDE-ig"
USER_AGENT = "Python:gallery-dl:0.8.4 (by /u/mikf1)"
@@ -369,41 +389,50 @@ class RedditAPI():
self.log = extractor.log
config = extractor.config
+
self.comments = text.parse_int(config("comments", 0))
self.morecomments = config("morecomments", False)
+ self._warn_429 = False
- client_id = config("client-id")
- if client_id is None:
- self.client_id = self.CLIENT_ID
- self.headers = {"User-Agent": self.USER_AGENT}
+ if config("api") == "rest":
+ self.root = "https://www.reddit.com"
+ self.headers = None
+ self.authenticate = util.noop
+ self.log.debug("Using REST API")
else:
- self.client_id = client_id
- self.headers = {"User-Agent": config("user-agent")}
+ self.root = self.ROOT
- if self.client_id == self.CLIENT_ID:
- client_id = self.client_id
- self._warn_429 = True
- kind = "default"
- else:
- client_id = client_id[:5] + "*" * (len(client_id)-5)
- self._warn_429 = False
- kind = "custom"
+ client_id = config("client-id")
+ if client_id is None:
+ self.client_id = self.CLIENT_ID
+ self.headers = {"User-Agent": self.USER_AGENT}
+ else:
+ self.client_id = client_id
+ self.headers = {"User-Agent": config("user-agent")}
- self.log.debug(
- "Using %s API credentials (client-id %s)", kind, client_id)
+ if self.client_id == self.CLIENT_ID:
+ client_id = self.client_id
+ self._warn_429 = True
+ kind = "default"
+ else:
+ client_id = client_id[:5] + "*" * (len(client_id)-5)
+ kind = "custom"
- token = config("refresh-token")
- if token is None or token == "cache":
- key = "#" + self.client_id
- self.refresh_token = _refresh_token_cache(key)
- else:
- self.refresh_token = token
+ self.log.debug(
+ "Using %s API credentials (client-id %s)", kind, client_id)
- if not self.refresh_token:
- # allow downloading from quarantined subreddits (#2180)
- extractor.cookies.set(
- "_options", '%7B%22pref_quarantine_optin%22%3A%20true%7D',
- domain=extractor.cookies_domain)
+ token = config("refresh-token")
+ if token is None or token == "cache":
+ key = "#" + self.client_id
+ self.refresh_token = _refresh_token_cache(key)
+ else:
+ self.refresh_token = token
+
+ if not self.refresh_token:
+ # allow downloading from quarantined subreddits (#2180)
+ extractor.cookies.set(
+ "_options", '%7B%22pref_quarantine_optin%22%3A%20true%7D',
+ domain=extractor.cookies_domain)
def submission(self, submission_id):
"""Fetch the (submission, comments)=-tuple for a submission id"""
@@ -416,13 +445,11 @@ class RedditAPI():
def submissions_subreddit(self, subreddit, params):
"""Collect all (submission, comments)-tuples of a subreddit"""
endpoint = subreddit + "/.json"
- params["limit"] = 100
return self._pagination(endpoint, params)
def submissions_user(self, user, params):
"""Collect all (submission, comments)-tuples posted by a user"""
endpoint = "/user/" + user + "/.json"
- params["limit"] = 100
return self._pagination(endpoint, params)
def morechildren(self, link_id, children):
@@ -477,7 +504,7 @@ class RedditAPI():
return "Bearer " + data["access_token"]
def _call(self, endpoint, params):
- url = "https://oauth.reddit.com" + endpoint
+ url = f"{self.root}{endpoint}"
params["raw_json"] = "1"
while True:
@@ -522,6 +549,9 @@ class RedditAPI():
id_max = float("inf")
date_min, date_max = self.extractor._get_date_min_max(0, 253402210800)
+ if limit := self.extractor.config("limit"):
+ params["limit"] = limit
+
while True:
data = self._call(endpoint, params)["data"]
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index b988646..36b083b 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -79,9 +79,12 @@ BASE_PATTERN = Shimmie2Extractor.update({
},
"nozrip": {
"root": "https://noz.rip/booru",
- "base": "https://noz.rip",
"pattern": r"noz\.rip/booru",
},
+ "thecollectionS": {
+ "root": "https://co.llection.pics",
+ "pattern": r"co\.llection\.pics",
+ },
}) + r"/(?:index\.php\?q=/?)?"
@@ -160,10 +163,12 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
def posts(self):
post_id = self.groups[-1]
- url = f"{self.root}/post/view/{post_id}"
+ root = self.root
+ base = root if (pos := root.find("/", 8)) < 0 else root[:pos]
+
+ url = f"{root}/post/view/{post_id}"
page = self.request(url).text
extr = text.extract_from(page)
- base = self.config_instance("base", self.root)
qt = self._quote_type(page)
post = {
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 6eea76c..92fc831 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -31,15 +31,12 @@ class TumblrExtractor(Extractor):
filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
- def __init__(self, match):
- Extractor.__init__(self, match)
-
- if name := match[2]:
- self.blog = name + ".tumblr.com"
+ def _init(self):
+ if name := self.groups[1]:
+ self.blog = f"{name}.tumblr.com"
else:
- self.blog = match[1] or match[3]
+ self.blog = self.groups[0] or self.groups[2]
- def _init(self):
self.api = TumblrAPI(self)
self.types = self._setup_posttypes()
self.avatar = self.config("avatar", False)
@@ -287,14 +284,10 @@ class TumblrPostExtractor(TumblrExtractor):
pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)"
example = "https://www.tumblr.com/BLOG/12345"
- def __init__(self, match):
- TumblrExtractor.__init__(self, match)
- self.post_id = match[4]
+ def posts(self):
self.reblogs = True
self.date_min = 0
-
- def posts(self):
- return self.api.posts(self.blog, {"id": self.post_id})
+ return self.api.posts(self.blog, {"id": self.groups[3]})
def _setup_posttypes(self):
return POST_TYPES
@@ -303,15 +296,13 @@ class TumblrPostExtractor(TumblrExtractor):
class TumblrTagExtractor(TumblrExtractor):
"""Extractor for Tumblr user's posts by tag"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"/tagged/([^/?#]+)"
+ pattern = BASE_PATTERN + r"(?:/archive)?/tagged/([^/?#]+)"
example = "https://www.tumblr.com/BLOG/tagged/TAG"
- def __init__(self, match):
- TumblrExtractor.__init__(self, match)
- self.tag = text.unquote(match[4].replace("-", " "))
-
def posts(self):
- return self.api.posts(self.blog, {"tag": self.tag})
+ self.kwdict["search_tags"] = tag = text.unquote(
+ self.groups[3].replace("-", " "))
+ return self.api.posts(self.blog, {"tag": tag})
class TumblrDayExtractor(TumblrExtractor):
@@ -320,21 +311,13 @@ class TumblrDayExtractor(TumblrExtractor):
pattern = BASE_PATTERN + r"/day/(\d\d\d\d/\d\d/\d\d)"
example = "https://www.tumblr.com/BLOG/day/1970/01/01"
- def __init__(self, match):
- TumblrExtractor.__init__(self, match)
- year, month, day = match[4].split("/")
- self.ordinal = date(int(year), int(month), int(day)).toordinal()
-
- def _init(self):
- TumblrExtractor._init(self)
-
- self.date_min = (
- # 719163 == date(1970, 1, 1).toordinal()
- (self.ordinal - 719163) * 86400)
+ def posts(self):
+ year, month, day = self.groups[3].split("/")
+ ordinal = date(int(year), int(month), int(day)).toordinal()
+ # 719163 == date(1970, 1, 1).toordinal()
+ self.date_min = (ordinal - 719163) * 86400
self.api.before = self.date_min + 86400
-
- def posts(self):
return self.api.posts(self.blog, {})
@@ -550,8 +533,11 @@ class TumblrAPI(oauth.OAuth1API):
params["api_key"] = self.api_key
strategy = self.extractor.config("pagination")
- if not strategy and "offset" not in params:
- strategy = "api"
+ if not strategy:
+ if params.get("before"):
+ strategy = "before"
+ elif "offset" not in params:
+ strategy = "api"
while True:
data = self._call(endpoint, params)
@@ -573,10 +559,9 @@ class TumblrAPI(oauth.OAuth1API):
endpoint = data["_links"]["next"]["href"]
except KeyError:
return
-
- params = None
- if self.api_key:
- endpoint += "&api_key=" + self.api_key
+ if params is not None and self.api_key:
+ endpoint = f"{endpoint}&api_key={self.api_key}"
+ params = None
elif strategy == "before":
if not posts:
diff --git a/gallery_dl/extractor/tungsten.py b/gallery_dl/extractor/tungsten.py
new file mode 100644
index 0000000..20d5a59
--- /dev/null
+++ b/gallery_dl/extractor/tungsten.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://tungsten.run/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?tungsten\.run"
+
+
+class TungstenExtractor(Extractor):
+ """Base class for tungsten extractors"""
+ category = "tungsten"
+ root = "https://tungsten.run"
+ directory_fmt = ("{category}", "{user[username]}")
+ filename_fmt = "{date} {title:?/ /}{uuid}.{extension}"
+ archive_fmt = "{uuid}"
+
+ def items(self):
+ for post in self.posts():
+ url = post["original_url"]
+ post["date"] = text.parse_datetime(post["created_at"])
+ post["filename"] = url[url.rfind("/")+1:]
+ post["extension"] = "webp"
+ yield Message.Directory, post
+ yield Message.Url, url, post
+
+ def _pagination(self, url, params):
+ params["page"] = 1
+ params["per_page"] = 40
+
+ headers = {
+ "Origin": self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-site",
+ }
+
+ while True:
+ data = self.request_json(url, params=params, headers=headers)
+
+ yield from data
+
+ if len(data) < params["per_page"]:
+ break
+ params["page"] += 1
+
+
+class TungstenPostExtractor(TungstenExtractor):
+ subcategory = "post"
+ pattern = rf"{BASE_PATTERN}/post/(\w+)"
+ example = "https://tungsten.run/post/AbCdEfGhIjKlMnOp"
+
+ def posts(self):
+ url = f"{self.root}/post/{self.groups[0]}"
+ page = self.request(url).text
+ data = self._extract_nextdata(page)
+ return (data["props"]["pageProps"]["post"],)
+
+
+class TungstenModelExtractor(TungstenExtractor):
+ subcategory = "model"
+ pattern = rf"{BASE_PATTERN}/model/(\w+)(?:/?\?model_version=(\w+))?"
+ example = "https://tungsten.run/model/AbCdEfGhIjKlM"
+
+ def posts(self):
+ uuid_model, uuid_version = self.groups
+
+ if uuid_version is None:
+ url = f"{self.root}/model/{uuid_model}/"
+ page = self.request(url).text
+ uuid_version = text.extr(page, '"modelVersionUUID":"', '"')
+
+ url = "https://api.tungsten.run/v1/posts"
+ params = {
+ "sort" : "top_all_time",
+ "tweakable_only": "false",
+ "following" : "false",
+ "model_version_uuid": uuid_version,
+ }
+ return self._pagination(url, params)
+
+
+class TungstenUserExtractor(TungstenExtractor):
+ subcategory = "user"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)"
+ example = "https://tungsten.run/user/USER/posts"
+
+ def posts(self):
+ url = f"{self.root}/user/{self.groups[0]}"
+ page = self.request(url).text
+ uuid_user = text.extr(page, '"user":{"uuid":"', '"')
+
+ url = f"https://api.tungsten.run/v1/users/{uuid_user}/posts"
+ params = {"sort": "top_all_time"}
+ return self._pagination(url, params)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c928507..c919cb8 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -471,21 +471,35 @@ class TwitterExtractor(Extractor):
except KeyError:
pass
+ admin = creator = banner = None
+ try:
+ if results := com.get("admin_results"):
+ admin = results["result"]["core"]["screen_name"]
+ except Exception:
+ pass
+ try:
+ if results := com.get("creator_results"):
+ creator = results["result"]["core"]["screen_name"]
+ except Exception:
+ pass
+ try:
+ if results := com.get("custom_banner_media"):
+ banner = results["media_info"]["original_img_url"]
+ except Exception:
+ pass
+
self._user_cache[f"C#{cid}"] = cdata = {
"id": text.parse_int(cid),
- "name": com["name"],
- "description": com["description"],
- "date": text.parse_timestamp(com["created_at"] // 1000),
- "nsfw": com["is_nsfw"],
- "role": com["role"],
- "member_count": com["member_count"],
- "rules": [rule["name"] for rule in com["rules"]],
- "admin": (admin := com.get("admin_results")) and
- admin["result"]["core"]["screen_name"], # noqa: E131
- "creator": (creator := com.get("creator_results")) and
- creator["result"]["core"]["screen_name"], # noqa: E131
- "banner": (banner := com.get("custom_banner_media")) and
- banner["media_info"]["original_img_url"], # noqa: E131
+ "name": com.get("name"),
+ "description": com.get("description"),
+ "date": text.parse_timestamp(com.get("created_at", 0) // 1000),
+ "nsfw": com.get("is_nsfw"),
+ "role": com.get("role"),
+ "member_count": com.get("member_count"),
+ "rules": [rule["name"] for rule in com.get("rules", ())],
+ "admin" : admin,
+ "creator": creator,
+ "banner" : banner,
}
return cdata
@@ -512,8 +526,8 @@ class TwitterExtractor(Extractor):
entities = legacy["entities"]
self._user_cache[uid] = udata = {
"id" : text.parse_int(uid),
- "name" : core["screen_name"],
- "nick" : core["name"],
+ "name" : core.get("screen_name"),
+ "nick" : core.get("name"),
"location" : user["location"]["location"],
"date" : text.parse_datetime(
core["created_at"], "%a %b %d %H:%M:%S %z %Y"),
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 75a0137..22d4b9a 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -72,6 +72,10 @@ class VkExtractor(Extractor):
photo["width"] = photo["height"] = 0
photo["id"] = photo["id"].rpartition("_")[2]
+ photo["date"] = text.parse_timestamp(text.extr(
+ photo["date"], 'data-date="', '"'))
+ photo["description"] = text.unescape(text.extr(
+ photo.get("desc", ""), ">", "<"))
photo.update(data)
text.nameext_from_url(url, photo)
@@ -108,6 +112,10 @@ class VkExtractor(Extractor):
total = payload[1]
photos = payload[3]
+ for i in range(len(photos)):
+ photos[i]["num"] = self.offset + i + 1
+ photos[i]["count"] = total
+
offset_next = self.offset + len(photos)
if offset_next >= total:
# the last chunk of photos also contains the first few photos
@@ -128,7 +136,7 @@ class VkPhotosExtractor(VkExtractor):
subcategory = "photos"
pattern = (BASE_PATTERN + r"/(?:"
r"(?:albums|photos|id)(-?\d+)"
- r"|(?!(?:album|tag)-?\d+_?)([^/?#]+))")
+ r"|(?!(?:album|tag|wall)-?\d+_?)([^/?#]+))")
example = "https://vk.com/id12345"
def __init__(self, match):
@@ -179,17 +187,40 @@ class VkAlbumExtractor(VkExtractor):
pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
example = "https://vk.com/album12345_00"
- def __init__(self, match):
- VkExtractor.__init__(self, match)
- self.user_id, self.album_id = match.groups()
-
def photos(self):
- return self._pagination(f"album{self.user_id}_{self.album_id}")
+ user_id, album_id = self.groups
+ return self._pagination(f"album{user_id}_{album_id}")
def metadata(self):
+ user_id, album_id = self.groups
+
+ url = f"{self.root}/album{user_id}_{album_id}"
+ page = self.request(url).text
+ desc = text.extr(page, 'name="og:description" value="', '"')
+ try:
+ album_name, user_name, photos = desc.rsplit(" - ", 2)
+ except ValueError:
+ if msg := text.extr(
+ page, '<div class="message_page_title">Error</div>',
+ "</div>"):
+ msg = f" ('{text.remove_html(msg)[:-5]}')"
+ self.log.warning("%s_%s: Failed to extract metadata%s",
+ user_id, album_id, msg)
+ return {"user": {"id": user_id}, "album": {"id": album_id}}
+
return {
- "user": {"id": self.user_id},
- "album": {"id": self.album_id},
+ "user": {
+ "id" : user_id,
+ "nick" : text.unescape(user_name),
+ "name" : text.unescape(text.extr(
+ page, 'class="ui_crumb" href="/', '"')),
+ "group": user_id[0] == "-",
+ },
+ "album": {
+ "id" : album_id,
+ "name" : text.unescape(album_name),
+ "count": text.parse_int(photos[:-7])
+ },
}
@@ -209,3 +240,35 @@ class VkTaggedExtractor(VkExtractor):
def metadata(self):
return {"user": {"id": self.user_id}}
+
+
+class VkWallPostExtractor(VkExtractor):
+ """Extractor for a vk wall post"""
+ subcategory = "wall-post"
+ directory_fmt = ("{category}", "{user[id]}", "wall")
+ filename_fmt = "{wall[id]}_{num}.{extension}"
+ pattern = BASE_PATTERN + r"/wall(-?\d+)_(\d+)"
+ example = "https://vk.com/wall12345_123"
+
+ def photos(self):
+ user_id, wall_id = self.groups
+ return self._pagination(f"wall{user_id}_{wall_id}")
+
+ def metadata(self):
+ user_id, wall_id = self.groups
+
+ url = f"{self.root}/wall{user_id}_{wall_id}"
+ page = self.request(url).text
+ desc = text.unescape(
+ text.extr(page, 'data-testid="post_description">', "</div>") or
+ text.extr(page, 'name="description" content="', '"'))
+
+ return {
+ "user": {
+ "id": user_id,
+ },
+ "wall": {
+ "id": wall_id,
+ "description": desc,
+ },
+ }
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index fca8911..e1b4897 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -55,7 +55,8 @@ class ZerochanExtractor(BooruExtractor):
"login" : "Login",
}
- response = self.request(url, method="POST", headers=headers, data=data)
+ response = self.request(
+ url, method="POST", headers=headers, data=data, expected=(500,))
if not response.history:
raise exception.AuthenticationError()
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 7a49049..b09203f 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -565,6 +565,7 @@ _CONVERSIONS = {
"U": text.unescape,
"H": lambda s: text.unescape(text.remove_html(s)),
"g": text.slugify,
+ "R": text.re(r"https?://[^\s\"']+").findall,
"W": text.sanitize_whitespace,
"S": util.to_string,
"s": str,
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index fd664e6..05cc9d3 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -222,10 +222,6 @@ class Formatter(argparse.HelpFormatter):
def _format_usage(self, usage, actions, groups, prefix):
return f"Usage: {self._prog} [OPTIONS] URL [URL...]\n"
- def format_help(self):
- return self._long_break_matcher.sub(
- "\n\n", self._root_section.format_help())
-
def _parse_option(opt):
key, _, value = opt.partition("=")
@@ -276,7 +272,7 @@ def build_parser():
help="Load external extractors from PATH",
)
general.add_argument(
- "--user-agent",
+ "-a", "--user-agent",
dest="user-agent", metavar="UA", action=ConfigAction,
help="User-Agent request header",
)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 8020352..187ef92 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.30.5"
+__version__ = "1.30.6"
__variant__ = None
diff --git a/test/test_formatter.py b/test/test_formatter.py
index f3ed9dd..8b35a2b 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -44,6 +44,10 @@ class TestFormatter(unittest.TestCase):
"s": " \n\r\tSPACE ",
"S": " \n\r\tS P A\tC\nE ",
"h": "<p>foo </p> &amp; bar <p> </p>",
+ "H": """<p>
+ <a href="http://www.example.com">Lorem ipsum dolor sit amet</a>.
+ Duis aute irure <a href="http://blog.example.org">dolor</a>.
+</p>""",
"u": "&#x27;&lt; / &gt;&#x27;",
"t": 1262304000,
"ds": "2010-01-01T01:00:00+01:00",
@@ -72,6 +76,9 @@ class TestFormatter(unittest.TestCase):
self._run_test("{h!H}", "foo & bar")
self._run_test("{u!H}", "'< / >'")
self._run_test("{n!H}", "")
+ self._run_test("{h!R}", [])
+ self._run_test("{H!R}", ["http://www.example.com",
+ "http://blog.example.org"])
self._run_test("{a!s}", self.kwdict["a"])
self._run_test("{a!r}", f"'{self.kwdict['a']}'")
self._run_test("{a!a}", f"'{self.kwdict['a']}'")
@@ -590,10 +597,11 @@ def gentext(kwdict):
def lengths(kwdict):
a = 0
for k, v in kwdict.items():
- try:
- a += len(v)
- except TypeError:
- pass
+ if k == k.lower():
+ try:
+ a += len(v)
+ except TypeError:
+ pass
return format(a)
def noarg():
@@ -616,10 +624,10 @@ def noarg():
fmt4 = formatter.parse(f"\fM {path}:lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt2.format_map(self.kwdict), "168")
+ self.assertEqual(fmt2.format_map(self.kwdict), "139")
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt4.format_map(self.kwdict), "168")
+ self.assertEqual(fmt4.format_map(self.kwdict), "139")
with self.assertRaises(TypeError):
self.assertEqual(fmt0.format_map(self.kwdict), "")
diff --git a/test/test_results.py b/test/test_results.py
index 05b98bf..7e024b8 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -361,6 +361,12 @@ class TestExtractorResults(unittest.TestCase):
msg = f"{path} / ISO 639-1"
self.assertIsInstance(value, str, msg=msg)
self.assertRegex(value, r"^[a-z]{2}(-\w+)?$", msg=msg)
+ elif iso in ("uuid", "11578", "11578:1996", "4122"):
+ msg = f"{path} / ISO 11578:1996"
+ pat = (r"(?i)[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-"
+ r"[0-9a-f]{4}-[0-9a-f]{12}")
+ self.assertIsInstance(value, str, msg=msg)
+ self.assertRegex(value, pat, msg=msg)
else:
self.fail(f"Unsupported ISO test '{test}'")
else: