summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-05-28 01:35:33 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-05-28 01:35:33 -0400
commit645863b3144f85f1830afe23c8d412f2703d5b81 (patch)
tree4be5d8a509ba8fac7a9c565d2d0764287b73973b
parent1d742a229479aa2c3cb6db253c90434414a6fea3 (diff)
parent8950c0f2ef55ec2ed36b3fccc9fd85b64b877c3b (diff)
Update upstream source from tag 'upstream/1.25.5'
Update to upstream version '1.25.5' with Debian dir fb939ee149571d6a29a37441553f417ce143d419
-rw-r--r--CHANGELOG.md33
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5106
-rw-r--r--docs/gallery-dl.conf10
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/cookies.py147
-rw-r--r--gallery_dl/extractor/8muses.py7
-rw-r--r--gallery_dl/extractor/__init__.py3
-rw-r--r--gallery_dl/extractor/common.py8
-rw-r--r--gallery_dl/extractor/danbooru.py79
-rw-r--r--gallery_dl/extractor/e621.py11
-rw-r--r--gallery_dl/extractor/fanbox.py7
-rw-r--r--gallery_dl/extractor/gofile.py23
-rw-r--r--gallery_dl/extractor/imagechest.py75
-rw-r--r--gallery_dl/extractor/instagram.py23
-rw-r--r--gallery_dl/extractor/jpgfish.py143
-rw-r--r--gallery_dl/extractor/kemonoparty.py4
-rw-r--r--gallery_dl/extractor/lensdump.py161
-rw-r--r--gallery_dl/extractor/mangadex.py5
-rw-r--r--gallery_dl/extractor/mangaread.py191
-rw-r--r--gallery_dl/extractor/misskey.py29
-rw-r--r--gallery_dl/extractor/newgrounds.py1
-rw-r--r--gallery_dl/extractor/nsfwalbum.py3
-rw-r--r--gallery_dl/extractor/pixiv.py198
-rw-r--r--gallery_dl/extractor/poipiku.py6
-rw-r--r--gallery_dl/extractor/reddit.py28
-rw-r--r--gallery_dl/extractor/tcbscans.py18
-rw-r--r--gallery_dl/extractor/twitter.py27
-rw-r--r--gallery_dl/extractor/weibo.py2
-rw-r--r--gallery_dl/formatter.py30
-rw-r--r--gallery_dl/postprocessor/exec.py2
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py2
-rw-r--r--test/test_formatter.py29
37 files changed, 1257 insertions, 177 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85c732d..405c117 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,38 @@
# Changelog
+## 1.25.5 - 2023-05-27
+### Additions
+- [8muses] add `parts` metadata field ([#3329](https://github.com/mikf/gallery-dl/issues/3329))
+- [danbooru] add `date` metadata field ([#4047](https://github.com/mikf/gallery-dl/issues/4047))
+- [e621] add `date` metadata field ([#4047](https://github.com/mikf/gallery-dl/issues/4047))
+- [gofile] add basic password support ([#4056](https://github.com/mikf/gallery-dl/issues/4056))
+- [imagechest] implement API support ([#4065](https://github.com/mikf/gallery-dl/issues/4065))
+- [instagram] add `order-files` option ([#3993](https://github.com/mikf/gallery-dl/issues/3993), [#4017](https://github.com/mikf/gallery-dl/issues/4017))
+- [instagram] add `order-posts` option ([#3993](https://github.com/mikf/gallery-dl/issues/3993), [#4017](https://github.com/mikf/gallery-dl/issues/4017))
+- [instagram] add `metadata` option ([#3107](https://github.com/mikf/gallery-dl/issues/3107))
+- [jpgfish] add `jpg.fishing` extractors ([#2657](https://github.com/mikf/gallery-dl/issues/2657), [#2719](https://github.com/mikf/gallery-dl/issues/2719))
+- [lensdump] add `lensdump.com` extractors ([#2078](https://github.com/mikf/gallery-dl/issues/2078), [#4104](https://github.com/mikf/gallery-dl/issues/4104))
+- [mangaread] add `mangaread.org` extractors ([#2425](https://github.com/mikf/gallery-dl/issues/2425), [#2781](https://github.com/mikf/gallery-dl/issues/2781))
+- [misskey] add `favorite` extractor ([#3950](https://github.com/mikf/gallery-dl/issues/3950))
+- [pixiv] add `novel` support ([#1241](https://github.com/mikf/gallery-dl/issues/1241), [#4044](https://github.com/mikf/gallery-dl/issues/4044))
+- [reddit] support cross-posted media ([#887](https://github.com/mikf/gallery-dl/issues/887), [#3586](https://github.com/mikf/gallery-dl/issues/3586), [#3976](https://github.com/mikf/gallery-dl/issues/3976))
+- [postprocessor:exec] support tilde expansion for `command`
+- [formatter] support slicing strings as bytes ([#4087](https://github.com/mikf/gallery-dl/issues/4087))
+### Fixes
+- [8muses] fix value of `album[url]` ([#3329](https://github.com/mikf/gallery-dl/issues/3329))
+- [danbooru] refactor pagination logic ([#4002](https://github.com/mikf/gallery-dl/issues/4002))
+- [fanbox] skip invalid posts ([#4088](https://github.com/mikf/gallery-dl/issues/4088))
+- [gofile] automatically fetch `website-token`
+- [kemonoparty] fix kemono and coomer logins sharing the same cache ([#4098](https://github.com/mikf/gallery-dl/issues/4098))
+- [newgrounds] add default delay between requests ([#4046](https://github.com/mikf/gallery-dl/issues/4046))
+- [nsfwalbum] detect placeholder images
+- [poipiku] extract full `descriptions` ([#4066](https://github.com/mikf/gallery-dl/issues/4066))
+- [tcbscans] update domain to `tcbscans.com` ([#4080](https://github.com/mikf/gallery-dl/issues/4080))
+- [twitter] extract TwitPic URLs in text ([#3792](https://github.com/mikf/gallery-dl/issues/3792), [#3796](https://github.com/mikf/gallery-dl/issues/3796))
+- [weibo] require numeric IDs to have length >= 10 ([#4059](https://github.com/mikf/gallery-dl/issues/4059))
+- [ytdl] fix crash due to removed `no_color` attribute
+- [cookies] improve logging behavior ([#4050](https://github.com/mikf/gallery-dl/issues/4050))
+
## 1.25.4 - 2023-05-07
### Additions
- [4chanarchives] add `thread` and `board` extractors ([#4012](https://github.com/mikf/gallery-dl/issues/4012))
diff --git a/PKG-INFO b/PKG-INFO
index 8b90cda..cadb98c 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.25.4
+Version: 1.25.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 36f3ffa..ba745a8 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index f018c63..3d5e4e8 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-05-07" "1.25.4" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-05-27" "1.25.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 8008451..be234ce 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-05-07" "1.25.4" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-05-27" "1.25.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1976,16 +1976,14 @@ If not set, a temporary guest token will be used.
.IP "Type:" 6
\f[I]string\f[]
-.IP "Default:" 9
-\f[I]"12345"\f[]
-
.IP "Description:" 4
API token value used during API requests.
-A not up-to-date value will result in \f[I]401 Unauthorized\f[] errors.
+An invalid or not up-to-date value
+will result in \f[I]401 Unauthorized\f[] errors.
-Setting this value to \f[I]null\f[] will do an extra HTTP request to fetch
-the current value used by gofile.
+Keeping this option unset will use an extra HTTP request
+to attempt to fetch the current value used by gofile.
.SS extractor.gofile.recursive
@@ -2041,6 +2039,21 @@ Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[].
but is most likely going to fail with \f[I]403 Forbidden\f[] errors.
+.SS extractor.imagechest.access-token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+Your personal Image Chest access token.
+
+These tokens allow using the API instead of having to scrape HTML pages,
+providing more detailed metadata.
+(\f[I]date\f[], \f[I]description\f[], etc)
+
+See https://imgchest.com/docs/api/1.0/general/authorization
+for instructions on how to generate such a token.
+
+
.SS extractor.imgur.client-id
.IP "Type:" 6
\f[I]string\f[]
@@ -2132,6 +2145,66 @@ Possible values are
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.instagram.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Provide extended \f[I]user\f[] metadata even when referring to a user by ID,
+e.g. \f[I]instagram.com/id:12345678\f[].
+
+Note: This metadata is always available when referring to a user by name,
+e.g. \f[I]instagram.com/USERNAME\f[].
+
+
+.SS extractor.instagram.order-files
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"asc"\f[]
+
+.IP "Description:" 4
+Controls the order in which files of each post are returned.
+
+.br
+* \f[I]"asc"\f[]: Same order as displayed in a post
+.br
+* \f[I]"desc"\f[]: Reverse order as displayed in a post
+.br
+* \f[I]"reverse"\f[]: Same as \f[I]"desc"\f[]
+
+Note: This option does *not* affect \f[I]{num}\f[].
+To enumerate files in reverse order, use \f[I]count - num + 1\f[].
+
+
+.SS extractor.instagram.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"asc"\f[]
+
+.IP "Description:" 4
+Controls the order in which posts are returned.
+
+.br
+* \f[I]"asc"\f[]: Same order as displayed
+.br
+* \f[I]"desc"\f[]: Reverse order as displayed
+.br
+* \f[I]"id"\f[] or \f[I]"id_asc"\f[]: Ascending order by ID
+.br
+* \f[I]"id_desc"\f[]: Descending order by ID
+.br
+* \f[I]"reverse"\f[]: Same as \f[I]"desc"\f[]
+
+Note: This option only affects \f[I]highlights\f[].
+
+
.SS extractor.instagram.previews
.IP "Type:" 6
\f[I]bool\f[]
@@ -2397,6 +2470,14 @@ Fetch media from replies to other posts.
Also emit metadata for text-only posts without media content.
+.SS extractor.[misskey].access-token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+Your access token, necessary to fetch favorited notes.
+
+
.SS extractor.[misskey].renotes
.IP "Type:" 6
\f[I]bool\f[]
@@ -2736,6 +2817,17 @@ by using a third-party tool like
\f[I]gppt\f[].
+.SS extractor.pixiv.embeds
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download images embedded in novels.
+
+
.SS extractor.pixiv.metadata
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 92451fd..6a3c84f 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -129,7 +129,7 @@
},
"gofile": {
"api-token": null,
- "website-token": "12345"
+ "website-token": null
},
"hentaifoundry":
{
@@ -146,6 +146,9 @@
"password": null,
"sleep-request": 5.0
},
+ "imagechest": {
+ "access-token": null
+ },
"imgbb":
{
"username": null,
@@ -166,6 +169,9 @@
"api": "rest",
"cookies": null,
"include": "posts",
+ "order-files": "asc",
+ "order-posts": "asc",
+ "previews": false,
"sleep-request": [6.0, 12.0],
"videos": true
},
@@ -190,6 +196,7 @@
"password": null
},
"misskey": {
+ "access-token": null,
"renotes": false,
"replies": true
},
@@ -239,6 +246,7 @@
{
"refresh-token": null,
"include": "artworks",
+ "embeds": false,
"metadata": false,
"metadata-bookmark": false,
"tags": "japanese",
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 3554f49..c069128 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.25.4
+Version: 1.25.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 8be6871..fde82b6 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -113,11 +113,13 @@ gallery_dl/extractor/instagram.py
gallery_dl/extractor/issuu.py
gallery_dl/extractor/itaku.py
gallery_dl/extractor/itchio.py
+gallery_dl/extractor/jpgfish.py
gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
gallery_dl/extractor/khinsider.py
gallery_dl/extractor/komikcast.py
+gallery_dl/extractor/lensdump.py
gallery_dl/extractor/lexica.py
gallery_dl/extractor/lightroom.py
gallery_dl/extractor/lineblog.py
@@ -131,6 +133,7 @@ gallery_dl/extractor/mangahere.py
gallery_dl/extractor/mangakakalot.py
gallery_dl/extractor/manganelo.py
gallery_dl/extractor/mangapark.py
+gallery_dl/extractor/mangaread.py
gallery_dl/extractor/mangasee.py
gallery_dl/extractor/mangoxo.py
gallery_dl/extractor/mastodon.py
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 32ba323..c5c5667 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -75,7 +75,7 @@ def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None):
domain, bool(domain), domain.startswith("."),
path, bool(path), secure, expires, False, None, None, {},
))
- logger.info("Extracted %s cookies from Firefox", len(cookiejar))
+ _log_info("Extracted %s cookies from Firefox", len(cookiejar))
def load_cookies_safari(cookiejar, profile=None, domain=None):
@@ -98,7 +98,7 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
keyring=None, domain=None):
config = _get_chromium_based_browser_settings(browser_name)
path = _chrome_cookies_database(profile, config)
- logger.debug("Extracting cookies from %s", path)
+ _log_debug("Extracting cookies from %s", path)
with DatabaseCopy(path) as db:
db.text_factory = bytes
@@ -155,11 +155,11 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
else:
failed_message = ""
- logger.info("Extracted %s cookies from %s%s",
- len(cookiejar), browser_name.capitalize(), failed_message)
+ _log_info("Extracted %s cookies from %s%s",
+ len(cookiejar), browser_name.capitalize(), failed_message)
counts = decryptor.cookie_counts
counts["unencrypted"] = unencrypted_cookies
- logger.debug("cookie version breakdown: %s", counts)
+ _log_debug("Cookie version breakdown: %s", counts)
# --------------------------------------------------------------------
@@ -177,11 +177,11 @@ def _firefox_cookies_database(profile=None, container=None):
if path is None:
raise FileNotFoundError("Unable to find Firefox cookies database in "
"{}".format(search_root))
- logger.debug("Extracting cookies from %s", path)
+ _log_debug("Extracting cookies from %s", path)
if container == "none":
container_id = False
- logger.debug("Only loading cookies not belonging to any container")
+ _log_debug("Only loading cookies not belonging to any container")
elif container:
containers_path = os.path.join(
@@ -191,8 +191,8 @@ def _firefox_cookies_database(profile=None, container=None):
with open(containers_path) as file:
identities = util.json_loads(file.read())["identities"]
except OSError:
- logger.error("Unable to read Firefox container database at %s",
- containers_path)
+ _log_error("Unable to read Firefox container database at '%s'",
+ containers_path)
raise
except KeyError:
identities = ()
@@ -203,10 +203,10 @@ def _firefox_cookies_database(profile=None, container=None):
container_id = context["userContextId"]
break
else:
- raise ValueError("Unable to find Firefox container {}".format(
+ raise ValueError("Unable to find Firefox container '{}'".format(
container))
- logger.debug("Only loading cookies from container '%s' (ID %s)",
- container, container_id)
+ _log_debug("Only loading cookies from container '%s' (ID %s)",
+ container, container_id)
else:
container_id = None
@@ -229,7 +229,7 @@ def _safari_cookies_database():
path = os.path.expanduser("~/Library/Cookies/Cookies.binarycookies")
return open(path, "rb")
except FileNotFoundError:
- logger.debug("Trying secondary cookie location")
+ _log_debug("Trying secondary cookie location")
path = os.path.expanduser("~/Library/Containers/com.apple.Safari/Data"
"/Library/Cookies/Cookies.binarycookies")
return open(path, "rb")
@@ -250,7 +250,7 @@ def _safari_parse_cookies_page(data, cookiejar, domain=None):
number_of_cookies = p.read_uint()
record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
if number_of_cookies == 0:
- logger.debug("a cookies page of size %s has no cookies", len(data))
+ _log_debug("Cookies page of size %s has no cookies", len(data))
return
p.skip_to(record_offsets[0], "unknown page header field")
@@ -299,8 +299,7 @@ def _safari_parse_cookies_record(data, cookiejar, host=None):
p.skip_to(value_offset)
value = p.read_cstring()
except UnicodeDecodeError:
- logger.warning("failed to parse Safari cookie "
- "because UTF-8 decoding failed")
+ _log_warning("Failed to parse Safari cookie")
return record_size
p.skip_to(record_size, "space at the end of the record")
@@ -328,7 +327,7 @@ def _chrome_cookies_database(profile, config):
elif config["profiles"]:
search_root = os.path.join(config["directory"], profile)
else:
- logger.warning("%s does not support profiles", config["browser"])
+ _log_warning("%s does not support profiles", config["browser"])
search_root = config["directory"]
path = _find_most_recently_used_file(search_root, "Cookies")
@@ -479,7 +478,7 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
elif version == b"v11":
self._cookie_counts["v11"] += 1
if self._v11_key is None:
- logger.warning("cannot decrypt v11 cookies: no key found")
+ _log_warning("Unable to decrypt v11 cookies: no key found")
return None
return _decrypt_aes_cbc(ciphertext, self._v11_key)
@@ -513,7 +512,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
if version == b"v10":
self._cookie_counts["v10"] += 1
if self._v10_key is None:
- logger.warning("cannot decrypt v10 cookies: no key found")
+ _log_warning("Unable to decrypt v10 cookies: no key found")
return None
return _decrypt_aes_cbc(ciphertext, self._v10_key)
@@ -543,7 +542,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
if version == b"v10":
self._cookie_counts["v10"] += 1
if self._v10_key is None:
- logger.warning("cannot decrypt v10 cookies: no key found")
+ _log_warning("Unable to decrypt v10 cookies: no key found")
return None
# https://chromium.googlesource.com/chromium/src/+/refs/heads
@@ -581,7 +580,7 @@ def _choose_linux_keyring():
SelectBackend
"""
desktop_environment = _get_linux_desktop_environment(os.environ)
- logger.debug("Detected desktop environment: %s", desktop_environment)
+ _log_debug("Detected desktop environment: %s", desktop_environment)
if desktop_environment == DE_KDE:
return KEYRING_KWALLET
if desktop_environment == DE_OTHER:
@@ -609,23 +608,23 @@ def _get_kwallet_network_wallet():
)
if proc.returncode != 0:
- logger.warning("failed to read NetworkWallet")
+ _log_warning("Failed to read NetworkWallet")
return default_wallet
else:
network_wallet = stdout.decode().strip()
- logger.debug("NetworkWallet = '%s'", network_wallet)
+ _log_debug("NetworkWallet = '%s'", network_wallet)
return network_wallet
except Exception as exc:
- logger.warning("exception while obtaining NetworkWallet (%s: %s)",
- exc.__class__.__name__, exc)
+ _log_warning("Error while obtaining NetworkWallet (%s: %s)",
+ exc.__class__.__name__, exc)
return default_wallet
def _get_kwallet_password(browser_keyring_name):
- logger.debug("using kwallet-query to obtain password from kwallet")
+ _log_debug("Using kwallet-query to obtain password from kwallet")
if shutil.which("kwallet-query") is None:
- logger.error(
+ _log_error(
"kwallet-query command not found. KWallet and kwallet-query "
"must be installed to read from KWallet. kwallet-query should be "
"included in the kwallet package for your distribution")
@@ -642,14 +641,14 @@ def _get_kwallet_password(browser_keyring_name):
)
if proc.returncode != 0:
- logger.error("kwallet-query failed with return code {}. "
- "Please consult the kwallet-query man page "
- "for details".format(proc.returncode))
+ _log_error("kwallet-query failed with return code {}. "
+ "Please consult the kwallet-query man page "
+ "for details".format(proc.returncode))
return b""
if stdout.lower().startswith(b"failed to read"):
- logger.debug("Failed to read password from kwallet. "
- "Using empty string instead")
+ _log_debug("Failed to read password from kwallet. "
+ "Using empty string instead")
# This sometimes occurs in KDE because chrome does not check
# hasEntry and instead just tries to read the value (which
# kwallet returns "") whereas kwallet-query checks hasEntry.
@@ -660,13 +659,12 @@ def _get_kwallet_password(browser_keyring_name):
# random password and store it, but that doesn't matter here.
return b""
else:
- logger.debug("password found")
if stdout[-1:] == b"\n":
stdout = stdout[:-1]
return stdout
except Exception as exc:
- logger.warning("exception running kwallet-query (%s: %s)",
- exc.__class__.__name__, exc)
+ _log_warning("Error when running kwallet-query (%s: %s)",
+ exc.__class__.__name__, exc)
return b""
@@ -674,7 +672,7 @@ def _get_gnome_keyring_password(browser_keyring_name):
try:
import secretstorage
except ImportError:
- logger.error("secretstorage not available")
+ _log_error("'secretstorage' Python package not available")
return b""
# Gnome keyring does not seem to organise keys in the same way as KWallet,
@@ -689,7 +687,7 @@ def _get_gnome_keyring_password(browser_keyring_name):
if item.get_label() == label:
return item.get_secret()
else:
- logger.error("failed to read from keyring")
+ _log_error("Failed to read from GNOME keyring")
return b""
@@ -703,7 +701,7 @@ def _get_linux_keyring_password(browser_keyring_name, keyring):
if not keyring:
keyring = _choose_linux_keyring()
- logger.debug("Chosen keyring: %s", keyring)
+ _log_debug("Chosen keyring: %s", keyring)
if keyring == KEYRING_KWALLET:
return _get_kwallet_password(browser_keyring_name)
@@ -717,8 +715,8 @@ def _get_linux_keyring_password(browser_keyring_name, keyring):
def _get_mac_keyring_password(browser_keyring_name):
- logger.debug("using find-generic-password to obtain "
- "password from OSX keychain")
+ _log_debug("Using find-generic-password to obtain "
+ "password from OSX keychain")
try:
proc, stdout = Popen_communicate(
"security", "find-generic-password",
@@ -731,28 +729,28 @@ def _get_mac_keyring_password(browser_keyring_name):
stdout = stdout[:-1]
return stdout
except Exception as exc:
- logger.warning("exception running find-generic-password (%s: %s)",
- exc.__class__.__name__, exc)
+ _log_warning("Error when using find-generic-password (%s: %s)",
+ exc.__class__.__name__, exc)
return None
def _get_windows_v10_key(browser_root):
path = _find_most_recently_used_file(browser_root, "Local State")
if path is None:
- logger.error("could not find local state file")
+ _log_error("Unable to find Local State file")
return None
- logger.debug("Found local state file at '%s'", path)
+ _log_debug("Found Local State file at '%s'", path)
with open(path, encoding="utf-8") as file:
data = util.json_loads(file.read())
try:
base64_key = data["os_crypt"]["encrypted_key"]
except KeyError:
- logger.error("no encrypted key in Local State")
+ _log_error("Unable to find encrypted key in Local State")
return None
encrypted_key = binascii.a2b_base64(base64_key)
prefix = b"DPAPI"
if not encrypted_key.startswith(prefix):
- logger.error("invalid key")
+ _log_error("Invalid Local State key")
return None
return _decrypt_windows_dpapi(encrypted_key[len(prefix):])
@@ -804,10 +802,10 @@ class DataParser:
def skip(self, num_bytes, description="unknown"):
if num_bytes > 0:
- logger.debug("skipping {} bytes ({}): {!r}".format(
+ _log_debug("Skipping {} bytes ({}): {!r}".format(
num_bytes, description, self.read_bytes(num_bytes)))
elif num_bytes < 0:
- raise ParserError("invalid skip of {} bytes".format(num_bytes))
+ raise ParserError("Invalid skip of {} bytes".format(num_bytes))
def skip_to(self, offset, description="unknown"):
self.skip(offset - self.cursor, description)
@@ -929,31 +927,25 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
def _decrypt_aes_cbc(ciphertext, key, initialization_vector=b" " * 16):
- plaintext = aes.unpad_pkcs7(
- aes.aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
try:
- return plaintext.decode()
+ return aes.unpad_pkcs7(aes.aes_cbc_decrypt_bytes(
+ ciphertext, key, initialization_vector)).decode()
except UnicodeDecodeError:
- logger.warning("failed to decrypt cookie (AES-CBC) because UTF-8 "
- "decoding failed. Possibly the key is wrong?")
- return None
+ _log_warning("Failed to decrypt cookie (AES-CBC Unicode)")
+ except ValueError:
+ _log_warning("Failed to decrypt cookie (AES-CBC)")
+ return None
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag):
try:
- plaintext = aes.aes_gcm_decrypt_and_verify_bytes(
- ciphertext, key, authentication_tag, nonce)
- except ValueError:
- logger.warning("failed to decrypt cookie (AES-GCM) because MAC check "
- "failed. Possibly the key is wrong?")
- return None
-
- try:
- return plaintext.decode()
+ return aes.aes_gcm_decrypt_and_verify_bytes(
+ ciphertext, key, authentication_tag, nonce).decode()
except UnicodeDecodeError:
- logger.warning("failed to decrypt cookie (AES-GCM) because UTF-8 "
- "decoding failed. Possibly the key is wrong?")
- return None
+ _log_warning("Failed to decrypt cookie (AES-GCM Unicode)")
+ except ValueError:
+ _log_warning("Failed to decrypt cookie (AES-GCM MAC)")
+ return None
def _decrypt_windows_dpapi(ciphertext):
@@ -981,7 +973,7 @@ def _decrypt_windows_dpapi(ciphertext):
ctypes.byref(blob_out) # pDataOut
)
if not ret:
- logger.warning("failed to decrypt with DPAPI")
+ _log_warning("Failed to decrypt cookie (DPAPI)")
return None
result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
@@ -1009,9 +1001,26 @@ def _parse_browser_specification(
browser, profile=None, keyring=None, container=None, domain=None):
browser = browser.lower()
if browser not in SUPPORTED_BROWSERS:
- raise ValueError("unsupported browser '{}'".format(browser))
+ raise ValueError("Unsupported browser '{}'".format(browser))
if keyring and keyring not in SUPPORTED_KEYRINGS:
- raise ValueError("unsupported keyring '{}'".format(keyring))
+ raise ValueError("Unsupported keyring '{}'".format(keyring))
if profile and _is_path(profile):
profile = os.path.expanduser(profile)
return browser, profile, keyring, container, domain
+
+
+_log_cache = set()
+_log_debug = logger.debug
+_log_info = logger.info
+
+
+def _log_warning(msg, *args):
+ if msg not in _log_cache:
+ _log_cache.add(msg)
+ logger.warning(msg, *args)
+
+
+def _log_error(msg, *args):
+ if msg not in _log_cache:
+ _log_cache.add(msg)
+ logger.error(msg, *args)
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index 26ac8b2..584c6d2 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -35,8 +35,10 @@ class _8musesAlbumExtractor(Extractor):
"id" : 10467,
"title" : "Liar",
"path" : "Fakku Comics/mogg/Liar",
+ "parts" : ["Fakku Comics", "mogg", "Liar"],
"private": False,
- "url" : str,
+ "url" : "https://comics.8muses.com/comics"
+ "/album/Fakku-Comics/mogg/Liar",
"parent" : 10464,
"views" : int,
"likes" : int,
@@ -118,9 +120,10 @@ class _8musesAlbumExtractor(Extractor):
return {
"id" : album["id"],
"path" : album["path"],
+ "parts" : album["path"].split("/"),
"title" : album["name"],
"private": album["isPrivate"],
- "url" : self.root + album["permalink"],
+ "url" : self.root + "/comics/album/" + album["permalink"],
"parent" : text.parse_int(album["parentId"]),
"views" : text.parse_int(album["numberViews"]),
"likes" : text.parse_int(album["numberLikes"]),
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 5475fea..3e47c3e 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -75,11 +75,13 @@ modules = [
"issuu",
"itaku",
"itchio",
+ "jpgfish",
"kabeuchi",
"keenspot",
"kemonoparty",
"khinsider",
"komikcast",
+ "lensdump",
"lexica",
"lightroom",
"lineblog",
@@ -92,6 +94,7 @@ modules = [
"mangakakalot",
"manganelo",
"mangapark",
+ "mangaread",
"mangasee",
"mangoxo",
"misskey",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 09737ef..50d1026 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -555,7 +555,13 @@ class GalleryExtractor(Extractor):
def items(self):
self.login()
- page = self.request(self.gallery_url, notfound=self.subcategory).text
+
+ if self.gallery_url:
+ page = self.request(
+ self.gallery_url, notfound=self.subcategory).text
+ else:
+ page = None
+
data = self.metadata(page)
imgs = self.images(page)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 326b53b..5cfbf5c 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -70,6 +70,8 @@ class DanbooruExtractor(BaseExtractor):
continue
text.nameext_from_url(url, post)
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
if post["extension"] == "zip":
if self.ugoira:
@@ -92,42 +94,47 @@ class DanbooruExtractor(BaseExtractor):
def posts(self):
return ()
- def _pagination(self, endpoint, params, pages=False):
+ def _pagination(self, endpoint, params, prefix=None):
url = self.root + endpoint
params["limit"] = self.per_page
params["page"] = self.page_start
+ first = True
while True:
posts = self.request(url, params=params).json()
- if "posts" in posts:
+ if isinstance(posts, dict):
posts = posts["posts"]
- if self.includes and posts:
- if not pages and "only" not in params:
- params["page"] = "b{}".format(posts[0]["id"] + 1)
- params["only"] = self.includes
- data = {
- meta["id"]: meta
- for meta in self.request(url, params=params).json()
- }
- for post in posts:
- post.update(data[post["id"]])
- params["only"] = None
-
- yield from posts
+ if posts:
+ if self.includes:
+ params_meta = {
+ "only" : self.includes,
+ "limit": len(posts),
+ "tags" : "id:" + ",".join(str(p["id"]) for p in posts),
+ }
+ data = {
+ meta["id"]: meta
+ for meta in self.request(
+ url, params=params_meta).json()
+ }
+ for post in posts:
+ post.update(data[post["id"]])
+
+ if prefix == "a" and not first:
+ posts.reverse()
+
+ yield from posts
if len(posts) < self.threshold:
return
- if pages:
+ if prefix:
+ params["page"] = "{}{}".format(prefix, posts[-1]["id"])
+ elif params["page"]:
params["page"] += 1
else:
- for post in reversed(posts):
- if "id" in post:
- params["page"] = "b{}".format(post["id"])
- break
- else:
- return
+ params["page"] = 2
+ first = False
def _ugoira_frames(self, post):
data = self.request("{}/posts/{}.json?only=media_metadata".format(
@@ -153,7 +160,7 @@ BASE_PATTERN = DanbooruExtractor.update({
"aibooru": {
"root": None,
"pattern": r"(?:safe.)?aibooru\.online",
- }
+ },
})
@@ -181,7 +188,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
"count": 12,
}),
("https://aibooru.online/posts?tags=center_frills&z=1", {
- "pattern": r"https://aibooru\.online/data/original"
+ "pattern": r"https://cdn\.aibooru\.online/original"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+",
"count": ">= 3",
}),
@@ -200,7 +207,21 @@ class DanbooruTagExtractor(DanbooruExtractor):
return {"search_tags": self.tags}
def posts(self):
- return self._pagination("/posts.json", {"tags": self.tags})
+ prefix = "b"
+ for tag in self.tags.split():
+ if tag.startswith("order:"):
+ if tag == "order:id" or tag == "order:id_asc":
+ prefix = "a"
+ elif tag == "order:id_desc":
+ prefix = "b"
+ else:
+ prefix = None
+ elif tag.startswith(
+ ("id:", "md5", "ordfav:", "ordfavgroup:", "ordpool:")):
+ prefix = None
+ break
+
+ return self._pagination("/posts.json", {"tags": self.tags}, prefix)
class DanbooruPoolExtractor(DanbooruExtractor):
@@ -234,7 +255,7 @@ class DanbooruPoolExtractor(DanbooruExtractor):
def posts(self):
params = {"tags": "pool:" + self.pool_id}
- return self._pagination("/posts.json", params)
+ return self._pagination("/posts.json", params, "b")
class DanbooruPostExtractor(DanbooruExtractor):
@@ -245,6 +266,7 @@ class DanbooruPostExtractor(DanbooruExtractor):
test = (
("https://danbooru.donmai.us/posts/294929", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
+ "keyword": {"date": "dt:2008-08-12 04:46:05"},
}),
("https://danbooru.donmai.us/posts/3613024", {
"pattern": r"https?://.+\.zip$",
@@ -307,7 +329,4 @@ class DanbooruPopularExtractor(DanbooruExtractor):
return {"date": date, "scale": scale}
def posts(self):
- if self.page_start is None:
- self.page_start = 1
- return self._pagination(
- "/explore/posts/popular.json", self.params, True)
+ return self._pagination("/explore/posts/popular.json", self.params)
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 8f2994e..d4f6cd4 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -57,6 +57,8 @@ class E621Extractor(danbooru.DanbooruExtractor):
post["filename"] = file["md5"]
post["extension"] = file["ext"]
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post.update(data)
yield Message.Directory, post
@@ -140,6 +142,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
("https://e621.net/posts/535", {
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
+ "keyword": {"date": "dt:2007-02-17 19:02:32"},
}),
("https://e621.net/posts/3181052", {
"options": (("metadata", "notes,pools"),),
@@ -216,9 +219,7 @@ class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
)
def posts(self):
- if self.page_start is None:
- self.page_start = 1
- return self._pagination("/popular.json", self.params, True)
+ return self._pagination("/popular.json", self.params)
class E621FavoriteExtractor(E621Extractor):
@@ -249,6 +250,4 @@ class E621FavoriteExtractor(E621Extractor):
return {"user_id": self.query.get("user_id", "")}
def posts(self):
- if self.page_start is None:
- self.page_start = 1
- return self._pagination("/favorites.json", self.query, True)
+ return self._pagination("/favorites.json", self.query)
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 57c4333..4ca0852 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -52,8 +52,11 @@ class FanboxExtractor(Extractor):
url = text.ensure_http_scheme(url)
body = self.request(url, headers=headers).json()["body"]
for item in body["items"]:
- yield self._get_post_data(item["id"])
-
+ try:
+ yield self._get_post_data(item["id"])
+ except Exception as exc:
+ self.log.warning("Skipping post %s (%s: %s)",
+ item["id"], exc.__class__.__name__, exc)
url = body["nextUrl"]
def _get_post_data(self, post_id):
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index b53ebbe..044dddb 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -6,7 +6,8 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import memcache
+from ..cache import cache, memcache
+import hashlib
class GofileFolderExtractor(Extractor):
@@ -66,6 +67,7 @@ class GofileFolderExtractor(Extractor):
def items(self):
recursive = self.config("recursive")
+ password = self.config("password")
token = self.config("api-token")
if not token:
@@ -73,12 +75,10 @@ class GofileFolderExtractor(Extractor):
self.session.cookies.set("accountToken", token, domain=".gofile.io")
self.api_token = token
- token = self.config("website-token", "12345")
- if not token:
- token = self._get_website_token()
- self.website_token = token
+ self.website_token = (self.config("website-token") or
+ self._get_website_token())
- folder = self._get_content(self.content_id)
+ folder = self._get_content(self.content_id, password)
yield Message.Directory, folder
num = 0
@@ -109,17 +109,20 @@ class GofileFolderExtractor(Extractor):
self.log.debug("Creating temporary account")
return self._api_request("createAccount")["token"]
- @memcache()
+ @cache(maxage=86400)
def _get_website_token(self):
self.log.debug("Fetching website token")
- page = self.request(self.root + "/contents/files.html").text
- return text.extract(page, "websiteToken:", ",")[0].strip("\" ")
+ page = self.request(self.root + "/dist/js/alljs.js").text
+ return text.extr(page, 'fetchData.websiteToken = "', '"')
- def _get_content(self, content_id):
+ def _get_content(self, content_id, password=None):
+ if password is not None:
+ password = hashlib.sha256(password.encode()).hexdigest()
return self._api_request("getContent", {
"contentId" : content_id,
"token" : self.api_token,
"websiteToken": self.website_token,
+ "password" : password,
})
def _api_request(self, endpoint, params=None):
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 086b95d..9229617 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -37,6 +37,9 @@ class ImagechestGalleryExtractor(GalleryExtractor):
"url": "f5674e8ba79d336193c9f698708d9dcc10e78cc7",
"count": 52,
}),
+ ("https://imgchest.com/p/xxxxxxxxxxx", {
+ "exception": exception.NotFoundError,
+ }),
)
def __init__(self, match):
@@ -44,6 +47,12 @@ class ImagechestGalleryExtractor(GalleryExtractor):
url = self.root + "/p/" + self.gallery_id
GalleryExtractor.__init__(self, match, url)
+ self.access_token = self.config("access-token")
+ if self.access_token:
+ self.gallery_url = None
+ self.metadata = self._metadata_api
+ self.images = self._images_api
+
def metadata(self, page):
if "Sorry, but the page you requested could not be found." in page:
raise exception.NotFoundError("gallery")
@@ -71,3 +80,69 @@ class ImagechestGalleryExtractor(GalleryExtractor):
(url, None)
for url in text.extract_iter(page, 'data-url="', '"')
]
+
+ def _metadata_api(self, page):
+ api = ImagechestAPI(self, self.access_token)
+ post = api.post(self.gallery_id)
+
+ post["date"] = text.parse_datetime(
+ post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ for img in post["images"]:
+ img["date"] = text.parse_datetime(
+ img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+ post["gallery_id"] = self.gallery_id
+ post.pop("image_count", None)
+ self._image_list = post.pop("images")
+
+ return post
+
+ def _images_api(self, page):
+ return [
+ (img["link"], img)
+ for img in self._image_list
+ ]
+
+
+class ImagechestAPI():
+ """Interface for the Image Chest API
+
+ https://imgchest.com/docs/api/1.0/general/overview
+ """
+ root = "https://api.imgchest.com"
+
+ def __init__(self, extractor, access_token):
+ self.extractor = extractor
+ self.headers = {"Authorization": "Bearer " + access_token}
+
+ def file(self, file_id):
+ endpoint = "/v1/file/" + file_id
+ return self._call(endpoint)
+
+ def post(self, post_id):
+ endpoint = "/v1/post/" + post_id
+ return self._call(endpoint)
+
+ def user(self, username):
+ endpoint = "/v1/user/" + username
+ return self._call(endpoint)
+
+ def _call(self, endpoint):
+ url = self.root + endpoint
+
+ while True:
+ response = self.extractor.request(
+ url, headers=self.headers, fatal=None, allow_redirects=False)
+
+ if response.status_code < 300:
+ return response.json()["data"]
+
+ elif response.status_code < 400:
+ raise exception.AuthenticationError("Invalid API access token")
+
+ elif response.status_code == 429:
+ self.extractor.wait(seconds=600)
+
+ else:
+ self.extractor.log.debug(response.text)
+ raise exception.StopExtraction("API request failed")
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 4c1be0f..677cbdd 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -55,6 +55,9 @@ class InstagramExtractor(Extractor):
previews = self.config("previews", False)
video_headers = {"User-Agent": "Mozilla/5.0"}
+ order = self.config("order-files")
+ reverse = order[0] in ("r", "d") if order else False
+
for post in self.posts():
if "__typename" in post:
@@ -71,6 +74,8 @@ class InstagramExtractor(Extractor):
if "date" in post:
del post["date"]
+ if reverse:
+ files.reverse()
for file in files:
file.update(post)
@@ -756,10 +761,20 @@ class InstagramRestAPI():
endpoint = "/v1/guides/guide/{}/".format(guide_id)
return self._pagination_guides(endpoint)
- def highlights_media(self, user_id):
- chunk_size = 5
+ def highlights_media(self, user_id, chunk_size=5):
reel_ids = [hl["id"] for hl in self.highlights_tray(user_id)]
+ order = self.extractor.config("order-posts")
+ if order:
+ if order in ("desc", "reverse"):
+ reel_ids.reverse()
+ elif order in ("id", "id_asc"):
+ reel_ids.sort(key=lambda r: int(r[10:]))
+ elif order == "id_desc":
+ reel_ids.sort(key=lambda r: int(r[10:]), reverse=True)
+ elif order != "asc":
+ self.extractor.log.warning("Unknown posts order '%s'", order)
+
for offset in range(0, len(reel_ids), chunk_size):
yield from self.reels_media(
reel_ids[offset : offset+chunk_size])
@@ -799,13 +814,17 @@ class InstagramRestAPI():
params = {"username": screen_name}
return self._call(endpoint, params=params)["data"]["user"]
+ @memcache(keyarg=1)
def user_by_id(self, user_id):
endpoint = "/v1/users/{}/info/".format(user_id)
return self._call(endpoint)["user"]
def user_id(self, screen_name, check_private=True):
if screen_name.startswith("id:"):
+ if self.extractor.config("metadata"):
+ self.extractor._user = self.user_by_id(screen_name[3:])
return screen_name[3:]
+
user = self.user_by_name(screen_name)
if user is None:
raise exception.AuthorizationError(
diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py
new file mode 100644
index 0000000..cdcf35c
--- /dev/null
+++ b/gallery_dl/extractor/jpgfish.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://jpg.fishing/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?jpg\.(?:fishing|church)"
+
+
+class JpgfishExtractor(Extractor):
+ """Base class for jpgfish extractors"""
+ category = "jpgfish"
+ root = "https://jpg.fishing"
+ directory_fmt = ("{category}", "{user}", "{album}",)
+ archive_fmt = "{id}"
+
+ def _pagination(self, url):
+ while url:
+ page = self.request(url).text
+
+ for item in text.extract_iter(
+ page, '<div class="list-item-image ', 'image-container'):
+ yield text.extract(item, '<a href="', '"')[0]
+
+ url = text.extract(
+ page, '<a data-pagination="next" href="', '" ><')[0]
+
+
+class JpgfishImageExtractor(JpgfishExtractor):
+ """Extractor for jpgfish Images"""
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))"
+ test = (
+ ("https://jpg.fishing/img/funnymeme.LecXGS", {
+ "pattern": r"https://simp3\.jpg\.church/images/funnymeme\.jpg",
+ "content": "098e5e9b17ad634358426e0ffd1c93871474d13c",
+ "keyword": {
+ "album": "",
+ "extension": "jpg",
+ "filename": "funnymeme",
+ "id": "LecXGS",
+ "url": "https://simp3.jpg.church/images/funnymeme.jpg",
+ "user": "exearco",
+ },
+ }),
+ ("https://jpg.church/img/auCruA", {
+ "pattern": r"https://simp2\.jpg\.church/hannahowo_00457\.jpg",
+ "keyword": {"album": "401-500"},
+ }),
+ ("https://jpg.church/img/hannahowo-00424.au64iA"),
+ )
+
+ def __init__(self, match):
+ JpgfishExtractor.__init__(self, match)
+ self.path, self.image_id = match.groups()
+
+ def items(self):
+ url = "{}/img/{}".format(self.root, self.path)
+ extr = text.extract_from(self.request(url).text)
+
+ image = {
+ "id" : self.image_id,
+ "url" : extr('<meta property="og:image" content="', '"'),
+ "album": text.extract(extr(
+ "Added to <a", "/a>"), ">", "<")[0] or "",
+ "user" : extr('username: "', '"'),
+ }
+
+ text.nameext_from_url(image["url"], image)
+ yield Message.Directory, image
+ yield Message.Url, image["url"], image
+
+
+class JpgfishAlbumExtractor(JpgfishExtractor):
+ """Extractor for jpgfish Albums"""
+ subcategory = "album"
+ pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?"
+ test = (
+ ("https://jpg.fishing/album/CDilP/?sort=date_desc&page=1", {
+ "count": 2,
+ }),
+ ("https://jpg.church/a/gunggingnsk.N9OOI", {
+ "count": 114,
+ }),
+ ("https://jpg.church/a/101-200.aNJ6A/", {
+ "count": 100,
+ }),
+ ("https://jpg.church/a/hannahowo.aNTdH/sub", {
+ "count": 606,
+ }),
+ )
+
+ def __init__(self, match):
+ JpgfishExtractor.__init__(self, match)
+ self.album, self.sub_albums = match.groups()
+
+ def items(self):
+ url = "{}/a/{}".format(self.root, self.album)
+ data = {"_extractor": JpgfishImageExtractor}
+
+ if self.sub_albums:
+ albums = self._pagination(url + "/sub")
+ else:
+ albums = (url,)
+
+ for album in albums:
+ for image in self._pagination(album):
+ yield Message.Queue, image, data
+
+
+class JpgfishUserExtractor(JpgfishExtractor):
+ """Extractor for jpgfish Users"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?"
+ test = (
+ ("https://jpg.fishing/exearco", {
+ "count": 3,
+ }),
+ ("https://jpg.church/exearco/albums", {
+ "count": 1,
+ }),
+ )
+
+ def __init__(self, match):
+ JpgfishExtractor.__init__(self, match)
+ self.user, self.albums = match.groups()
+
+ def items(self):
+ url = "{}/{}".format(self.root, self.user)
+
+ if self.albums:
+ url += "/albums"
+ data = {"_extractor": JpgfishAlbumExtractor}
+ else:
+ data = {"_extractor": JpgfishImageExtractor}
+
+ for url in self._pagination(url):
+ yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 33e8370..915fbe6 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -125,10 +125,12 @@ class KemonopartyExtractor(Extractor):
def login(self):
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
+ self._update_cookies(self._login_impl(
+ (username, self.cookiedomain), password))
@cache(maxage=28*24*3600, keyarg=1)
def _login_impl(self, username, password):
+ username = username[0]
self.log.info("Logging in as %s", username)
url = self.root + "/account/login"
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
new file mode 100644
index 0000000..8990621
--- /dev/null
+++ b/gallery_dl/extractor/lensdump.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://lensdump.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?lensdump\.com"
+
+
+class LensdumpBase():
+ """Base class for lensdump extractors"""
+ category = "lensdump"
+ root = "https://lensdump.com"
+
+ def nodes(self, page=None):
+ if page is None:
+ page = self.request(self.url).text
+
+ # go through all pages starting from the oldest
+ page_url = text.urljoin(self.root, text.extr(
+ text.extr(page, ' id="list-most-oldest-link"', '>'),
+ 'href="', '"'))
+ while page_url is not None:
+ if page_url == self.url:
+ current_page = page
+ else:
+ current_page = self.request(page_url).text
+
+ for node in text.extract_iter(
+ current_page, ' class="list-item ', '>'):
+ yield node
+
+ # find url of next page
+ page_url = text.extr(
+ text.extr(current_page, ' data-pagination="next"', '>'),
+ 'href="', '"')
+ if page_url is not None and len(page_url) > 0:
+ page_url = text.urljoin(self.root, page_url)
+ else:
+ page_url = None
+
+
+class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
+ subcategory = "album"
+ pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
+ test = (
+ ("https://lensdump.com/a/1IhJr", {
+ "url": "7428cc906e7b291c778d446a11c602b81ba72840",
+ "keyword": {
+ "extension": "png",
+ "name": str,
+ "num": int,
+ "title": str,
+ "url": str,
+ "width": int,
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ GalleryExtractor.__init__(self, match, match.string)
+ self.gallery_id = match.group(1) or match.group(2)
+
+ def metadata(self, page):
+ return {
+ "gallery_id": self.gallery_id,
+ "title": text.unescape(text.extr(
+ page, 'property="og:title" content="', '"').strip())
+ }
+
+ def images(self, page):
+ for node in self.nodes(page):
+ # get urls and filenames of images in current page
+ json_data = util.json_loads(text.unquote(
+ text.extr(node, 'data-object="', '"')))
+ image_id = json_data.get('name')
+ image_url = json_data.get('url')
+ image_title = json_data.get('title')
+ if image_title is not None:
+ image_title = text.unescape(image_title)
+ yield (image_url, {
+ 'id': image_id,
+ 'url': image_url,
+ 'title': image_title,
+ 'name': json_data.get('filename'),
+ 'filename': image_id,
+ 'extension': json_data.get('extension'),
+ 'height': text.parse_int(json_data.get('height')),
+ 'width': text.parse_int(json_data.get('width')),
+ })
+
+
+class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
+ """Extractor for album list from lensdump.com"""
+ subcategory = "albums"
+ pattern = BASE_PATTERN + r"/\w+/albums"
+ test = ("https://lensdump.com/vstar925/albums",)
+
+ def items(self):
+ for node in self.nodes():
+ album_url = text.urljoin(self.root, text.extr(
+ node, 'data-url-short="', '"'))
+ yield Message.Queue, album_url, {
+ "_extractor": LensdumpAlbumExtractor}
+
+
+class LensdumpImageExtractor(LensdumpBase, Extractor):
+ """Extractor for individual images on lensdump.com"""
+ subcategory = "image"
+ filename_fmt = "{category}_{id}{title:?_//}.{extension}"
+ directory_fmt = ("{category}",)
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/i/(\w+)"
+ test = (
+ ("https://lensdump.com/i/tyoAyM", {
+ "pattern": r"https://i\d\.lensdump\.com/i/tyoAyM\.webp",
+ "url": "ae9933f5f3bd9497bfc34e3e70a0fbef6c562d38",
+ "content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
+ "keyword": {
+ "date": "dt:2022-08-01 08:24:28",
+ "extension": "webp",
+ "filename": "tyoAyM",
+ "height": 400,
+ "id": "tyoAyM",
+ "title": "MYOBI clovis bookcaseset",
+ "url": "https://i2.lensdump.com/i/tyoAyM.webp",
+ "width": 620,
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.key = match.group(1)
+
+ def items(self):
+ url = "{}/i/{}".format(self.root, self.key)
+ extr = text.extract_from(self.request(url).text)
+
+ data = {
+ "id" : self.key,
+ "title" : text.unescape(extr(
+ 'property="og:title" content="', '"')),
+ "url" : extr(
+ 'property="og:image" content="', '"'),
+ "width" : text.parse_int(extr(
+ 'property="image:width" content="', '"')),
+ "height": text.parse_int(extr(
+ 'property="image:height" content="', '"')),
+ "date" : text.parse_datetime(extr(
+ '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ }
+
+ text.nameext_from_url(data["url"], data)
+ yield Message.Directory, data
+ yield Message.Url, data["url"], data
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 12b8f39..e111fee 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -193,7 +193,10 @@ class MangadexFeedExtractor(MangadexExtractor):
class MangadexAPI():
- """Interface for the MangaDex API v5"""
+ """Interface for the MangaDex API v5
+
+ https://api.mangadex.org/docs/
+ """
def __init__(self, extr):
self.extractor = extr
diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py
new file mode 100644
index 0000000..49d4d7d
--- /dev/null
+++ b/gallery_dl/extractor/mangaread.py
@@ -0,0 +1,191 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangaread.org/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text, exception
+import re
+
+
+class MangareadBase():
+ """Base class for Mangaread extractors"""
+ category = "mangaread"
+ root = "https://www.mangaread.org"
+
+ @staticmethod
+ def parse_chapter_string(chapter_string, data):
+ match = re.match(
+ r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?",
+ text.unescape(chapter_string).strip())
+ manga, chapter, minor, title = match.groups()
+ manga = manga.strip() if manga else ""
+ data["manga"] = data.pop("manga", manga)
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = minor or ""
+ data["title"] = title or ""
+ data["lang"] = "en"
+ data["language"] = "English"
+
+
+class MangareadChapterExtractor(MangareadBase, ChapterExtractor):
+ """Extractor for manga-chapters from mangaread.org"""
+ pattern = (r"(?:https?://)?(?:www\.)?mangaread\.org"
+ r"(/manga/[^/?#]+/[^/?#]+)")
+ test = (
+ ("https://www.mangaread.org/manga/one-piece/chapter-1053-3/", {
+ "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
+ r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
+ "count": 11,
+ "keyword": {
+ "manga" : "One Piece",
+ "title" : "",
+ "chapter" : 1053,
+ "chapter_minor": ".3",
+ "tags" : ["Oda Eiichiro"],
+ "lang" : "en",
+ "language": "English",
+ }
+ }),
+ ("https://www.mangaread.org/manga/one-piece/chapter-1000000/", {
+ "exception": exception.NotFoundError,
+ }),
+ (("https://www.mangaread.org"
+ "/manga/kanan-sama-wa-akumade-choroi/chapter-10/"), {
+ "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
+ r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
+ "count": 9,
+ "keyword": {
+ "manga" : "Kanan-sama wa Akumade Choroi",
+ "title" : "",
+ "chapter" : 10,
+ "chapter_minor": "",
+ "tags" : list,
+ "lang" : "en",
+ "language": "English",
+ }
+ }),
+ # 'Chapter146.5'
+ # ^^ no whitespace
+ ("https://www.mangaread.org/manga/above-all-gods/chapter146-5/", {
+ "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
+ r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
+ "count": 6,
+ "keyword": {
+ "manga" : "Above All Gods",
+ "title" : "",
+ "chapter" : 146,
+ "chapter_minor": ".5",
+ "tags" : list,
+ "lang" : "en",
+ "language": "English",
+ }
+ }),
+ )
+
+ def metadata(self, page):
+ data = {"tags": list(text.extract_iter(page, "class>", "<"))}
+ info = text.extr(page, '<h1 id="chapter-heading">', "</h1>")
+ if not info:
+ raise exception.NotFoundError("chapter")
+ self.parse_chapter_string(info, data)
+ return data
+
+ def images(self, page):
+ page = text.extr(
+ page, '<div class="reading-content">', '<div class="entry-header')
+ return [
+ (url.strip(), None)
+ for url in text.extract_iter(page, 'data-src="', '"')
+ ]
+
+
+class MangareadMangaExtractor(MangareadBase, MangaExtractor):
+ """Extractor for manga from mangaread.org"""
+ chapterclass = MangareadChapterExtractor
+ pattern = r"(?:https?://)?(?:www\.)?mangaread\.org(/manga/[^/?#]+)/?$"
+ test = (
+ ("https://www.mangaread.org/manga/kanan-sama-wa-akumade-choroi", {
+ "pattern": (r"https://www\.mangaread\.org/manga"
+ r"/kanan-sama-wa-akumade-choroi"
+ r"/chapter-\d+(-.+)?/"),
+ "count" : ">= 13",
+ "keyword": {
+ "manga" : "Kanan-sama wa Akumade Choroi",
+ "author" : ["nonco"],
+ "artist" : ["nonco"],
+ "type" : "Manga",
+ "genres" : ["Comedy", "Romance", "Shounen", "Supernatural"],
+ "rating" : float,
+ "release": 2022,
+ "status" : "OnGoing",
+ "lang" : "en",
+ "language" : "English",
+ "manga_alt" : list,
+ "description": str,
+ }
+ }),
+ ("https://www.mangaread.org/manga/one-piece", {
+ "pattern": (r"https://www\.mangaread\.org/manga"
+ r"/one-piece/chapter-\d+(-.+)?/"),
+ "count" : ">= 1066",
+ "keyword": {
+ "manga" : "One Piece",
+ "author" : ["Oda Eiichiro"],
+ "artist" : ["Oda Eiichiro"],
+ "type" : "Manga",
+ "genres" : list,
+ "rating" : float,
+ "release": 1997,
+ "status" : "OnGoing",
+ "lang" : "en",
+ "language" : "English",
+ "manga_alt" : ["One Piece"],
+ "description": str,
+ }
+ }),
+ ("https://www.mangaread.org/manga/doesnotexist", {
+ "exception": exception.NotFoundError,
+ }),
+ )
+
+ def chapters(self, page):
+ if 'class="error404' in page:
+ raise exception.NotFoundError("manga")
+ data = self.metadata(page)
+ result = []
+ for chapter in text.extract_iter(
+ page, '<li class="wp-manga-chapter', "</li>"):
+ url , pos = text.extract(chapter, '<a href="', '"')
+ info, _ = text.extract(chapter, ">", "</a>", pos)
+ self.parse_chapter_string(info, data)
+ result.append((url, data.copy()))
+ return result
+
+ def metadata(self, page):
+ extr = text.extract_from(text.extr(
+ page, 'class="summary_content">', 'class="manga-action"'))
+ return {
+ "manga" : text.extr(page, "<h1>", "</h1>").strip(),
+ "description": text.unescape(text.remove_html(text.extract(
+ page, ">", "</div>", page.index("summary__content"))[0])),
+ "rating" : text.parse_float(
+ extr('total_votes">', "</span>").strip()),
+ "manga_alt" : text.remove_html(
+ extr("Alternative </h5>\n</div>", "</div>")).split("; "),
+ "author" : list(text.extract_iter(
+ extr('class="author-content">', "</div>"), '"tag">', "</a>")),
+ "artist" : list(text.extract_iter(
+ extr('class="artist-content">', "</div>"), '"tag">', "</a>")),
+ "genres" : list(text.extract_iter(
+ extr('class="genres-content">', "</div>"), '"tag">', "</a>")),
+ "type" : text.remove_html(
+ extr("Type </h5>\n</div>", "</div>")),
+ "release" : text.parse_int(text.remove_html(
+ extr("Release </h5>\n</div>", "</div>"))),
+ "status" : text.remove_html(
+ extr("Status </h5>\n</div>", "</div>")),
+ }
diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py
index 03e9104..37efac0 100644
--- a/gallery_dl/extractor/misskey.py
+++ b/gallery_dl/extractor/misskey.py
@@ -7,7 +7,7 @@
"""Extractors for Misskey instances"""
from .common import BaseExtractor, Message
-from .. import text
+from .. import text, exception
class MisskeyExtractor(BaseExtractor):
@@ -27,6 +27,8 @@ class MisskeyExtractor(BaseExtractor):
def items(self):
for note in self.notes():
+ if "note" in note:
+ note = note["note"]
files = note.pop("files") or []
renote = note.get("renote")
if renote:
@@ -68,7 +70,7 @@ BASE_PATTERN = MisskeyExtractor.update({
},
"lesbian.energy": {
"root": "https://lesbian.energy",
- "pattern": r"lesbian\.energy"
+ "pattern": r"lesbian\.energy",
},
"sushi.ski": {
"root": "https://sushi.ski",
@@ -152,6 +154,21 @@ class MisskeyNoteExtractor(MisskeyExtractor):
return (self.api.notes_show(self.item),)
+class MisskeyFavoriteExtractor(MisskeyExtractor):
+ """Extractor for favorited notes"""
+ subcategory = "favorite"
+ pattern = BASE_PATTERN + r"/(?:my|api/i)/favorites"
+ test = (
+ ("https://misskey.io/my/favorites"),
+ ("https://misskey.io/api/i/favorites"),
+ ("https://lesbian.energy/my/favorites"),
+ ("https://sushi.ski/my/favorites"),
+ )
+
+ def notes(self):
+ return self.api.i_favorites()
+
+
class MisskeyAPI():
"""Interface for Misskey API
@@ -164,6 +181,7 @@ class MisskeyAPI():
self.root = extractor.root
self.extractor = extractor
self.headers = {"Content-Type": "application/json"}
+ self.access_token = extractor.config("access-token")
def user_id_by_username(self, username):
endpoint = "/users/show"
@@ -187,6 +205,13 @@ class MisskeyAPI():
data = {"noteId": note_id}
return self._call(endpoint, data)
+ def i_favorites(self):
+ endpoint = "/i/favorites"
+ if not self.access_token:
+ raise exception.AuthenticationError()
+ data = {"i": self.access_token}
+ return self._pagination(endpoint, data)
+
def _call(self, endpoint, data):
url = self.root + "/api" + endpoint
return self.extractor.request(
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 2b759ec..5d100a4 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -23,6 +23,7 @@ class NewgroundsExtractor(Extractor):
root = "https://www.newgrounds.com"
cookiedomain = ".newgrounds.com"
cookienames = ("NG_GG_username", "vmk1du5I8m")
+ request_interval = 1.0
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py
index be736d1..6433fbd 100644
--- a/gallery_dl/extractor/nsfwalbum.py
+++ b/gallery_dl/extractor/nsfwalbum.py
@@ -75,7 +75,8 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
@staticmethod
def _validate_response(response):
- return not response.request.url.endswith("/no_image.jpg")
+ return not response.request.url.endswith(
+ ("/no_image.jpg", "/placeholder.png"))
@staticmethod
def _annihilate(value, base=6):
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index b704031..cdaf595 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -172,6 +172,7 @@ class PixivUserExtractor(PixivExtractor):
(PixivBackgroundExtractor, base + "background"),
(PixivArtworksExtractor , base + "artworks"),
(PixivFavoriteExtractor , base + "bookmarks/artworks"),
+ (PixivNovelUserExtractor , base + "novels"),
), ("artworks",))
@@ -750,6 +751,182 @@ class PixivSeriesExtractor(PixivExtractor):
params["p"] += 1
+class PixivNovelExtractor(PixivExtractor):
+ """Extractor for pixiv novels"""
+ subcategory = "novel"
+ request_interval = 1.0
+ pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
+ r"/n(?:ovel/show\.php\?id=|/)(\d+)")
+ test = (
+ ("https://www.pixiv.net/novel/show.php?id=19612040", {
+ "count": 1,
+ "content": "8c818474153cbd2f221ee08766e1d634c821d8b4",
+ "keyword": {
+ "caption": r"re:「無能な名無し」と呼ばれ虐げられて育った鈴\(すず\)は、",
+ "comment_access_control": 0,
+ "create_date": "2023-04-02T15:18:58+09:00",
+ "date": "dt:2023-04-02 06:18:58",
+ "id": 19612040,
+ "is_bookmarked": False,
+ "is_muted": False,
+ "is_mypixiv_only": False,
+ "is_original": True,
+ "is_x_restricted": False,
+ "novel_ai_type": 1,
+ "page_count": 1,
+ "rating": "General",
+ "restrict": 0,
+ "series": {
+ "id": 10278364,
+ "title": "龍の贄嫁〜無能な名無しと虐げられていましたが、"
+ "どうやら異母妹に霊力を搾取されていたようです〜",
+ },
+ "tags": ["和風ファンタジー", "溺愛", "神様", "ヤンデレ", "執着",
+ "異能", "ざまぁ", "学園", "神嫁"],
+ "text_length": 5974,
+ "title": "異母妹から「無能な名無し」と虐げられていた私、"
+ "どうやら異母妹に霊力を搾取されていたようです(1)",
+ "user": {
+ "account": "yukinaga_chifuyu",
+ "id": 77055466,
+ },
+ "visible": True,
+ "x_restrict": 0,
+ },
+ }),
+ # embeds
+ ("https://www.pixiv.net/novel/show.php?id=16422450", {
+ "options": (("embeds", True),),
+ "count": 3,
+ }),
+ ("https://www.pixiv.net/n/19612040"),
+ )
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.novel_id = match.group(1)
+
+ def items(self):
+ tags = self.config("tags", "japanese")
+ if tags == "original":
+ transform_tags = None
+ elif tags == "translated":
+ def transform_tags(work):
+ work["tags"] = list(dict.fromkeys(
+ tag["translated_name"] or tag["name"]
+ for tag in work["tags"]))
+ else:
+ def transform_tags(work):
+ work["tags"] = [tag["name"] for tag in work["tags"]]
+
+ ratings = {0: "General", 1: "R-18", 2: "R-18G"}
+ meta_user = self.config("metadata")
+ meta_bookmark = self.config("metadata-bookmark")
+ embeds = self.config("embeds")
+
+ if embeds:
+ headers = {
+ "User-Agent" : "Mozilla/5.0",
+ "App-OS" : None,
+ "App-OS-Version": None,
+ "App-Version" : None,
+ "Referer" : self.root + "/",
+ "Authorization" : None,
+ }
+
+ novels = self.novels()
+ if self.max_posts:
+ novels = itertools.islice(novels, self.max_posts)
+ for novel in novels:
+ if meta_user:
+ novel.update(self.api.user_detail(novel["user"]["id"]))
+ if meta_bookmark and novel["is_bookmarked"]:
+ detail = self.api.novel_bookmark_detail(novel["id"])
+ novel["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
+ if tag["is_registered"]]
+ if transform_tags:
+ transform_tags(novel)
+ novel["num"] = 0
+ novel["date"] = text.parse_datetime(novel["create_date"])
+ novel["rating"] = ratings.get(novel["x_restrict"])
+ novel["suffix"] = ""
+
+ yield Message.Directory, novel
+
+ novel["extension"] = "txt"
+ content = self.api.novel_text(novel["id"])["novel_text"]
+ yield Message.Url, "text:" + content, novel
+
+ if embeds:
+ desktop = False
+ illusts = {}
+
+ for marker in text.extract_iter(content, "[", "]"):
+ if marker.startswith("[jumpuri:"):
+ desktop = True
+ elif marker.startswith("pixivimage:"):
+ illusts[marker[11:].partition("-")[0]] = None
+
+ if desktop:
+ novel_id = str(novel["id"])
+ url = "{}/novel/show.php?id={}".format(
+ self.root, novel_id)
+ data = util.json_loads(text.extr(
+ self.request(url, headers=headers).text,
+ "id=\"meta-preload-data\" content='", "'"))
+
+ for image in (data["novel"][novel_id]
+ ["textEmbeddedImages"]).values():
+ url = image.pop("urls")["original"]
+ novel.update(image)
+ novel["date_url"] = self._date_from_url(url)
+ novel["num"] += 1
+ novel["suffix"] = "_p{:02}".format(novel["num"])
+ text.nameext_from_url(url, novel)
+ yield Message.Url, url, novel
+
+ if illusts:
+ novel["_extractor"] = PixivWorkExtractor
+ novel["date_url"] = None
+ for illust_id in illusts:
+ novel["num"] += 1
+ novel["suffix"] = "_p{:02}".format(novel["num"])
+ url = "{}/artworks/{}".format(self.root, illust_id)
+ yield Message.Queue, url, novel
+
+ def novels(self):
+ return (self.api.novel_detail(self.novel_id),)
+
+
+class PixivNovelUserExtractor(PixivNovelExtractor):
+ """Extractor for pixiv users' novels"""
+ subcategory = "novel-user"
+ pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
+ r"/(?:en/)?users/(\d+)/novels")
+ test = ("https://www.pixiv.net/en/users/77055466/novels", {
+ "pattern": "^text:",
+ "range": "1-5",
+ "count": 5,
+ })
+
+ def novels(self):
+ return self.api.user_novels(self.novel_id)
+
+
+class PixivNovelSeriesExtractor(PixivNovelExtractor):
+ """Extractor for pixiv novel series"""
+ subcategory = "novel-series"
+ pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
+ r"/novel/series/(\d+)")
+ test = ("https://www.pixiv.net/novel/series/10278364", {
+ "count": 4,
+ "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2",
+ })
+
+ def novels(self):
+ return self.api.novel_series(self.novel_id)
+
+
class PixivSketchExtractor(Extractor):
"""Extractor for user pages on sketch.pixiv.net"""
category = "pixiv"
@@ -907,6 +1084,23 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._pagination("/v2/illust/related", params)
+ def novel_bookmark_detail(self, novel_id):
+ params = {"novel_id": novel_id}
+ return self._call(
+ "/v2/novel/bookmark/detail", params)["bookmark_detail"]
+
+ def novel_detail(self, novel_id):
+ params = {"novel_id": novel_id}
+ return self._call("/v2/novel/detail", params)["novel"]
+
+ def novel_series(self, series_id):
+ params = {"series_id": series_id}
+ return self._pagination("/v1/novel/series", params, "novels")
+
+ def novel_text(self, novel_id):
+ params = {"novel_id": novel_id}
+ return self._call("/v1/novel/text", params)
+
def search_illust(self, word, sort=None, target=None, duration=None,
date_start=None, date_end=None):
params = {"word": word, "search_target": target,
@@ -938,6 +1132,10 @@ class PixivAppAPI():
params = {"user_id": user_id}
return self._pagination("/v1/user/illusts", params)
+ def user_novels(self, user_id):
+ params = {"user_id": user_id}
+ return self._pagination("/v1/user/novels", params, "novels")
+
def ugoira_metadata(self, illust_id):
params = {"illust_id": illust_id}
return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 49da9ce..14c25c4 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -41,7 +41,7 @@ class PoipikuExtractor(Extractor):
"user_name" : text.unescape(extr(
'<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
"description": text.unescape(extr(
- 'class="IllustItemDesc" >', '<')),
+ 'class="IllustItemDesc" >', '</h1>')),
"_http_headers": {"Referer": post_url},
}
@@ -172,7 +172,9 @@ class PoipikuPostExtractor(PoipikuExtractor):
"count": 3,
"keyword": {
"count": "3",
- "description": "ORANGE OASISボスネタバレ",
+ "description": "ORANGE OASISボスネタバレ<br />曲も大好き<br />"
+ "2枚目以降はほとんど見えなかった1枚目背景"
+ "のヒエログリフ小ネタです𓀀",
"num": int,
"post_category": "SPOILER",
"post_id": "5776587",
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index cefe8d3..3f09e13 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -55,21 +55,26 @@ class RedditExtractor(Extractor):
visited.add(submission["id"])
submission["num"] = 0
- url = submission["url"]
+ if "crosspost_parent_list" in submission:
+ media = submission["crosspost_parent_list"][-1]
+ else:
+ media = submission
+
+ url = media["url"]
if url and url.startswith("https://i.redd.it/"):
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
- elif "gallery_data" in submission:
+ elif "gallery_data" in media:
for submission["num"], url in enumerate(
- self._extract_gallery(submission), 1):
+ self._extract_gallery(media), 1):
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
- elif submission["is_video"]:
+ elif media["is_video"]:
if videos:
text.nameext_from_url(url, submission)
- url = "ytdl:" + self._extract_video(submission)
+ url = "ytdl:" + self._extract_video(media)
yield Message.Url, url, submission
elif not submission["is_self"]:
@@ -280,14 +285,19 @@ class RedditSubmissionExtractor(RedditExtractor):
("https://www.reddit.com/r/kpopfap/comments/qjj04q/", {
"count": 0,
}),
- ("https://old.reddit.com/r/lavaporn/comments/2a00np/"),
- ("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
- ("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
- ("https://redd.it/2a00np/"),
+ # user page submission (#2301)
("https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", {
"pattern": r"https://i.redd.it/8fpgv17yqlh81.jpg",
"count": 1,
}),
+ # cross-posted video (#887, #3586, #3976)
+ ("https://www.reddit.com/r/kittengifs/comments/12m0b8d", {
+ "pattern": r"ytdl:https://v\.redd\.it/cvabpjacrvta1",
+ }),
+ ("https://old.reddit.com/r/lavaporn/comments/2a00np/"),
+ ("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
+ ("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
+ ("https://redd.it/2a00np/"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py
index cac5a54..b5a730a 100644
--- a/gallery_dl/extractor/tcbscans.py
+++ b/gallery_dl/extractor/tcbscans.py
@@ -4,19 +4,20 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://onepiecechapters.com/"""
+"""Extractors for https://tcbscans.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
+BASE_PATTERN = r"(?:https?://)?(?:tcbscans|onepiecechapters)\.com"
+
class TcbscansChapterExtractor(ChapterExtractor):
category = "tcbscans"
- pattern = (r"(?:https?://)?onepiecechapters\.com"
- r"(/chapters/\d+/[^/?#]+)")
- root = "https://onepiecechapters.com"
+ root = "https://tcbscans.com"
+ pattern = BASE_PATTERN + r"(/chapters/\d+/[^/?#]+)"
test = (
- (("https://onepiecechapters.com"
+ (("https://tcbscans.com"
"/chapters/4708/chainsaw-man-chapter-108"), {
"pattern": (r"https://cdn\.[^/]+"
r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
@@ -66,12 +67,11 @@ class TcbscansChapterExtractor(ChapterExtractor):
class TcbscansMangaExtractor(MangaExtractor):
category = "tcbscans"
+ root = "https://tcbscans.com"
chapterclass = TcbscansChapterExtractor
- pattern = (r"(?:https?://)?onepiecechapters\.com"
- r"(/mangas/\d+/[^/?#]+)")
- root = "https://onepiecechapters.com"
+ pattern = BASE_PATTERN + r"(/mangas/\d+/[^/?#]+)"
test = (
- ("https://onepiecechapters.com/mangas/13/chainsaw-man", {
+ ("https://tcbscans.com/mangas/13/chainsaw-man", {
"pattern": TcbscansChapterExtractor.pattern,
"range" : "1-50",
"count" : 50,
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 5e68f13..c47021e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -13,6 +13,7 @@ from .. import text, util, exception
from ..cache import cache
import itertools
import json
+import re
BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com"
@@ -75,6 +76,10 @@ class TwitterExtractor(Extractor):
else:
seen_tweets = None
+ if self.twitpic:
+ self._find_twitpic = re.compile(
+ r"https?(://twitpic\.com/(?!photos/)\w+)").findall
+
for tweet in self.tweets():
if "legacy" in tweet:
@@ -231,12 +236,24 @@ class TwitterExtractor(Extractor):
files.append({"url": url})
def _extract_twitpic(self, tweet, files):
- for url in tweet["entities"].get("urls", ()):
+ urls = {}
+
+ # collect URLs from entities
+ for url in tweet["entities"].get("urls") or ():
url = url["expanded_url"]
if "//twitpic.com/" not in url or "/photos/" in url:
continue
if url.startswith("http:"):
url = "https" + url[4:]
+ urls[url] = None
+
+ # collect URLs from text
+ for url in self._find_twitpic(
+ tweet.get("full_text") or tweet.get("text") or ""):
+ urls["https" + url] = None
+
+ # extract actual URLs
+ for url in urls:
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
@@ -781,7 +798,13 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/112900228289540096", {
"options": (("twitpic", True), ("cards", False)),
"pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
- "count": 3,
+ "count": 2, # 1 duplicate
+ }),
+ # TwitPic URL not in 'urls' (#3792)
+ ("https://twitter.com/shimoigusaP/status/8138669971", {
+ "options": (("twitpic", True),),
+ "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png",
+ "count": 1,
}),
# Twitter card (#1005)
("https://twitter.com/billboard/status/1306599586602135555", {
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 388ee03..2cbfad6 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -132,7 +132,7 @@ class WeiboExtractor(Extractor):
return self.request(url).json()
def _user_id(self):
- if self.user.isdecimal():
+ if len(self.user) >= 10 and self.user.isdecimal():
return self.user[-10:]
else:
url = "{}/ajax/profile/info?{}={}".format(
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index fc36fa2..2ff48c3 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -9,6 +9,7 @@
"""String formatters"""
import os
+import sys
import time
import string
import _string
@@ -255,7 +256,11 @@ def parse_field_name(field_name):
func = operator.itemgetter
try:
if ":" in key:
- key = _slice(key)
+ if key[0] == "b":
+ func = _bytesgetter
+ key = _slice(key[1:])
+ else:
+ key = _slice(key)
else:
key = key.strip("\"'")
except TypeError:
@@ -276,6 +281,14 @@ def _slice(indices):
)
+def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
+
+ def apply_slice_bytes(obj):
+ return obj.encode(encoding)[slice].decode(encoding, "ignore")
+
+ return apply_slice_bytes
+
+
def _build_format_func(format_spec, default):
if format_spec:
return _FORMAT_SPECIFIERS.get(
@@ -295,11 +308,20 @@ def _parse_optional(format_spec, default):
def _parse_slice(format_spec, default):
indices, _, format_spec = format_spec.partition("]")
- slice = _slice(indices[1:])
fmt = _build_format_func(format_spec, default)
- def apply_slice(obj):
- return fmt(obj[slice])
+ if indices[1] == "b":
+ slice_bytes = _bytesgetter(_slice(indices[2:]))
+
+ def apply_slice(obj):
+ return fmt(slice_bytes(obj))
+
+ else:
+ slice = _slice(indices[1:])
+
+ def apply_slice(obj):
+ return fmt(obj[slice])
+
return apply_slice
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index e81c6cf..39188f1 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -11,6 +11,7 @@
from .common import PostProcessor
from .. import util, formatter
import subprocess
+import os
if util.WINDOWS:
@@ -60,6 +61,7 @@ class ExecPP(PostProcessor):
kwdict["_path"] = pathfmt.realpath
args = [arg.format_map(kwdict) for arg in self.args]
+ args[0] = os.path.expanduser(args[0])
self._exec(args, False)
if archive:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 4f9e49a..3e0290c 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.25.4"
+__version__ = "1.25.5"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index eb09b9b..0a0bf86 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -399,7 +399,7 @@ def parse_command_line(module, argv):
"playlist_items": opts.playlist_items,
"xattr_set_filesize": opts.xattr_set_filesize,
"match_filter": match_filter,
- "no_color": opts.no_color,
+ "no_color": getattr(opts, "no_color", None),
"ffmpeg_location": opts.ffmpeg_location,
"hls_prefer_native": opts.hls_prefer_native,
"hls_use_mpegts": opts.hls_use_mpegts,
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 2258966..1bda9d9 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,6 +23,7 @@ class TestFormatter(unittest.TestCase):
kwdict = {
"a": "hElLo wOrLd",
"b": "äöü",
+ "j": "げんそうきょう",
"d": {"a": "foo", "b": 0, "c": None},
"l": ["a", "b", "c"],
"n": None,
@@ -133,7 +134,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{d['a']}", "foo")
self._run_test('{d["a"]}', "foo")
- def test_slicing(self):
+ def test_slice_str(self):
v = self.kwdict["a"]
self._run_test("{a[1:10]}" , v[1:10])
self._run_test("{a[-10:-1]}", v[-10:-1])
@@ -165,6 +166,26 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a:[:50:2]}", v[:50:2])
self._run_test("{a:[::]}" , v)
+ def test_slice_bytes(self):
+ v = self.kwdict["j"]
+ self._run_test("{j[b1:10]}" , v[1:3])
+ self._run_test("{j[b-10:-1]}", v[-3:-1])
+ self._run_test("{j[b5:]}" , v[2:])
+ self._run_test("{j[b50:]}" , v[50:])
+ self._run_test("{j[b:5]}" , v[:1])
+ self._run_test("{j[b:50]}" , v[:50])
+ self._run_test("{j[b:]}" , v)
+ self._run_test("{j[b::]}" , v)
+
+ self._run_test("{j:[b1:10]}" , v[1:3])
+ self._run_test("{j:[b-10:-1]}", v[-3:-1])
+ self._run_test("{j:[b5:]}" , v[2:])
+ self._run_test("{j:[b50:]}" , v[50:])
+ self._run_test("{j:[b:5]}" , v[:1])
+ self._run_test("{j:[b:50]}" , v[:50])
+ self._run_test("{j:[b:]}" , v)
+ self._run_test("{j:[b::]}" , v)
+
def test_maxlen(self):
v = self.kwdict["a"]
self._run_test("{a:L5/foo/}" , "foo")
@@ -413,10 +434,10 @@ def noarg():
fmt4 = formatter.parse("\fM " + path + ":lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt2.format_map(self.kwdict), "89")
+ self.assertEqual(fmt2.format_map(self.kwdict), "96")
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt4.format_map(self.kwdict), "89")
+ self.assertEqual(fmt4.format_map(self.kwdict), "96")
with self.assertRaises(TypeError):
self.assertEqual(fmt0.format_map(self.kwdict), "")