summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-06-28 19:54:18 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2022-06-28 19:54:18 -0400
commitce35450b5308adab049c5bd99095986d4c607027 (patch)
treef0c2b600f8ef720941bdf615164b942c6c4a5d07
parent25442ea49f031d4d2df3353dd7e9ad2080e332da (diff)
New upstream version 1.22.3.upstream/1.22.3
-rw-r--r--CHANGELOG.md32
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.533
-rw-r--r--docs/gallery-dl.conf2
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/cookies.py43
-rw-r--r--gallery_dl/downloader/ytdl.py9
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/common.py18
-rw-r--r--gallery_dl/extractor/cyberdrop.py5
-rw-r--r--gallery_dl/extractor/instagram.py223
-rw-r--r--gallery_dl/extractor/itaku.py183
-rw-r--r--gallery_dl/extractor/lolisafe.py2
-rw-r--r--gallery_dl/extractor/nijie.py194
-rw-r--r--gallery_dl/extractor/poipiku.py169
-rw-r--r--gallery_dl/extractor/readcomiconline.py9
-rw-r--r--gallery_dl/extractor/skeb.py34
-rw-r--r--gallery_dl/extractor/twitter.py88
-rw-r--r--gallery_dl/extractor/unsplash.py20
-rw-r--r--gallery_dl/extractor/vk.py4
-rw-r--r--gallery_dl/extractor/weibo.py48
-rw-r--r--gallery_dl/formatter.py32
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_cookies.py16
-rw-r--r--test/test_formatter.py21
-rw-r--r--test/test_results.py1
29 files changed, 920 insertions, 290 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea14b35..403149e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,37 @@
# Changelog
+## 1.22.3 - 2022-06-28
+### Changes
+- [twitter] revert strategy changes for user URLs ([#2712](https://github.com/mikf/gallery-dl/issues/2712), [#2710](https://github.com/mikf/gallery-dl/issues/2710))
+- update default User-Agent headers
+
+## 1.22.2 - 2022-06-27
+### Additions
+- [cyberdrop] add fallback URLs ([#2668](https://github.com/mikf/gallery-dl/issues/2668))
+- [horne] add support for horne.red ([#2700](https://github.com/mikf/gallery-dl/issues/2700))
+- [itaku] add `gallery` and `image` extractors ([#1842](https://github.com/mikf/gallery-dl/issues/1842))
+- [poipiku] add `user` and `post` extractors ([#1602](https://github.com/mikf/gallery-dl/issues/1602))
+- [skeb] add `following` extractor ([#2698](https://github.com/mikf/gallery-dl/issues/2698))
+- [twitter] implement `expand` option ([#2665](https://github.com/mikf/gallery-dl/issues/2665))
+- [twitter] implement `csrf` option ([#2676](https://github.com/mikf/gallery-dl/issues/2676))
+- [unsplash] add `collection_title` and `collection_id` metadata fields ([#2670](https://github.com/mikf/gallery-dl/issues/2670))
+- [weibo] support `tabtype=video` listings ([#2601](https://github.com/mikf/gallery-dl/issues/2601))
+- [formatter] implement slice operator as format specifier
+- support cygwin/BSD/etc for `--cookies-from-browser`
+### Fixes
+- [instagram] improve metadata generated by `_parse_post_api()` ([#2695](https://github.com/mikf/gallery-dl/issues/2695), [#2660](https://github.com/mikf/gallery-dl/issues/2660))
+- [instagram} fix `tag` extractor ([#2659](https://github.com/mikf/gallery-dl/issues/2659))
+- [instagram] automatically invalidate expired login sessions
+- [twitter] fix pagination for conversion tweets
+- [twitter] improve `"replies": "self"` ([#2665](https://github.com/mikf/gallery-dl/issues/2665))
+- [twitter] improve strategy for user URLs ([#2665](https://github.com/mikf/gallery-dl/issues/2665))
+- [vk] take URLs from `*_src` entries ([#2535](https://github.com/mikf/gallery-dl/issues/2535))
+- [weibo] fix URLs generated by `user` extractor ([#2601](https://github.com/mikf/gallery-dl/issues/2601))
+- [weibo] fix retweets ([#2601](https://github.com/mikf/gallery-dl/issues/2601))
+- [downloader:ytdl] update `_set_outtmpl()` ([#2692](https://github.com/mikf/gallery-dl/issues/2692))
+- [formatter] fix `!j` conversion for non-serializable types ([#2624](https://github.com/mikf/gallery-dl/issues/2624))
+- [snap] Fix missing libslang dependency ([#2655](https://github.com/mikf/gallery-dl/issues/2655))
+
## 1.22.1 - 2022-06-04
### Additions
- [gfycat] add support for collections ([#2629](https://github.com/mikf/gallery-dl/issues/2629))
diff --git a/PKG-INFO b/PKG-INFO
index 8704ca3..59bfe09 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.22.1
+Version: 1.22.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -98,8 +98,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index f165f1b..cd6e9ae 100644
--- a/README.rst
+++ b/README.rst
@@ -65,8 +65,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index c2c4577..2c8757d 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-06-04" "1.22.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-06-28" "1.22.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index b1528c9..1139e2e 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-06-04" "1.22.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-06-28" "1.22.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -596,7 +596,7 @@ or a \f[I]list\f[] with IP and explicit port number as elements.
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"\f[]
+\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"\f[]
.IP "Description:" 4
User-Agent header value to be used for HTTP requests.
@@ -1702,6 +1702,17 @@ Download video previews.
Download video files.
+.SS extractor.itaku.videos
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download video files.
+
+
.SS extractor.kemonoparty.comments
.IP "Type:" 6
\f[I]bool\f[]
@@ -2585,6 +2596,22 @@ Fetch media from all Tweets and replies in a \f[I]conversation
<https://help.twitter.com/en/using-twitter/twitter-conversations>\f[].
+.SS extractor.twitter.csrf
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"cookies"\f[]
+
+.IP "Description:" 4
+Controls how to handle Cross Site Request Forgery (CSRF) tokens.
+
+.br
+* \f[I]"auto"\f[]: Always auto-generate a token.
+.br
+* \f[I]"cookies"\f[]: Use token given by the \f[I]ct0\f[] cookie if present.
+
+
.SS extractor.twitter.size
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -2831,7 +2858,7 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"home"\f[], \f[I]"feed"\f[], \f[I]"videos"\f[], \f[I]"article"\f[], \f[I]"album"\f[].
+\f[I]"home"\f[], \f[I]"feed"\f[], \f[I]"videos"\f[], \f[I]"newvideo"\f[], \f[I]"article"\f[], \f[I]"album"\f[].
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index cf71949..78550b5 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -10,7 +10,7 @@
"proxy": null,
"skip": true,
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
"retries": 4,
"timeout": 30.0,
"verify": true,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 7bcd2d8..5eb7939 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.22.1
+Version: 1.22.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -98,8 +98,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 954dafb..eb62cb3 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -102,6 +102,7 @@ gallery_dl/extractor/imgur.py
gallery_dl/extractor/inkbunny.py
gallery_dl/extractor/instagram.py
gallery_dl/extractor/issuu.py
+gallery_dl/extractor/itaku.py
gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
@@ -149,6 +150,7 @@ gallery_dl/extractor/pinterest.py
gallery_dl/extractor/pixiv.py
gallery_dl/extractor/pixnet.py
gallery_dl/extractor/plurk.py
+gallery_dl/extractor/poipiku.py
gallery_dl/extractor/pornhub.py
gallery_dl/extractor/pururin.py
gallery_dl/extractor/reactor.py
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index b173a30..579f755 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -152,13 +152,11 @@ def _firefox_cookies_database(profile=None):
def _firefox_browser_directory():
- if sys.platform in ("linux", "linux2"):
- return os.path.expanduser("~/.mozilla/firefox")
- if sys.platform == "win32":
+ if sys.platform in ("win32", "cygwin"):
return os.path.expandvars(R"%APPDATA%\Mozilla\Firefox\Profiles")
if sys.platform == "darwin":
return os.path.expanduser("~/Library/Application Support/Firefox")
- raise ValueError("unsupported platform '{}'".format(sys.platform))
+ return os.path.expanduser("~/.mozilla/firefox")
# --------------------------------------------------------------------
@@ -277,20 +275,7 @@ def _get_chromium_based_browser_settings(browser_name):
# /src/+/HEAD/docs/user_data_dir.md
join = os.path.join
- if sys.platform in ("linux", "linux2"):
- config = (os.environ.get("XDG_CONFIG_HOME") or
- os.path.expanduser("~/.config"))
-
- browser_dir = {
- "brave" : join(config, "BraveSoftware/Brave-Browser"),
- "chrome" : join(config, "google-chrome"),
- "chromium": join(config, "chromium"),
- "edge" : join(config, "microsoft-edge"),
- "opera" : join(config, "opera"),
- "vivaldi" : join(config, "vivaldi"),
- }[browser_name]
-
- elif sys.platform == "win32":
+ if sys.platform in ("win32", "cygwin"):
appdata_local = os.path.expandvars("%LOCALAPPDATA%")
appdata_roaming = os.path.expandvars("%APPDATA%")
browser_dir = {
@@ -315,7 +300,16 @@ def _get_chromium_based_browser_settings(browser_name):
}[browser_name]
else:
- raise ValueError("unsupported platform '{}'".format(sys.platform))
+ config = (os.environ.get("XDG_CONFIG_HOME") or
+ os.path.expanduser("~/.config"))
+ browser_dir = {
+ "brave" : join(config, "BraveSoftware/Brave-Browser"),
+ "chrome" : join(config, "google-chrome"),
+ "chromium": join(config, "chromium"),
+ "edge" : join(config, "microsoft-edge"),
+ "opera" : join(config, "opera"),
+ "vivaldi" : join(config, "vivaldi"),
+ }[browser_name]
# Linux keyring names can be determined by snooping on dbus
# while opening the browser in KDE:
@@ -379,16 +373,13 @@ class ChromeCookieDecryptor:
def get_cookie_decryptor(browser_root, browser_keyring_name, *, keyring=None):
- if sys.platform in ("linux", "linux2"):
- return LinuxChromeCookieDecryptor(
- browser_keyring_name, keyring=keyring)
+ if sys.platform in ("win32", "cygwin"):
+ return WindowsChromeCookieDecryptor(browser_root)
elif sys.platform == "darwin":
return MacChromeCookieDecryptor(browser_keyring_name)
- elif sys.platform == "win32":
- return WindowsChromeCookieDecryptor(browser_root)
else:
- raise NotImplementedError("Chrome cookie decryption is not supported "
- "on {}".format(sys.platform))
+ return LinuxChromeCookieDecryptor(
+ browser_keyring_name, keyring=keyring)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 2badccf..efa957b 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -138,9 +138,14 @@ class YoutubeDLDownloader(DownloaderBase):
@staticmethod
def _set_outtmpl(ytdl_instance, outtmpl):
try:
- ytdl_instance.outtmpl_dict["default"] = outtmpl
+ ytdl_instance._parse_outtmpl
except AttributeError:
- ytdl_instance.params["outtmpl"] = outtmpl
+ try:
+ ytdl_instance.outtmpl_dict["default"] = outtmpl
+ except AttributeError:
+ ytdl_instance.params["outtmpl"] = outtmpl
+ else:
+ ytdl_instance.params["outtmpl"] = {"default": outtmpl}
def compatible_formats(formats):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 6d6c7ee..e273f84 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -64,6 +64,7 @@ modules = [
"inkbunny",
"instagram",
"issuu",
+ "itaku",
"kabeuchi",
"keenspot",
"kemonoparty",
@@ -106,6 +107,7 @@ modules = [
"pixiv",
"pixnet",
"plurk",
+ "poipiku",
"pornhub",
"pururin",
"reactor",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 9cd9059..5c5e29e 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -256,7 +256,7 @@ class Extractor():
else:
headers["User-Agent"] = self.config("user-agent", (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
- "rv:91.0) Gecko/20100101 Firefox/91.0"))
+ "rv:102.0) Gecko/20100101 Firefox/102.0"))
headers["Accept"] = "*/*"
headers["Accept-Language"] = "en-US,en;q=0.5"
headers["Accept-Encoding"] = "gzip, deflate"
@@ -713,16 +713,21 @@ _browser_cookies = {}
HTTP_HEADERS = {
"firefox": (
- ("User-Agent", "Mozilla/5.0 ({}; rv:91.0) "
- "Gecko/20100101 Firefox/91.0"),
+ ("User-Agent", "Mozilla/5.0 ({}; rv:102.0) "
+ "Gecko/20100101 Firefox/102.0"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
- "image/avif,*/*;q=0.8"),
+ "image/avif,image/webp,*/*;q=0.8"),
("Accept-Language", "en-US,en;q=0.5"),
- ("Accept-Encoding", "gzip, deflate"),
+ ("Accept-Encoding", "gzip, deflate, br"),
("Referer", None),
+ ("DNT", "1"),
("Connection", "keep-alive"),
("Upgrade-Insecure-Requests", "1"),
("Cookie", None),
+ ("Sec-Fetch-Dest", "empty"),
+ ("Sec-Fetch-Mode", "no-cors"),
+ ("Sec-Fetch-Site", "same-origin"),
+ ("TE", "trailers"),
),
"chrome": (
("Upgrade-Insecure-Requests", "1"),
@@ -755,8 +760,7 @@ SSL_CIPHERS = {
"AES128-GCM-SHA256:"
"AES256-GCM-SHA384:"
"AES128-SHA:"
- "AES256-SHA:"
- "DES-CBC3-SHA"
+ "AES256-SHA"
),
"chrome": (
"TLS_AES_128_GCM_SHA256:"
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index 1afaac8..7a79eca 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -48,10 +48,11 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
files = []
append = files.append
while True:
- url = extr('id="file" href="', '"')
+ url = text.unescape(extr('id="file" href="', '"'))
if not url:
break
- append({"file": text.unescape(url)})
+ append({"file": url,
+ "_fallback": (self.root + url[url.find("/", 8):],)})
return files, {
"album_id" : self.album_id,
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index e536e22..31f5b32 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -82,8 +82,12 @@ class InstagramExtractor(Extractor):
if response.history:
- url = response.request.url
+ url = response.url
if "/accounts/login/" in url:
+ if self._username:
+ self.log.debug("Invalidating cached login session for "
+ "'%s'", self._username)
+ _login_impl.invalidate(self._username)
page = "login"
elif "/challenge/" in url:
page = "challenge"
@@ -161,55 +165,15 @@ class InstagramExtractor(Extractor):
return self._pagination_api(endpoint)
def login(self):
+ self._username = None
if not self._check_cookies(self.cookienames):
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
+ self._username = username
+ self._update_cookies(_login_impl(self, username, password))
self.session.cookies.set(
"csrftoken", self.csrf_token, domain=self.cookiedomain)
- @cache(maxage=360*24*3600, keyarg=1)
- def _login_impl(self, username, password):
- self.log.info("Logging in as %s", username)
-
- url = self.root + "/accounts/login/"
- page = self.request(url).text
-
- headers = {
- "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0],
- "X-IG-App-ID" : "936619743392459",
- "X-ASBD-ID" : "437806",
- "X-IG-WWW-Claim" : "0",
- "X-Requested-With": "XMLHttpRequest",
- "Referer" : url,
- }
- url = self.root + "/data/shared_data/"
- data = self.request(url, headers=headers).json()
-
- headers["X-CSRFToken"] = data["config"]["csrf_token"]
- headers["X-Instagram-AJAX"] = data["rollout_hash"]
- headers["Origin"] = self.root
- data = {
- "username" : username,
- "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(
- int(time.time()), password),
- "queryParams" : "{}",
- "optIntoOneTap" : "false",
- "stopDeletionNonce" : "",
- "trustedDeviceRecords": "{}",
- }
- url = self.root + "/accounts/login/ajax/"
- response = self.request(url, method="POST", headers=headers, data=data)
-
- if not response.json().get("authenticated"):
- raise exception.AuthenticationError()
-
- cget = self.session.cookies.get
- return {
- name: cget(name)
- for name in ("sessionid", "mid", "ig_did")
- }
-
def _parse_post_graphql(self, post):
typename = post["__typename"]
@@ -286,37 +250,51 @@ class InstagramExtractor(Extractor):
return data
def _parse_post_api(self, post):
-
- if "media" in post:
- media = post["media"]
- owner = media["user"]
+ if "items" in post:
+ items = post["items"]
+ reel_id = str(post["id"]).rpartition(":")[2]
data = {
- "post_id" : media["pk"],
- "post_shortcode": shortcode_from_id(media["pk"]),
+ "expires": text.parse_timestamp(post.get("expiring_at")),
+ "post_id": reel_id,
+ "post_shortcode": shortcode_from_id(reel_id),
+ }
+ else:
+ data = {
+ "post_id" : post["pk"],
+ "post_shortcode": post["code"],
+ "likes": post["like_count"],
}
- if "carousel_media" in media:
- post["items"] = media["carousel_media"]
+ caption = post["caption"]
+ data["description"] = caption["text"] if caption else ""
+
+ tags = self._find_tags(data["description"])
+ if tags:
+ data["tags"] = sorted(set(tags))
+
+ location = post.get("location")
+ if location:
+ slug = location["short_name"].replace(" ", "-").lower()
+ data["location_id"] = location["pk"]
+ data["location_slug"] = slug
+ data["location_url"] = "{}/explore/locations/{}/{}/".format(
+ self.root, location["pk"], slug)
+
+ if "carousel_media" in post:
+ items = post["carousel_media"]
data["sidecar_media_id"] = data["post_id"]
data["sidecar_shortcode"] = data["post_shortcode"]
else:
- post["items"] = (media,)
-
- else:
- reel_id = str(post["id"]).rpartition(":")[2]
- owner = post["user"]
- data = {
- "expires" : text.parse_timestamp(post.get("expiring_at")),
- "post_id" : reel_id,
- "post_shortcode": shortcode_from_id(reel_id),
- }
+ items = (post,)
+ owner = post["user"]
data["owner_id"] = owner["pk"]
data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name")
- data["_files"] = files = []
+ data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"])
- for num, item in enumerate(post["items"], 1):
+ data["_files"] = files = []
+ for num, item in enumerate(items, 1):
image = item["image_versions2"]["candidates"][0]
@@ -333,7 +311,8 @@ class InstagramExtractor(Extractor):
media = {
"num" : num,
"date" : text.parse_timestamp(item.get("taken_at") or
- media.get("taken_at")),
+ media.get("taken_at") or
+ post.get("taken_at")),
"media_id" : item["pk"],
"shortcode" : (item.get("code") or
shortcode_from_id(item["pk"])),
@@ -342,6 +321,10 @@ class InstagramExtractor(Extractor):
"width" : media["width"],
"height" : media["height"],
}
+
+ if "expiring_at" in item:
+ media["expires"] = text.parse_timestamp(post["expiring_at"])
+
self._extract_tagged_users(item, media)
files.append(media)
@@ -385,31 +368,6 @@ class InstagramExtractor(Extractor):
"username" : user["username"],
"full_name": user["full_name"]})
- def _extract_shared_data(self, page):
- shared_data, pos = text.extract(
- page, "window._sharedData =", ";</script>")
- additional_data, pos = text.extract(
- page, "window.__additionalDataLoaded(", ");</script>", pos)
-
- data = json.loads(shared_data)
- if additional_data:
- next(iter(data["entry_data"].values()))[0] = \
- json.loads(additional_data.partition(",")[2])
- return data
-
- def _get_edge_data(self, user, key):
- cursor = self.config("cursor")
- if cursor or not key:
- return {
- "edges" : (),
- "page_info": {
- "end_cursor" : cursor,
- "has_next_page": True,
- "_virtual" : True,
- },
- }
- return user[key]
-
def _pagination_graphql(self, query_hash, variables):
cursor = self.config("cursor")
if cursor:
@@ -436,8 +394,7 @@ class InstagramExtractor(Extractor):
def _pagination_api(self, endpoint, params=None):
while True:
data = self._request_api(endpoint, params=params)
- for item in data["items"]:
- yield {"media": item}
+ yield from data["items"]
if not data["more_available"]:
return
@@ -446,7 +403,8 @@ class InstagramExtractor(Extractor):
def _pagination_api_post(self, endpoint, params, post=False):
while True:
data = self._request_api(endpoint, method="POST", data=params)
- yield from data["items"]
+ for item in data["items"]:
+ yield item["media"]
info = data["paging_info"]
if not info["more_available"]:
@@ -567,21 +525,7 @@ class InstagramTagExtractor(InstagramExtractor):
return {"tag": text.unquote(self.item)}
def posts(self):
- url = "{}/explore/tags/{}/".format(self.root, self.item)
- page = self._extract_shared_data(
- self.request(url).text)["entry_data"]["TagPage"][0]
-
- if "data" in page:
- return self._pagination_sections(page["data"]["recent"])
-
- hashtag = page["graphql"]["hashtag"]
- query_hash = "9b498c08113f1e09617a1703c22b2f32"
- variables = {"tag_name": hashtag["name"], "first": 50}
- edge = self._get_edge_data(hashtag, "edge_hashtag_to_media")
- return self._pagination_graphql(query_hash, variables, edge)
-
- def _pagination_sections(self, info):
- endpoint = "/v1/tags/instagram/sections/"
+ endpoint = "/v1/tags/{}/sections/".format(self.item)
data = {
"include_persistent": "0",
"max_id" : None,
@@ -591,29 +535,17 @@ class InstagramTagExtractor(InstagramExtractor):
}
while True:
+ info = self._request_api(endpoint, method="POST", data=data)
+
for section in info["sections"]:
- yield from section["layout_content"]["medias"]
+ for media in section["layout_content"]["medias"]:
+ yield media["media"]
if not info.get("more_available"):
return
data["max_id"] = info["next_max_id"]
data["page"] = info["next_page"]
- info = self._request_api(endpoint, method="POST", data=data)
-
- def _pagination_graphql(self, query_hash, variables, data):
- while True:
- for edge in data["edges"]:
- yield edge["node"]
-
- info = data["page_info"]
- if not info["has_next_page"]:
- return
-
- variables["after"] = self._cursor = info["end_cursor"]
- self.log.debug("Cursor: %s", self._cursor)
- data = self._request_graphql(
- query_hash, variables)["hashtag"]["edge_hashtag_to_media"]
class InstagramPostExtractor(InstagramExtractor):
@@ -812,6 +744,49 @@ class InstagramReelsExtractor(InstagramExtractor):
return self._pagination_api_post(endpoint, data)
+@cache(maxage=360*24*3600, keyarg=1)
+def _login_impl(extr, username, password):
+ extr.log.info("Logging in as %s", username)
+
+ url = extr.root + "/accounts/login/"
+ page = extr.request(url).text
+
+ headers = {
+ "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0],
+ "X-IG-App-ID" : "936619743392459",
+ "X-ASBD-ID" : "437806",
+ "X-IG-WWW-Claim" : "0",
+ "X-Requested-With": "XMLHttpRequest",
+ "Referer" : url,
+ }
+ url = extr.root + "/data/shared_data/"
+ data = extr.request(url, headers=headers).json()
+
+ headers["X-CSRFToken"] = data["config"]["csrf_token"]
+ headers["X-Instagram-AJAX"] = data["rollout_hash"]
+ headers["Origin"] = extr.root
+ data = {
+ "username" : username,
+ "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(
+ int(time.time()), password),
+ "queryParams" : "{}",
+ "optIntoOneTap" : "false",
+ "stopDeletionNonce" : "",
+ "trustedDeviceRecords": "{}",
+ }
+ url = extr.root + "/accounts/login/ajax/"
+ response = extr.request(url, method="POST", headers=headers, data=data)
+
+ if not response.json().get("authenticated"):
+ raise exception.AuthenticationError()
+
+ cget = extr.session.cookies.get
+ return {
+ name: cget(name)
+ for name in ("sessionid", "mid", "ig_did")
+ }
+
+
def id_from_shortcode(shortcode):
return util.bdecode(shortcode, _ALPHABET)
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
new file mode 100644
index 0000000..dfe4b53
--- /dev/null
+++ b/gallery_dl/extractor/itaku.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://itaku.ee/"""
+
+from .common import Extractor, Message
+from ..cache import memcache
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?itaku\.ee"
+
+
+class ItakuExtractor(Extractor):
+ """Base class for itaku extractors"""
+ category = "itaku"
+ root = "https://itaku.ee"
+ directory_fmt = ("{category}", "{owner_username}")
+ filename_fmt = ("{id}{title:? //}.{extension}")
+ archive_fmt = "{id}"
+ request_interval = (0.5, 1.5)
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.api = ItakuAPI(self)
+ self.item = match.group(1)
+ self.videos = self.config("videos", True)
+
+ def items(self):
+ for post in self.posts():
+
+ post["date"] = text.parse_datetime(
+ post["date_added"], "%Y-%m-%dT%H:%M:%S.%f")
+ for category, tags in post.pop("categorized_tags").items():
+ post["tags_" + category.lower()] = [t["name"] for t in tags]
+ post["tags"] = [t["name"] for t in post["tags"]]
+ post["sections"] = [s["title"] for s in post["sections"]]
+
+ if post["video"] and self.videos:
+ url = post["video"]["video"]
+ else:
+ url = post["image"]
+
+ yield Message.Directory, post
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+
+class ItakuGalleryExtractor(ItakuExtractor):
+ """Extractor for posts from an itaku user gallery"""
+ subcategory = "gallery"
+ pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery"
+ test = ("https://itaku.ee/profile/piku/gallery", {
+ "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
+ r"/[^/?#]+\.(jpg|png|gif)",
+ "range": "1-10",
+ "count": 10,
+ })
+
+ def posts(self):
+ return self.api.galleries_images(self.item)
+
+
+class ItakuImageExtractor(ItakuExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/images/(\d+)"
+ test = (
+ ("https://itaku.ee/images/100471", {
+ "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
+ r"/220504_oUNIAFT\.png",
+ "count": 1,
+ "keyword": {
+ "already_pinned": None,
+ "blacklisted": {
+ "blacklisted_tags": [],
+ "is_blacklisted": False
+ },
+ "can_reshare": True,
+ "date_added": "2022-05-05T19:21:17.674148Z",
+ "date_edited": "2022-05-25T14:37:46.220612Z",
+ "description": "sketch from drawpile",
+ "extension": "png",
+ "filename": "220504_oUNIAFT",
+ "hotness_score": 11507.4691939,
+ "id": 100471,
+ "image": "https://d1wmr8tlk3viaj.cloudfront.net/gallery_imgs"
+ "/220504_oUNIAFT.png",
+ "image_xl": "https://d1wmr8tlk3viaj.cloudfront.net"
+ "/gallery_imgs/220504_oUNIAFT/xl.jpg",
+ "liked_by_you": False,
+ "maturity_rating": "SFW",
+ "num_comments": 2,
+ "num_likes": 80,
+ "num_reshares": 2,
+ "obj_tags": 136446,
+ "owner": 16775,
+ "owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
+ "/profile_pics/av2022r_vKYVywc/sm.jpg",
+ "owner_displayname": "Piku",
+ "owner_username": "piku",
+ "reshared_by_you": False,
+ "sections": ["Miku"],
+ "tags": list,
+ "tags_character": ["hatsune_miku"],
+ "tags_copyright": ["vocaloid"],
+ "tags_general" : ["twintails", "green_hair", "flag", "gloves",
+ "green_eyes", "female", "racing_miku"],
+ "title": "Racing Miku 2022 Ver.",
+ "too_mature": False,
+ "uncompressed_filesize": "0.62",
+ "video": None,
+ "visibility": "PUBLIC",
+ },
+ }),
+ # video
+ ("https://itaku.ee/images/19465", {
+ "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_vids"
+ r"/sleepy_af_OY5GHWw\.mp4",
+ }),
+ )
+
+ def posts(self):
+ return (self.api.image(self.item),)
+
+
+class ItakuAPI():
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = extractor.root + "/api"
+ self.headers = {
+ "Accept": "application/json, text/plain, */*",
+ "Referer": extractor.root + "/",
+ }
+
+ def galleries_images(self, username, section=None):
+ endpoint = "/galleries/images/"
+ params = {
+ "cursor" : None,
+ "owner" : self.user(username)["owner"],
+ "section" : section,
+ "date_range": "",
+ "maturity_rating": ("SFW", "Questionable", "NSFW", "Extreme"),
+ "ordering" : "-date_added",
+ "page" : "1",
+ "page_size" : "30",
+ "visibility": ("PUBLIC", "PROFILE_ONLY"),
+ }
+ return self._pagination(endpoint, params, self.image)
+
+ def image(self, image_id):
+ endpoint = "/galleries/images/" + str(image_id)
+ return self._call(endpoint)
+
+ @memcache()
+ def user(self, username):
+ return self._call("/user_profiles/{}/".format(username))
+
+ def _call(self, endpoint, params=None):
+ if not endpoint.startswith("http"):
+ endpoint = self.root + endpoint
+ response = self.extractor.request(
+ endpoint, params=params, headers=self.headers)
+ return response.json()
+
+ def _pagination(self, endpoint, params, extend):
+ data = self._call(endpoint, params)
+
+ while True:
+ if extend:
+ for result in data["results"]:
+ yield extend(result["id"])
+ else:
+ yield from data["results"]
+
+ url_next = data["links"].get("next")
+ if not url_next:
+ return
+
+ data = self._call(url_next)
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index f3bd5d8..2aea44c 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -85,6 +85,8 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
yield Message.Directory, data
for data["num"], file in enumerate(files, 1):
url = file["file"]
+ if "_fallback" in file:
+ data["_fallback"] = file["_fallback"]
text.nameext_from_url(url, data)
data["name"], sep, data["id"] = data["filename"].rpartition("-")
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 832831f..122ea46 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -6,31 +6,31 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://nijie.info/"""
+"""Extractors for nijie instances"""
-from .common import Extractor, Message, AsynchronousMixin
+from .common import BaseExtractor, Message, AsynchronousMixin
from .. import text, exception
from ..cache import cache
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?nijie\.info"
-
-
-class NijieExtractor(AsynchronousMixin, Extractor):
+class NijieExtractor(AsynchronousMixin, BaseExtractor):
"""Base class for nijie extractors"""
- category = "nijie"
+ basecategory = "Nijie"
directory_fmt = ("{category}", "{user_id}")
filename_fmt = "{image_id}_p{num}.{extension}"
archive_fmt = "{image_id}_{num}"
- cookiedomain = "nijie.info"
- cookienames = ("nemail", "nlogin")
- root = "https://nijie.info"
- view_url = "https://nijie.info/view.php?id="
- popup_url = "https://nijie.info/view_popup.php?id="
def __init__(self, match):
- Extractor.__init__(self, match)
- self.user_id = text.parse_int(match.group(1))
+ self._init_category(match)
+ self.cookiedomain = "." + self.root.rpartition("/")[2]
+ self.cookienames = (self.category + "_tok",)
+
+ if self.category == "horne":
+ self._extract_data = self._extract_data_horne
+
+ BaseExtractor.__init__(self, match)
+
+ self.user_id = text.parse_int(match.group(match.lastindex))
self.user_name = None
self.session.headers["Referer"] = self.root + "/"
@@ -39,13 +39,21 @@ class NijieExtractor(AsynchronousMixin, Extractor):
for image_id in self.image_ids():
- response = self.request(self.view_url + image_id, fatal=False)
+ url = "{}/view.php?id={}".format(self.root, image_id)
+ response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
page = response.text
data = self._extract_data(page)
data["image_id"] = text.parse_int(image_id)
+
+ if self.user_name:
+ data["user_id"] = self.user_id
+ data["user_name"] = self.user_name
+ else:
+ data["user_id"] = data["artist_id"]
+ data["user_name"] = data["artist_name"]
yield Message.Directory, data
for image in self._extract_images(page):
@@ -68,24 +76,41 @@ class NijieExtractor(AsynchronousMixin, Extractor):
"description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")),
"date" : text.parse_datetime(extr(
- '"datePublished": "', '"') + "+0900",
- "%a %b %d %H:%M:%S %Y%z"),
- "artist_id" : text.parse_int(extr(
- '"sameAs": "https://nijie.info/members.php?id=', '"')),
+ '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
+ "artist_id" : text.parse_int(extr('/members.php?id=', '"')),
+ "artist_name": keywords[1],
+ "tags" : keywords[2:-1],
+ }
+ return data
+
+ @staticmethod
+ def _extract_data_horne(page):
+ """Extract image metadata from 'page'"""
+ extr = text.extract_from(page)
+ keywords = text.unescape(extr(
+ 'name="keywords" content="', '" />')).split(",")
+ data = {
+ "title" : keywords[0].strip(),
+ "description": text.unescape(extr(
+ 'property="og:description" content="', '"')),
+ "artist_id" : text.parse_int(extr('members.php?id=', '"')),
"artist_name": keywords[1],
"tags" : keywords[2:-1],
+ "date" : text.parse_datetime(extr(
+ "itemprop='datePublished' content=", "<").rpartition(">")[2],
+ "%Y-%m-%d %H:%M:%S", 9),
}
- data["user_id"] = data["artist_id"]
- data["user_name"] = data["artist_name"]
return data
@staticmethod
def _extract_images(page):
"""Extract image URLs from 'page'"""
- images = text.extract_iter(page, '<a href="./view_popup.php', '</a>')
+ images = text.extract_iter(page, "/view_popup.php", "</a>")
for num, image in enumerate(images):
- url = "https:" + text.extract(image, 'src="', '"')[0]
- url = url.replace("/__rs_l120x120/", "/")
+ src = text.extract(image, 'src="', '"')[0]
+ if not src:
+ continue
+ url = ("https:" + src).replace("/__rs_l120x120/", "/")
yield text.nameext_from_url(url, {
"num": num,
"url": url,
@@ -112,7 +137,7 @@ class NijieExtractor(AsynchronousMixin, Extractor):
data = {"email": username, "password": password, "save": "on"}
response = self.request(url, method="POST", data=data)
- if "//nijie.info/login.php" in response.text:
+ if "/login.php" in response.text:
raise exception.AuthenticationError()
return self.session.cookies
@@ -132,12 +157,27 @@ class NijieExtractor(AsynchronousMixin, Extractor):
params["p"] += 1
+BASE_PATTERN = NijieExtractor.update({
+ "nijie": {
+ "root": "https://nijie.info",
+ "pattern": r"(?:www\.)?nijie\.info",
+ },
+ "horne": {
+ "root": "https://horne.red",
+ "pattern": r"(?:www\.)?horne\.red",
+ },
+})
+
+
class NijieUserExtractor(NijieExtractor):
"""Extractor for nijie user profiles"""
subcategory = "user"
cookiedomain = None
pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
- test = ("https://nijie.info/members.php?id=44",)
+ test = (
+ ("https://nijie.info/members.php?id=44"),
+ ("https://horne.red/members.php?id=58000"),
+ )
def items(self):
fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
@@ -172,6 +212,25 @@ class NijieIllustrationExtractor(NijieExtractor):
"user_name": "ED",
},
}),
+ ("https://horne.red/members_illust.php?id=58000", {
+ "pattern": r"https://pic\.nijie\.net/\d+/horne/\d+/\d+/\d+"
+ r"/illust/\d+_\d+_[0-9a-f]+_[0-9a-f]+\.png",
+ "range": "1-20",
+ "count": 20,
+ "keyword": {
+ "artist_id": 58000,
+ "artist_name": "のえるわ",
+ "date": "type:datetime",
+ "description": str,
+ "image_id": int,
+ "num": int,
+ "tags": list,
+ "title": str,
+ "url": str,
+ "user_id": 58000,
+ "user_name": "のえるわ",
+ },
+ }),
("https://nijie.info/members_illust.php?id=43", {
"exception": exception.NotFoundError,
}),
@@ -182,34 +241,47 @@ class NijieIllustrationExtractor(NijieExtractor):
class NijieDoujinExtractor(NijieExtractor):
- """Extractor for doujin entries of a nijie-user"""
+ """Extractor for doujin entries of a nijie user"""
subcategory = "doujin"
pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)"
- test = ("https://nijie.info/members_dojin.php?id=6782", {
- "count": ">= 18",
- "keyword": {
- "user_id" : 6782,
- "user_name": "ジョニー@アビオン村",
- },
- })
+ test = (
+ ("https://nijie.info/members_dojin.php?id=6782", {
+ "count": ">= 18",
+ "keyword": {
+ "user_id" : 6782,
+ "user_name": "ジョニー@アビオン村",
+ },
+ }),
+ ("https://horne.red/members_dojin.php?id=58000"),
+ )
def image_ids(self):
return self._pagination("members_dojin")
class NijieFavoriteExtractor(NijieExtractor):
- """Extractor for all favorites/bookmarks of a nijie-user"""
+ """Extractor for all favorites/bookmarks of a nijie user"""
subcategory = "favorite"
directory_fmt = ("{category}", "bookmarks", "{user_id}")
archive_fmt = "f_{user_id}_{image_id}_{num}"
pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)"
- test = ("https://nijie.info/user_like_illust_view.php?id=44", {
- "count": ">= 16",
- "keyword": {
- "user_id" : 44,
- "user_name": "ED",
- },
- })
+ test = (
+ ("https://nijie.info/user_like_illust_view.php?id=44", {
+ "count": ">= 16",
+ "keyword": {
+ "user_id" : 44,
+ "user_name": "ED",
+ },
+ }),
+ ("https://horne.red/user_like_illust_view.php?id=58000", {
+ "range": "1-5",
+ "count": 5,
+ "keyword": {
+ "user_id" : 58000,
+ "user_name": "のえるわ",
+ },
+ }),
+ )
def image_ids(self):
return self._pagination("user_like_illust_view")
@@ -227,14 +299,17 @@ class NijieNuitaExtractor(NijieExtractor):
directory_fmt = ("{category}", "nuita", "{user_id}")
archive_fmt = "n_{user_id}_{image_id}_{num}"
pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)"
- test = ("https://nijie.info/history_nuita.php?id=728995", {
- "range": "1-10",
- "count": 10,
- "keyword": {
- "user_id" : 728995,
- "user_name": "莚",
- },
- })
+ test = (
+ ("https://nijie.info/history_nuita.php?id=728995", {
+ "range": "1-10",
+ "count": 10,
+ "keyword": {
+ "user_id" : 728995,
+ "user_name": "莚",
+ },
+ }),
+ ("https://horne.red/history_nuita.php?id=58000"),
+ )
def image_ids(self):
return self._pagination("history_nuita")
@@ -252,7 +327,7 @@ class NijieNuitaExtractor(NijieExtractor):
class NijieImageExtractor(NijieExtractor):
- """Extractor for a work/image from nijie.info"""
+ """Extractor for a nijie work/image"""
subcategory = "image"
pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
test = (
@@ -265,11 +340,26 @@ class NijieImageExtractor(NijieExtractor):
"count": 0,
}),
("https://nijie.info/view_popup.php?id=70720"),
+ ("https://horne.red/view.php?id=8716", {
+ "count": 4,
+ "keyword": {
+ "artist_id": 58000,
+ "artist_name": "のえるわ",
+ "date": "dt:2018-02-04 14:47:24",
+ "description": "ノエル「そんなことしなくても、"
+ "言ってくれたら咥えるのに・・・♡」",
+ "image_id": 8716,
+ "tags": ["男の娘", "フェラ", "オリキャラ", "うちのこ"],
+ "title": "ノエル「いまどきそんな、恵方巻ネタなんてやらなくても・・・」",
+ "user_id": 58000,
+ "user_name": "のえるわ",
+ },
+ }),
)
def __init__(self, match):
NijieExtractor.__init__(self, match)
- self.image_id = match.group(1)
+ self.image_id = match.group(match.lastindex)
def image_ids(self):
return (self.image_id,)
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
new file mode 100644
index 0000000..e1846cc
--- /dev/null
+++ b/gallery_dl/extractor/poipiku.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://poipiku.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?poipiku\.com"
+
+
+class PoipikuExtractor(Extractor):
+ """Base class for poipiku extractors"""
+ category = "poipiku"
+ root = "https://poipiku.com"
+ directory_fmt = ("{category}", "{user_id} {user_name}")
+ filename_fmt = "{post_id}_{num}.{extension}"
+ archive_fmt = "{post_id}_{num}"
+ request_interval = (0.5, 1.5)
+
+ def items(self):
+ password = self.config("password", "")
+
+ for post_url in self.posts():
+ parts = post_url.split("/")
+ if post_url[0] == "/":
+ post_url = self.root + post_url
+ page = self.request(post_url).text
+ extr = text.extract_from(page)
+
+ post = {
+ "post_category": extr("<title>[", "]"),
+ "count" : extr("(", " "),
+ "post_id" : parts[-1].partition(".")[0],
+ "user_id" : parts[-2],
+ "user_name" : text.unescape(extr(
+ '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
+ "description": text.unescape(extr(
+ 'class="IllustItemDesc" >', '<')),
+ }
+
+ yield Message.Directory, post
+ post["num"] = 0
+
+ while True:
+ thumb = extr('class="IllustItemThumbImg" src="', '"')
+ if not thumb:
+ break
+ elif thumb.startswith("/img/"):
+ continue
+ post["num"] += 1
+ url = text.ensure_http_scheme(thumb[:-8])
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ if not extr('</i> show all', '<'):
+ continue
+
+ url = self.root + "/f/ShowAppendFileF.jsp"
+ headers = {
+ "Accept" : "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer": post_url,
+ }
+ data = {
+ "UID": post["user_id"],
+ "IID": post["post_id"],
+ "PAS": password,
+ "MD" : "0",
+ "TWF": "-1",
+ }
+ page = self.request(
+ url, method="POST", headers=headers, data=data).json()["html"]
+
+ for thumb in text.extract_iter(
+ page, 'class="IllustItemThumbImg" src="', '"'):
+ post["num"] += 1
+ url = text.ensure_http_scheme(thumb[:-8])
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+
+class PoipikuUserExtractor(PoipikuExtractor):
+ """Extractor for posts from a poipiku user"""
+ subcategory = "user"
+ pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
+ r"(\d+)/?(?:$|[?&#])")
+ test = (
+ ("https://poipiku.com/25049/", {
+ "pattern": r"https://img\.poipiku\.com/user_img\d+/000025049"
+ r"/\d+_\w+\.(jpe?g|png)$",
+ "range": "1-10",
+ "count": 10,
+ }),
+ ("https://poipiku.com/IllustListPcV.jsp?PG=1&ID=25049&KWD=")
+ )
+
+ def __init__(self, match):
+ PoipikuExtractor.__init__(self, match)
+ self._page, self.user_id = match.groups()
+
+ def posts(self):
+ url = self.root + "/IllustListPcV.jsp"
+ params = {
+ "PG" : text.parse_int(self._page, 0),
+ "ID" : self.user_id,
+ "KWD": "",
+ }
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for path in text.extract_iter(
+ page, 'class="IllustInfo" href="', '"'):
+ yield path
+ cnt += 1
+
+ if cnt < 48:
+ return
+ params["PG"] += 1
+
+
+class PoipikuPostExtractor(PoipikuExtractor):
+ """Extractor for a poipiku post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
+ test = (
+ ("https://poipiku.com/25049/5864576.html", {
+ "pattern": r"https://img\.poipiku\.com/user_img03/000025049"
+ r"/005864576_EWN1Y65gQ\.png$",
+ "keyword": {
+ "count": "1",
+ "description": "",
+ "extension": "png",
+ "filename": "005864576_EWN1Y65gQ",
+ "num": 1,
+ "post_category": "DOODLE",
+ "post_id": "5864576",
+ "user_id": "25049",
+ "user_name": "ユキウサギ",
+ },
+ }),
+ ("https://poipiku.com/2166245/6411749.html", {
+ "pattern": r"https://img\.poipiku\.com/user_img01/002166245"
+ r"/006411749_\w+\.jpeg$",
+ "count": 4,
+ "keyword": {
+ "count": "4",
+ "description": "絵茶の産物ネタバレあるやつ",
+ "num": int,
+ "post_category": "SPOILER",
+ "post_id": "6411749",
+ "user_id": "2166245",
+ "user_name": "wadahito",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ PoipikuExtractor.__init__(self, match)
+ self.user_id, self.post_id = match.groups()
+
+ def posts(self):
+ return ("/{}/{}.html".format(self.user_id, self.post_id),)
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index ca7a3c6..a477424 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -130,12 +130,13 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
def beau(url):
"""https://readcomiconline.li/Scripts/rguard.min.js"""
- if url.startswith("https"):
- return url
-
url = url.replace("_x236", "d")
url = url.replace("_x945", "g")
+ if url.startswith("https"):
+ return url
+
+ url, sep, rest = url.partition("?")
containsS0 = "=s0" in url
url = url[:-3 if containsS0 else -6]
url = url[4:22] + url[25:]
@@ -143,4 +144,4 @@ def beau(url):
url = binascii.a2b_base64(url).decode()
url = url[0:13] + url[17:]
url = url[0:-2] + ("=s0" if containsS0 else "=s1600")
- return "https://2.bp.blogspot.com/" + url
+ return "https://2.bp.blogspot.com/" + url + sep + rest
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 2af917d..2ecb4b6 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -135,10 +135,11 @@ class SkebPostExtractor(SkebExtractor):
"body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ",
"client": {
"avatar_url": "https://pbs.twimg.com/profile_images"
- "/1471184042791895042/f0DcWFGl.jpg",
- "header_url": None,
+ "/1537488326697287680/yNUbLDgC.jpg",
+ "header_url": "https://pbs.twimg.com/profile_banners"
+ "/1375007870291300358/1655744756/1500x500",
"id": 1196514,
- "name": "湊ラギ",
+ "name": "湊ラギ♦️🎀Vtuber🎀次回6/23予定",
"screen_name": "minato_ragi",
},
"completed_at": "2022-02-27T14:03:45.442Z",
@@ -208,3 +209,30 @@ class SkebUserExtractor(SkebExtractor):
posts = itertools.chain(posts, self._pagination(url, params))
return posts
+
+
+class SkebFollowingExtractor(SkebExtractor):
+ """Extractor for all creators followed by a skeb user"""
+ subcategory = "following"
+ pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
+ test = ("https://skeb.jp/@user/following_creators",)
+
+ def items(self):
+ for user in self.users():
+ url = "{}/@{}".format(self.root, user["screen_name"])
+ user["_extractor"] = SkebUserExtractor
+ yield Message.Queue, url, user
+
+ def users(self):
+ url = "{}/api/users/{}/following_creators".format(
+ self.root, self.user_name)
+ headers = {"Referer": self.root, "Authorization": "Bearer null"}
+ params = {"sort": "date", "offset": 0, "limit": 90}
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+ yield from data
+
+ if len(data) < params["limit"]:
+ return
+ params["offset"] += params["limit"]
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2737d34..a0d6194 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -40,6 +40,7 @@ class TwitterExtractor(Extractor):
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
+ self._user_id = None
self._user_cache = {}
self._init_sizes()
@@ -59,6 +60,10 @@ class TwitterExtractor(Extractor):
self.api = TwitterAPI(self)
metadata = self.metadata()
+ if self.config("expand"):
+ tweets = self._expand_tweets(self.tweets())
+ self.tweets = lambda : tweets
+
for tweet in self.tweets():
if "legacy" in tweet:
@@ -75,7 +80,8 @@ class TwitterExtractor(Extractor):
if "in_reply_to_user_id_str" in data and (
not self.replies or (
self.replies == "self" and
- data["in_reply_to_user_id_str"] != data["user_id_str"]
+ (self._user_id or data["in_reply_to_user_id_str"]) !=
+ data["user_id_str"]
)
):
self.log.debug("Skipping %s (reply)", data["id_str"])
@@ -338,6 +344,22 @@ class TwitterExtractor(Extractor):
user["_extractor"] = cls
yield Message.Queue, fmt(user), user
+ def _expand_tweets(self, tweets):
+ seen = set()
+ for tweet in tweets:
+
+ if "legacy" in tweet:
+ cid = tweet["legacy"]["conversation_id_str"]
+ else:
+ cid = tweet["conversation_id_str"]
+
+ if cid not in seen:
+ seen.add(cid)
+ try:
+ yield from self.api.tweet_detail(cid)
+ except Exception:
+ yield tweet
+
def metadata(self):
"""Return general metadata"""
return {}
@@ -418,12 +440,12 @@ class TwitterTimelineExtractor(TwitterExtractor):
self.user = "id:" + user_id
def tweets(self):
- tweets = (self.api.user_tweets(self.user) if self.retweets else
- self.api.user_media(self.user))
+ tweets = (self.api.user_tweets if self.retweets else
+ self.api.user_media)
# yield initial batch of (media) tweets
tweet = None
- for tweet in tweets:
+ for tweet in tweets(self.user):
yield tweet
if tweet is None:
@@ -442,12 +464,17 @@ class TwitterTimelineExtractor(TwitterExtractor):
if "legacy" in tweet:
tweet = tweet["legacy"]
+ # build search query
+ query = "from:{} max_id:{}".format(username, tweet["id_str"])
+ if self.retweets:
+ query += " include:retweets include:nativeretweets"
+ if not self.textonly:
+ query += (" (filter:images OR"
+ " filter:native_video OR"
+ " card_name:animated_gif)")
+
# yield search results starting from last tweet id
- yield from self.api.search_adaptive(
- "from:{} include:retweets include:nativeretweets max_id:{} "
- "filter:images OR card_name:animated_gif OR filter:native_video"
- .format(username, tweet["id_str"])
- )
+ yield from self.api.search_adaptive(query)
class TwitterTweetsExtractor(TwitterExtractor):
@@ -694,10 +721,10 @@ class TwitterTweetExtractor(TwitterExtractor):
"date" : "dt:2020-08-20 04:00:28",
},
}),
- # all Tweets from a conversation (#1319)
- ("https://twitter.com/BlankArts_/status/1323314488611872769", {
+ # all Tweets from a 'conversation' (#1319)
+ ("https://twitter.com/supernaturepics/status/604341487988576256", {
"options": (("conversations", True),),
- "count": ">= 50",
+ "count": 5,
}),
# retweet with missing media entities (#1555)
("https://twitter.com/morino_ya/status/1392763691599237121", {
@@ -845,8 +872,11 @@ class TwitterAPI():
cookies = extractor.session.cookies
cookiedomain = extractor.cookiedomain
- # CSRF
- csrf_token = cookies.get("ct0", domain=cookiedomain)
+ csrf = extractor.config("csrf")
+ if csrf is None or csrf == "cookies":
+ csrf_token = cookies.get("ct0", domain=cookiedomain)
+ else:
+ csrf_token = None
if not csrf_token:
csrf_token = util.generate_token()
cookies.set("ct0", csrf_token, domain=cookiedomain)
@@ -1000,19 +1030,23 @@ class TwitterAPI():
def _user_id_by_screen_name(self, screen_name):
if screen_name.startswith("id:"):
self._user = util.SENTINEL
- return screen_name[3:]
+ user_id = screen_name[3:]
- user = ()
- try:
- user = self._user = self.user_by_screen_name(screen_name)
- return user["rest_id"]
- except KeyError:
- if "unavailable_message" in user:
- raise exception.NotFoundError("{} ({})".format(
- user["unavailable_message"].get("text"),
- user.get("reason")), False)
- else:
- raise exception.NotFoundError("user")
+ else:
+ user = ()
+ try:
+ user = self._user = self.user_by_screen_name(screen_name)
+ user_id = user["rest_id"]
+ except KeyError:
+ if "unavailable_message" in user:
+ raise exception.NotFoundError("{} ({})".format(
+ user["unavailable_message"].get("text"),
+ user.get("reason")), False)
+ else:
+ raise exception.NotFoundError("user")
+
+ self.extractor._user_id = user_id
+ return user_id
@cache(maxage=3600)
def _guest_token(self):
@@ -1228,6 +1262,8 @@ class TwitterAPI():
tweets.append(entry)
elif esw("cursor-bottom-"):
cursor = entry["content"]
+ if "itemContent" in cursor:
+ cursor = cursor["itemContent"]
if not cursor.get("stopOnEmptyResponse", True):
# keep going even if there are no tweets
tweet = True
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index ad1617c..c29d730 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -30,12 +30,16 @@ class UnsplashExtractor(Extractor):
def items(self):
fmt = self.config("format") or "raw"
+ metadata = self.metadata()
+
for photo in self.photos():
util.delete_items(
photo, ("current_user_collections", "related_collections"))
url = photo["urls"][fmt]
text.nameext_from_url(url, photo)
+ if metadata:
+ photo.update(metadata)
photo["extension"] = "jpg"
photo["date"] = text.parse_datetime(photo["created_at"])
if "tags" in photo:
@@ -44,6 +48,10 @@ class UnsplashExtractor(Extractor):
yield Message.Directory, photo
yield Message.Url, url, photo
+ @staticmethod
+ def metadata():
+ return None
+
def skip(self, num):
pages = num // self.per_page
self.page_start += pages
@@ -172,17 +180,27 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
class UnsplashCollectionExtractor(UnsplashExtractor):
"""Extractor for an unsplash collection"""
subcategory = "collection"
- pattern = BASE_PATTERN + r"/collections/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?"
test = (
("https://unsplash.com/collections/3178572/winter", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "keyword": {"collection_id": "3178572",
+ "collection_title": "winter"},
"range": "1-30",
"count": 30,
}),
+ ("https://unsplash.com/collections/3178572/"),
("https://unsplash.com/collections/_8qJQ2bCMWE/2021.05"),
)
+ def __init__(self, match):
+ UnsplashExtractor.__init__(self, match)
+ self.title = match.group(2) or ""
+
+ def metadata(self):
+ return {"collection_id": self.item, "collection_title": self.title}
+
def photos(self):
url = "{}/napi/collections/{}/photos".format(self.root, self.item)
params = {"order_by": "latest"}
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 23f6ea2..ab2153f 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -40,12 +40,12 @@ class VkExtractor(Extractor):
continue
try:
- photo["url"], photo["width"], photo["height"] = photo[size]
+ _, photo["width"], photo["height"] = photo[size]
except ValueError:
# photo without width/height entries (#2535)
- photo["url"] = photo[size + "src"]
photo["width"] = photo["height"] = 0
+ photo["url"] = photo[size + "src"]
photo["id"] = photo["id"].rpartition("_")[2]
photo.update(data)
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index a7068c8..68871c8 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -52,10 +52,6 @@ class WeiboExtractor(Extractor):
for status in self.statuses():
- status["date"] = text.parse_datetime(
- status["created_at"], "%a %b %d %H:%M:%S %z %Y")
- yield Message.Directory, status
-
if self.retweets and "retweeted_status" in status:
if original_retweets:
status = status["retweeted_status"]
@@ -68,6 +64,10 @@ class WeiboExtractor(Extractor):
else:
files = self._files_from_status(status)
+ status["date"] = text.parse_datetime(
+ status["created_at"], "%a %b %d %H:%M:%S %z %Y")
+ yield Message.Directory, status
+
for num, file in enumerate(files, 1):
if file["url"].startswith("http:"):
file["url"] = "https:" + file["url"][5:]
@@ -191,7 +191,9 @@ class WeiboUserExtractor(WeiboExtractor):
subcategory = "user"
pattern = USER_PATTERN + r"(?:$|#)"
test = (
- ("https://weibo.com/1758989602"),
+ ("https://weibo.com/1758989602", {
+ "pattern": r"^https://weibo\.com/u/1758989602\?tabtype=feed$",
+ }),
("https://weibo.com/u/1758989602"),
("https://weibo.com/p/1758989602"),
("https://m.weibo.cn/profile/2314621010"),
@@ -200,12 +202,13 @@ class WeiboUserExtractor(WeiboExtractor):
)
def items(self):
- base = " {}/u/{}?tabtype=".format(self.root, self._user_id())
+ base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
return self._dispatch_extractors((
- (WeiboHomeExtractor , base + "home"),
- (WeiboFeedExtractor , base + "feed"),
- (WeiboVideosExtractor, base + "newVideo"),
- (WeiboAlbumExtractor , base + "album"),
+ (WeiboHomeExtractor , base + "home"),
+ (WeiboFeedExtractor , base + "feed"),
+ (WeiboVideosExtractor , base + "video"),
+ (WeiboNewvideoExtractor, base + "newVideo"),
+ (WeiboAlbumExtractor , base + "album"),
), ("feed",))
@@ -254,8 +257,27 @@ class WeiboFeedExtractor(WeiboExtractor):
class WeiboVideosExtractor(WeiboExtractor):
- """Extractor for weibo 'newVideo' listings"""
+ """Extractor for weibo 'video' listings"""
subcategory = "videos"
+ pattern = USER_PATTERN + r"\?tabtype=video"
+ test = ("https://weibo.com/1758989602?tabtype=video", {
+ "pattern": r"https://f\.(video\.weibocdn\.com|us\.sinaimg\.cn)"
+ r"/(../)?\w+\.mp4\?label=mp",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def statuses(self):
+ endpoint = "/profile/getprofilevideolist"
+ params = {"uid": self._user_id()}
+
+ for status in self._pagination(endpoint, params):
+ yield status["video_detail_vo"]
+
+
+class WeiboNewvideoExtractor(WeiboExtractor):
+ """Extractor for weibo 'newVideo' listings"""
+ subcategory = "newvideo"
pattern = USER_PATTERN + r"\?tabtype=newVideo"
test = ("https://weibo.com/1758989602?tabtype=newVideo", {
"pattern": r"https://f\.video\.weibocdn\.com/(../)?\w+\.mp4\?label=mp",
@@ -336,8 +358,8 @@ class WeiboStatusExtractor(WeiboExtractor):
}),
# type == gif
("https://weibo.com/1758989602/LvBhm5DiP", {
- "pattern": r"http://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM01041"
- r"20005tc0E010\.mp4\?label=gif_mp4",
+ "pattern": r"https://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM0104"
+ r"120005tc0E010\.mp4\?label=gif_mp4",
}),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index d1b3a8a..107c8ed 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -14,6 +14,7 @@ import string
import _string
import datetime
import operator
+import functools
from . import text, util
_CACHE = {}
@@ -231,12 +232,7 @@ def parse_field_name(field_name):
func = operator.itemgetter
try:
if ":" in key:
- start, _, stop = key.partition(":")
- stop, _, step = stop.partition(":")
- start = int(start) if start else None
- stop = int(stop) if stop else None
- step = int(step) if step else None
- key = slice(start, stop, step)
+ key = _slice(key)
except TypeError:
pass # key is an integer
@@ -245,6 +241,16 @@ def parse_field_name(field_name):
return first, funcs
+def _slice(indices):
+ start, _, stop = indices.partition(":")
+ stop, _, step = stop.partition(":")
+ return slice(
+ int(start) if start else None,
+ int(stop) if stop else None,
+ int(step) if step else None,
+ )
+
+
def parse_format_spec(format_spec, conversion):
fmt = build_format_func(format_spec)
if not conversion:
@@ -257,7 +263,7 @@ def parse_format_spec(format_spec, conversion):
"u": str.upper,
"c": str.capitalize,
"C": string.capwords,
- "j": json.dumps,
+ "j": functools.partial(json.dumps, default=str),
"t": str.strip,
"T": util.datetime_to_timestamp_string,
"d": text.parse_timestamp,
@@ -282,6 +288,8 @@ def build_format_func(format_spec):
fmt = format_spec[0]
if fmt == "?":
return _parse_optional(format_spec)
+ if fmt == "[":
+ return _parse_slice(format_spec)
if fmt == "L":
return _parse_maxlen(format_spec)
if fmt == "J":
@@ -304,6 +312,16 @@ def _parse_optional(format_spec):
return optional
+def _parse_slice(format_spec):
+ indices, _, format_spec = format_spec.partition("]")
+ slice = _slice(indices[1:])
+ fmt = build_format_func(format_spec)
+
+ def apply_slice(obj):
+ return fmt(obj[slice])
+ return apply_slice
+
+
def _parse_maxlen(format_spec):
maxlen, replacement, format_spec = format_spec.split("/", 2)
maxlen = text.parse_int(maxlen[1:])
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 95d6806..8ac7384 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.22.1"
+__version__ = "1.22.3"
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 188b54c..335fa3d 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -89,7 +89,7 @@ class TestCookiedict(unittest.TestCase):
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
def test_domain(self):
- for category in ["exhentai", "idolcomplex", "nijie"]:
+ for category in ["exhentai", "idolcomplex", "nijie", "horne"]:
extr = _get_extractor(category)
cookies = extr.session.cookies
for key in self.cdict:
@@ -107,7 +107,8 @@ class TestCookieLogin(unittest.TestCase):
extr_cookies = {
"exhentai" : ("ipb_member_id", "ipb_pass_hash"),
"idolcomplex": ("login", "pass_hash"),
- "nijie" : ("nemail", "nlogin"),
+ "nijie" : ("nijie_tok",),
+ "horne" : ("horne_tok",),
}
for category, cookienames in extr_cookies.items():
cookies = {name: "value" for name in cookienames}
@@ -199,10 +200,13 @@ class TestCookieUtils(unittest.TestCase):
def _get_extractor(category):
- for extr in extractor.extractors():
- if extr.category == category and hasattr(extr, "_login_impl"):
- url = next(extr._get_tests())[0]
- return extr.from_url(url)
+ URLS = {
+ "exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/",
+ "idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
+ "nijie" : "https://nijie.info/view.php?id=1",
+ "horne" : "https://horne.red/view.php?id=1",
+ }
+ return extractor.find(URLS[category])
if __name__ == "__main__":
diff --git a/test/test_formatter.py b/test/test_formatter.py
index efb6963..5b8ca0a 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -56,6 +56,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{t!d:%Y-%m-%d}", "2010-01-01")
self._run_test("{dt!T}", "1262304000")
self._run_test("{l!j}", '["a", "b", "c"]')
+ self._run_test("{dt!j}", '"2010-01-01 00:00:00"')
with self.assertRaises(KeyError):
self._run_test("{a!q}", "hello world")
@@ -134,6 +135,21 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a[:50:2]}", v[:50:2])
self._run_test("{a[::]}" , v)
+ self._run_test("{a:[1:10]}" , v[1:10])
+ self._run_test("{a:[-10:-1]}", v[-10:-1])
+ self._run_test("{a:[5:]}" , v[5:])
+ self._run_test("{a:[50:]}", v[50:])
+ self._run_test("{a:[:5]}" , v[:5])
+ self._run_test("{a:[:50]}", v[:50])
+ self._run_test("{a:[:]}" , v)
+ self._run_test("{a:[1:10:2]}" , v[1:10:2])
+ self._run_test("{a:[-10:-1:2]}", v[-10:-1:2])
+ self._run_test("{a:[5::2]}" , v[5::2])
+ self._run_test("{a:[50::2]}", v[50::2])
+ self._run_test("{a:[:5:2]}" , v[:5:2])
+ self._run_test("{a:[:50:2]}", v[:50:2])
+ self._run_test("{a:[::]}" , v)
+
def test_maxlen(self):
v = self.kwdict["a"]
self._run_test("{a:L5/foo/}" , "foo")
@@ -176,6 +192,9 @@ class TestFormatter(unittest.TestCase):
# join-and-replace
self._run_test("{l:J-/Rb/E/}", "a-E-c")
+ # join and slice
+ self._run_test("{l:J-/[1:-1]}", "-b-")
+
# optional-and-maxlen
self._run_test("{d[a]:?</>/L1/too long/}", "<too long>")
self._run_test("{d[c]:?</>/L5/too long/}", "")
diff --git a/test/test_results.py b/test/test_results.py
index 6a186fd..d3debc6 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -312,6 +312,7 @@ def setup_test_config():
config.set(("extractor", "nijie") , "username", email)
config.set(("extractor", "seiga") , "username", email)
+ config.set(("extractor", "horne") , "username", email2)
config.set(("extractor", "pinterest") , "username", email2)
config.set(("extractor", "pinterest") , "username", None) # login broken