aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-06-19 01:14:28 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-06-19 01:14:28 -0400
commit9fb906aeb3816abb42f459d1b67e35024e6f2348 (patch)
tree30b039301c783475c0f4d46b0e0c5ec9851b2567
parent8950c0f2ef55ec2ed36b3fccc9fd85b64b877c3b (diff)
New upstream version 1.25.6.upstream/1.25.6
-rw-r--r--CHANGELOG.md33
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.534
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/blogger.py1
-rw-r--r--gallery_dl/extractor/bunkr.py13
-rw-r--r--gallery_dl/extractor/fanbox.py12
-rw-r--r--gallery_dl/extractor/fantia.py142
-rw-r--r--gallery_dl/extractor/furaffinity.py15
-rw-r--r--gallery_dl/extractor/imagehosts.py31
-rw-r--r--gallery_dl/extractor/instagram.py2
-rw-r--r--gallery_dl/extractor/jpgfish.py23
-rw-r--r--gallery_dl/extractor/jschan.py94
-rw-r--r--gallery_dl/extractor/kemonoparty.py46
-rw-r--r--gallery_dl/extractor/pixiv.py99
-rw-r--r--gallery_dl/extractor/pornhub.py23
-rw-r--r--gallery_dl/extractor/reddit.py5
-rw-r--r--gallery_dl/extractor/redgifs.py73
-rw-r--r--gallery_dl/extractor/senmanga.py96
-rw-r--r--gallery_dl/extractor/twitter.py119
-rw-r--r--gallery_dl/extractor/vipergirls.py94
-rw-r--r--gallery_dl/extractor/wallhaven.py24
-rw-r--r--gallery_dl/extractor/weibo.py4
-rw-r--r--gallery_dl/formatter.py1
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_formatter.py9
-rw-r--r--test/test_results.py3
31 files changed, 733 insertions, 285 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 405c117..429c7ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,38 @@
# Changelog
+## 1.25.6 - 2023-06-17
+### Additions
+- [blogger] download files from `lh*.googleusercontent.com` ([#4070](https://github.com/mikf/gallery-dl/issues/4070))
+- [fantia] extract `plan` metadata ([#2477](https://github.com/mikf/gallery-dl/issues/2477))
+- [fantia] emit warning for non-visible content sections ([#4128](https://github.com/mikf/gallery-dl/issues/4128))
+- [furaffinity] extract `favorite_id` metadata ([#4133](https://github.com/mikf/gallery-dl/issues/4133))
+- [jschan] add generic extractors for jschan image boards ([#3447](https://github.com/mikf/gallery-dl/issues/3447))
+- [kemonoparty] support `.su` TLDs ([#4139](https://github.com/mikf/gallery-dl/issues/4139))
+- [pixiv:novel] add `novel-bookmark` extractor ([#4111](https://github.com/mikf/gallery-dl/issues/4111))
+- [pixiv:novel] add `full-series` option ([#4111](https://github.com/mikf/gallery-dl/issues/4111))
+- [postimage] add gallery support, update image extractor ([#3115](https://github.com/mikf/gallery-dl/issues/3115), [#4134](https://github.com/mikf/gallery-dl/issues/4134))
+- [redgifs] support galleries ([#4021](https://github.com/mikf/gallery-dl/issues/4021))
+- [twitter] extract `conversation_id` metadata ([#3839](https://github.com/mikf/gallery-dl/issues/3839))
+- [vipergirls] add login support ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
+- [vipergirls] use API endpoints ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
+- [formatter] implement `H` conversion ([#4164](https://github.com/mikf/gallery-dl/issues/4164))
+### Fixes
+- [acidimg] fix extraction ([#4136](https://github.com/mikf/gallery-dl/issues/4136))
+- [bunkr] update domain to bunkrr.su ([#4159](https://github.com/mikf/gallery-dl/issues/4159), [#4189](https://github.com/mikf/gallery-dl/issues/4189))
+- [bunkr] fix video downloads
+- [fanbox] prevent exception due to missing embeds ([#4088](https://github.com/mikf/gallery-dl/issues/4088))
+- [instagram] fix retrieving `/tagged` posts ([#4122](https://github.com/mikf/gallery-dl/issues/4122))
+- [jpgfish] update domain to `jpg.pet` ([#4138](https://github.com/mikf/gallery-dl/issues/4138))
+- [pixiv:novel] fix error with embeds extraction ([#4175](https://github.com/mikf/gallery-dl/issues/4175))
+- [pornhub] improve redirect handling ([#4188](https://github.com/mikf/gallery-dl/issues/4188))
+- [reddit] fix crash due to empty `crosspost_parent_lists` ([#4120](https://github.com/mikf/gallery-dl/issues/4120), [#4172](https://github.com/mikf/gallery-dl/issues/4172))
+- [redgifs] update `search` URL pattern ([#4115](https://github.com/mikf/gallery-dl/issues/4115), [#4185](https://github.com/mikf/gallery-dl/issues/4185))
+- [senmanga] fix and update ([#4160](https://github.com/mikf/gallery-dl/issues/4160))
+- [twitter] use GraphQL API search endpoint ([#3942](https://github.com/mikf/gallery-dl/issues/3942))
+- [wallhaven] improve HTTP error handling ([#4192](https://github.com/mikf/gallery-dl/issues/4192))
+- [weibo] prevent fatal exception due to missing video data ([#4150](https://github.com/mikf/gallery-dl/issues/4150))
+- [weibo] fix `.json` extension for some videos
+
## 1.25.5 - 2023-05-27
### Additions
- [8muses] add `parts` metadata field ([#3329](https://github.com/mikf/gallery-dl/issues/3329))
diff --git a/PKG-INFO b/PKG-INFO
index cadb98c..68bf134 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.25.5
+Version: 1.25.6
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index ba745a8..44cbfb3 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 3d5e4e8..c86db6a 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-05-27" "1.25.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-06-17" "1.25.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index be234ce..e4df909 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-05-27" "1.25.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-06-17" "1.25.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -486,6 +486,8 @@ and optional for
.br
* \f[I]twitter\f[]
.br
+* \f[I]vipergirls\f[]
+.br
* \f[I]zerochan\f[]
These values can also be specified via the
@@ -2828,6 +2830,18 @@ by using a third-party tool like
Download images embedded in novels.
+.SS extractor.pixiv.novel.full-series
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+When downloading a novel being part of a series,
+download all novels of that series.
+
+
.SS extractor.pixiv.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -3631,6 +3645,24 @@ If this value is \f[I]"original"\f[], metadata for these files
will be taken from the original Tweets, not the Retweets.
+.SS extractor.twitter.search-endpoint
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Description:" 4
+Selects the API endpoint used to retrieve search results.
+
+.br
+* \f[I]"rest"\f[]: Legacy REST endpoint - returns a \f[I]403 Forbidden\f[] error when not logged in
+.br
+* \f[I]"graphql"\f[]: New GraphQL endpoint
+.br
+* \f[I]"auto"\f[]: \f[I]"rest"\f[] when logged in, \f[I]"graphql"\f[] otherwise
+
+
.SS extractor.twitter.timeline.strategy
.IP "Type:" 6
\f[I]string\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index c069128..547f3be 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.25.5
+Version: 1.25.6
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index fde82b6..44fbd22 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -114,6 +114,7 @@ gallery_dl/extractor/issuu.py
gallery_dl/extractor/itaku.py
gallery_dl/extractor/itchio.py
gallery_dl/extractor/jpgfish.py
+gallery_dl/extractor/jschan.py
gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 3e47c3e..a344fe4 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -76,6 +76,7 @@ modules = [
"itaku",
"itchio",
"jpgfish",
+ "jschan",
"kabeuchi",
"keenspot",
"kemonoparty",
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index eafc8af..3ceada8 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -44,6 +44,7 @@ class BloggerExtractor(Extractor):
findall_image = re.compile(
r'src="(https?://(?:'
r'blogger\.googleusercontent\.com/img|'
+ r'lh\d+\.googleusercontent\.com/|'
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
findall_video = re.compile(
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 7c66fb0..5c8c530 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,19 +6,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkr.la/"""
+"""Extractors for https://bunkrr.su/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkr.la albums"""
+ """Extractor for bunkrr.su albums"""
category = "bunkr"
- root = "https://bunkr.la"
- pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
+ root = "https://bunkrr.su"
+ pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
test = (
- ("https://bunkr.la/a/Lktg9Keq", {
+ ("https://bunkrr.su/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
@@ -52,6 +52,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"num": int,
},
}),
+ ("https://bunkrr.su/a/Lktg9Keq"),
("https://bunkr.la/a/Lktg9Keq"),
("https://bunkr.su/a/Lktg9Keq"),
("https://bunkr.ru/a/Lktg9Keq"),
@@ -70,7 +71,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
cdn = None
files = []
append = files.append
- headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"}
+ headers = {"Referer": self.root + "/"}
pos = page.index('class="grid-images')
for url in text.extract_iter(page, '<a href="', '"', pos):
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 4ca0852..373529f 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -214,9 +214,15 @@ class FanboxExtractor(Extractor):
# to a proper Fanbox URL
url = "https://www.pixiv.net/fanbox/"+content_id
# resolve redirect
- response = self.request(url, method="HEAD", allow_redirects=False)
- url = response.headers["Location"]
- final_post["_extractor"] = FanboxPostExtractor
+ try:
+ url = self.request(url, method="HEAD",
+ allow_redirects=False).headers["location"]
+ except Exception as exc:
+ url = None
+ self.log.warning("Unable to extract fanbox embed %s (%s: %s)",
+ content_id, exc.__class__.__name__, exc)
+ else:
+ final_post["_extractor"] = FanboxPostExtractor
elif provider == "twitter":
url = "https://twitter.com/_/status/"+content_id
elif provider == "google_forms":
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 13dfead..35c4cc4 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -24,6 +24,14 @@ class FantiaExtractor(Extractor):
"Accept" : "application/json, text/plain, */*",
"Referer": self.root,
}
+ _empty_plan = {
+ "id" : 0,
+ "price": 0,
+ "limit": 0,
+ "name" : "",
+ "description": "",
+ "thumb": self.root + "/images/fallback/plan/thumb_default.png",
+ }
if self._warning:
if not self._check_cookies(("_session_id",)):
@@ -31,15 +39,29 @@ class FantiaExtractor(Extractor):
FantiaExtractor._warning = False
for post_id in self.posts():
- full_response, post = self._get_post_data(post_id)
- yield Message.Directory, post
+ post = self._get_post_data(post_id)
post["num"] = 0
- for url, url_data in self._get_urls_from_post(full_response, post):
- post["num"] += 1
- fname = url_data["content_filename"] or url
- text.nameext_from_url(fname, url_data)
- url_data["file_url"] = url
- yield Message.Url, url, url_data
+
+ for content in self._get_post_contents(post):
+ post["content_category"] = content["category"]
+ post["content_title"] = content["title"]
+ post["content_filename"] = content.get("filename", "")
+ post["content_id"] = content["id"]
+ post["plan"] = content["plan"] or _empty_plan
+ yield Message.Directory, post
+
+ if content["visible_status"] != "visible":
+ self.log.warning(
+ "Unable to download '%s' files from "
+ "%s#post-content-id-%s", content["visible_status"],
+ post["post_url"], content["id"])
+
+ for url in self._get_content_urls(post, content):
+ text.nameext_from_url(
+ post["content_filename"] or url, post)
+ post["file_url"] = url
+ post["num"] += 1
+ yield Message.Url, url, post
def posts(self):
"""Return post IDs"""
@@ -71,7 +93,7 @@ class FantiaExtractor(Extractor):
"""Fetch and process post data"""
url = self.root+"/api/v1/posts/"+post_id
resp = self.request(url, headers=self.headers).json()["post"]
- post = {
+ return {
"post_id": resp["id"],
"post_url": self.root + "/posts/" + str(resp["id"]),
"post_title": resp["title"],
@@ -85,55 +107,65 @@ class FantiaExtractor(Extractor):
"fanclub_user_name": resp["fanclub"]["user"]["name"],
"fanclub_name": resp["fanclub"]["name"],
"fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]),
- "tags": resp["tags"]
+ "tags": resp["tags"],
+ "_data": resp,
}
- return resp, post
- def _get_urls_from_post(self, resp, post):
+ def _get_post_contents(self, post):
+ contents = post["_data"]["post_contents"]
+
+ try:
+ url = post["_data"]["thumb"]["original"]
+ except Exception:
+ pass
+ else:
+ contents.insert(0, {
+ "id": "thumb",
+ "title": "thumb",
+ "category": "thumb",
+ "download_uri": url,
+ "visible_status": "visible",
+ "plan": None,
+ })
+
+ return contents
+
+ def _get_content_urls(self, post, content):
"""Extract individual URL data from the response"""
- if "thumb" in resp and resp["thumb"] and "original" in resp["thumb"]:
- post["content_filename"] = ""
- post["content_category"] = "thumb"
- post["file_id"] = "thumb"
- yield resp["thumb"]["original"], post
-
- for content in resp["post_contents"]:
- post["content_category"] = content["category"]
- post["content_title"] = content["title"]
- post["content_filename"] = content.get("filename", "")
- post["content_id"] = content["id"]
-
- if "comment" in content:
- post["content_comment"] = content["comment"]
-
- if "post_content_photos" in content:
- for photo in content["post_content_photos"]:
- post["file_id"] = photo["id"]
- yield photo["url"]["original"], post
-
- if "download_uri" in content:
- post["file_id"] = content["id"]
- yield self.root+"/"+content["download_uri"], post
-
- if content["category"] == "blog" and "comment" in content:
- comment_json = util.json_loads(content["comment"])
- ops = comment_json.get("ops", ())
-
- # collect blogpost text first
- blog_text = ""
- for op in ops:
- insert = op.get("insert")
- if isinstance(insert, str):
- blog_text += insert
- post["blogpost_text"] = blog_text
-
- # collect images
- for op in ops:
- insert = op.get("insert")
- if isinstance(insert, dict) and "fantiaImage" in insert:
- img = insert["fantiaImage"]
- post["file_id"] = img["id"]
- yield "https://fantia.jp" + img["original_url"], post
+ if "comment" in content:
+ post["content_comment"] = content["comment"]
+
+ if "post_content_photos" in content:
+ for photo in content["post_content_photos"]:
+ post["file_id"] = photo["id"]
+ yield photo["url"]["original"]
+
+ if "download_uri" in content:
+ post["file_id"] = content["id"]
+ url = content["download_uri"]
+ if url[0] == "/":
+ url = self.root + url
+ yield url
+
+ if content["category"] == "blog" and "comment" in content:
+ comment_json = util.json_loads(content["comment"])
+ ops = comment_json.get("ops") or ()
+
+ # collect blogpost text first
+ blog_text = ""
+ for op in ops:
+ insert = op.get("insert")
+ if isinstance(insert, str):
+ blog_text += insert
+ post["blogpost_text"] = blog_text
+
+ # collect images
+ for op in ops:
+ insert = op.get("insert")
+ if isinstance(insert, dict) and "fantiaImage" in insert:
+ img = insert["fantiaImage"]
+ post["file_id"] = img["id"]
+ yield self.root + img["original_url"]
class FantiaCreatorExtractor(FantiaExtractor):
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index cc43cec..9f5cbba 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -159,7 +159,13 @@ class FuraffinityExtractor(Extractor):
while path:
page = self.request(self.root + path).text
- yield from text.extract_iter(page, 'id="sid-', '"')
+ extr = text.extract_from(page)
+ while True:
+ post_id = extr('id="sid-', '"')
+ if not post_id:
+ break
+ self._favorite_id = text.parse_int(extr('data-fav-id="', '"'))
+ yield post_id
path = text.extr(page, 'right" href="', '"')
def _pagination_search(self, query):
@@ -241,6 +247,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
r"/art/[^/]+/\d+/\d+.\w+\.\w+",
+ "keyword": {"favorite_id": int},
"range": "45-50",
"count": 6,
})
@@ -248,6 +255,12 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
def posts(self):
return self._pagination_favorites()
+ def _parse_post(self, post_id):
+ post = FuraffinityExtractor._parse_post(self, post_id)
+ if post:
+ post["favorite_id"] = self._favorite_id
+ return post
+
class FuraffinitySearchExtractor(FuraffinityExtractor):
"""Extractor for furaffinity search results"""
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index df4ff26..a6e848c 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -164,17 +164,17 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)"
test = ("https://acidimg.cc/img-5acb6b9de4640.html", {
"url": "f132a630006e8d84f52d59555191ed82b3b64c04",
- "keyword": "a8bb9ab8b2f6844071945d31f8c6e04724051f37",
+ "keyword": "135347ab4345002fc013863c0d9419ba32d98f78",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
params = "simple"
encoding = "utf-8"
def get_info(self, page):
- url, pos = text.extract(page, "<img class='centred' src='", "'")
+ url, pos = text.extract(page, '<img class="centred" src="', '"')
if not url:
raise exception.NotFoundError("image")
- filename, pos = text.extract(page, " alt='", "'", pos)
+ filename, pos = text.extract(page, ' alt="', '"', pos)
return url, (filename + splitext(url)[1]) if filename else url
@@ -295,19 +295,38 @@ class PostimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from postimages.org"""
category = "postimg"
pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
- r"/(?:image/)?([^/?#]+)/?)")
+ r"/(?!gallery/)(?:image/)?([^/?#]+)/?)")
test = ("https://postimg.cc/Wtn2b3hC", {
- "url": "0794cfda9b8951a8ac3aa692472484200254ab86",
+ "url": "72f3c8b1d6c6601a20ad58f35635494b4891a99e",
"keyword": "2d05808d04e4e83e33200db83521af06e3147a84",
"content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
})
def get_info(self, page):
- url , pos = text.extract(page, 'id="main-image" src="', '"')
+ pos = page.index(' id="download"')
+ url , pos = text.rextract(page, ' href="', '"', pos)
filename, pos = text.extract(page, 'class="imagename">', '<', pos)
return url, text.unescape(filename)
+class PostimgGalleryExtractor(ImagehostImageExtractor):
+ """Extractor for images galleries from postimages.org"""
+ category = "postimg"
+ subcategory = "gallery"
+ pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
+ r"/(?:gallery/)([^/?#]+)/?)")
+ test = ("https://postimg.cc/gallery/wxpDLgX", {
+ "pattern": PostimgImageExtractor.pattern,
+ "count": 22,
+ })
+
+ def items(self):
+ page = self.request(self.page_url).text
+ data = {"_extractor": PostimgImageExtractor}
+ for url in text.extract_iter(page, ' class="thumb"><a href="', '"'):
+ yield Message.Queue, url, data
+
+
class TurboimagehostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from www.turboimagehost.com"""
category = "turboimagehost"
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 677cbdd..faeffa6 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -864,7 +864,7 @@ class InstagramRestAPI():
def user_tagged(self, user_id):
endpoint = "/v1/usertags/{}/feed/".format(user_id)
- params = {"count": 50}
+ params = {"count": 20}
return self._pagination(endpoint, params)
def _call(self, endpoint, **kwargs):
diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py
index cdcf35c..b8d425a 100644
--- a/gallery_dl/extractor/jpgfish.py
+++ b/gallery_dl/extractor/jpgfish.py
@@ -4,18 +4,18 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://jpg.fishing/"""
+"""Extractors for https://jpg.pet/"""
from .common import Extractor, Message
from .. import text
-BASE_PATTERN = r"(?:https?://)?jpg\.(?:fishing|church)"
+BASE_PATTERN = r"(?:https?://)?jpg\.(?:pet|fish(?:ing)?|church)"
class JpgfishExtractor(Extractor):
"""Base class for jpgfish extractors"""
category = "jpgfish"
- root = "https://jpg.fishing"
+ root = "https://jpg.pet"
directory_fmt = ("{category}", "{user}", "{album}",)
archive_fmt = "{id}"
@@ -36,7 +36,7 @@ class JpgfishImageExtractor(JpgfishExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))"
test = (
- ("https://jpg.fishing/img/funnymeme.LecXGS", {
+ ("https://jpg.pet/img/funnymeme.LecXGS", {
"pattern": r"https://simp3\.jpg\.church/images/funnymeme\.jpg",
"content": "098e5e9b17ad634358426e0ffd1c93871474d13c",
"keyword": {
@@ -52,7 +52,9 @@ class JpgfishImageExtractor(JpgfishExtractor):
"pattern": r"https://simp2\.jpg\.church/hannahowo_00457\.jpg",
"keyword": {"album": "401-500"},
}),
- ("https://jpg.church/img/hannahowo-00424.au64iA"),
+ ("https://jpg.fishing/img/funnymeme.LecXGS"),
+ ("https://jpg.fish/img/funnymeme.LecXGS"),
+ ("https://jpg.church/img/funnymeme.LecXGS"),
)
def __init__(self, match):
@@ -81,13 +83,13 @@ class JpgfishAlbumExtractor(JpgfishExtractor):
subcategory = "album"
pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?"
test = (
- ("https://jpg.fishing/album/CDilP/?sort=date_desc&page=1", {
+ ("https://jpg.pet/album/CDilP/?sort=date_desc&page=1", {
"count": 2,
}),
- ("https://jpg.church/a/gunggingnsk.N9OOI", {
+ ("https://jpg.fishing/a/gunggingnsk.N9OOI", {
"count": 114,
}),
- ("https://jpg.church/a/101-200.aNJ6A/", {
+ ("https://jpg.fish/a/101-200.aNJ6A/", {
"count": 100,
}),
("https://jpg.church/a/hannahowo.aNTdH/sub", {
@@ -118,12 +120,15 @@ class JpgfishUserExtractor(JpgfishExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?"
test = (
- ("https://jpg.fishing/exearco", {
+ ("https://jpg.pet/exearco", {
"count": 3,
}),
("https://jpg.church/exearco/albums", {
"count": 1,
}),
+ ("https://jpg.fishing/exearco"),
+ ("https://jpg.fish/exearco"),
+ ("https://jpg.church/exearco"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/jschan.py b/gallery_dl/extractor/jschan.py
new file mode 100644
index 0000000..fe758fa
--- /dev/null
+++ b/gallery_dl/extractor/jschan.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for jschan Imageboards"""
+
+from .common import BaseExtractor, Message
+from .. import text
+import itertools
+
+
+class JschanExtractor(BaseExtractor):
+ basecategory = "jschan"
+
+
+BASE_PATTERN = JschanExtractor.update({
+ "94chan": {
+ "root": "https://94chan.org",
+ "pattern": r"94chan\.org"
+ }
+})
+
+
+class JschanThreadExtractor(JschanExtractor):
+ """Extractor for jschan threads"""
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board}",
+ "{threadId} {subject|nomarkup[:50]}")
+ filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
+ archive_fmt = "{board}_{postId}_{num}"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html"
+ test = (
+ ("https://94chan.org/art/thread/25.html", {
+ "pattern": r"https://94chan.org/file/[0-9a-f]{64}(\.\w+)?",
+ "count": ">= 15"
+ })
+ )
+
+ def __init__(self, match):
+ JschanExtractor.__init__(self, match)
+ index = match.lastindex
+ self.board = match.group(index-1)
+ self.thread = match.group(index)
+
+ def items(self):
+ url = "{}/{}/thread/{}.json".format(
+ self.root, self.board, self.thread)
+ thread = self.request(url).json()
+ thread["threadId"] = thread["postId"]
+ posts = thread.pop("replies", ())
+
+ yield Message.Directory, thread
+ for post in itertools.chain((thread,), posts):
+ files = post.pop("files", ())
+ if files:
+ thread.update(post)
+ thread["count"] = len(files)
+ for num, file in enumerate(files):
+ url = self.root + "/file/" + file["filename"]
+ file.update(thread)
+ file["num"] = num
+ file["siteFilename"] = file["filename"]
+ text.nameext_from_url(file["originalFilename"], file)
+ yield Message.Url, url, file
+
+
+class JschanBoardExtractor(JschanExtractor):
+ """Extractor for jschan boards"""
+ subcategory = "board"
+ pattern = (BASE_PATTERN + r"/([^/?#]+)"
+ r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)")
+ test = (
+ ("https://94chan.org/art/", {
+ "pattern": JschanThreadExtractor.pattern,
+ "count": ">= 30"
+ }),
+ ("https://94chan.org/art/2.html"),
+ ("https://94chan.org/art/catalog.html"),
+ ("https://94chan.org/art/index.html"),
+ )
+
+ def __init__(self, match):
+ JschanExtractor.__init__(self, match)
+ self.board = match.group(match.lastindex)
+
+ def items(self):
+ url = "{}/{}/catalog.json".format(self.root, self.board)
+ for thread in self.request(url).json():
+ url = "{}/{}/thread/{}.html".format(
+ self.root, self.board, thread["postId"])
+ thread["_extractor"] = JschanThreadExtractor
+ yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 915fbe6..5aeefeb 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,7 @@ from ..cache import cache
import itertools
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.party"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})"
@@ -29,10 +29,11 @@ class KemonopartyExtractor(Extractor):
cookiedomain = ".kemono.party"
def __init__(self, match):
- if match.group(1) == "coomer":
- self.category = "coomerparty"
- self.cookiedomain = ".coomer.party"
+ domain = match.group(1)
+ tld = match.group(2)
+ self.category = domain + "party"
self.root = text.root_from_url(match.group(0))
+ self.cookiedomain = ".{}.{}".format(domain, tld)
Extractor.__init__(self, match)
self.session.headers["Referer"] = self.root + "/"
@@ -40,7 +41,7 @@ class KemonopartyExtractor(Extractor):
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
- r'src="(?:https?://(?:kemono|coomer)\.party)?(/inline/[^"]+'
+ r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
@@ -224,11 +225,12 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
"options": (("max-posts", 25),),
"count": "< 100",
}),
+ ("https://kemono.su/subscribestar/user/alcorart"),
("https://kemono.party/subscribestar/user/alcorart"),
)
def __init__(self, match):
- _, service, user_id, offset = match.groups()
+ _, _, service, user_id, offset = match.groups()
self.subcategory = service
KemonopartyExtractor.__init__(self, match)
self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
@@ -329,13 +331,14 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
r"f51c10adc9dabd86e92bd52339f298b9\.txt",
"content": "da39a3ee5e6b4b0d3255bfef95601890afd80709", # empty
}),
+ ("https://kemono.su/subscribestar/user/alcorart/post/184330"),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
)
def __init__(self, match):
- _, service, user_id, post_id = match.groups()
+ _, _, service, user_id, post_id = match.groups()
self.subcategory = service
KemonopartyExtractor.__init__(self, match)
self.api_url = "{}/api/{}/user/{}/post/{}".format(
@@ -361,9 +364,9 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
"count": 4,
"keyword": {"channel_name": "finish-work"},
}),
- (("https://kemono.party/discord"
+ (("https://kemono.su/discord"
"/server/256559665620451329/channel/462437519519383555#"), {
- "pattern": r"https://kemono\.party/data/("
+ "pattern": r"https://kemono\.su/data/("
r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
"keyword": {"hash": "re:e377e3525164559484ace2e64425b0cec1db08"
@@ -382,7 +385,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
- _, self.server, self.channel, self.channel_name = match.groups()
+ _, _, self.server, self.channel, self.channel_name = match.groups()
def items(self):
self._prepare_ddosguard_cookies()
@@ -457,14 +460,20 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
- test = ("https://kemono.party/discord/server/488668827274444803", {
- "pattern": KemonopartyDiscordExtractor.pattern,
- "count": 13,
- })
+ test = (
+ ("https://kemono.party/discord/server/488668827274444803", {
+ "pattern": KemonopartyDiscordExtractor.pattern,
+ "count": 13,
+ }),
+ ("https://kemono.su/discord/server/488668827274444803", {
+ "pattern": KemonopartyDiscordExtractor.pattern,
+ "count": 13,
+ }),
+ )
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
- self.server = match.group(2)
+ self.server = match.group(3)
def items(self):
url = "{}/api/discord/channels/lookup?q={}".format(
@@ -493,11 +502,16 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
"count": 3,
}),
+ ("https://kemono.su/favorites?type=post", {
+ "pattern": KemonopartyPostExtractor.pattern,
+ "url": "4be8e84cb384a907a8e7997baaf6287b451783b5",
+ "count": 3,
+ }),
)
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
- self.favorites = (text.parse_query(match.group(2)).get("type") or
+ self.favorites = (text.parse_query(match.group(3)).get("type") or
self.config("favorites") or
"artist")
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index cdaf595..861959e 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -15,6 +15,9 @@ from datetime import datetime, timedelta
import itertools
import hashlib
+BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
+USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
+
class PixivExtractor(Extractor):
"""Base class for pixiv extractors"""
@@ -150,7 +153,7 @@ class PixivExtractor(Extractor):
class PixivUserExtractor(PixivExtractor):
"""Extractor for a pixiv user profile"""
subcategory = "user"
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
+ pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
test = (
@@ -168,18 +171,19 @@ class PixivUserExtractor(PixivExtractor):
def items(self):
base = "{}/users/{}/".format(self.root, self.user_id)
return self._dispatch_extractors((
- (PixivAvatarExtractor , base + "avatar"),
- (PixivBackgroundExtractor, base + "background"),
- (PixivArtworksExtractor , base + "artworks"),
- (PixivFavoriteExtractor , base + "bookmarks/artworks"),
- (PixivNovelUserExtractor , base + "novels"),
+ (PixivAvatarExtractor , base + "avatar"),
+ (PixivBackgroundExtractor , base + "background"),
+ (PixivArtworksExtractor , base + "artworks"),
+ (PixivFavoriteExtractor , base + "bookmarks/artworks"),
+ (PixivNovelBookmarkExtractor, base + "bookmarks/novels"),
+ (PixivNovelUserExtractor , base + "novels"),
), ("artworks",))
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
+ pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
@@ -240,8 +244,7 @@ class PixivAvatarExtractor(PixivExtractor):
subcategory = "avatar"
filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "avatar_{user[id]}_{date}"
- pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
- r"/(?:en/)?users/(\d+)/avatar")
+ pattern = USER_PATTERN + r"/avatar"
test = ("https://www.pixiv.net/en/users/173530/avatar", {
"content": "4e57544480cc2036ea9608103e8f024fa737fe66",
})
@@ -261,8 +264,7 @@ class PixivBackgroundExtractor(PixivExtractor):
subcategory = "background"
filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "background_{user[id]}_{date}"
- pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
- r"/(?:en/)?users/(\d+)/background")
+ pattern = USER_PATTERN + "/background"
test = ("https://www.pixiv.net/en/users/194921/background", {
"pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
@@ -376,12 +378,12 @@ class PixivWorkExtractor(PixivExtractor):
class PixivFavoriteExtractor(PixivExtractor):
- """Extractor for all favorites/bookmarks of a pixiv-user"""
+ """Extractor for all favorites/bookmarks of a pixiv user"""
subcategory = "favorite"
directory_fmt = ("{category}", "bookmarks",
"{user_bookmark[id]} {user_bookmark[account]}")
archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:(?:en/)?"
+ pattern = (BASE_PATTERN + r"/(?:(?:en/)?"
r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
r"|bookmark\.php)(?:\?([^#]*))?")
test = (
@@ -484,8 +486,7 @@ class PixivRankingExtractor(PixivExtractor):
archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "rankings",
"{ranking[mode]}", "{ranking[date]}")
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/ranking\.php(?:\?([^#]*))?")
+ pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?"
test = (
("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"),
("https://www.pixiv.net/ranking.php"),
@@ -550,8 +551,7 @@ class PixivSearchExtractor(PixivExtractor):
subcategory = "search"
archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "search", "{search[word]}")
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
+ pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
r"|search\.php)(?:\?([^#]+))?")
test = (
("https://www.pixiv.net/en/tags/Original", {
@@ -634,8 +634,7 @@ class PixivFollowExtractor(PixivExtractor):
subcategory = "follow"
archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "following")
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/bookmark_new_illust\.php")
+ pattern = BASE_PATTERN + r"/bookmark_new_illust\.php"
test = (
("https://www.pixiv.net/bookmark_new_illust.php"),
("https://touch.pixiv.net/bookmark_new_illust.php"),
@@ -697,8 +696,7 @@ class PixivSeriesExtractor(PixivExtractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}",
"{series[id]} {series[title]}")
filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
- pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
- r"/user/(\d+)/series/(\d+)")
+ pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
test = ("https://www.pixiv.net/user/10509347/series/21859", {
"range": "1-10",
"count": 10,
@@ -755,8 +753,7 @@ class PixivNovelExtractor(PixivExtractor):
"""Extractor for pixiv novels"""
subcategory = "novel"
request_interval = 1.0
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/n(?:ovel/show\.php\?id=|/)(\d+)")
+ pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
test = (
("https://www.pixiv.net/novel/show.php?id=19612040", {
"count": 1,
@@ -799,6 +796,12 @@ class PixivNovelExtractor(PixivExtractor):
"options": (("embeds", True),),
"count": 3,
}),
+ # full series
+ ("https://www.pixiv.net/novel/show.php?id=19612040", {
+ "options": (("full-series", True),),
+ "count": 4,
+ }),
+ # short URL
("https://www.pixiv.net/n/19612040"),
)
@@ -862,7 +865,7 @@ class PixivNovelExtractor(PixivExtractor):
illusts = {}
for marker in text.extract_iter(content, "[", "]"):
- if marker.startswith("[jumpuri:"):
+ if marker.startswith("[jumpuri:If you would like to "):
desktop = True
elif marker.startswith("pixivimage:"):
illusts[marker[11:].partition("-")[0]] = None
@@ -895,14 +898,17 @@ class PixivNovelExtractor(PixivExtractor):
yield Message.Queue, url, novel
def novels(self):
- return (self.api.novel_detail(self.novel_id),)
+ novel = self.api.novel_detail(self.novel_id)
+ if self.config("full-series") and novel["series"]:
+ self.subcategory = PixivNovelSeriesExtractor.subcategory
+ return self.api.novel_series(novel["series"]["id"])
+ return (novel,)
class PixivNovelUserExtractor(PixivNovelExtractor):
"""Extractor for pixiv users' novels"""
subcategory = "novel-user"
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/(?:en/)?users/(\d+)/novels")
+ pattern = USER_PATTERN + r"/novels"
test = ("https://www.pixiv.net/en/users/77055466/novels", {
"pattern": "^text:",
"range": "1-5",
@@ -916,8 +922,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor):
class PixivNovelSeriesExtractor(PixivNovelExtractor):
"""Extractor for pixiv novel series"""
subcategory = "novel-series"
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/novel/series/(\d+)")
+ pattern = BASE_PATTERN + r"/novel/series/(\d+)"
test = ("https://www.pixiv.net/novel/series/10278364", {
"count": 4,
"content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2",
@@ -927,6 +932,37 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor):
return self.api.novel_series(self.novel_id)
+class PixivNovelBookmarkExtractor(PixivNovelExtractor):
+ """Extractor for bookmarked pixiv novels"""
+ subcategory = "novel-bookmark"
+ pattern = (USER_PATTERN + r"/bookmarks/novels"
+ r"(?:/([^/?#]+))?(?:/?\?([^#]+))?")
+ test = (
+ ("https://www.pixiv.net/en/users/77055466/bookmarks/novels", {
+ "count": 1,
+ "content": "7194e8faa876b2b536f185ee271a2b6e46c69089",
+ }),
+ ("https://www.pixiv.net/en/users/11/bookmarks/novels/TAG?rest=hide"),
+ )
+
+ def __init__(self, match):
+ PixivNovelExtractor.__init__(self, match)
+ self.user_id, self.tag, self.query = match.groups()
+
+ def novels(self):
+ if self.tag:
+ tag = text.unquote(self.tag)
+ else:
+ tag = None
+
+ if text.parse_query(self.query).get("rest") == "hide":
+ restrict = "private"
+ else:
+ restrict = "public"
+
+ return self.api.user_bookmarks_novel(self.user_id, tag, restrict)
+
+
class PixivSketchExtractor(Extractor):
"""Extractor for user pages on sketch.pixiv.net"""
category = "pixiv"
@@ -1113,6 +1149,11 @@ class PixivAppAPI():
params = {"user_id": user_id, "tag": tag, "restrict": restrict}
return self._pagination("/v1/user/bookmarks/illust", params)
+ def user_bookmarks_novel(self, user_id, tag=None, restrict="public"):
+ """Return novels bookmarked by a user"""
+ params = {"user_id": user_id, "tag": tag, "restrict": restrict}
+ return self._pagination("/v1/user/bookmarks/novel", params, "novels")
+
def user_bookmark_tags_illust(self, user_id, restrict="public"):
"""Return bookmark tags defined by a user"""
params = {"user_id": user_id, "restrict": restrict}
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index f8497c0..f19e33c 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, exception
-
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com"
@@ -146,10 +145,20 @@ class PornhubUserExtractor(PornhubExtractor):
data = {"_extractor": PornhubGalleryExtractor}
while True:
- page = self.request(
- url, method="POST", headers=headers, params=params).text
- if not page:
- return
- for gid in text.extract_iter(page, 'id="albumphoto', '"'):
+ response = self.request(
+ url, method="POST", headers=headers, params=params,
+ allow_redirects=False)
+
+ if 300 <= response.status_code < 400:
+ url = "{}{}/photos/{}/ajax".format(
+ self.root, response.headers["location"],
+ self.cat or "public")
+ continue
+
+ gid = None
+ for gid in text.extract_iter(response.text, 'id="albumphoto', '"'):
yield Message.Queue, self.root + "/album/" + gid, data
+ if gid is None:
+ return
+
params["page"] += 1
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 3f09e13..9a57dcf 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -56,7 +56,10 @@ class RedditExtractor(Extractor):
submission["num"] = 0
if "crosspost_parent_list" in submission:
- media = submission["crosspost_parent_list"][-1]
+ try:
+ media = submission["crosspost_parent_list"][-1]
+ except Exception:
+ media = submission
else:
media = submission
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index eaaef7d..bfd18b5 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -16,7 +16,8 @@ from ..cache import memcache
class RedgifsExtractor(Extractor):
"""Base class for redgifs extractors"""
category = "redgifs"
- filename_fmt = "{category}_{id}.{extension}"
+ filename_fmt = \
+ "{category}_{gallery:?//[:11]}{num:?_/_/>02}{id}.{extension}"
archive_fmt = "{id}"
root = "https://www.redgifs.com"
@@ -34,16 +35,32 @@ class RedgifsExtractor(Extractor):
def items(self):
metadata = self.metadata()
+
for gif in self.gifs():
- url = self._process(gif)
- if not url:
- self.log.warning("Skipping '%s' (format not available)",
- gif["id"])
- continue
+
+ gallery = gif.get("gallery")
+ if gallery:
+ gifs = self.api.gallery(gallery)["gifs"]
+ enum = 1
+ cnt = len(gifs)
+ else:
+ gifs = (gif,)
+ enum = 0
+ cnt = 1
gif.update(metadata)
+ gif["count"] = cnt
yield Message.Directory, gif
- yield Message.Url, url, gif
+
+ for num, gif in enumerate(gifs, enum):
+ url = self._process(gif)
+ if not url:
+ self.log.warning(
+ "Skipping '%s' (format not available)", gif["id"])
+ continue
+ gif["num"] = num
+ gif["count"] = cnt
+ yield Message.Url, url, gif
def _process(self, gif):
gif["_fallback"] = formats = self._formats(gif)
@@ -145,21 +162,36 @@ class RedgifsSearchExtractor(RedgifsExtractor):
"""Extractor for redgifs search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = r"(?:https?://)?(?:\w+\.)?redgifs\.com/browse/?\?([^#]+)"
+ pattern = (r"(?:https?://)?(?:\w+\.)?redgifs\.com"
+ r"/(?:gifs/([^/?#]+)|browse)(?:/?\?([^#]+))?")
test = (
+ ("https://www.redgifs.com/gifs/jav", {
+ "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
+ "range": "1-10",
+ "count": 10,
+ }),
("https://www.redgifs.com/browse?tags=JAV", {
"pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
"range": "1-10",
"count": 10,
}),
- ("https://v3.redgifs.com/browse?tags=JAV"),
+ ("https://www.redgifs.com/gifs/jav?order=best&verified=1"),
("https://www.redgifs.com/browse?type=i&verified=y&order=top7"),
+ ("https://v3.redgifs.com/browse?tags=JAV"),
)
+ def __init__(self, match):
+ RedgifsExtractor.__init__(self, match)
+ self.search, self.query = match.groups()
+
def metadata(self):
- self.params = params = text.parse_query(self.key)
- search = params.get("tags") or params.get("order") or "trending"
- return {"search": search}
+ self.params = text.parse_query(self.query)
+ if self.search:
+ self.params["tags"] = text.unquote(self.search)
+
+ return {"search": (self.params.get("tags") or
+ self.params.get("order") or
+ "trending")}
def gifs(self):
return self.api.search(self.params)
@@ -178,6 +210,16 @@ class RedgifsImageExtractor(RedgifsExtractor):
r"/FoolishForkedAbyssiniancat\.mp4",
"content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533",
}),
+ # gallery (#4021)
+ ("https://www.redgifs.com/watch/desertedbaregraywolf", {
+ "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.jpg",
+ "count": 4,
+ "keyword": {
+ "num": int,
+ "count": 4,
+ "gallery": "187ad979693-1922-fc66-0000-a96fb07b8a5d",
+ },
+ }),
("https://redgifs.com/ifr/FoolishForkedAbyssiniancat"),
("https://i.redgifs.com/i/FoolishForkedAbyssiniancat"),
("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"),
@@ -207,6 +249,10 @@ class RedgifsAPI():
endpoint = "/v2/gifs/" + gif_id.lower()
return self._call(endpoint)["gif"]
+ def gallery(self, gallery_id):
+ endpoint = "/v2/gallery/" + gallery_id
+ return self._call(endpoint)
+
def user(self, user, order="best"):
endpoint = "/v2/users/{}/search".format(user.lower())
params = {"order": order}
@@ -228,7 +274,6 @@ class RedgifsAPI():
def search(self, params):
endpoint = "/v2/gifs/search"
params["search_text"] = params.pop("tags", None)
- params.pop("needSendGtm", None)
return self._pagination(endpoint, params)
def _call(self, endpoint, params=None):
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index 34177b4..6d025f4 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -1,64 +1,88 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters from from https://raw.senmanga.com/"""
+"""Extractors for https://raw.senmanga.com/"""
-from .common import Extractor, Message
+from .common import ChapterExtractor
from .. import text
-class SenmangaChapterExtractor(Extractor):
- """Extractor for manga-chapters from raw.senmanga.com"""
+class SenmangaChapterExtractor(ChapterExtractor):
+ """Extractor for manga chapters from raw.senmanga.com"""
category = "senmanga"
- subcategory = "chapter"
- directory_fmt = ("{category}", "{manga}", "{chapter_string}")
- filename_fmt = "{manga}_{chapter_string}_{page:>03}.{extension}"
- archive_fmt = "{manga}_{chapter_string}_{page}"
- pattern = r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)"
+ root = "https://raw.senmanga.com"
+ pattern = r"(?:https?://)?raw\.senmanga\.com(/[^/?#]+/[^/?#]+)"
test = (
- ("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", {
+ ("https://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", {
+ "pattern": r"https://raw\.senmanga\.com/viewer"
+ r"/Bokura-wa-Minna-Kawaisou/37A/[12]",
"url": "5f95140ff511d8497e2ec08fa7267c6bb231faec",
- "keyword": "705d941a150765edb33cd2707074bd703a93788c",
"content": "556a16d5ca3441d7a5807b6b5ac06ec458a3e4ba",
+ "keyword": {
+ "chapter": "37A",
+ "count": 2,
+ "extension": "",
+ "filename": "re:[12]",
+ "lang": "ja",
+ "language": "Japanese",
+ "manga": "Bokura wa Minna Kawaisou",
+ "page": int,
+ },
}),
("http://raw.senmanga.com/Love-Lab/2016-03/1", {
+ "pattern": r"https://raw\.senmanga\.com/viewer"
+ r"/Love-Lab/2016-03/\d",
"url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de",
- "keyword": "8a8ab2529ba2edfc83a6b3a8bede1d6c580db7b4",
+ "keyword": {
+ "chapter": "2016-03",
+ "count": 9,
+ "extension": "",
+ "filename": r"re:\d",
+ "manga": "Renai Lab 恋愛ラボ",
+ },
+ }),
+ ("https://raw.senmanga.com/akabane-honeko-no-bodyguard/1", {
+ "pattern": r"https://i\d\.wp\.com/kumacdn.club/image-new-2/a"
+ r"/akabane-honeko-no-bodyguard/chapter-1"
+ r"/\d+-[0-9a-f]{13}\.jpg",
+ "keyword": {
+ "chapter": "1",
+ "count": 65,
+ "extension": "jpg",
+ "filename": r"re:\d+-\w+",
+ "manga": "Akabane Honeko no Bodyguard",
+ },
}),
)
- root = "https://raw.senmanga.com"
def __init__(self, match):
- Extractor.__init__(self, match)
- part = match.group(1)
- self.chapter_url = "{}/{}/".format(self.root, part)
- self.img_url = "{}/viewer/{}/".format(self.root, part)
- self.session.headers["Referer"] = self.chapter_url
+ ChapterExtractor.__init__(self, match)
+ self.session.headers["Referer"] = self.gallery_url
- def items(self):
- data = self.metadata()
- yield Message.Directory, data
- for data["page"] in range(1, data["count"]+1):
- data["extension"] = None
- yield Message.Url, self.img_url + str(data["page"]), data
+ # select "All pages" viewer
+ self.session.cookies.set(
+ "viewer", "1", domain="raw.senmanga.com")
- def metadata(self):
- """Collect metadata for extractor-job"""
- page = self.request(self.chapter_url).text
- self.session.cookies.clear()
- title, pos = text.extract(page, '<title>', '</title>')
- count, pos = text.extract(page, '</select> of ', '\n', pos)
+ def metadata(self, page):
+ title = text.extr(page, "<title>", "</title>")
manga, _, chapter = title.partition(" - Chapter ")
return {
- "manga": text.unescape(manga).replace("-", " "),
- "chapter_string": chapter.partition(" - Page ")[0],
- "count": text.parse_int(count),
- "lang": "jp",
- "language": "Japanese",
+ "manga" : text.unescape(manga).replace("-", " "),
+ "chapter" : chapter.partition(" - Page ")[0],
+ "chapter_minor": "",
+ "lang" : "ja",
+ "language" : "Japanese",
}
+
+ def images(self, page):
+ return [
+ (url, None)
+ for url in text.extract_iter(
+ page, '<img class="picture" src="', '"')
+ ]
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c47021e..710bde3 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -295,6 +295,8 @@ class TwitterExtractor(Extractor):
tget("quoted_by_id_str")),
"reply_id" : text.parse_int(
tget("in_reply_to_status_id_str")),
+ "conversation_id": text.parse_int(
+ tget("conversation_id_str")),
"date" : date,
"author" : author,
"user" : self._user or author,
@@ -664,8 +666,8 @@ class TwitterSearchExtractor(TwitterExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
test = ("https://twitter.com/search?q=nature", {
- "range": "1-40",
- "count": 40,
+ "range": "1-20",
+ "count": 20,
"archive": False,
})
@@ -1058,7 +1060,7 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
- self.root = "https://api.twitter.com"
+ self.root = "https://twitter.com/i/api"
self._nsfw_warning = True
self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
@@ -1077,6 +1079,10 @@ class TwitterAPI():
auth_token = cookies.get("auth_token", domain=cookiedomain)
+ search = extractor.config("search-endpoint")
+ if search == "graphql" or not auth_token and search in ("auto", None):
+ self.search_adaptive = self.search_timeline
+
self.headers = {
"Accept": "*/*",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
@@ -1087,7 +1093,6 @@ class TwitterAPI():
"x-twitter-client-language": "en",
"x-twitter-active-user": "yes",
"x-csrf-token": csrf_token,
- "Origin": "https://twitter.com",
"Referer": "https://twitter.com/",
}
self.params = {
@@ -1131,47 +1136,44 @@ class TwitterAPI():
"enrichments,superFollowMetadata,unmentionInfo,editControl,"
"collab_control,vibe",
}
- self.variables = {
- "withDownvotePerspective": False,
- "withReactionsMetadata": False,
- "withReactionsPerspective": False,
- }
self.features = {
- "blue_business_profile_image_shape_enabled": False,
- "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+ "hidden_profile_likes_enabled": False,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
- "responsive_web_graphql_skip_user_profile_"
- "image_extensions_enabled": False,
+ "subscriptions_verification_info_verified_since_enabled": True,
+ "highlights_tweets_tab_ui_enabled": True,
+ "creator_subscriptions_tweet_preview_api_enabled": True,
+ "responsive_web_graphql_"
+ "skip_user_profile_image_extensions_enabled": False,
"responsive_web_graphql_timeline_navigation_enabled": True,
}
self.features_pagination = {
- "blue_business_profile_image_shape_enabled": False,
- "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+ "rweb_lists_timeline_redesign_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
+ "creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_graphql_timeline_navigation_enabled": True,
"responsive_web_graphql_skip_user_profile_"
"image_extensions_enabled": False,
"tweetypie_unmention_optimization_enabled": True,
- "vibe_api_enabled": True,
"responsive_web_edit_tweet_api_enabled": True,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
"view_counts_everywhere_api_enabled": True,
"longform_notetweets_consumption_enabled": True,
"tweet_awards_web_tipping_enabled": False,
- "freedom_of_speech_not_reach_fetch_enabled": False,
+ "freedom_of_speech_not_reach_fetch_enabled": True,
"standardized_nudges_misinfo": True,
"tweet_with_visibility_results_prefer_gql_"
"limited_actions_policy_enabled": False,
"interactive_text_enabled": True,
"responsive_web_text_conversations_enabled": False,
- "longform_notetweets_richtext_consumption_enabled": False,
+ "longform_notetweets_rich_text_read_enabled": True,
+ "longform_notetweets_inline_media_enabled": False,
"responsive_web_enhance_cards_enabled": False,
}
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/AV_lPTkN6Fc6LgerQpK8Zg/TweetDetail"
+ endpoint = "/graphql/JlLZj42Ltr2qwjasw-l5lQ/TweetDetail"
variables = {
"focalTweetId": tweet_id,
"referrer": "profile",
@@ -1179,9 +1181,7 @@ class TwitterAPI():
"includePromotedContent": True,
"withCommunity": True,
"withQuickPromoteEligibilityTweetFields": True,
- "withBirdwatchNotes": False,
- "withSuperFollowsUserFields": True,
- "withSuperFollowsTweetFields": True,
+ "withBirdwatchNotes": True,
"withVoice": True,
"withV2Timeline": True,
}
@@ -1189,7 +1189,7 @@ class TwitterAPI():
endpoint, variables, ("threaded_conversation_with_injections_v2",))
def user_tweets(self, screen_name):
- endpoint = "/graphql/BeHK76TOCY3P8nO-FWocjA/UserTweets"
+ endpoint = "/graphql/-AY51QoFpVf-w7TxjQ6lpw/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1201,7 +1201,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/eZVlZu_1gwb6hMUDXBnZoQ/UserTweetsAndReplies"
+ endpoint = "/graphql/urrCZMyyIh1FkSFi2cdPUA/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1213,7 +1213,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_media(self, screen_name):
- endpoint = "/graphql/d_ONZLUHGCsErBCriRsLXg/UserMedia"
+ endpoint = "/graphql/lo965xQZdN2-eSM1Jc-W_A/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1246,7 +1246,7 @@ class TwitterAPI():
features=False)
def user_likes(self, screen_name):
- endpoint = "/graphql/fN4-E0MjFJ9Cn7IYConL7g/Likes"
+ endpoint = "/graphql/6JET1d0iHsIzW0Zjs3OOwQ/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1259,7 +1259,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_bookmarks(self):
- endpoint = "/graphql/RV1g3b8n_SGOHwkqKYSCFw/Bookmarks"
+ endpoint = "/graphql/YNtYqNuki6_oiVwx0uP8mQ/Bookmarks"
variables = {
"count": 100,
}
@@ -1270,7 +1270,7 @@ class TwitterAPI():
features=features)
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/5DAiJG3bD77SiWEs4xViBw/ListLatestTweetsTimeline"
+ endpoint = "/graphql/ZBbXrl37E6za5ml-DIpmgg/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -1288,6 +1288,24 @@ class TwitterAPI():
params["spelling_corrections"] = "1"
return self._pagination_legacy(endpoint, params)
+ def search_timeline(self, query):
+ endpoint = "/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline"
+ variables = {
+ "rawQuery": query,
+ "count": 20,
+ "product": "Latest",
+ "withDownvotePerspective": False,
+ "withReactionsMetadata": False,
+ "withReactionsPerspective": False,
+ }
+ features = self.features_pagination.copy()
+ features["blue_business_profile_image_shape_enabled"] = False
+ features["vibe_api_enabled"] = True
+ return self._pagination_tweets(
+ endpoint, variables,
+ ("search_by_raw_query", "search_timeline", "timeline"),
+ features=features)
+
def live_event_timeline(self, event_id):
endpoint = "/2/live_event/timeline/{}.json".format(event_id)
params = self.params.copy()
@@ -1305,11 +1323,10 @@ class TwitterAPI():
["twitter_objects"]["live_events"][event_id])
def list_by_rest_id(self, list_id):
- endpoint = "/graphql/D0EoyrDcct2MEqC-LnPzFg/ListByRestId"
+ endpoint = "/graphql/AmCdeFUvlrKAO96yHr-GCg/ListByRestId"
params = {
"variables": self._json_dumps({
"listId": list_id,
- "withSuperFollowsUserFields": True,
}),
"features": self._json_dumps(self.features),
}
@@ -1319,7 +1336,7 @@ class TwitterAPI():
raise exception.NotFoundError("list")
def list_members(self, list_id):
- endpoint = "/graphql/tzsIIbGUH9RyFCVmtO2W2w/ListMembers"
+ endpoint = "/graphql/a_ZQomd3MMk1crWkeiQBPg/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -1329,7 +1346,7 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_following(self, screen_name):
- endpoint = "/graphql/FaBzCqZXuQCb4PhB0RHqHw/Following"
+ endpoint = "/graphql/JPZiqKjET7_M1r5Tlr8pyA/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1338,18 +1355,20 @@ class TwitterAPI():
return self._pagination_users(endpoint, variables)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/S2BkcAyFMG--jef2N6Dgzw/UserByRestId"
+ endpoint = "/graphql/1YAM811Q8Ry4XyPpJclURQ/UserByRestId"
+ features = self.features.copy()
+ features["blue_business_profile_image_shape_enabled"] = True
params = {
"variables": self._json_dumps({
"userId": rest_id,
"withSafetyModeUserFields": True,
}),
- "features": self._json_dumps(self.features),
+ "features": self._json_dumps(features),
}
return self._call(endpoint, params)["data"]["user"]["result"]
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/k26ASEiniqy4eXMdknTSoQ/UserByScreenName"
+ endpoint = "/graphql/XA6F1nJELYg65hxOC2Ekmg/UserByScreenName"
params = {
"variables": self._json_dumps({
"screen_name": screen_name,
@@ -1380,7 +1399,9 @@ class TwitterAPI():
def _guest_token(self):
endpoint = "/1.1/guest/activate.json"
self.extractor.log.info("Requesting guest token")
- return str(self._call(endpoint, None, "POST", False)["guest_token"])
+ return str(self._call(
+ endpoint, None, "POST", False, "https://api.twitter.com",
+ )["guest_token"])
def _authenticate_guest(self):
guest_token = self._guest_token()
@@ -1389,8 +1410,8 @@ class TwitterAPI():
self.extractor.session.cookies.set(
"gt", guest_token, domain=self.extractor.cookiedomain)
- def _call(self, endpoint, params, method="GET", auth=True):
- url = self.root + endpoint
+ def _call(self, endpoint, params, method="GET", auth=True, root=None):
+ url = (root or self.root) + endpoint
while True:
if not self.headers["x-twitter-auth-type"] and auth:
@@ -1416,6 +1437,12 @@ class TwitterAPI():
self.extractor.wait(until=until, seconds=seconds)
continue
+ if response.status_code == 403 and \
+ not self.headers["x-twitter-auth-type"] and \
+ endpoint == "/2/search/adaptive.json":
+ raise exception.AuthorizationError(
+ "Login required to access search results")
+
# error
try:
data = response.json()
@@ -1524,7 +1551,6 @@ class TwitterAPI():
def _pagination_tweets(self, endpoint, variables,
path=None, stop_tweets=True, features=None):
extr = self.extractor
- variables.update(self.variables)
original_retweets = (extr.retweets == "original")
pinned_tweet = extr.pinned
@@ -1548,11 +1574,17 @@ class TwitterAPI():
instructions = instructions[key]
instructions = instructions["instructions"]
+ cursor = None
+ entries = None
for instr in instructions:
- if instr.get("type") == "TimelineAddEntries":
+ instr_type = instr.get("type")
+ if instr_type == "TimelineAddEntries":
entries = instr["entries"]
- break
- else:
+ elif instr_type == "TimelineReplaceEntry":
+ entry = instr["entry"]
+ if entry["entryId"].startswith("cursor-bottom-"):
+ cursor = entry["content"]["value"]
+ if entries is None:
raise KeyError()
except LookupError:
@@ -1581,7 +1613,7 @@ class TwitterAPI():
"Unable to retrieve Tweets from this timeline")
tweets = []
- tweet = cursor = None
+ tweet = None
if pinned_tweet:
pinned_tweet = False
@@ -1687,7 +1719,6 @@ class TwitterAPI():
variables["cursor"] = cursor
def _pagination_users(self, endpoint, variables, path=None):
- variables.update(self.variables)
params = {"variables": None,
"features" : self._json_dumps(self.features_pagination)}
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 1cebdf7..6dff01c 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -9,7 +9,10 @@
"""Extractors for https://vipergirls.to/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
+from ..cache import cache
+
+from xml.etree import ElementTree
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vipergirls\.to"
@@ -18,26 +21,50 @@ class VipergirlsExtractor(Extractor):
"""Base class for vipergirls extractors"""
category = "vipergirls"
root = "https://vipergirls.to"
+ request_interval = 0.5
+ request_interval_min = 0.2
+ cookiedomain = ".vipergirls.to"
+ cookienames = ("vg_userid", "vg_password")
def __init__(self, match):
Extractor.__init__(self, match)
self.session.headers["Referer"] = self.root
def items(self):
- for html in self.posts():
-
- pos = html.find('<a href="')
- if pos < 0:
- continue
+ self.login()
- title = text.extr(html, '<h2 class="title', '<')
- data = {
- "title": text.unescape(title.partition(">")[2].strip()),
- }
+ for post in self.posts():
+ data = post.attrib
+ data["thread_id"] = self.thread_id
yield Message.Directory, data
- for href in text.extract_iter(html, '<a href="', '"', pos):
- yield Message.Queue, href, data
+ for image in post:
+ yield Message.Queue, image.attrib["main_url"], data
+
+ def login(self):
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=90*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = "{}/login.php?do=login".format(self.root)
+ data = {
+ "vb_login_username": username,
+ "vb_login_password": password,
+ "do" : "login",
+ "cookieuser" : "1",
+ }
+
+ response = self.request(url, method="POST", data=data)
+ if not response.cookies.get("vg_password"):
+ raise exception.AuthenticationError()
+
+ return {cookie.name: cookie.value
+ for cookie in response.cookies}
class VipergirlsThreadExtractor(VipergirlsExtractor):
@@ -47,11 +74,11 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
test = (
(("https://vipergirls.to/threads/4328304"
"-2011-05-28-Danica-Simply-Beautiful-x112-4500x3000"), {
- "url": "b22feaa35a358bb36086c2b9353aee28989e1d7a",
- "count": 227,
+ "url": "0d75cb42777f5bebc0d284d1d38cb90c750c61d9",
+ "count": 225,
}),
("https://vipergirls.to/threads/6858916-Karina/page4", {
- "count": 1294,
+ "count": 1279,
}),
("https://vipergirls.to/threads/4328304"),
)
@@ -61,25 +88,20 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
self.thread_id, self.page = match.groups()
def posts(self):
- url = "{}/threads/{}{}".format(
- self.root, self.thread_id, self.page or "")
-
- while True:
- page = self.request(url).text
- yield from text.extract_iter(
- page, '<div class="postbody">', '</blockquote>')
+ url = "{}/vr.php?t={}".format(self.root, self.thread_id)
+ root = ElementTree.fromstring(self.request(url).text)
+ posts = root.iter("post")
- url = text.extr(page, '<a rel="next" href="', '"')
- if not url:
- return
- url = "{}/{}".format(self.root, url)
+ if self.page:
+ util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
+ return posts
class VipergirlsPostExtractor(VipergirlsExtractor):
"""Extractor for vipergirls posts"""
subcategory = "post"
pattern = (BASE_PATTERN +
- r"/threads/(\d+)(?:-[^/?#]+)?\?(p=\d+[^#]*)#post(\d+)")
+ r"/threads/(\d+)(?:-[^/?#]+)?\?p=\d+[^#]*#post(\d+)")
test = (
(("https://vipergirls.to/threads/4328304-2011-05-28-Danica-Simply-"
"Beautiful-x112-4500x3000?p=116038081&viewfull=1#post116038081"), {
@@ -87,6 +109,10 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
"range": "2-113",
"count": 112,
"keyword": {
+ "id": "116038081",
+ "imagecount": "113",
+ "number": "116038081",
+ "thread_id": "4328304",
"title": "FemJoy Danica - Simply Beautiful (x112) 3000x4500",
},
}),
@@ -94,15 +120,9 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
def __init__(self, match):
VipergirlsExtractor.__init__(self, match)
- self.thread_id, self.query, self.post_id = match.groups()
+ self.thread_id, self.post_id = match.groups()
def posts(self):
- url = "{}/threads/{}?{}".format(self.root, self.thread_id, self.query)
- page = self.request(url).text
-
- try:
- pos = page.index('id="post_' + self.post_id + '"')
- return (text.extract(
- page, '<div class="postbody">', '</blockquote>', pos)[0],)
- except Exception:
- raise exception.NotFoundError("post")
+ url = "{}/vr.php?p={}".format(self.root, self.post_id)
+ root = ElementTree.fromstring(self.request(url).text)
+ return root.iter("post")
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 06f1aab..a0fba3c 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,15 +9,16 @@
"""Extractors for https://wallhaven.cc/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
class WallhavenExtractor(Extractor):
"""Base class for wallhaven extractors"""
category = "wallhaven"
+ root = "https://wallhaven.cc"
filename_fmt = "{category}_{id}_{resolution}.{extension}"
archive_fmt = "{id}"
- root = "https://wallhaven.cc"
+ request_interval = 1.4
def __init__(self, match):
Extractor.__init__(self, match)
@@ -246,8 +247,21 @@ class WallhavenAPI():
def _call(self, endpoint, params=None):
url = "https://wallhaven.cc/api" + endpoint
- return self.extractor.request(
- url, headers=self.headers, params=params).json()
+
+ while True:
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+
+ if response.status_code < 400:
+ return response.json()
+ if response.status_code == 429:
+ self.extractor.wait(seconds=60)
+ continue
+
+ self.extractor.log.debug("Server response: %s", response.text)
+ raise exception.StopExtraction(
+ "API request failed (%s: %s)",
+ response.status_code, response.reason)
def _pagination(self, endpoint, params=None, metadata=None):
if params is None:
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 2cbfad6..805aa53 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -72,6 +72,8 @@ class WeiboExtractor(Extractor):
file["url"] = "https:" + file["url"][5:]
if "filename" not in file:
text.nameext_from_url(file["url"], file)
+ if file["extension"] == "json":
+ file["extension"] = "mp4"
file["status"] = status
file["num"] = num
yield Message.Url, file["url"], file
@@ -123,7 +125,7 @@ class WeiboExtractor(Extractor):
key=lambda m: m["meta"]["quality_index"])
except Exception:
return {"url": (info.get("stream_url_hd") or
- info["stream_url"])}
+ info.get("stream_url") or "")}
else:
return media["play_info"].copy()
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 2ff48c3..500eaa1 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -437,6 +437,7 @@ _CONVERSIONS = {
"T": util.datetime_to_timestamp_string,
"d": text.parse_timestamp,
"U": text.unescape,
+ "H": lambda s: text.unescape(text.remove_html(s)),
"g": text.slugify,
"S": util.to_string,
"s": str,
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 3e0290c..09b8612 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.25.5"
+__version__ = "1.25.6"
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 1bda9d9..0992f4b 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -28,6 +28,7 @@ class TestFormatter(unittest.TestCase):
"l": ["a", "b", "c"],
"n": None,
"s": " \n\r\tSPACE ",
+ "h": "<p>foo </p> &amp; bar <p> </p>",
"u": "&#x27;&lt; / &gt;&#x27;",
"t": 1262304000,
"dt": datetime.datetime(2010, 1, 1),
@@ -47,6 +48,10 @@ class TestFormatter(unittest.TestCase):
self._run_test("{s!t}", "SPACE")
self._run_test("{a!U}", self.kwdict["a"])
self._run_test("{u!U}", "'< / >'")
+ self._run_test("{a!H}", self.kwdict["a"])
+ self._run_test("{h!H}", "foo & bar")
+ self._run_test("{u!H}", "'< / >'")
+ self._run_test("{n!H}", "")
self._run_test("{a!s}", self.kwdict["a"])
self._run_test("{a!r}", "'" + self.kwdict["a"] + "'")
self._run_test("{a!a}", "'" + self.kwdict["a"] + "'")
@@ -434,10 +439,10 @@ def noarg():
fmt4 = formatter.parse("\fM " + path + ":lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt2.format_map(self.kwdict), "96")
+ self.assertEqual(fmt2.format_map(self.kwdict), "126")
self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt4.format_map(self.kwdict), "96")
+ self.assertEqual(fmt4.format_map(self.kwdict), "126")
with self.assertRaises(TypeError):
self.assertEqual(fmt0.format_map(self.kwdict), "")
diff --git a/test/test_results.py b/test/test_results.py
index d28496b..03a17c4 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -324,7 +324,8 @@ def setup_test_config():
for category in ("danbooru", "atfbooru", "aibooru", "e621", "e926",
"instagram", "twitter", "subscribestar", "deviantart",
- "inkbunny", "tapas", "pillowfort", "mangadex"):
+ "inkbunny", "tapas", "pillowfort", "mangadex",
+ "vipergirls"):
config.set(("extractor", category), "username", None)
config.set(("extractor", "mastodon.social"), "access-token",