aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-08-25 02:01:07 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-08-25 02:01:07 -0400
commit1df55d9de48105dace9cc16f1511dba3c9a6da6f (patch)
tree6f6af90bd15a453d7fd1f5253cf01e1db801222f /gallery_dl/extractor
parent3c1539bde1b47fff0ba81c9d92801fa700fedc3b (diff)
New upstream version 1.30.5.upstream/1.30.5
Diffstat (limited to 'gallery_dl/extractor')
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/aryion.py2
-rw-r--r--gallery_dl/extractor/batoto.py52
-rw-r--r--gallery_dl/extractor/booru.py3
-rw-r--r--gallery_dl/extractor/civitai.py11
-rw-r--r--gallery_dl/extractor/common.py3
-rw-r--r--gallery_dl/extractor/gelbooru.py5
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py21
-rw-r--r--gallery_dl/extractor/instagram.py32
-rw-r--r--gallery_dl/extractor/newgrounds.py1
-rw-r--r--gallery_dl/extractor/oauth.py20
-rw-r--r--gallery_dl/extractor/pixiv.py18
-rw-r--r--gallery_dl/extractor/shimmie2.py61
-rw-r--r--gallery_dl/extractor/sizebooru.py162
-rw-r--r--gallery_dl/extractor/tumblr.py2
-rw-r--r--gallery_dl/extractor/twitter.py410
-rw-r--r--gallery_dl/extractor/vichan.py4
-rw-r--r--gallery_dl/extractor/zerochan.py4
-rw-r--r--gallery_dl/extractor/zzup.py63
19 files changed, 572 insertions, 304 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 70e79fe..aabaa93 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -170,6 +170,7 @@ modules = [
"sexcom",
"shimmie2",
"simplyhentai",
+ "sizebooru",
"skeb",
"slickpic",
"slideshare",
@@ -217,7 +218,6 @@ modules = [
"xvideos",
"yiffverse",
"zerochan",
- "zzup",
"booru",
"moebooru",
"foolfuuka",
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 8a7cb04..38b8ee4 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -95,7 +95,7 @@ class AryionExtractor(Extractor):
cnt += 1
yield post_id
- if cnt < 40:
+ if cnt < 40 and ">Next &gt;&gt;<" not in page:
return
params["p"] += 1
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 50e0c5d..a7d1b78 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -8,6 +8,7 @@
from .common import Extractor, ChapterExtractor, MangaExtractor
from .. import text, util
+from ..cache import memcache
BASE_PATTERN = (r"(?:https?://)?("
r"(?:ba|d|f|h|j|m|w)to\.to|"
@@ -113,8 +114,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
minor = ""
return {
- "manga" : text.unescape(manga),
- "manga_id" : text.parse_int(manga_id),
+ **_manga_info(self, manga_id),
"chapter_url" : extr(self.chapter_id + "-ch_", '"'),
"title" : text.unescape(text.remove_html(extr(
"selected>", "</option")).partition(" : ")[2]),
@@ -151,17 +151,11 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
def chapters(self, page):
extr = text.extract_from(page)
-
if warning := extr(' class="alert alert-warning">', "</div>"):
self.log.warning("'%s'", text.remove_html(warning))
-
- data = {
- "manga_id": text.parse_int(self.manga_id),
- "manga" : text.unescape(extr(
- "<title>", "<").rpartition(" - ")[0]),
- }
-
extr('<div data-hk="0-0-0-0"', "")
+ data = _manga_info(self, self.manga_id, page)
+
results = []
while True:
href = extr('<a href="/title/', '"')
@@ -179,3 +173,41 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
url = f"{self.root}/title/{href}"
results.append((url, data.copy()))
return results
+
+
+@memcache(keyarg=1)
+def _manga_info(self, manga_id, page=None):
+ if page is None:
+ url = f"{self.root}/title/{manga_id}"
+ page = self.request(url).text
+
+ props = text.extract(page, 'props="', '"', page.find(' prefix="r20" '))[0]
+ data = util.json_loads(text.unescape(props))["data"][1]
+
+ return {
+ "manga" : data["name"][1],
+ "manga_id" : text.parse_int(manga_id),
+ "manga_slug" : data["slug"][1],
+ "manga_date" : text.parse_timestamp(
+ data["dateCreate"][1] // 1000),
+ "manga_date_updated": text.parse_timestamp(
+ data["dateUpdate"][1] / 1000),
+ "author" : json_list(data["authors"]),
+ "artist" : json_list(data["artists"]),
+ "genre" : json_list(data["genres"]),
+ "lang" : data["tranLang"][1],
+ "lang_orig" : data["origLang"][1],
+ "status" : data["originalStatus"][1],
+ "published" : data["originalPubFrom"][1],
+ "description": data["summary"][1]["code"][1],
+ "cover" : data["urlCoverOri"][1],
+ "uploader" : data["userId"][1],
+ "score" : data["stat_score_avg"][1],
+ }
+
+
+def json_list(value):
+ return [
+ item[1].replace("_", " ")
+ for item in util.json_loads(value[1].replace('\\"', '"'))
+ ]
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index 3b97e9a..ae455bf 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -52,7 +52,8 @@ class BooruExtractor(BaseExtractor):
if notes:
self._notes(post, html)
- text.nameext_from_url(url, post)
+ if "extension" not in post:
+ text.nameext_from_url(url, post)
post.update(data)
self._prepare(post)
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 00400ba..d5cf996 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -912,9 +912,16 @@ class CivitaiSearchAPI():
def __init__(self, extractor):
self.extractor = extractor
self.root = "https://search-new.civitai.com"
+
+ if auth := extractor.config("token"):
+ if " " not in auth:
+ auth = f"Bearer {auth}"
+ else:
+ auth = ("Bearer 8c46eb2508e21db1e9828a97968d"
+ "91ab1ca1caa5f70a00e88a2ba1e286603b61")
+
self.headers = {
- "Authorization": "Bearer 8c46eb2508e21db1e9828a97968d91ab1ca1caa5f"
- "70a00e88a2ba1e286603b61",
+ "Authorization": auth,
"Content-Type": "application/json",
"X-Meilisearch-Client": "Meilisearch instant-meilisearch (v0.13.5)"
" ; Meilisearch JavaScript (v0.34.0)",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 1ee54de..719fc62 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -143,7 +143,7 @@ class Extractor():
return values
def request(self, url, method="GET", session=None, fatal=True,
- retries=None, retry_codes=None, interval=True,
+ retries=None, retry_codes=None, expected=(), interval=True,
encoding=None, notfound=None, **kwargs):
if session is None:
session = self.session
@@ -202,6 +202,7 @@ class Extractor():
self._dump_response(response)
if (
code < 400 or
+ code in expected or
code < 500 and (
not fatal and code != 429 or fatal is None) or
fatal is ...
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index b152885..f32059e 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -35,9 +35,8 @@ class GelbooruBase():
data = self.request_json(url, params=params)
except exception.HttpError as exc:
if exc.status == 401:
- raise exception.AuthorizationError(
- f"'api-key' and 'user-id' required "
- f"({exc.status}: {exc.response.reason})")
+ raise exception.AuthRequired(
+ "'api-key' & 'user-id'", "the API")
raise
if not key:
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index c12a7a2..33db4e4 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -16,17 +16,33 @@ import collections
class GelbooruV02Extractor(booru.BooruExtractor):
basecategory = "gelbooru_v02"
+ def __init__(self, match):
+ booru.BooruExtractor.__init__(self, match)
+ self.request_interval = self.config_instance("request-interval", 0.0)
+ self.root_api = self.config_instance("root-api") or self.root
+
def _init(self):
self.api_key = self.config("api-key")
self.user_id = self.config("user-id")
- self.root_api = self.config_instance("root-api") or self.root
if self.category == "rule34":
self._file_url = self._file_url_rule34
def _api_request(self, params):
+ params["api_key"] = self.api_key
+ params["user_id"] = self.user_id
+
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
- return self.request_xml(url, params=params)
+ root = self.request_xml(url, params=params)
+
+ if root.tag == "error":
+ msg = root.text
+ if msg.lower().startswith("missing authentication"):
+ raise exception.AuthRequired(
+ "'api-key' & 'user-id'", "the API", msg)
+ raise exception.AbortExtraction(f"'{msg}'")
+
+ return root
def _pagination(self, params):
params["pid"] = self.page_start
@@ -148,6 +164,7 @@ BASE_PATTERN = GelbooruV02Extractor.update({
"rule34": {
"root": "https://rule34.xxx",
"root-api": "https://api.rule34.xxx",
+ "request-interval": 1.0,
"pattern": r"(?:www\.)?rule34\.xxx",
},
"safebooru": {
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index b5450d5..fa60f91 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -39,6 +39,7 @@ class InstagramExtractor(Extractor):
self.www_claim = "0"
self.csrf_token = util.generate_token()
self._find_tags = util.re(r"#\w+").findall
+ self._warn_video_ua = True
self._logged_in = True
self._cursor = None
self._user = None
@@ -166,6 +167,7 @@ class InstagramExtractor(Extractor):
else:
post_url = f"{self.root}/stories/highlights/{reel_id}/"
data = {
+ "user" : post.get("user"),
"expires": text.parse_timestamp(expires),
"post_id": reel_id,
"post_shortcode": shortcode_from_id(reel_id),
@@ -223,8 +225,7 @@ class InstagramExtractor(Extractor):
for num, item in enumerate(items, 1):
try:
- candidates = item["image_versions2"]["candidates"]
- image = candidates[0]
+ image = item["image_versions2"]["candidates"][0]
except Exception:
self.log.warning("Missing media in post %s",
data["post_shortcode"])
@@ -235,17 +236,22 @@ class InstagramExtractor(Extractor):
video_versions,
key=lambda x: (x["width"], x["height"], x["type"]),
)
+ manifest = item.get("video_dash_manifest")
media = video
+
+ if self._warn_video_ua:
+ self._warn_video_ua = False
+ pattern = text.re(
+ r"AppleWebKit/537\.36 \(KHTML, like Gecko\) "
+ r"Chrome/\d+\.\d+\.\d+\.\d+ Safari/537\.36$")
+ if not pattern.search(self.session.headers["User-Agent"]):
+ self.log.warning("Potentially lowered video quality "
+ "due to non-Chrome User-Agent")
else:
- video = None
+ video = manifest = None
media = image
- if len(candidates) <= 3 and not post.get("__gdl_gen"):
- self.log.warning(
- "%s: Image candidate list possibly incomplete "
- "(%s items). Consider refreshing your cookies.",
- data["post_shortcode"], len(candidates))
- elif image["width"] < item.get("original_width", 0) or \
+ if image["width"] < item.get("original_width", 0) or \
image["height"] < item.get("original_height", 0):
self.log.warning(
"%s: Available image resolutions lower than the "
@@ -268,9 +274,14 @@ class InstagramExtractor(Extractor):
"video_url" : video["url"] if video else None,
"width" : media["width"],
"height" : media["height"],
- "_ytdl_manifest_data": item.get("video_dash_manifest"),
}
+ if manifest is not None:
+ media["_ytdl_manifest_data"] = manifest
+ if "owner" in item:
+ media["owner2"] = item["owner"]
+ if "reshared_story_media_author" in item:
+ media["author"] = item["reshared_story_media_author"]
if "expiring_at" in item:
media["expires"] = text.parse_timestamp(post["expiring_at"])
@@ -711,7 +722,6 @@ class InstagramAvatarExtractor(InstagramExtractor):
"caption" : None,
"like_count": 0,
"image_versions2": {"candidates": (avatar,)},
- "__gdl_gen" : True,
},)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index c42453f..ffb4cad 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -206,6 +206,7 @@ class NewgroundsExtractor(Extractor):
data["tags"].sort()
data["user"] = self.user or data["artist"][0]
+ data["slug"] = post_url[post_url.rfind("/")+1:]
data["post_url"] = post_url
return data
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 2d9a061..ff192c2 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -60,9 +60,23 @@ class OAuthBase(Extractor):
pass
server.close()
- data = self.client.recv(1024).decode()
- path = data.split(" ", 2)[1]
- return text.parse_query(path.partition("?")[2])
+ data = None
+ try:
+ data = self.client.recv(1024).decode()
+ path = data.split(" ", 2)[1]
+ return text.parse_query(path.partition("?")[2])
+ except Exception as exc:
+ if data is None:
+ msg = "Failed to receive"
+ elif not data:
+ exc = ""
+ msg = "Received empty"
+ else:
+ self.log.warning("Response: %r", data)
+ msg = "Received invalid"
+ if exc:
+ exc = f" ({exc.__class__.__name__}: {exc})"
+ raise exception.AbortExtraction(f"{msg} OAuth response{exc}")
def send(self, msg):
"""Send 'msg' to the socket opened in 'recv()'"""
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index d34130d..a72042c 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1251,9 +1251,9 @@ class PixivAppAPI():
"/v1/user/bookmark-tags/illust", params, "bookmark_tags")
@memcache(keyarg=1)
- def user_detail(self, user_id):
+ def user_detail(self, user_id, fatal=True):
params = {"user_id": user_id}
- return self._call("/v1/user/detail", params)
+ return self._call("/v1/user/detail", params, fatal=fatal)
def user_following(self, user_id, restrict="public"):
params = {"user_id": user_id, "restrict": restrict}
@@ -1261,7 +1261,7 @@ class PixivAppAPI():
def user_illusts(self, user_id):
params = {"user_id": user_id}
- return self._pagination("/v1/user/illusts", params, user_data="user")
+ return self._pagination("/v1/user/illusts", params, key_user="user")
def user_novels(self, user_id):
params = {"user_id": user_id}
@@ -1271,7 +1271,7 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
- def _call(self, endpoint, params=None, parse=None):
+ def _call(self, endpoint, params=None, parse=None, fatal=True):
url = "https://app-api.pixiv.net" + endpoint
while True:
@@ -1283,7 +1283,7 @@ class PixivAppAPI():
else:
data = response.json()
- if "error" not in data:
+ if "error" not in data or not fatal:
return data
self.log.debug(data)
@@ -1302,14 +1302,16 @@ class PixivAppAPI():
raise exception.AbortExtraction(f"API request failed: {msg}")
def _pagination(self, endpoint, params,
- key_items="illusts", key_data=None, user_data=None):
+ key_items="illusts", key_data=None, key_user=None):
data = self._call(endpoint, params)
if key_data is not None:
self.data = data.get(key_data)
- if user_data is not None:
- if not data[user_data].get("id"):
+ if key_user is not None and not data[key_user].get("id"):
+ user = self.user_detail(self.extractor.user_id, fatal=False)
+ if user.get("error"):
raise exception.NotFoundError("user")
+ return
while True:
yield from data[key_items]
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 9afa706..b988646 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -26,9 +26,6 @@ class Shimmie2Extractor(BaseExtractor):
if file_url := self.config_instance("file_url"):
self.file_url_fmt = file_url
- if self.category == "giantessbooru":
- self.posts = self._posts_giantessbooru
-
def items(self):
data = self.metadata()
@@ -67,11 +64,6 @@ class Shimmie2Extractor(BaseExtractor):
BASE_PATTERN = Shimmie2Extractor.update({
- "giantessbooru": {
- "root": "https://sizechangebooru.com",
- "pattern": r"(?:sizechange|giantess)booru\.com",
- "cookies": {"agreed": "true"},
- },
"cavemanon": {
"root": "https://booru.cavemanon.xyz",
"pattern": r"booru\.cavemanon\.xyz",
@@ -85,6 +77,11 @@ BASE_PATTERN = Shimmie2Extractor.update({
"root": "https://vidya.pics",
"pattern": r"vidya\.pics",
},
+ "nozrip": {
+ "root": "https://noz.rip/booru",
+ "base": "https://noz.rip",
+ "pattern": r"noz\.rip/booru",
+ },
}) + r"/(?:index\.php\?q=/?)?"
@@ -154,36 +151,6 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
if not extr(f"/{pnum}'>{pnum}<", ">"):
return
- def _posts_giantessbooru(self):
- pnum = text.parse_int(self.groups[-1], 1)
- file_url_fmt = (self.root + "/index.php?q=/image/{}.jpg").format
-
- while True:
- url = f"{self.root}/index.php?q=/post/list/{self.tags}/{pnum}"
- extr = text.extract_from(self.request(url).text)
-
- while True:
- pid = extr("href='./index.php?q=/post/view/", "&")
- if not pid:
- break
-
- tags, dimensions, size = extr("title='", "'").split(" // ")
- width, _, height = dimensions.partition("x")
-
- yield {
- "file_url": file_url_fmt(pid),
- "id" : pid,
- "md5" : "",
- "tags" : tags,
- "width" : width,
- "height" : height,
- "size" : text.parse_bytes(size[:-1]),
- }
-
- pnum += 1
- if not extr(f"/{pnum}'>{pnum}<", ">"):
- return
-
class Shimmie2PostExtractor(Shimmie2Extractor):
"""Extractor for single shimmie2 posts"""
@@ -196,13 +163,14 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
url = f"{self.root}/post/view/{post_id}"
page = self.request(url).text
extr = text.extract_from(page)
+ base = self.config_instance("base", self.root)
qt = self._quote_type(page)
post = {
"id" : post_id,
"tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
"md5" : extr("/_thumbs/", "/"),
- "file_url": self.root + (
+ "file_url": base + (
extr(f"id={qt}main_image{qt} src={qt}", qt) or
extr("<source src="+qt, qt)).lstrip("."),
"width" : extr("data-width=", " ").strip("\"'"),
@@ -215,18 +183,3 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
post["md5"] = text.extr(post["file_url"], "/_images/", "/")
return (post,)
-
- def _posts_giantessbooru(self):
- post_id = self.groups[-1]
- url = f"{self.root}/index.php?q=/post/view/{post_id}"
- extr = text.extract_from(self.request(url).text)
-
- return ({
- "id" : post_id,
- "tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
- "md5" : "",
- "file_url": self.root + extr("id='main_image' src='.", "'"),
- "width" : extr("orig_width =", ";"),
- "height" : 0,
- "size" : 0,
- },)
diff --git a/gallery_dl/extractor/sizebooru.py b/gallery_dl/extractor/sizebooru.py
new file mode 100644
index 0000000..cad4b23
--- /dev/null
+++ b/gallery_dl/extractor/sizebooru.py
@@ -0,0 +1,162 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://sizebooru.com/"""
+
+from .booru import BooruExtractor
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?sizebooru\.com"
+
+
+class SizebooruExtractor(BooruExtractor):
+ """Base class for sizebooru extractors"""
+ category = "sizebooru"
+ root = "https://sizebooru.com"
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ page_start = 1
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ if self.config("metadata", False):
+ self._prepare = self._prepare_metadata
+
+ def _file_url(self, post):
+ post["file_url"] = url = f"{self.root}/Picture/{post['id']}"
+ return url
+
+ def _prepare(self, post):
+ post_id = post["id"]
+ post["id"] = text.parse_int(post_id)
+ post["filename"] = post_id
+ if not post["extension"]:
+ post["extension"] = "jpg"
+
+ def _prepare_metadata(self, post):
+ post_id = post["id"]
+ url = f"{self.root}/Details/{post_id}"
+ extr = text.extract_from(self.request(url).text)
+
+ post.update({
+ "id" : text.parse_int(post_id),
+ "date" : text.parse_datetime(
+ extr("<b>Posted Date:</b> ", "<"), "%m/%d/%Y"),
+ "date_approved": text.parse_datetime(
+ extr("<b>Approved Date:</b> ", "<"), "%m/%d/%Y"),
+ "approver" : text.remove_html(extr("<b>Approved By:</b>", "</")),
+ "uploader" : text.remove_html(extr("<b>Posted By:</b>", "</")),
+ "artist" : None
+ if (artist := extr("<b>Artist:</b> ", "</")) == "N/A" else # noqa: E131 E501
+ text.remove_html(artist), # noqa: E131
+ "views" : text.parse_int(extr("<b>Views:</b>", "<")),
+ "source" : text.extr(extr(
+ "<b>Source Link:</b>", "</"), ' href="', '"') or None,
+ "tags" : text.split_html(extr(
+ "<h6>Related Tags</h6>", "</ul>")),
+ "favorite" : text.split_html(extr(
+ "<h6>Favorited By</h6>", "</ul>")),
+ })
+
+ post["filename"], _, ext = extr('" alt="', '"').rpartition(".")
+ if not post["extension"]:
+ post["extension"] = ext.lower()
+
+ return post
+
+ def _pagination(self, url, callback=None):
+ params = {
+ "pageNo" : self.page_start,
+ "pageSize": self.per_page,
+ }
+
+ page = self.request(url, params=params).text
+ if callback is not None:
+ callback(page)
+
+ while True:
+ thumb = None
+ for thumb in text.extract_iter(
+ page, '<a href="/Details/', ';base64'):
+ yield {
+ "id" : thumb[:thumb.find('"')],
+ "extension": thumb[thumb.rfind("/")+1:],
+ }
+
+ if "disabled" in text.extr(page, 'area-label="Next"', ">") or \
+ thumb is None:
+ return
+ params["pageNo"] += 1
+ page = self.request(url, params=params).text
+
+
+class SizebooruPostExtractor(SizebooruExtractor):
+ """Extractor for sizebooru posts"""
+ subcategory = "post"
+ pattern = rf"{BASE_PATTERN}/Details/(\d+)"
+ example = "https://sizebooru.com/Details/12345"
+
+ def posts(self):
+ return ({"id": self.groups[0], "extension": None},)
+
+
+class SizebooruTagExtractor(SizebooruExtractor):
+ """Extractor for sizebooru tag searches"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = rf"{BASE_PATTERN}/Search/([^/?#]+)"
+ example = "https://sizebooru.com/Search/TAG"
+
+ def posts(self):
+ tag = self.groups[0]
+ self.kwdict["search_tags"] = text.unquote(tag)
+ return self._pagination(f"{self.root}/Search/{tag}")
+
+
+class SizebooruGalleryExtractor(SizebooruExtractor):
+ """Extractor for sizebooru galleries"""
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "{gallery_name} ({gallery_id})")
+ pattern = rf"{BASE_PATTERN}/Galleries/List/(\d+)"
+ example = "https://sizebooru.com/Galleries/List/123"
+
+ def posts(self):
+ gid = self.groups[0]
+ self.kwdict["gallery_id"] = text.parse_int(gid)
+ return self._pagination(
+ f"{self.root}/Galleries/List/{gid}", self._extract_name)
+
+ def _extract_name(self, page):
+ self.kwdict["gallery_name"] = text.unescape(text.extr(
+ page, "<title>Gallery: ", " - Size Booru<"))
+
+
+class SizebooruUserExtractor(SizebooruExtractor):
+ """Extractor for a sizebooru user's uploads"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "Uploads {user}")
+ pattern = rf"{BASE_PATTERN}/Profile/Uploads/([^/?#]+)"
+ example = "https://sizebooru.com/Profile/Uploads/USER"
+
+ def posts(self):
+ user = self.groups[0]
+ self.kwdict["user"] = text.unquote(user)
+ return self._pagination(f"{self.root}/Profile/Uploads/{user}",)
+
+
+class SizebooruFavoriteExtractor(SizebooruExtractor):
+ """Extractor for a sizebooru user's favorites"""
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "Favorites {user}")
+ pattern = rf"{BASE_PATTERN}/Profile/Favorites/([^/?#]+)"
+ example = "https://sizebooru.com/Profile/Favorites/USER"
+
+ def posts(self):
+ user = self.groups[0]
+ self.kwdict["user"] = text.unquote(user)
+ return self._pagination(f"{self.root}/Profile/Favorites/{user}",)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 46507c4..6eea76c 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -382,7 +382,7 @@ class TumblrSearchExtractor(TumblrExtractor):
example = "https://www.tumblr.com/search/QUERY"
def posts(self):
- _, _, _, search, mode, post_type, query = self.groups
+ search, mode, post_type, query = self.groups
params = text.parse_query(query)
return self.api.search(text.unquote(search), params, mode, post_type)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4303524..c928507 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -16,6 +16,7 @@ import random
BASE_PATTERN = (r"(?:https?://)?(?:www\.|mobile\.)?"
r"(?:(?:[fv]x)?twitter|(?:fix(?:up|v))?x)\.com")
+USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)"
class TwitterExtractor(Extractor):
@@ -47,8 +48,9 @@ class TwitterExtractor(Extractor):
self.cards_blacklist = self.config("cards-blacklist")
if not self.config("transform", True):
- self._transform_user = util.identity
- self._transform_tweet = util.identity
+ self._transform_community = \
+ self._transform_tweet = \
+ self._transform_user = util.identity
self._cursor = None
self._user = None
@@ -412,6 +414,11 @@ class TwitterExtractor(Extractor):
content = tget("full_text") or tget("text") or ""
entities = legacy["entities"]
+ if "author_community_relationship" in tweet:
+ tdata["community"] = self._transform_community(
+ tweet["author_community_relationship"]
+ ["community_results"]["result"])
+
if hashtags := entities.get("hashtags"):
tdata["hashtags"] = [t["text"] for t in hashtags]
@@ -453,6 +460,36 @@ class TwitterExtractor(Extractor):
return tdata
+ def _transform_community(self, com):
+ try:
+ cid = com.get("id_str") or com["rest_id"]
+ except KeyError:
+ return {}
+
+ try:
+ return self._user_cache[f"C#{cid}"]
+ except KeyError:
+ pass
+
+ self._user_cache[f"C#{cid}"] = cdata = {
+ "id": text.parse_int(cid),
+ "name": com["name"],
+ "description": com["description"],
+ "date": text.parse_timestamp(com["created_at"] // 1000),
+ "nsfw": com["is_nsfw"],
+ "role": com["role"],
+ "member_count": com["member_count"],
+ "rules": [rule["name"] for rule in com["rules"]],
+ "admin": (admin := com.get("admin_results")) and
+ admin["result"]["core"]["screen_name"], # noqa: E131
+ "creator": (creator := com.get("creator_results")) and
+ creator["result"]["core"]["screen_name"], # noqa: E131
+ "banner": (banner := com.get("custom_banner_media")) and
+ banner["media_info"]["original_img_url"], # noqa: E131
+ }
+
+ return cdata
+
def _transform_user(self, user):
try:
uid = user.get("rest_id") or user["id_str"]
@@ -465,35 +502,35 @@ class TwitterExtractor(Extractor):
except KeyError:
pass
- if "legacy" in user:
- user = user["legacy"]
+ core = user.get("core") or user
+ legacy = user.get("legacy") or user
+ lget = legacy.get
- uget = user.get
- if uget("withheld_scope"):
- self.log.warning("'%s'", uget("description"))
+ if lget("withheld_scope"):
+ self.log.warning("'%s'", lget("description"))
- entities = user["entities"]
+ entities = legacy["entities"]
self._user_cache[uid] = udata = {
"id" : text.parse_int(uid),
- "name" : user["screen_name"],
- "nick" : user["name"],
- "location" : uget("location"),
+ "name" : core["screen_name"],
+ "nick" : core["name"],
+ "location" : user["location"]["location"],
"date" : text.parse_datetime(
- uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
- "verified" : uget("verified", False),
- "protected" : uget("protected", False),
- "profile_banner" : uget("profile_banner_url", ""),
- "profile_image" : uget(
- "profile_image_url_https", "").replace("_normal.", "."),
- "favourites_count": uget("favourites_count"),
- "followers_count" : uget("followers_count"),
- "friends_count" : uget("friends_count"),
- "listed_count" : uget("listed_count"),
- "media_count" : uget("media_count"),
- "statuses_count" : uget("statuses_count"),
+ core["created_at"], "%a %b %d %H:%M:%S %z %Y"),
+ "verified" : user["verification"]["verified"],
+ "protected" : user["privacy"]["protected"],
+ "profile_banner" : lget("profile_banner_url", ""),
+ "profile_image" : user["avatar"]["image_url"].replace(
+ "_normal.", "."),
+ "favourites_count": lget("favourites_count"),
+ "followers_count" : lget("followers_count"),
+ "friends_count" : lget("friends_count"),
+ "listed_count" : lget("listed_count"),
+ "media_count" : lget("media_count"),
+ "statuses_count" : lget("statuses_count"),
}
- descr = user["description"]
+ descr = legacy["description"]
if urls := entities["description"].get("urls"):
for url in urls:
try:
@@ -604,34 +641,92 @@ class TwitterExtractor(Extractor):
return self.cookies_update(_login_impl(self, username, password))
+class TwitterHomeExtractor(TwitterExtractor):
+ """Extractor for Twitter home timelines"""
+ subcategory = "home"
+ pattern = (BASE_PATTERN +
+ r"/(?:home(?:/fo(?:llowing|r[-_ ]?you()))?|i/timeline)/?$")
+ example = "https://x.com/home"
+
+ def tweets(self):
+ if self.groups[0] is None:
+ return self.api.home_latest_timeline()
+ return self.api.home_timeline()
+
+
+class TwitterSearchExtractor(TwitterExtractor):
+ """Extractor for Twitter search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
+ example = "https://x.com/search?q=QUERY"
+
+ def metadata(self):
+ return {"search": text.unquote(self.user)}
+
+ def tweets(self):
+ query = text.unquote(self.user.replace("+", " "))
+
+ user = None
+ for item in query.split():
+ item = item.strip("()")
+ if item.startswith("from:"):
+ if user:
+ user = None
+ break
+ else:
+ user = item[5:]
+
+ if user is not None:
+ try:
+ self._assign_user(self.api.user_by_screen_name(user))
+ except KeyError:
+ pass
+
+ return self.api.search_timeline(query)
+
+
+class TwitterHashtagExtractor(TwitterExtractor):
+ """Extractor for Twitter hashtags"""
+ subcategory = "hashtag"
+ pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
+ example = "https://x.com/hashtag/NAME"
+
+ def items(self):
+ url = f"{self.root}/search?q=%23{self.user}"
+ data = {"_extractor": TwitterSearchExtractor}
+ yield Message.Queue, url, data
+
+
class TwitterUserExtractor(Dispatch, TwitterExtractor):
"""Extractor for a Twitter user"""
- pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"([^/?#]+)/?(?:$|\?|#)"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
example = "https://x.com/USER"
def items(self):
user, user_id = self.groups
if user_id is not None:
- user = "id:" + user_id
+ user = f"id:{user_id}"
base = f"{self.root}/{user}/"
return self._dispatch_extractors((
- (TwitterInfoExtractor , base + "info"),
- (TwitterAvatarExtractor , base + "photo"),
- (TwitterBackgroundExtractor, base + "header_photo"),
- (TwitterTimelineExtractor , base + "timeline"),
- (TwitterTweetsExtractor , base + "tweets"),
- (TwitterMediaExtractor , base + "media"),
- (TwitterRepliesExtractor , base + "with_replies"),
- (TwitterLikesExtractor , base + "likes"),
+ (TwitterInfoExtractor , f"{base}info"),
+ (TwitterAvatarExtractor , f"{base}photo"),
+ (TwitterBackgroundExtractor, f"{base}header_photo"),
+ (TwitterTimelineExtractor , f"{base}timeline"),
+ (TwitterTweetsExtractor , f"{base}tweets"),
+ (TwitterMediaExtractor , f"{base}media"),
+ (TwitterRepliesExtractor , f"{base}with_replies"),
+ (TwitterHighlightsExtractor, f"{base}highlights"),
+ (TwitterLikesExtractor , f"{base}likes"),
), ("timeline",))
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for a Twitter user timeline"""
subcategory = "timeline"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
+ pattern = rf"{USER_PATTERN}/timeline(?!\w)"
example = "https://x.com/USER/timeline"
def _init_cursor(self):
@@ -728,7 +823,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
class TwitterTweetsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Tweets timeline"""
subcategory = "tweets"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
+ pattern = rf"{USER_PATTERN}/tweets(?!\w)"
example = "https://x.com/USER/tweets"
def tweets(self):
@@ -738,17 +833,27 @@ class TwitterTweetsExtractor(TwitterExtractor):
class TwitterRepliesExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline including replies"""
subcategory = "replies"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
+ pattern = rf"{USER_PATTERN}/with_replies(?!\w)"
example = "https://x.com/USER/with_replies"
def tweets(self):
return self.api.user_tweets_and_replies(self.user)
+class TwitterHighlightsExtractor(TwitterExtractor):
+ """Extractor for Tweets from a user's highlights timeline"""
+ subcategory = "highlights"
+ pattern = rf"{USER_PATTERN}/highlights(?!\w)"
+ example = "https://x.com/USER/highlights"
+
+ def tweets(self):
+ return self.api.user_highlights(self.user)
+
+
class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
+ pattern = rf"{USER_PATTERN}/media(?!\w)"
example = "https://x.com/USER/media"
def tweets(self):
@@ -758,7 +863,7 @@ class TwitterMediaExtractor(TwitterExtractor):
class TwitterLikesExtractor(TwitterExtractor):
"""Extractor for liked tweets"""
subcategory = "likes"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)"
+ pattern = rf"{USER_PATTERN}/likes(?!\w)"
example = "https://x.com/USER/likes"
def metadata(self):
@@ -808,7 +913,7 @@ class TwitterListMembersExtractor(TwitterExtractor):
class TwitterFollowingExtractor(TwitterExtractor):
"""Extractor for followed users"""
subcategory = "following"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/following(?!\w)"
+ pattern = rf"{USER_PATTERN}/following(?!\w)"
example = "https://x.com/USER/following"
def items(self):
@@ -819,7 +924,7 @@ class TwitterFollowingExtractor(TwitterExtractor):
class TwitterFollowersExtractor(TwitterExtractor):
"""Extractor for a user's followers"""
subcategory = "followers"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/followers(?!\w)"
+ pattern = rf"{USER_PATTERN}/followers(?!\w)"
example = "https://x.com/USER/followers"
def items(self):
@@ -827,52 +932,12 @@ class TwitterFollowersExtractor(TwitterExtractor):
return self._users_result(TwitterAPI(self).user_followers(self.user))
-class TwitterSearchExtractor(TwitterExtractor):
- """Extractor for Twitter search results"""
- subcategory = "search"
- pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
- example = "https://x.com/search?q=QUERY"
-
- def metadata(self):
- return {"search": text.unquote(self.user)}
-
- def tweets(self):
- query = text.unquote(self.user.replace("+", " "))
-
- user = None
- for item in query.split():
- item = item.strip("()")
- if item.startswith("from:"):
- if user:
- user = None
- break
- else:
- user = item[5:]
-
- if user is not None:
- try:
- self._assign_user(self.api.user_by_screen_name(user))
- except KeyError:
- pass
-
- return self.api.search_timeline(query)
-
-
-class TwitterHashtagExtractor(TwitterExtractor):
- """Extractor for Twitter hashtags"""
- subcategory = "hashtag"
- pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
- example = "https://x.com/hashtag/NAME"
-
- def items(self):
- url = f"{self.root}/search?q=%23{self.user}"
- data = {"_extractor": TwitterSearchExtractor}
- yield Message.Queue, url, data
-
-
class TwitterCommunityExtractor(TwitterExtractor):
"""Extractor for a Twitter community"""
subcategory = "community"
+ directory_fmt = ("{category}", "Communities",
+ "{community[name]} ({community[id]})")
+ archive_fmt = "C_{community[id]}_{tweet_id}_{num}"
pattern = BASE_PATTERN + r"/i/communities/(\d+)"
example = "https://x.com/i/communities/12345"
@@ -885,6 +950,8 @@ class TwitterCommunityExtractor(TwitterExtractor):
class TwitterCommunitiesExtractor(TwitterExtractor):
"""Extractor for followed Twitter communities"""
subcategory = "communities"
+ directory_fmt = TwitterCommunityExtractor.directory_fmt
+ archive_fmt = TwitterCommunityExtractor.archive_fmt
pattern = BASE_PATTERN + r"/([^/?#]+)/communities/?$"
example = "https://x.com/i/communities"
@@ -1002,7 +1069,7 @@ class TwitterQuotesExtractor(TwitterExtractor):
class TwitterInfoExtractor(TwitterExtractor):
"""Extractor for a user's profile data"""
subcategory = "info"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/info"
+ pattern = rf"{USER_PATTERN}/info"
example = "https://x.com/USER/info"
def items(self):
@@ -1021,13 +1088,13 @@ class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}"
archive_fmt = "AV_{user[id]}_{date}"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
+ pattern = rf"{USER_PATTERN}/photo"
example = "https://x.com/USER/photo"
def tweets(self):
self.api._user_id_by_screen_name(self.user)
user = self._user_obj
- url = user["legacy"]["profile_image_url_https"]
+ url = user["avatar"]["image_url"]
if url == ("https://abs.twimg.com/sticky"
"/default_profile_images/default_profile_normal.png"):
@@ -1043,7 +1110,7 @@ class TwitterBackgroundExtractor(TwitterExtractor):
subcategory = "background"
filename_fmt = "background {date}.{extension}"
archive_fmt = "BG_{user[id]}_{date}"
- pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
+ pattern = rf"{USER_PATTERN}/header_photo"
example = "https://x.com/USER/header_photo"
def tweets(self):
@@ -1169,9 +1236,10 @@ class TwitterAPI():
}
self.features = {
"hidden_profile_subscriptions_enabled": True,
+ "payments_enabled": False,
+ "rweb_xchat_enabled": False,
"profile_label_improvements_pcf_label_in_post_enabled": True,
"rweb_tipjar_consumption_enabled": True,
- "responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
"highlights_tweets_tab_ui_enabled": True,
"responsive_web_twitter_article_notes_tab_enabled": True,
@@ -1179,26 +1247,26 @@ class TwitterAPI():
"creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_graphql_"
"skip_user_profile_image_extensions_enabled": False,
- "responsive_web_graphql_"
- "timeline_navigation_enabled": True,
+ "responsive_web_graphql_timeline_navigation_enabled": True,
}
self.features_pagination = {
"rweb_video_screen_enabled": False,
+ "payments_enabled": False,
+ "rweb_xchat_enabled": False,
"profile_label_improvements_pcf_label_in_post_enabled": True,
"rweb_tipjar_consumption_enabled": True,
- "responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
"creator_subscriptions_tweet_preview_api_enabled": True,
- "responsive_web_graphql_"
- "timeline_navigation_enabled": True,
- "responsive_web_graphql_"
- "skip_user_profile_image_extensions_enabled": False,
+ "responsive_web_graphql"
+ "_timeline_navigation_enabled": True,
+ "responsive_web_graphql"
+ "_skip_user_profile_image_extensions_enabled": False,
"premium_content_api_read_enabled": False,
"communities_web_enable_tweet_community_results_fetch": True,
"c9s_tweet_anatomy_moderator_badge_enabled": True,
"responsive_web_grok_analyze_button_fetch_trends_enabled": False,
"responsive_web_grok_analyze_post_followups_enabled": True,
- "responsive_web_jetfuel_frame": False,
+ "responsive_web_jetfuel_frame": True,
"responsive_web_grok_share_attachment_enabled": True,
"articles_preview_enabled": True,
"responsive_web_edit_tweet_api_enabled": True,
@@ -1212,22 +1280,27 @@ class TwitterAPI():
"creator_subscriptions_quote_tweet_preview_enabled": False,
"freedom_of_speech_not_reach_fetch_enabled": True,
"standardized_nudges_misinfo": True,
- "tweet_with_visibility_results_"
- "prefer_gql_limited_actions_policy_enabled": True,
+ "tweet_with_visibility_results"
+ "_prefer_gql_limited_actions_policy_enabled": True,
"longform_notetweets_rich_text_read_enabled": True,
"longform_notetweets_inline_media_enabled": True,
"responsive_web_grok_image_annotation_enabled": True,
+ "responsive_web_grok_imagine_annotation_enabled": True,
+ "responsive_web_grok"
+ "_community_note_auto_translation_is_enabled": False,
"responsive_web_enhance_cards_enabled": False,
}
def tweet_result_by_rest_id(self, tweet_id):
- endpoint = "/graphql/Vg2Akr5FzUmF0sTplA5k6g/TweetResultByRestId"
+ endpoint = "/graphql/qxWQxcMLiTPcavz9Qy5hwQ/TweetResultByRestId"
variables = {
"tweetId": tweet_id,
"withCommunity": False,
"includePromotedContent": False,
"withVoice": False,
}
+ features = self.features_pagination.copy()
+ del features["rweb_video_screen_enabled"]
field_toggles = {
"withArticleRichContentState": True,
"withArticlePlainText": False,
@@ -1236,7 +1309,7 @@ class TwitterAPI():
}
params = {
"variables" : self._json_dumps(variables),
- "features" : self._json_dumps(self.features_pagination),
+ "features" : self._json_dumps(features),
"fieldToggles": self._json_dumps(field_toggles),
}
tweet = self._call(endpoint, params)["data"]["tweetResult"]["result"]
@@ -1245,16 +1318,16 @@ class TwitterAPI():
if tweet.get("__typename") == "TweetUnavailable":
reason = tweet.get("reason")
- if reason == "NsfwLoggedOut":
- raise exception.AuthorizationError("NSFW Tweet")
+ if reason in ("NsfwViewerHasNoStatedAge", "NsfwLoggedOut"):
+ raise exception.AuthRequired(message="NSFW Tweet")
if reason == "Protected":
- raise exception.AuthorizationError("Protected Tweet")
+ raise exception.AuthRequired(message="Protected Tweet")
raise exception.AbortExtraction(f"Tweet unavailable ('{reason}')")
return tweet
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/b9Yw90FMr_zUb8DvA8r2ug/TweetDetail"
+ endpoint = "/graphql/iFEr5AcP121Og4wx9Yqo3w/TweetDetail"
variables = {
"focalTweetId": tweet_id,
"referrer": "profile",
@@ -1278,7 +1351,7 @@ class TwitterAPI():
field_toggles=field_toggles)
def user_tweets(self, screen_name):
- endpoint = "/graphql/M3Hpkrb8pjWkEuGdLeXMOA/UserTweets"
+ endpoint = "/graphql/E8Wq-_jFSaU7hxVcuOPR9g/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1293,7 +1366,7 @@ class TwitterAPI():
endpoint, variables, field_toggles=field_toggles)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/pz0IHaV_t7T4HJavqqqcIA/UserTweetsAndReplies"
+ endpoint = "/graphql/-O3QOHrVn1aOm_cF5wyTCQ/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1307,8 +1380,22 @@ class TwitterAPI():
return self._pagination_tweets(
endpoint, variables, field_toggles=field_toggles)
+ def user_highlights(self, screen_name):
+ endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
+ variables = {
+ "userId": self._user_id_by_screen_name(screen_name),
+ "count": 100,
+ "includePromotedContent": False,
+ "withVoice": True,
+ }
+ field_toggles = {
+ "withArticlePlainText": False,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, field_toggles=field_toggles)
+
def user_media(self, screen_name):
- endpoint = "/graphql/8B9DqlaGvYyOvTCzzZWtNA/UserMedia"
+ endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1324,7 +1411,7 @@ class TwitterAPI():
endpoint, variables, field_toggles=field_toggles)
def user_likes(self, screen_name):
- endpoint = "/graphql/uxjTlmrTI61zreSIV1urbw/Likes"
+ endpoint = "/graphql/TGEKkJG_meudeaFcqaxM-Q/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1340,7 +1427,7 @@ class TwitterAPI():
endpoint, variables, field_toggles=field_toggles)
def user_bookmarks(self):
- endpoint = "/graphql/ztCdjqsvvdL0dE8R5ME0hQ/Bookmarks"
+ endpoint = "/graphql/pLtjrO4ubNh996M_Cubwsg/Bookmarks"
variables = {
"count": 100,
"includePromotedContent": False,
@@ -1348,29 +1435,35 @@ class TwitterAPI():
return self._pagination_tweets(
endpoint, variables, ("bookmark_timeline_v2", "timeline"), False)
- def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/LSefrrxhpeX8HITbKfWz9g/ListLatestTweetsTimeline"
- variables = {
- "listId": list_id,
- "count": 100,
- }
- return self._pagination_tweets(
- endpoint, variables, ("list", "tweets_timeline", "timeline"))
-
def search_timeline(self, query, product="Latest"):
- endpoint = "/graphql/fL2MBiqXPk5pSrOS5ACLdA/SearchTimeline"
+ endpoint = "/graphql/4fpceYZ6-YQCx_JSl_Cn_A/SearchTimeline"
variables = {
"rawQuery": query,
"count": 100,
"querySource": "typed_query",
"product": product,
+ "withGrokTranslatedBio": False,
}
return self._pagination_tweets(
endpoint, variables,
("search_by_raw_query", "search_timeline", "timeline"))
+ def community_query(self, community_id):
+ endpoint = "/graphql/2W09l7nD7ZbxGQHXvfB22w/CommunityQuery"
+ params = {
+ "variables": self._json_dumps({
+ "communityId": community_id,
+ }),
+ "features": self._json_dumps({
+ "c9s_list_members_action_api_enabled": False,
+ "c9s_superc9s_indication_enabled": False,
+ }),
+ }
+ return (self._call(endpoint, params)
+ ["data"]["communityResults"]["result"])
+
def community_tweets_timeline(self, community_id):
- endpoint = "/graphql/awszcpgwaIeqqNfmzjxUow/CommunityTweetsTimeline"
+ endpoint = "/graphql/Nyt-88UX4-pPCImZNUl9RQ/CommunityTweetsTimeline"
variables = {
"communityId": community_id,
"count": 100,
@@ -1384,7 +1477,7 @@ class TwitterAPI():
"timeline"))
def community_media_timeline(self, community_id):
- endpoint = "/graphql/HfMuDHto2j3NKUeiLjKWHA/CommunityMediaTimeline"
+ endpoint = "/graphql/ZniZ7AAK_VVu1xtSx1V-gQ/CommunityMediaTimeline"
variables = {
"communityId": community_id,
"count": 100,
@@ -1396,7 +1489,7 @@ class TwitterAPI():
"timeline"))
def communities_main_page_timeline(self, screen_name):
- endpoint = ("/graphql/NbdrKPY_h_nlvZUg7oqH5Q"
+ endpoint = ("/graphql/p048a9n3hTPppQyK7FQTFw"
"/CommunitiesMainPageTimeline")
variables = {
"count": 100,
@@ -1406,6 +1499,27 @@ class TwitterAPI():
endpoint, variables,
("viewer", "communities_timeline", "timeline"))
+ def home_timeline(self):
+ endpoint = "/graphql/DXmgQYmIft1oLP6vMkJixw/HomeTimeline"
+ variables = {
+ "count": 100,
+ "includePromotedContent": False,
+ "latestControlAvailable": True,
+ "withCommunity": True,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, ("home", "home_timeline_urt"))
+
+ def home_latest_timeline(self):
+ endpoint = "/graphql/SFxmNKWfN9ySJcXG_tjX8g/HomeLatestTimeline"
+ variables = {
+ "count": 100,
+ "includePromotedContent": False,
+ "latestControlAvailable": True,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, ("home", "home_timeline_urt"))
+
def live_event_timeline(self, event_id):
endpoint = f"/2/live_event/timeline/{event_id}.json"
params = self.params.copy()
@@ -1422,8 +1536,17 @@ class TwitterAPI():
return (self._call(endpoint, params)
["twitter_objects"]["live_events"][event_id])
+ def list_latest_tweets_timeline(self, list_id):
+ endpoint = "/graphql/06JtmwM8k_1cthpFZITVVA/ListLatestTweetsTimeline"
+ variables = {
+ "listId": list_id,
+ "count": 100,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, ("list", "tweets_timeline", "timeline"))
+
def list_members(self, list_id):
- endpoint = "/graphql/v97svwb-qcBmzv6QruDuNg/ListMembers"
+ endpoint = "/graphql/naea_MSad4pOb-D6_oVv_g/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -1432,35 +1555,38 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_followers(self, screen_name):
- endpoint = "/graphql/jqZ0_HJBA6mnu18iTZYm9w/Followers"
+ endpoint = "/graphql/i6PPdIMm1MO7CpAqjau7sw/Followers"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
+ "withGrokTranslatedBio": False,
}
return self._pagination_users(endpoint, variables)
def user_followers_verified(self, screen_name):
- endpoint = "/graphql/GHg0X_FjrJoISwwLPWi1LQ/BlueVerifiedFollowers"
+ endpoint = "/graphql/fxEl9kp1Tgolqkq8_Lo3sg/BlueVerifiedFollowers"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
+ "withGrokTranslatedBio": False,
}
return self._pagination_users(endpoint, variables)
def user_following(self, screen_name):
- endpoint = "/graphql/4QHbs4wmzgtU91f-t96_Eg/Following"
+ endpoint = "/graphql/SaWqzw0TFAWMx1nXWjXoaQ/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
+ "withGrokTranslatedBio": False,
}
return self._pagination_users(endpoint, variables)
@memcache(keyarg=1)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/5vdJ5sWkbSRDiiNZvwc2Yg/UserByRestId"
+ endpoint = "/graphql/8r5oa_2vD0WkhIAOkY4TTA/UserByRestId"
features = self.features
params = {
"variables": self._json_dumps({
@@ -1472,7 +1598,7 @@ class TwitterAPI():
@memcache(keyarg=1)
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/32pL5BWe9WKeSK1MoPvFQQ/UserByScreenName"
+ endpoint = "/graphql/ck5KkZ8t5cOmoLssopN99Q/UserByScreenName"
features = self.features.copy()
features["subscriptions_verification_info_"
"is_identity_verified_enabled"] = True
@@ -1481,6 +1607,7 @@ class TwitterAPI():
params = {
"variables": self._json_dumps({
"screen_name": screen_name,
+ "withGrokTranslatedBio": False,
}),
"features": self._json_dumps(features),
"fieldToggles": self._json_dumps({
@@ -1618,7 +1745,8 @@ class TwitterAPI():
return data
elif response.status_code in (403, 404) and \
not self.headers["x-twitter-auth-type"]:
- raise exception.AuthorizationError("Login required")
+ raise exception.AuthRequired(
+ "authenticated cookies", "timeline")
elif response.status_code == 429:
self._handle_ratelimit(response)
continue
@@ -1870,19 +1998,16 @@ class TwitterAPI():
continue
if "retweeted_status_result" in legacy:
- retweet = legacy["retweeted_status_result"]["result"]
- if "tweet" in retweet:
- retweet = retweet["tweet"]
- if original_retweets:
- try:
+ try:
+ retweet = legacy["retweeted_status_result"]["result"]
+ if "tweet" in retweet:
+ retweet = retweet["tweet"]
+ if original_retweets:
retweet["legacy"]["retweeted_status_id_str"] = \
retweet["rest_id"]
retweet["_retweet_id_str"] = tweet["rest_id"]
tweet = retweet
- except KeyError:
- continue
- else:
- try:
+ else:
legacy["retweeted_status_id_str"] = \
retweet["rest_id"]
tweet["author"] = \
@@ -1904,8 +2029,11 @@ class TwitterAPI():
rtlegacy["withheld_scope"]
legacy["full_text"] = rtlegacy["full_text"]
- except KeyError:
- pass
+ except Exception as exc:
+ extr.log.debug(
+ "%s: %s: %s",
+ tweet.get("rest_id"), exc.__class__.__name__, exc)
+ continue
yield tweet
diff --git a/gallery_dl/extractor/vichan.py b/gallery_dl/extractor/vichan.py
index f99b5de..cbb44ee 100644
--- a/gallery_dl/extractor/vichan.py
+++ b/gallery_dl/extractor/vichan.py
@@ -26,6 +26,10 @@ BASE_PATTERN = VichanExtractor.update({
"root": None,
"pattern": r"smuglo(?:\.li|li\.net)",
},
+ "gurochan": {
+ "root": "https://boards.guro.cx",
+ "pattern": r"boards\.guro\.cx",
+ },
})
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 3341594..fca8911 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -63,7 +63,7 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_html(self, entry_id):
url = f"{self.root}/{entry_id}"
- page = self.request(url).text
+ page = self.request(url, expected=(500,)).text
try:
jsonld = self._extract_jsonld(page)
@@ -191,7 +191,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
metadata = self.config("metadata")
while True:
- page = self.request(url, params=params).text
+ page = self.request(url, params=params, expected=(500,)).text
thumbs = text.extr(page, '<ul id="thumbs', '</ul>')
extr = text.extract_from(thumbs)
diff --git a/gallery_dl/extractor/zzup.py b/gallery_dl/extractor/zzup.py
deleted file mode 100644
index 7393931..0000000
--- a/gallery_dl/extractor/zzup.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://zzup.com/"""
-
-from .common import GalleryExtractor
-from .. import text
-
-
-class ZzupGalleryExtractor(GalleryExtractor):
- category = "zzup"
- directory_fmt = ("{category}", "{title}")
- filename_fmt = "{num:>03}.{extension}"
- archive_fmt = "{slug}_{num}"
- root = "https://zzup.com"
- pattern = (r"(?:https?://)?(up\.|w+\.)?zzup\.com(/(?:viewalbum|content)"
- r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
- example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
-
- def __init__(self, match):
- subdomain, path, self.slug = match.groups()
- if subdomain == "up.":
- self.root = "https://up.zzup.com"
- self.images = self.images_v2
- url = f"{self.root}{path}/index.html"
- GalleryExtractor.__init__(self, match, url)
-
- def metadata(self, page):
- return {
- "slug" : self.slug,
- "title": text.unescape(text.extr(
- page, "<title>", "</title>"))[:-11],
- }
-
- def images(self, page):
- path = text.extr(page, 'class="picbox"><a target="_blank" href="', '"')
- count = text.parse_int(text.extr(path, "-pics-", "-mirror"))
- page = self.request(self.root + path).text
- url = self.root + text.extr(page, '\n<a href="', '"')
- p1, _, p2 = url.partition("/image0")
- p2 = p2[4:]
- return [(f"{p1}/image{i:>05}{p2}", None) for i in range(1, count + 1)]
-
- def images_v2(self, page):
- base = f"{self.root}/showimage/"
- results = []
-
- while True:
- for path in text.extract_iter(
- page, ' class="picbox"><a target="_blank" href="', '"'):
- url = f"{base}{'/'.join(path.split('/')[2:-2])}/zzup.com.jpg"
- results.append((url, None))
-
- pos = page.find("glyphicon-arrow-right")
- if pos < 0:
- break
- path = text.rextr(page, ' href="', '"', pos)
- page = self.request(text.urljoin(self.page_url, path)).text
-
- return results