aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor')
-rw-r--r--gallery_dl/extractor/__init__.py14
-rw-r--r--gallery_dl/extractor/architizer.py2
-rw-r--r--gallery_dl/extractor/bluesky.py121
-rw-r--r--gallery_dl/extractor/common.py7
-rw-r--r--gallery_dl/extractor/deviantart.py54
-rw-r--r--gallery_dl/extractor/everia.py2
-rw-r--r--gallery_dl/extractor/fanbox.py22
-rw-r--r--gallery_dl/extractor/fapello.py3
-rw-r--r--gallery_dl/extractor/gelbooru.py14
-rw-r--r--gallery_dl/extractor/instagram.py44
-rw-r--r--gallery_dl/extractor/itaku.py24
-rw-r--r--gallery_dl/extractor/kemonoparty.py22
-rw-r--r--gallery_dl/extractor/moebooru.py1
-rw-r--r--gallery_dl/extractor/naver.py61
-rw-r--r--gallery_dl/extractor/patreon.py6
-rw-r--r--gallery_dl/extractor/pictoa.py78
-rw-r--r--gallery_dl/extractor/pinterest.py9
-rw-r--r--gallery_dl/extractor/pixiv.py38
-rw-r--r--gallery_dl/extractor/postmill.py12
-rw-r--r--gallery_dl/extractor/reddit.py5
-rw-r--r--gallery_dl/extractor/scrolller.py218
-rw-r--r--gallery_dl/extractor/seiga.py4
-rw-r--r--gallery_dl/extractor/subscribestar.py67
-rw-r--r--gallery_dl/extractor/tiktok.py5
-rw-r--r--gallery_dl/extractor/twitter.py176
-rw-r--r--gallery_dl/extractor/urlshortener.py20
-rw-r--r--gallery_dl/extractor/weasyl.py3
-rw-r--r--gallery_dl/extractor/wikifeet.py15
28 files changed, 738 insertions, 309 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 87c3798..9a7ca53 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -7,7 +7,7 @@
# published by the Free Software Foundation.
import sys
-import re
+from ..util import re_compile
modules = [
"2ch",
@@ -130,6 +130,7 @@ modules = [
"philomena",
"photovogue",
"picarto",
+ "pictoa",
"piczel",
"pillowfort",
"pinterest",
@@ -234,7 +235,8 @@ def find(url):
def add(cls):
"""Add 'cls' to the list of available extractors"""
- cls.pattern = re.compile(cls.pattern)
+ if isinstance(cls.pattern, str):
+ cls.pattern = re_compile(cls.pattern)
_cache.append(cls)
return cls
@@ -242,9 +244,11 @@ def add(cls):
def add_module(module):
"""Add all extractors in 'module' to the list of available extractors"""
classes = _get_classes(module)
- for cls in classes:
- cls.pattern = re.compile(cls.pattern)
- _cache.extend(classes)
+ if classes:
+ if isinstance(classes[0].pattern, str):
+ for cls in classes:
+ cls.pattern = re_compile(cls.pattern)
+ _cache.extend(classes)
return classes
diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py
index 0268224..911753b 100644
--- a/gallery_dl/extractor/architizer.py
+++ b/gallery_dl/extractor/architizer.py
@@ -54,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
return [
(url, None)
for url in text.extract_iter(
- page, "property='og:image:secure_url' content='", "?")
+ page, 'property="og:image:secure_url" content="', "?")
]
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index f8fef93..ec274b8 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -25,10 +25,6 @@ class BlueskyExtractor(Extractor):
archive_fmt = "{filename}"
root = "https://bsky.app"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user = match.group(1)
-
def _init(self):
meta = self.config("metadata") or ()
if meta:
@@ -87,6 +83,22 @@ class BlueskyExtractor(Extractor):
def posts(self):
return ()
+ def _posts_records(self, actor, collection):
+ depth = self.config("depth", "0")
+
+ for record in self.api.list_records(actor, collection):
+ uri = None
+ try:
+ uri = record["value"]["subject"]["uri"]
+ if "/app.bsky.feed.post/" in uri:
+ yield from self.api.get_post_thread_uri(uri, depth)
+ except exception.StopExtraction:
+ pass # deleted post
+ except Exception as exc:
+ self.log.debug(record, exc_info=exc)
+ self.log.warning("Failed to extract %s (%s: %s)",
+ uri or "record", exc.__class__.__name__, exc)
+
def _pid(self, post):
return post["uri"].rpartition("/")[2]
@@ -203,7 +215,7 @@ class BlueskyUserExtractor(BlueskyExtractor):
pass
def items(self):
- base = "{}/profile/{}/".format(self.root, self.user)
+ base = "{}/profile/{}/".format(self.root, self.groups[0])
default = ("posts" if self.config("quoted", False) or
self.config("reposts", False) else "media")
return self._dispatch_extractors((
@@ -213,6 +225,7 @@ class BlueskyUserExtractor(BlueskyExtractor):
(BlueskyPostsExtractor , base + "posts"),
(BlueskyRepliesExtractor , base + "replies"),
(BlueskyMediaExtractor , base + "media"),
+ (BlueskyVideoExtractor , base + "video"),
(BlueskyLikesExtractor , base + "likes"),
), (default,))
@@ -223,7 +236,8 @@ class BlueskyPostsExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/posts"
def posts(self):
- return self.api.get_author_feed(self.user, "posts_and_author_threads")
+ return self.api.get_author_feed(
+ self.groups[0], "posts_and_author_threads")
class BlueskyRepliesExtractor(BlueskyExtractor):
@@ -232,7 +246,8 @@ class BlueskyRepliesExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/replies"
def posts(self):
- return self.api.get_author_feed(self.user, "posts_with_replies")
+ return self.api.get_author_feed(
+ self.groups[0], "posts_with_replies")
class BlueskyMediaExtractor(BlueskyExtractor):
@@ -241,7 +256,18 @@ class BlueskyMediaExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/media"
def posts(self):
- return self.api.get_author_feed(self.user, "posts_with_media")
+ return self.api.get_author_feed(
+ self.groups[0], "posts_with_media")
+
+
+class BlueskyVideoExtractor(BlueskyExtractor):
+ subcategory = "video"
+ pattern = USER_PATTERN + r"/video"
+ example = "https://bsky.app/profile/HANDLE/video"
+
+ def posts(self):
+ return self.api.get_author_feed(
+ self.groups[0], "posts_with_video")
class BlueskyLikesExtractor(BlueskyExtractor):
@@ -250,7 +276,9 @@ class BlueskyLikesExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/likes"
def posts(self):
- return self.api.get_actor_likes(self.user)
+ if self.config("endpoint") == "getActorLikes":
+ return self.api.get_actor_likes(self.groups[0])
+ return self._posts_records(self.groups[0], "app.bsky.feed.like")
class BlueskyFeedExtractor(BlueskyExtractor):
@@ -258,12 +286,9 @@ class BlueskyFeedExtractor(BlueskyExtractor):
pattern = USER_PATTERN + r"/feed/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/feed/NAME"
- def __init__(self, match):
- BlueskyExtractor.__init__(self, match)
- self.feed = match.group(2)
-
def posts(self):
- return self.api.get_feed(self.user, self.feed)
+ actor, feed = self.groups
+ return self.api.get_feed(actor, feed)
class BlueskyListExtractor(BlueskyExtractor):
@@ -271,12 +296,9 @@ class BlueskyListExtractor(BlueskyExtractor):
pattern = USER_PATTERN + r"/lists/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/lists/ID"
- def __init__(self, match):
- BlueskyExtractor.__init__(self, match)
- self.list = match.group(2)
-
def posts(self):
- return self.api.get_list_feed(self.user, self.list)
+ actor, list_id = self.groups
+ return self.api.get_list_feed(actor, list_id)
class BlueskyFollowingExtractor(BlueskyExtractor):
@@ -285,7 +307,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/follows"
def items(self):
- for user in self.api.get_follows(self.user):
+ for user in self.api.get_follows(self.groups[0]):
url = "https://bsky.app/profile/" + user["did"]
user["_extractor"] = BlueskyUserExtractor
yield Message.Queue, url, user
@@ -296,12 +318,9 @@ class BlueskyPostExtractor(BlueskyExtractor):
pattern = USER_PATTERN + r"/post/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/post/ID"
- def __init__(self, match):
- BlueskyExtractor.__init__(self, match)
- self.post_id = match.group(2)
-
def posts(self):
- return self.api.get_post_thread(self.user, self.post_id)
+ actor, post_id = self.groups
+ return self.api.get_post_thread(actor, post_id)
class BlueskyInfoExtractor(BlueskyExtractor):
@@ -311,7 +330,7 @@ class BlueskyInfoExtractor(BlueskyExtractor):
def items(self):
self._metadata_user = True
- self.api._did_from_actor(self.user)
+ self.api._did_from_actor(self.groups[0])
return iter(((Message.Directory, self._user),))
@@ -322,7 +341,7 @@ class BlueskyAvatarExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/avatar"
def posts(self):
- return self._make_post(self.user, "avatar")
+ return self._make_post(self.groups[0], "avatar")
class BlueskyBackgroundExtractor(BlueskyExtractor):
@@ -332,7 +351,7 @@ class BlueskyBackgroundExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/banner"
def posts(self):
- return self._make_post(self.user, "banner")
+ return self._make_post(self.groups[0], "banner")
class BlueskySearchExtractor(BlueskyExtractor):
@@ -341,7 +360,7 @@ class BlueskySearchExtractor(BlueskyExtractor):
example = "https://bsky.app/search?q=QUERY"
def posts(self):
- query = text.unquote(self.user.replace("+", " "))
+ query = text.unquote(self.groups[0].replace("+", " "))
return self.api.search_posts(query)
@@ -351,13 +370,14 @@ class BlueskyHashtagExtractor(BlueskyExtractor):
example = "https://bsky.app/hashtag/NAME"
def posts(self):
- return self.api.search_posts("#"+self.user, self.groups[1])
+ hashtag, order = self.groups
+ return self.api.search_posts("#"+hashtag, order)
class BlueskyAPI():
"""Interface for the Bluesky API
- https://www.docs.bsky.app/docs/category/http-reference
+ https://docs.bsky.app/docs/category/http-reference
"""
def __init__(self, extractor):
@@ -378,7 +398,7 @@ class BlueskyAPI():
"actor": self._did_from_actor(actor),
"limit": "100",
}
- return self._pagination(endpoint, params)
+ return self._pagination(endpoint, params, check_empty=True)
def get_author_feed(self, actor, filter="posts_and_author_threads"):
endpoint = "app.bsky.feed.getAuthorFeed"
@@ -416,11 +436,16 @@ class BlueskyAPI():
return self._pagination(endpoint, params)
def get_post_thread(self, actor, post_id):
+ uri = "at://{}/app.bsky.feed.post/{}".format(
+ self._did_from_actor(actor), post_id)
+ depth = self.extractor.config("depth", "0")
+ return self.get_post_thread_uri(uri, depth)
+
+ def get_post_thread_uri(self, uri, depth="0"):
endpoint = "app.bsky.feed.getPostThread"
params = {
- "uri": "at://{}/app.bsky.feed.post/{}".format(
- self._did_from_actor(actor), post_id),
- "depth" : self.extractor.config("depth", "0"),
+ "uri" : uri,
+ "depth" : depth,
"parentHeight": "0",
}
@@ -443,6 +468,18 @@ class BlueskyAPI():
params = {"actor": did}
return self._call(endpoint, params)
+ def list_records(self, actor, collection):
+ endpoint = "com.atproto.repo.listRecords"
+ actor_did = self._did_from_actor(actor)
+ params = {
+ "repo" : actor_did,
+ "collection": collection,
+ "limit" : "100",
+ # "reverse" : "false",
+ }
+ return self._pagination(endpoint, params, "records",
+ self.service_endpoint(actor_did))
+
@memcache(keyarg=1)
def resolve_handle(self, handle):
endpoint = "com.atproto.identity.resolveHandle"
@@ -523,8 +560,10 @@ class BlueskyAPI():
_refresh_token_cache.update(self.username, data["refreshJwt"])
return "Bearer " + data["accessJwt"]
- def _call(self, endpoint, params):
- url = "{}/xrpc/{}".format(self.root, endpoint)
+ def _call(self, endpoint, params, root=None):
+ if root is None:
+ root = self.root
+ url = "{}/xrpc/{}".format(root, endpoint)
while True:
self.authenticate()
@@ -549,9 +588,13 @@ class BlueskyAPI():
self.extractor.log.debug("Server response: %s", response.text)
raise exception.StopExtraction(msg)
- def _pagination(self, endpoint, params, key="feed"):
+ def _pagination(self, endpoint, params,
+ key="feed", root=None, check_empty=False):
while True:
- data = self._call(endpoint, params)
+ data = self._call(endpoint, params, root)
+
+ if check_empty and not data[key]:
+ return
yield from data[key]
cursor = data.get("cursor")
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 995505f..c430ec1 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -59,7 +59,7 @@ class Extractor():
@classmethod
def from_url(cls, url):
if isinstance(cls.pattern, str):
- cls.pattern = re.compile(cls.pattern)
+ cls.pattern = util.re_compile(cls.pattern)
match = cls.pattern.match(url)
return cls(match) if match else None
@@ -240,6 +240,11 @@ class Extractor():
raise exception.HttpError(msg, response)
+ def request_location(self, url, **kwargs):
+ kwargs.setdefault("method", "HEAD")
+ kwargs.setdefault("allow_redirects", False)
+ return self.request(url, **kwargs).headers.get("location", "")
+
_handle_429 = util.false
def wait(self, seconds=None, until=None, adjust=1.0,
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 378c7ec..ae475e2 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -867,6 +867,9 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
)["deviation"]["extended"]["deviationUuid"]
yield self.api.deviation(deviation_uuid)
+ def _unescape_json(self, json):
+ return json.replace('\\"', '"').replace("\\\\", "\\")
+
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
@@ -1046,7 +1049,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
DeviantartExtractor.__init__(self, match)
self.user = None
- def deviations(self, stash_id=None):
+ def deviations(self, stash_id=None, stash_data=None):
if stash_id is None:
legacy_url, stash_id = self.groups
else:
@@ -1068,14 +1071,33 @@ class DeviantartStashExtractor(DeviantartExtractor):
deviation["_page"] = page
deviation["index"] = text.parse_int(text.extr(
page, '\\"deviationId\\":', ','))
+
+ deviation["stash_id"] = stash_id
+ if stash_data:
+ folder = stash_data["folder"]
+ deviation["stash_name"] = folder["name"]
+ deviation["stash_folder"] = folder["folderId"]
+ deviation["stash_parent"] = folder["parentId"] or 0
+ deviation["stash_description"] = \
+ folder["richDescription"]["excerpt"]
+ else:
+ deviation["stash_name"] = ""
+ deviation["stash_description"] = ""
+ deviation["stash_folder"] = 0
+ deviation["stash_parent"] = 0
+
yield deviation
return
+ stash_data = text.extr(page, ',\\"stash\\":', ',\\"@@')
+ if stash_data:
+ stash_data = util.json_loads(self._unescape_json(stash_data))
+
for sid in text.extract_iter(
page, 'href="https://www.deviantart.com/stash/', '"'):
if sid == stash_id or sid.endswith("#comments"):
continue
- yield from self.deviations(sid)
+ yield from self.deviations(sid, stash_data)
class DeviantartFavoriteExtractor(DeviantartExtractor):
@@ -1276,28 +1298,26 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
deviation = self.api.deviation(uuid)
deviation["_page"] = page
+ deviation["index_file"] = 0
+ deviation["num"] = deviation["count"] = 1
- _dev_info = text.extr(
- page, '\\"deviationExtended\\":', ',\\"deviation\\":', None)
- # Clean up escaped quotes
- _json_str = re.sub(
- r'(?<!\\)\\{1}"', '"', _dev_info).replace("\\'", "'")
- _extended_info = util.json_loads(_json_str)[self.deviation_id]
- additional_media = _extended_info.get("additionalMedia") or ()
+ additional_media = text.extr(page, ',\\"additionalMedia\\":', '}],\\"')
+ if not additional_media:
+ yield deviation
+ return
- if additional_media:
- self.filename_fmt = ("{category}_{index}_{index_file}_{title}_"
- "{num:>02}.{extension}")
- self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}."
- "{extension}")
+ self.filename_fmt = ("{category}_{index}_{index_file}_{title}_"
+ "{num:>02}.{extension}")
+ self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}."
+ "{extension}")
- deviation["index_file"] = 0
+ additional_media = util.json_loads(self._unescape_json(
+ additional_media) + "}]")
deviation["count"] = 1 + len(additional_media)
- deviation["num"] = 1
yield deviation
for index, post in enumerate(additional_media):
- uri = post["media"]["baseUri"].encode().decode("unicode-escape")
+ uri = self._eclipse_media(post["media"], "fullview")[0]
deviation["content"]["src"] = uri
deviation["num"] += 1
deviation["index_file"] = post["fileId"]
diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py
index e41f6f6..3bf0a74 100644
--- a/gallery_dl/extractor/everia.py
+++ b/gallery_dl/extractor/everia.py
@@ -57,7 +57,7 @@ class EveriaPostExtractor(EveriaExtractor):
data = {
"title": text.unescape(
- text.extr(page, 'itemprop="headline">', "</h1>")),
+ text.extr(page, 'itemprop="headline">', "</h")),
"tags": list(text.extract_iter(page, 'rel="tag">', "</a>")),
"post_url": url,
"post_category": text.extr(
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 9bbfb43..3b43134 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -173,15 +173,16 @@ class FanboxExtractor(Extractor):
return plans
def _get_comment_data(self, post_id):
- url = ("https://api.fanbox.cc/post.listComments"
+ url = ("https://api.fanbox.cc/post.getComments"
"?limit=10&postId=" + post_id)
comments = []
while url:
url = text.ensure_http_scheme(url)
body = self.request(url, headers=self.headers).json()["body"]
- comments.extend(body["items"])
- url = body["nextUrl"]
+ data = body["commentList"]
+ comments.extend(data["items"])
+ url = data["nextUrl"]
return comments
def _get_urls_from_post(self, content_body, post):
@@ -296,8 +297,7 @@ class FanboxExtractor(Extractor):
url = "https://www.pixiv.net/fanbox/"+content_id
# resolve redirect
try:
- url = self.request(url, method="HEAD",
- allow_redirects=False).headers["location"]
+ url = self.request_location(url)
except Exception as exc:
url = None
self.log.warning("Unable to extract fanbox embed %s (%s: %s)",
@@ -392,13 +392,7 @@ class FanboxRedirectExtractor(Extractor):
pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)"
example = "https://www.pixiv.net/fanbox/creator/12345"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user_id = match.group(1)
-
def items(self):
- url = "https://www.pixiv.net/fanbox/creator/" + self.user_id
- data = {"_extractor": FanboxCreatorExtractor}
- response = self.request(
- url, method="HEAD", allow_redirects=False, notfound="user")
- yield Message.Queue, response.headers["Location"], data
+ url = "https://www.pixiv.net/fanbox/creator/" + self.groups[0]
+ location = self.request_location(url, notfound="user")
+ yield Message.Queue, location, {"_extractor": FanboxCreatorExtractor}
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
index 838ae7b..cf18edc 100644
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@@ -72,10 +72,13 @@ class FapelloModelExtractor(Extractor):
if not page:
return
+ url = None
for url in text.extract_iter(page, '<a href="', '"'):
if url == "javascript:void(0);":
continue
yield Message.Queue, url, data
+ if url is None:
+ return
num += 1
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index eb07739..f24b696 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -107,22 +107,16 @@ class GelbooruBase():
return
params["pid"] += self.per_page
- @staticmethod
- def _file_url(post):
+ def _file_url(self, post):
url = post["file_url"]
if url.endswith((".webm", ".mp4")):
+ post["_fallback"] = (url,)
md5 = post["md5"]
+ root = text.root_from_url(post["preview_url"])
path = "/images/{}/{}/{}.webm".format(md5[0:2], md5[2:4], md5)
- post["_fallback"] = GelbooruBase._video_fallback(path)
- url = "https://img4.gelbooru.com" + path
+ url = root + path
return url
- @staticmethod
- def _video_fallback(path):
- yield "https://img3.gelbooru.com" + path
- yield "https://img2.gelbooru.com" + path
- yield "https://img1.gelbooru.com" + path
-
def _notes(self, post, page):
notes_data = text.extr(page, '<section id="notes"', '</section>')
if not notes_data:
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 432a7ad..0f88cac 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -606,6 +606,20 @@ class InstagramHighlightsExtractor(InstagramExtractor):
return self.api.highlights_media(uid)
+class InstagramFollowersExtractor(InstagramExtractor):
+ """Extractor for an Instagram user's followers"""
+ subcategory = "followers"
+ pattern = USER_PATTERN + r"/followers"
+ example = "https://www.instagram.com/USER/followers/"
+
+ def items(self):
+ uid = self.api.user_id(self.item)
+ for user in self.api.user_followers(uid):
+ user["_extractor"] = InstagramUserExtractor
+ url = "{}/{}".format(self.root, user["username"])
+ yield Message.Queue, url, user
+
+
class InstagramFollowingExtractor(InstagramExtractor):
"""Extractor for an Instagram user's followed users"""
subcategory = "following"
@@ -693,11 +707,21 @@ class InstagramPostExtractor(InstagramExtractor):
"""Extractor for an Instagram post"""
subcategory = "post"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/(?:[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)")
+ r"/(?:share/()|[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)")
example = "https://www.instagram.com/p/abcdefg/"
def posts(self):
- return self.api.media(self.item)
+ share, shortcode = self.groups
+ if share is not None:
+ url = text.ensure_http_scheme(self.url)
+ headers = {
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "navigate",
+ "Sec-Fetch-Site": "same-origin",
+ }
+ location = self.request_location(url, headers=headers)
+ shortcode = location.split("/")[-2]
+ return self.api.media(shortcode)
class InstagramRestAPI():
@@ -816,6 +840,11 @@ class InstagramRestAPI():
params = {"count": 30}
return self._pagination(endpoint, params)
+ def user_followers(self, user_id):
+ endpoint = "/v1/friendships/{}/followers/".format(user_id)
+ params = {"count": 12}
+ return self._pagination_following(endpoint, params)
+
def user_following(self, user_id):
endpoint = "/v1/friendships/{}/following/".format(user_id)
params = {"count": 12}
@@ -908,9 +937,10 @@ class InstagramRestAPI():
for item in data["items"]:
yield from item["media_items"]
- if "next_max_id" not in data:
+ next_max_id = data.get("next_max_id")
+ if not next_max_id:
return extr._update_cursor(None)
- params["max_id"] = extr._update_cursor(data["next_max_id"])
+ params["max_id"] = extr._update_cursor(next_max_id)
def _pagination_following(self, endpoint, params):
extr = self.extractor
@@ -921,10 +951,10 @@ class InstagramRestAPI():
yield from data["users"]
- if len(data["users"]) < params["count"]:
+ next_max_id = data.get("next_max_id")
+ if not next_max_id:
return extr._update_cursor(None)
- params["max_id"] = extr._update_cursor(
- params["max_id"] + params["count"])
+ params["max_id"] = extr._update_cursor(next_max_id)
class InstagramGraphqlAPI():
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 2974b59..e602665 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -65,6 +65,15 @@ class ItakuGalleryExtractor(ItakuExtractor):
return self.api.galleries_images(*self.groups)
+class ItakuStarsExtractor(ItakuExtractor):
+ subcategory = "stars"
+ pattern = BASE_PATTERN + r"/profile/([^/?#]+)/stars(?:/(\d+))?"
+ example = "https://itaku.ee/profile/USER/stars"
+
+ def posts(self):
+ return self.api.galleries_images_starred(*self.groups)
+
+
class ItakuImageExtractor(ItakuExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/images/(\d+)"
@@ -139,6 +148,21 @@ class ItakuAPI():
}
return self._pagination(endpoint, params, self.image)
+ def galleries_images_starred(self, username, section=None):
+ endpoint = "/galleries/images/user_starred_imgs/"
+ params = {
+ "cursor" : None,
+ "stars_of" : self.user(username)["owner"],
+ "sections" : section,
+ "date_range": "",
+ "ordering" : "-date_added",
+ "maturity_rating": ("SFW", "Questionable", "NSFW"),
+ "page" : "1",
+ "page_size" : "30",
+ "visibility": ("PUBLIC", "PROFILE_ONLY"),
+ }
+ return self._pagination(endpoint, params, self.image)
+
def image(self, image_id):
endpoint = "/galleries/images/{}/".format(image_id)
return self._call(endpoint)
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index de7d040..79070ee 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -153,7 +153,7 @@ class KemonopartyExtractor(Extractor):
file["type"] = "archive"
if archives:
try:
- data = self.api.posts_archives(file["hash"])
+ data = self.api.file(file["hash"])
data.update(file)
post_archives.append(data)
except Exception as exc:
@@ -319,12 +319,9 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
def posts(self):
_, _, service, creator_id, query = self.groups
params = text.parse_query(query)
- if params.get("tag"):
- return self.api.creator_tagged_posts(
- service, creator_id, params.get("tag"), params.get("o"))
- else:
- return self.api.creator_posts(
- service, creator_id, params.get("o"), params.get("q"))
+ return self.api.creator_posts_legacy(
+ service, creator_id,
+ params.get("o"), params.get("q"), params.get("tag"))
class KemonopartyPostsExtractor(KemonopartyExtractor):
@@ -524,18 +521,19 @@ class KemonoAPI():
params = {"q": query, "o": offset, "tag": tags}
return self._pagination(endpoint, params, 50, "posts")
- def posts_archives(self, file_hash):
- endpoint = "/posts/archives/" + file_hash
- return self._call(endpoint)["archive"]
+ def file(self, file_hash):
+ endpoint = "/file/" + file_hash
+ return self._call(endpoint)
def creator_posts(self, service, creator_id, offset=0, query=None):
endpoint = "/{}/user/{}".format(service, creator_id)
params = {"q": query, "o": offset}
return self._pagination(endpoint, params, 50)
- def creator_tagged_posts(self, service, creator_id, tags, offset=0):
+ def creator_posts_legacy(self, service, creator_id,
+ offset=0, query=None, tags=None):
endpoint = "/{}/user/{}/posts-legacy".format(service, creator_id)
- params = {"o": offset, "tag": tags}
+ params = {"o": offset, "tag": tags, "q": query}
return self._pagination(endpoint, params, 50, "results")
def creator_announcements(self, service, creator_id):
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index e97d273..9fd66e2 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -127,6 +127,7 @@ class MoebooruPoolExtractor(MoebooruExtractor):
if self.config("metadata"):
url = "{}/pool/show/{}.json".format(self.root, self.pool_id)
pool = self.request(url).json()
+ pool["name"] = pool["name"].replace("_", " ")
pool.pop("posts", None)
return {"pool": pool}
return {"pool": text.parse_int(self.pool_id)}
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index d3150e6..2287325 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -9,7 +9,9 @@
"""Extractors for https://blog.naver.com/"""
from .common import GalleryExtractor, Extractor, Message
-from .. import text
+from .. import text, util
+import datetime
+import time
class NaverBase():
@@ -59,19 +61,66 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
"user" : extr("var nickName = '", "'"),
},
}
- data["post"]["date"] = text.parse_datetime(
+
+ data["post"]["date"] = self._parse_datetime(
extr('se_publishDate pcol2">', '<') or
- extr('_postAddDate">', '<'), "%Y. %m. %d. %H:%M")
+ extr('_postAddDate">', '<'))
+
return data
+ def _parse_datetime(self, date_string):
+ if "전" in date_string:
+ ts = time.gmtime()
+ return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
+ return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M")
+
def images(self, page):
- results = []
+ files = []
+ self._extract_images(files, page)
+ if self.config("videos", True):
+ self._extract_videos(files, page)
+ return files
+
+ def _extract_images(self, files, page):
for url in text.extract_iter(page, 'data-lazy-src="', '"'):
url = url.replace("://post", "://blog", 1).partition("?")[0]
if "\ufffd" in text.unquote(url):
url = text.unquote(url, encoding="EUC-KR")
- results.append((url, None))
- return results
+ files.append((url, None))
+
+ def _extract_videos(self, files, page):
+ for module in text.extract_iter(page, " data-module='", "'></"):
+ if '"v2_video"' not in module:
+ continue
+ media = util.json_loads(module)["data"]
+ try:
+ self._extract_media(files, media)
+ except Exception as exc:
+ self.log.warning("%s: Failed to extract video '%s' (%s: %s)",
+ self.post_id, media.get("vid"),
+ exc.__class__.__name__, exc)
+
+ def _extract_media(self, files, media):
+ url = ("https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/" +
+ media["vid"])
+ params = {
+ "key" : media["inkey"],
+ "sid" : "2",
+ # "pid": "00000000-0000-0000-0000-000000000000",
+ "nonce": int(time.time()),
+ "devt" : "html5_pc",
+ "prv" : "N",
+ "aup" : "N",
+ "stpb" : "N",
+ "cpl" : "ko_KR",
+ "providerEnv": "real",
+ "adt" : "glad",
+ "lc" : "ko_KR",
+ }
+ data = self.request(url, params=params).json()
+ video = max(data["videos"]["list"],
+ key=lambda v: v.get("size") or 0)
+ files.append((video["source"], video))
class NaverBlogExtractor(NaverBase, Extractor):
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index b8c6acb..2b6742e 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -109,11 +109,7 @@ class PatreonExtractor(Extractor):
def _attachments(self, post):
for attachment in post.get("attachments") or ():
- url = self.request(
- attachment["url"], method="HEAD",
- allow_redirects=False, fatal=False,
- ).headers.get("Location")
-
+ url = self.request_location(attachment["url"], fatal=False)
if url:
yield "attachment", url, attachment["name"]
diff --git a/gallery_dl/extractor/pictoa.py b/gallery_dl/extractor/pictoa.py
new file mode 100644
index 0000000..a8008cf
--- /dev/null
+++ b/gallery_dl/extractor/pictoa.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pictoa.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:[\w]+\.)?pictoa\.com(?:\.de)?"
+
+
+class PictoaExtractor(Extractor):
+ """Base class for pictoa extractors"""
+ category = "pictoa"
+ root = "https://pictoa.com"
+ directory_fmt = ("{category}", "{album_id} {album_title}")
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+
+
+class PictoaImageExtractor(PictoaExtractor):
+ """Extractor for single images from pictoa.com"""
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/albums/(?:[\w-]+-)?(\d+)/(\d+)"
+ example = "https://www.pictoa.com/albums/NAME-12345/12345.html"
+
+ def items(self):
+ album_id, image_id = self.groups
+
+ url = "{}/albums/{}/{}.html".format(self.root, album_id, image_id)
+ page = self.request(url).text
+ album_title = text.extr(page, 'property="og:title" content="', '"')
+ image_url = text.extr(page, 'property="og:image" content="', '"')
+
+ data = {
+ "album_id" : album_id,
+ "album_title": album_title.rpartition(" #")[0],
+ "id" : image_id,
+ "url" : image_url,
+ }
+
+ text.nameext_from_url(image_url, data)
+ yield Message.Directory, data
+ yield Message.Url, image_url, data
+
+
+class PictoaAlbumExtractor(PictoaExtractor):
+ """Extractor for image albums from pictoa.com"""
+ subcategory = "album"
+ pattern = BASE_PATTERN + r"/albums/(?:[\w-]+-)?(\d+).html"
+ example = "https://www.pictoa.com/albums/NAME-12345.html"
+
+ def items(self):
+ album_id = self.groups[0]
+ url = "{}/albums/{}.html".format(self.root, album_id)
+ page = self.request(url).text
+
+ album_data = {
+ "album_id" : album_id,
+ "album_title": text.extr(page, "<h1>", "<"),
+ "tags" : text.split_html(text.extr(
+ page, '<ol class="related-categories', '</ol>'))[1:],
+ "_extractor" : PictoaImageExtractor,
+ }
+
+ while True:
+ container = text.extr(page, '<main>', '<span id="flag" >')
+ for url in text.extract_iter(
+ container, '<a rel="nofollow" href="', '"'):
+ yield Message.Queue, url, album_data
+
+ url = text.extr(page, '<link rel="next" href="', '"')
+ if not url:
+ break
+ page = self.request(url).text
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 1a299c1..ad8c681 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -380,15 +380,10 @@ class PinterestPinitExtractor(PinterestExtractor):
pattern = r"(?:https?://)?pin\.it/([^/?#]+)"
example = "https://pin.it/abcde"
- def __init__(self, match):
- PinterestExtractor.__init__(self, match)
- self.shortened_id = match.group(1)
-
def items(self):
url = "https://api.pinterest.com/url_shortener/{}/redirect/".format(
- self.shortened_id)
- response = self.request(url, method="HEAD", allow_redirects=False)
- location = response.headers.get("Location")
+ self.groups[0])
+ location = self.request_location(url)
if not location or not PinterestPinExtractor.pattern.match(location):
raise exception.NotFoundError("pin")
yield Message.Queue, location, {"_extractor": PinterestPinExtractor}
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index e8050b3..dfed1aa 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -69,7 +69,7 @@ class PixivExtractor(Extractor):
files = self._extract_files(work)
if self.meta_user:
- work.update(self.api.user_detail(work["user"]["id"]))
+ work.update(self.api.user_detail(str(work["user"]["id"])))
if self.meta_comments:
if work["total_comments"] and not work.get("_ajax"):
try:
@@ -516,16 +516,10 @@ class PixivMeExtractor(PixivExtractor):
pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)"
example = "https://pixiv.me/USER"
- def __init__(self, match):
- PixivExtractor.__init__(self, match)
- self.account = match.group(1)
-
def items(self):
- url = "https://pixiv.me/" + self.account
- data = {"_extractor": PixivUserExtractor}
- response = self.request(
- url, method="HEAD", allow_redirects=False, notfound="user")
- yield Message.Queue, response.headers["Location"], data
+ url = "https://pixiv.me/" + self.groups[0]
+ location = self.request_location(url, notfound="user")
+ yield Message.Queue, location, {"_extractor": PixivUserExtractor}
class PixivWorkExtractor(PixivExtractor):
@@ -887,7 +881,7 @@ class PixivNovelExtractor(PixivExtractor):
novels = itertools.islice(novels, self.max_posts)
for novel in novels:
if self.meta_user:
- novel.update(self.api.user_detail(novel["user"]["id"]))
+ novel.update(self.api.user_detail(str(novel["user"]["id"])))
if self.meta_comments:
if novel["total_comments"]:
novel["comments"] = list(
@@ -940,15 +934,19 @@ class PixivNovelExtractor(PixivExtractor):
illusts[marker[11:].partition("-")[0]] = None
if desktop:
- novel_id = str(novel["id"])
- url = "{}/novel/show.php?id={}".format(
- self.root, novel_id)
- data = util.json_loads(text.extr(
- self.request(url, headers=headers).text,
- "id=\"meta-preload-data\" content='", "'"))
-
- for image in (data["novel"][novel_id]
- ["textEmbeddedImages"]).values():
+ try:
+ novel_id = str(novel["id"])
+ url = "{}/novel/show.php?id={}".format(
+ self.root, novel_id)
+ data = util.json_loads(text.extr(
+ self.request(url, headers=headers).text,
+ "id=\"meta-preload-data\" content='", "'"))
+ images = (data["novel"][novel_id]
+ ["textEmbeddedImages"]).values()
+ except Exception:
+ images = ()
+
+ for image in images:
url = image.pop("urls")["original"]
novel.update(image)
novel["date_url"] = self._date_from_url(url)
diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py
index 8877175..6ea18e6 100644
--- a/gallery_dl/extractor/postmill.py
+++ b/gallery_dl/extractor/postmill.py
@@ -153,17 +153,13 @@ class PostmillPostExtractor(PostmillExtractor):
class PostmillShortURLExtractor(PostmillExtractor):
"""Extractor for short submission URLs"""
subcategory = "shorturl"
- pattern = BASE_PATTERN + r"/(\d+)$"
+ pattern = BASE_PATTERN + r"(/\d+)$"
example = "https://raddle.me/123"
- def __init__(self, match):
- PostmillExtractor.__init__(self, match)
- self.post_id = match.group(3)
-
def items(self):
- url = self.root + "/" + self.post_id
- response = self.request(url, method="HEAD", allow_redirects=False)
- full_url = text.urljoin(url, response.headers["Location"])
+ url = self.root + self.groups[2]
+ location = self.request_location(url)
+ full_url = text.urljoin(url, location)
yield Message.Queue, full_url, {"_extractor": PostmillPostExtractor}
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 7a9e3c5..76eadc4 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -357,10 +357,9 @@ class RedditRedirectExtractor(Extractor):
sub_type = "user"
url = "https://www.reddit.com/{}/{}/s/{}".format(
sub_type, subreddit, share_url)
+ location = self.request_location(url, notfound="submission")
data = {"_extractor": RedditSubmissionExtractor}
- response = self.request(url, method="HEAD", allow_redirects=False,
- notfound="submission")
- yield Message.Queue, response.headers["Location"], data
+ yield Message.Queue, location, data
class RedditAPI():
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py
index c818c98..f97fa14 100644
--- a/gallery_dl/extractor/scrolller.py
+++ b/gallery_dl/extractor/scrolller.py
@@ -20,8 +20,8 @@ class ScrolllerExtractor(Extractor):
category = "scrolller"
root = "https://scrolller.com"
directory_fmt = ("{category}", "{subredditTitle}")
- filename_fmt = "{id}{title:? //}.{extension}"
- archive_fmt = "{id}"
+ filename_fmt = "{id}{num:?_//>03}{title:? //}.{extension}"
+ archive_fmt = "{id}_{num}"
request_interval = (0.5, 1.5)
def _init(self):
@@ -31,23 +31,36 @@ class ScrolllerExtractor(Extractor):
self.login()
for post in self.posts():
-
- media_sources = post.get("mediaSources")
- if not media_sources:
- self.log.warning("%s: No media files", post.get("id"))
- continue
-
- src = max(media_sources, key=self._sort_key)
- post.update(src)
- url = src["url"]
- text.nameext_from_url(url, post)
+ files = self._extract_files(post)
+ post["count"] = len(files)
yield Message.Directory, post
- yield Message.Url, url, post
+ for file in files:
+ url = file["url"]
+ post.update(file)
+ yield Message.Url, url, text.nameext_from_url(url, post)
def posts(self):
return ()
+ def _extract_files(self, post):
+ album = post.pop("albumContent", None)
+ if not album:
+ sources = post.get("mediaSources")
+ if not sources:
+ self.log.warning("%s: No media files", post.get("id"))
+ return ()
+ src = max(sources, key=self._sort_key)
+ src["num"] = 0
+ return (src,)
+
+ files = []
+ for num, media in enumerate(album, 1):
+ src = max(media["mediaSources"], key=self._sort_key)
+ src["num"] = num
+ files.append(src)
+ return files
+
def login(self):
username, password = self._get_auth_info()
if username:
@@ -63,7 +76,7 @@ class ScrolllerExtractor(Extractor):
}
try:
- data = self._request_graphql("LoginQuery", variables)
+ data = self._request_graphql("LoginQuery", variables, False)
except exception.HttpError as exc:
if exc.status == 403:
raise exception.AuthenticationError()
@@ -71,10 +84,9 @@ class ScrolllerExtractor(Extractor):
return data["login"]["token"]
- def _request_graphql(self, opname, variables):
- url = "https://api.scrolller.com/api/v2/graphql"
+ def _request_graphql(self, opname, variables, admin=True):
headers = {
- "Content-Type" : "text/plain;charset=UTF-8",
+ "Content-Type" : None,
"Origin" : self.root,
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
@@ -85,14 +97,23 @@ class ScrolllerExtractor(Extractor):
"variables" : variables,
"authorization": self.auth_token,
}
+
+ if admin:
+ url = "https://api.scrolller.com/admin"
+ headers["Content-Type"] = "application/json"
+ else:
+ url = "https://api.scrolller.com/api/v2/graphql"
+ headers["Content-Type"] = "text/plain;charset=UTF-8"
+
return self.request(
url, method="POST", headers=headers, data=util.json_dumps(data),
).json()["data"]
- def _pagination(self, opname, variables):
- while True:
+ def _pagination(self, opname, variables, data=None):
+ if data is None:
data = self._request_graphql(opname, variables)
+ while True:
while "items" not in data:
data = data.popitem()[1]
yield from data["items"]
@@ -101,6 +122,8 @@ class ScrolllerExtractor(Extractor):
return
variables["iterator"] = data["iterator"]
+ data = self._request_graphql(opname, variables)
+
def _sort_key(self, src):
return src["width"], not src["isOptimized"]
@@ -114,6 +137,7 @@ class ScrolllerSubredditExtractor(ScrolllerExtractor):
def posts(self):
url, query = self.groups
filter = None
+ sort = "RANDOM"
if query:
params = text.parse_query(query)
@@ -121,12 +145,24 @@ class ScrolllerSubredditExtractor(ScrolllerExtractor):
filter = params["filter"].upper().rstrip("S")
variables = {
- "url" : url,
- "iterator" : None,
- "filter" : filter,
- "hostsDown": None,
+ "url" : url,
+ "filter": filter,
+ "sortBy": sort,
+ "limit" : 50,
}
- return self._pagination("SubredditQuery", variables)
+ subreddit = self._request_graphql(
+ "SubredditQuery", variables)["getSubreddit"]
+
+ variables = {
+ "subredditId": subreddit["id"],
+ "iterator": None,
+ "filter" : filter,
+ "sortBy" : sort,
+ "limit" : 50,
+ "isNsfw" : subreddit["isNsfw"],
+ }
+ return self._pagination(
+ "SubredditChildrenQuery", variables, subreddit["children"])
class ScrolllerFollowingExtractor(ScrolllerExtractor):
@@ -142,11 +178,14 @@ class ScrolllerFollowingExtractor(ScrolllerExtractor):
raise exception.AuthorizationError("Login required")
variables = {
- "iterator" : None,
- "hostsDown": None,
+ "iterator": None,
+ "filter" : None,
+ "limit" : 10,
+ "isNsfw" : False,
+ "sortBy" : "RANDOM",
}
- for subreddit in self._pagination("FollowingQuery", variables):
+ for subreddit in self._pagination("GetFollowingSubreddits", variables):
url = self.root + subreddit["url"]
subreddit["_extractor"] = ScrolllerSubredditExtractor
yield Message.Queue, url, subreddit
@@ -156,39 +195,62 @@ class ScrolllerPostExtractor(ScrolllerExtractor):
"""Extractor for media from a single scrolller post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
- example = "https://scrolller.com/title-slug-a1b2c3d4f5"
+ example = "https://scrolller.com/TITLE-SLUG-a1b2c3d4f5"
def posts(self):
- url = "{}/{}".format(self.root, self.groups[0])
- page = self.request(url).text
- data = util.json_loads(text.extr(
- page, '<script>window.scrolllerConfig="', '"</script>')
- .replace('\\"', '"'))
- return (data["item"],)
+ variables = {"url": "/" + self.groups[0]}
+ data = self._request_graphql("SubredditPostQuery", variables)
+ return (data["getPost"],)
QUERIES = {
+ "SubredditPostQuery": """\
+query SubredditPostQuery(
+ $url: String!
+) {
+ getPost(
+ data: { url: $url }
+ ) {
+ __typename id url title subredditId subredditTitle subredditUrl
+ redditPath isNsfw hasAudio fullLengthSource gfycatSource redgifsSource
+ ownerAvatar username displayName favoriteCount isPaid tags
+ commentsCount commentsRepliesCount isFavorite
+ albumContent { mediaSources { url width height isOptimized } }
+ mediaSources { url width height isOptimized }
+ blurredMediaSources { url width height isOptimized }
+ }
+}
+""",
+
"SubredditQuery": """\
query SubredditQuery(
$url: String!
- $filter: SubredditPostFilter
$iterator: String
+ $sortBy: GallerySortBy
+ $filter: GalleryFilter
+ $limit: Int!
) {
getSubreddit(
- url: $url
+ data: {
+ url: $url,
+ iterator: $iterator,
+ filter: $filter,
+ limit: $limit,
+ sortBy: $sortBy
+ }
) {
- children(
- limit: 50
- iterator: $iterator
- filter: $filter
- disabledHosts: null
- ) {
+ __typename id url title secondaryTitle description createdAt isNsfw
+ subscribers isComplete itemCount videoCount pictureCount albumCount
+ isPaid username tags isFollowing
+ banner { url width height isOptimized }
+ children {
iterator items {
- __typename id url title subredditId subredditTitle
- subredditUrl redditPath isNsfw albumUrl hasAudio
- fullLengthSource gfycatSource redgifsSource ownerAvatar
- username displayName isPaid tags isFavorite
+ __typename id url title subredditId subredditTitle subredditUrl
+ redditPath isNsfw hasAudio fullLengthSource gfycatSource
+ redgifsSource ownerAvatar username displayName favoriteCount
+ isPaid tags commentsCount commentsRepliesCount isFavorite
+ albumContent { mediaSources { url width height isOptimized } }
mediaSources { url width height isOptimized }
blurredMediaSources { url width height isOptimized }
}
@@ -197,19 +259,59 @@ query SubredditQuery(
}
""",
- "FollowingQuery": """\
-query FollowingQuery(
+ "SubredditChildrenQuery": """\
+query SubredditChildrenQuery(
+ $subredditId: Int!
$iterator: String
+ $filter: GalleryFilter
+ $sortBy: GallerySortBy
+ $limit: Int!
+ $isNsfw: Boolean
) {
- getFollowing(
- limit: 10
- iterator: $iterator
+ getSubredditChildren(
+ data: {
+ subredditId: $subredditId,
+ iterator: $iterator,
+ filter: $filter,
+ sortBy: $sortBy,
+ limit: $limit,
+ isNsfw: $isNsfw
+ },
+ ) {
+ iterator items {
+ __typename id url title subredditId subredditTitle subredditUrl
+ redditPath isNsfw hasAudio fullLengthSource gfycatSource
+ redgifsSource ownerAvatar username displayName favoriteCount isPaid
+ tags commentsCount commentsRepliesCount isFavorite
+ albumContent { mediaSources { url width height isOptimized } }
+ mediaSources { url width height isOptimized }
+ blurredMediaSources { url width height isOptimized }
+ }
+ }
+}
+""",
+
+ "GetFollowingSubreddits": """\
+query GetFollowingSubreddits(
+ $iterator: String,
+ $limit: Int!,
+ $filter: GalleryFilter,
+ $isNsfw: Boolean,
+ $sortBy: GallerySortBy
+) {
+ getFollowingSubreddits(
+ data: {
+ isNsfw: $isNsfw
+ limit: $limit
+ filter: $filter
+ iterator: $iterator
+ sortBy: $sortBy
+ }
) {
iterator items {
__typename id url title secondaryTitle description createdAt isNsfw
subscribers isComplete itemCount videoCount pictureCount albumCount
- isPaid username tags isFollowing
- banner { url width height isOptimized }
+ isFollowing
}
}
}
@@ -229,4 +331,14 @@ query LoginQuery(
}
""",
+ "ItemTypeQuery": """\
+query ItemTypeQuery(
+ $url: String!
+) {
+ getItemType(
+ url: $url
+ )
+}
+""",
+
}
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index 23ba340..ff8c505 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -43,9 +43,7 @@ class SeigaExtractor(Extractor):
def get_image_url(self, image_id):
"""Get url for an image with id 'image_id'"""
url = "{}/image/source/{}".format(self.root, image_id)
- response = self.request(
- url, method="HEAD", allow_redirects=False, notfound="image")
- location = response.headers["location"]
+ location = self.request_location(url, notfound="image")
if "nicovideo.jp/login" in location:
raise exception.StopExtraction(
"HTTP redirect to login page (%s)", location.partition("?")[0])
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 5d0ec46..1054a63 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -23,14 +23,15 @@ class SubscribestarExtractor(Extractor):
directory_fmt = ("{category}", "{author_name}")
filename_fmt = "{post_id}_{id}.{extension}"
archive_fmt = "{id}"
- cookies_domain = "www.subscribestar.com"
- cookies_names = ("auth_token",)
+ cookies_domain = ".subscribestar.com"
+ cookies_names = ("_personalization_id",)
+ _warning = True
def __init__(self, match):
tld, self.item = match.groups()
if tld == "adult":
self.root = "https://subscribestar.adult"
- self.cookies_domain = "subscribestar.adult"
+ self.cookies_domain = ".subscribestar.adult"
self.subcategory += "-adult"
Extractor.__init__(self, match)
@@ -78,34 +79,64 @@ class SubscribestarExtractor(Extractor):
username, password = self._get_auth_info()
if username:
- self.cookies_update(self._login_impl(username, password))
+ self.cookies_update(self._login_impl(
+ (username, self.cookies_domain), password))
+
+ if self._warning:
+ if not username or not self.cookies_check(self.cookies_names):
+ self.log.warning("no '_personalization_id' cookie set")
+ SubscribestarExtractor._warning = False
@cache(maxage=28*86400, keyarg=1)
def _login_impl(self, username, password):
+ username = username[0]
self.log.info("Logging in as %s", username)
- url = "https://www.subscribestar.com/session.json"
+ if self.root.endswith(".adult"):
+ self.cookies.set("18_plus_agreement_generic", "true",
+ domain=self.cookies_domain)
+
+ # load login page
+ url = self.root + "/login"
+ page = self.request(url).text
+
headers = {
- "Origin" : "https://www.subscribestar.com",
- "Referer" : "https://www.subscribestar.com/login",
+ "Accept": "*/*;q=0.5, text/javascript, application/javascript, "
+ "application/ecmascript, application/x-ecmascript",
+ "Referer": self.root + "/login",
+ "X-CSRF-Token": text.unescape(text.extr(
+ page, '<meta name="csrf-token" content="', '"')),
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"X-Requested-With": "XMLHttpRequest",
}
- data = {
- "utf8" : "✓",
- "email" : username,
- "password": password,
- }
- response = self.request(
- url, method="POST", headers=headers, data=data, fatal=False)
- if response.json().get("errors"):
- self.log.debug(response.json()["errors"])
- raise exception.AuthenticationError()
+ def check_errors(response):
+ errors = response.json().get("errors")
+ if errors:
+ self.log.debug(errors)
+ try:
+ msg = '"{}"'.format(errors.popitem()[1])
+ except Exception:
+ msg = None
+ raise exception.AuthenticationError(msg)
+ return response
+
+ # submit username / email
+ url = self.root + "/session.json"
+ data = {"email": username}
+ response = check_errors(self.request(
+ url, method="POST", headers=headers, data=data, fatal=False))
+
+ # submit password
+ url = self.root + "/session/password.json"
+ data = {"password": password}
+ response = check_errors(self.request(
+ url, method="POST", headers=headers, data=data, fatal=False))
+ # return cookies
return {
cookie.name: cookie.value
for cookie in response.cookies
- if cookie.name.startswith("auth")
}
def _media_from_post(self, html):
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index 4c1da7a..b9783c4 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -183,10 +183,7 @@ class TiktokVmpostExtractor(TiktokExtractor):
url = text.ensure_http_scheme(self.url)
headers = {"User-Agent": "facebookexternalhit/1.1"}
- response = self.request(url, headers=headers, method="HEAD",
- allow_redirects=False, notfound="post")
-
- url = response.headers.get("Location")
+ url = self.request_location(url, headers=headers, notfound="post")
if not url or len(url) <= 28:
# https://www.tiktok.com/?_r=1
raise exception.NotFoundError("post")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 8d90bc5..e2fe000 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -798,6 +798,17 @@ class TwitterFollowingExtractor(TwitterExtractor):
return self._users_result(TwitterAPI(self).user_following(self.user))
+class TwitterFollowersExtractor(TwitterExtractor):
+ """Extractor for a user's followers"""
+ subcategory = "followers"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/followers(?!\w)"
+ example = "https://x.com/USER/followers"
+
+ def items(self):
+ self.login()
+ return self._users_result(TwitterAPI(self).user_followers(self.user))
+
+
class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for Twitter search results"""
subcategory = "search"
@@ -1139,54 +1150,76 @@ class TwitterAPI():
"collab_control,vibe",
}
self.features = {
- "hidden_profile_likes_enabled": True,
"hidden_profile_subscriptions_enabled": True,
+ "profile_label_improvements_pcf_label_in_post_enabled": True,
+ "rweb_tipjar_consumption_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
"highlights_tweets_tab_ui_enabled": True,
"responsive_web_twitter_article_notes_tab_enabled": True,
+ "subscriptions_feature_can_gift_premium": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_graphql_"
"skip_user_profile_image_extensions_enabled": False,
- "responsive_web_graphql_timeline_navigation_enabled": True,
+ "responsive_web_graphql_"
+ "timeline_navigation_enabled": True,
}
self.features_pagination = {
+ "rweb_video_screen_enabled": False,
+ "profile_label_improvements_pcf_label_in_post_enabled": True,
+ "rweb_tipjar_consumption_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
"creator_subscriptions_tweet_preview_api_enabled": True,
- "responsive_web_graphql_timeline_navigation_enabled": True,
- "responsive_web_graphql_skip_user_profile_"
- "image_extensions_enabled": False,
+ "responsive_web_graphql_"
+ "timeline_navigation_enabled": True,
+ "responsive_web_graphql_"
+ "skip_user_profile_image_extensions_enabled": False,
+ "premium_content_api_read_enabled": False,
+ "communities_web_enable_tweet_community_results_fetch": True,
"c9s_tweet_anatomy_moderator_badge_enabled": True,
- "tweetypie_unmention_optimization_enabled": True,
+ "responsive_web_grok_analyze_button_fetch_trends_enabled": False,
+ "responsive_web_grok_analyze_post_followups_enabled": True,
+ "responsive_web_jetfuel_frame": False,
+ "responsive_web_grok_share_attachment_enabled": True,
+ "articles_preview_enabled": True,
"responsive_web_edit_tweet_api_enabled": True,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
"view_counts_everywhere_api_enabled": True,
"longform_notetweets_consumption_enabled": True,
"responsive_web_twitter_article_tweet_consumption_enabled": True,
"tweet_awards_web_tipping_enabled": False,
+ "responsive_web_grok_show_grok_translated_post": False,
+ "responsive_web_grok_analysis_button_from_backend": True,
+ "creator_subscriptions_quote_tweet_preview_enabled": False,
"freedom_of_speech_not_reach_fetch_enabled": True,
"standardized_nudges_misinfo": True,
- "tweet_with_visibility_results_prefer_gql_"
- "limited_actions_policy_enabled": True,
- "rweb_video_timestamps_enabled": True,
+ "tweet_with_visibility_results_"
+ "prefer_gql_limited_actions_policy_enabled": True,
"longform_notetweets_rich_text_read_enabled": True,
"longform_notetweets_inline_media_enabled": True,
- "responsive_web_media_download_video_enabled": True,
+ "responsive_web_grok_image_annotation_enabled": True,
"responsive_web_enhance_cards_enabled": False,
}
def tweet_result_by_rest_id(self, tweet_id):
- endpoint = "/graphql/MWY3AO9_I3rcP_L2A4FR4A/TweetResultByRestId"
+ endpoint = "/graphql/Vg2Akr5FzUmF0sTplA5k6g/TweetResultByRestId"
variables = {
"tweetId": tweet_id,
"withCommunity": False,
"includePromotedContent": False,
"withVoice": False,
}
+ field_toggles = {
+ "withArticleRichContentState": True,
+ "withArticlePlainText": False,
+ "withGrokAnalyze": False,
+ "withDisallowedReplyControls": False,
+ }
params = {
- "variables": self._json_dumps(variables),
- "features" : self._json_dumps(self.features_pagination),
+ "variables" : self._json_dumps(variables),
+ "features" : self._json_dumps(self.features_pagination),
+ "fieldToggles": self._json_dumps(field_toggles),
}
tweet = self._call(endpoint, params)["data"]["tweetResult"]["result"]
if "tweet" in tweet:
@@ -1203,47 +1236,61 @@ class TwitterAPI():
return tweet
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/B9_KmbkLhXt6jRwGjJrweg/TweetDetail"
+ endpoint = "/graphql/b9Yw90FMr_zUb8DvA8r2ug/TweetDetail"
variables = {
"focalTweetId": tweet_id,
"referrer": "profile",
"with_rux_injections": False,
+ # "rankingMode": "Relevance",
"includePromotedContent": False,
"withCommunity": True,
- "withQuickPromoteEligibilityTweetFields": True,
+ "withQuickPromoteEligibilityTweetFields": False,
"withBirdwatchNotes": True,
"withVoice": True,
- "withV2Timeline": True,
+ }
+ field_toggles = {
+ "withArticleRichContentState": True,
+ "withArticlePlainText": False,
+ "withGrokAnalyze": False,
+ "withDisallowedReplyControls": False,
}
return self._pagination_tweets(
- endpoint, variables, ("threaded_conversation_with_injections_v2",))
+ endpoint, variables,
+ ("threaded_conversation_with_injections_v2",),
+ field_toggles=field_toggles)
def user_tweets(self, screen_name):
- endpoint = "/graphql/5ICa5d9-AitXZrIA3H-4MQ/UserTweets"
+ endpoint = "/graphql/M3Hpkrb8pjWkEuGdLeXMOA/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
- "withQuickPromoteEligibilityTweetFields": True,
+ "withQuickPromoteEligibilityTweetFields": False,
"withVoice": True,
- "withV2Timeline": True,
}
- return self._pagination_tweets(endpoint, variables)
+ field_toggles = {
+ "withArticlePlainText": False,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, field_toggles=field_toggles)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/UtLStR_BnYUGD7Q453UXQg/UserTweetsAndReplies"
+ endpoint = "/graphql/pz0IHaV_t7T4HJavqqqcIA/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
"includePromotedContent": False,
"withCommunity": True,
"withVoice": True,
- "withV2Timeline": True,
}
- return self._pagination_tweets(endpoint, variables)
+ field_toggles = {
+ "withArticlePlainText": False,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, field_toggles=field_toggles)
def user_media(self, screen_name):
- endpoint = "/graphql/tO4LMUYAZbR4T0SqQ85aAw/UserMedia"
+ endpoint = "/graphql/8B9DqlaGvYyOvTCzzZWtNA/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1251,12 +1298,15 @@ class TwitterAPI():
"withClientEventToken": False,
"withBirdwatchNotes": False,
"withVoice": True,
- "withV2Timeline": True,
}
- return self._pagination_tweets(endpoint, variables)
+ field_toggles = {
+ "withArticlePlainText": False,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, field_toggles=field_toggles)
def user_likes(self, screen_name):
- endpoint = "/graphql/9s8V6sUI8fZLDiN-REkAxA/Likes"
+ endpoint = "/graphql/uxjTlmrTI61zreSIV1urbw/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1264,24 +1314,24 @@ class TwitterAPI():
"withClientEventToken": False,
"withBirdwatchNotes": False,
"withVoice": True,
- "withV2Timeline": True,
}
- return self._pagination_tweets(endpoint, variables)
+ field_toggles = {
+ "withArticlePlainText": False,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, field_toggles=field_toggles)
def user_bookmarks(self):
- endpoint = "/graphql/cQxQgX8MJYjWwC0dxpyfYg/Bookmarks"
+ endpoint = "/graphql/ztCdjqsvvdL0dE8R5ME0hQ/Bookmarks"
variables = {
"count": 100,
"includePromotedContent": False,
}
- features = self.features_pagination.copy()
- features["graphql_timeline_v2_bookmark_timeline"] = True
return self._pagination_tweets(
- endpoint, variables, ("bookmark_timeline_v2", "timeline"), False,
- features=features)
+ endpoint, variables, ("bookmark_timeline_v2", "timeline"), False)
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/HjsWc-nwwHKYwHenbHm-tw/ListLatestTweetsTimeline"
+ endpoint = "/graphql/LSefrrxhpeX8HITbKfWz9g/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -1289,21 +1339,20 @@ class TwitterAPI():
return self._pagination_tweets(
endpoint, variables, ("list", "tweets_timeline", "timeline"))
- def search_timeline(self, query):
- endpoint = "/graphql/fZK7JipRHWtiZsTodhsTfQ/SearchTimeline"
+ def search_timeline(self, query, product="Latest"):
+ endpoint = "/graphql/fL2MBiqXPk5pSrOS5ACLdA/SearchTimeline"
variables = {
"rawQuery": query,
"count": 100,
- "querySource": "",
- "product": "Latest",
+ "querySource": "typed_query",
+ "product": product,
}
-
return self._pagination_tweets(
endpoint, variables,
("search_by_raw_query", "search_timeline", "timeline"))
def community_tweets_timeline(self, community_id):
- endpoint = "/graphql/7B2AdxSuC-Er8qUr3Plm_w/CommunityTweetsTimeline"
+ endpoint = "/graphql/awszcpgwaIeqqNfmzjxUow/CommunityTweetsTimeline"
variables = {
"communityId": community_id,
"count": 100,
@@ -1317,7 +1366,7 @@ class TwitterAPI():
"timeline"))
def community_media_timeline(self, community_id):
- endpoint = "/graphql/qAGUldfcIoMv5KyAyVLYog/CommunityMediaTimeline"
+ endpoint = "/graphql/HfMuDHto2j3NKUeiLjKWHA/CommunityMediaTimeline"
variables = {
"communityId": community_id,
"count": 100,
@@ -1329,7 +1378,7 @@ class TwitterAPI():
"timeline"))
def communities_main_page_timeline(self, screen_name):
- endpoint = ("/graphql/GtOhw2mstITBepTRppL6Uw"
+ endpoint = ("/graphql/NbdrKPY_h_nlvZUg7oqH5Q"
"/CommunitiesMainPageTimeline")
variables = {
"count": 100,
@@ -1356,17 +1405,34 @@ class TwitterAPI():
["twitter_objects"]["live_events"][event_id])
def list_members(self, list_id):
- endpoint = "/graphql/BQp2IEYkgxuSxqbTAr1e1g/ListMembers"
+ endpoint = "/graphql/v97svwb-qcBmzv6QruDuNg/ListMembers"
variables = {
"listId": list_id,
"count": 100,
- "withSafetyModeUserFields": True,
}
return self._pagination_users(
endpoint, variables, ("list", "members_timeline", "timeline"))
+ def user_followers(self, screen_name):
+ endpoint = "/graphql/jqZ0_HJBA6mnu18iTZYm9w/Followers"
+ variables = {
+ "userId": self._user_id_by_screen_name(screen_name),
+ "count": 100,
+ "includePromotedContent": False,
+ }
+ return self._pagination_users(endpoint, variables)
+
+ def user_followers_verified(self, screen_name):
+ endpoint = "/graphql/GHg0X_FjrJoISwwLPWi1LQ/BlueVerifiedFollowers"
+ variables = {
+ "userId": self._user_id_by_screen_name(screen_name),
+ "count": 100,
+ "includePromotedContent": False,
+ }
+ return self._pagination_users(endpoint, variables)
+
def user_following(self, screen_name):
- endpoint = "/graphql/PAnE9toEjRfE-4tozRcsfw/Following"
+ endpoint = "/graphql/4QHbs4wmzgtU91f-t96_Eg/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1376,12 +1442,11 @@ class TwitterAPI():
@memcache(keyarg=1)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/tD8zKvQzwY3kdx5yz6YmOw/UserByRestId"
+ endpoint = "/graphql/5vdJ5sWkbSRDiiNZvwc2Yg/UserByRestId"
features = self.features
params = {
"variables": self._json_dumps({
"userId": rest_id,
- "withSafetyModeUserFields": True,
}),
"features": self._json_dumps(features),
}
@@ -1389,7 +1454,7 @@ class TwitterAPI():
@memcache(keyarg=1)
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/k5XapwcSikNsEsILW5FvgA/UserByScreenName"
+ endpoint = "/graphql/32pL5BWe9WKeSK1MoPvFQQ/UserByScreenName"
features = self.features.copy()
features["subscriptions_verification_info_"
"is_identity_verified_enabled"] = True
@@ -1398,9 +1463,11 @@ class TwitterAPI():
params = {
"variables": self._json_dumps({
"screen_name": screen_name,
- "withSafetyModeUserFields": True,
}),
"features": self._json_dumps(features),
+ "fieldToggles": self._json_dumps({
+ "withAuxiliaryUserLabels": True,
+ }),
}
return self._call(endpoint, params)["data"]["user"]["result"]
@@ -1620,7 +1687,8 @@ class TwitterAPI():
params["cursor"] = extr._update_cursor(cursor)
def _pagination_tweets(self, endpoint, variables,
- path=None, stop_tweets=True, features=None):
+ path=None, stop_tweets=True,
+ features=None, field_toggles=None):
extr = self.extractor
original_retweets = (extr.retweets == "original")
pinned_tweet = extr.pinned
@@ -1633,6 +1701,8 @@ class TwitterAPI():
features = self.features_pagination
if features:
params["features"] = self._json_dumps(features)
+ if field_toggles:
+ params["fieldToggles"] = self._json_dumps(field_toggles)
while True:
params["variables"] = self._json_dumps(variables)
@@ -1640,7 +1710,7 @@ class TwitterAPI():
try:
if path is None:
- instructions = (data["user"]["result"]["timeline_v2"]
+ instructions = (data["user"]["result"]["timeline"]
["timeline"]["instructions"])
else:
instructions = data
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index 49a3deb..0478ef0 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -32,21 +32,13 @@ BASE_PATTERN = UrlshortenerExtractor.update({
class UrlshortenerLinkExtractor(UrlshortenerExtractor):
"""Extractor for general-purpose URL shorteners"""
subcategory = "link"
- pattern = BASE_PATTERN + r"/([^/?#]+)"
+ pattern = BASE_PATTERN + r"(/[^/?#]+)"
example = "https://bit.ly/abcde"
- def __init__(self, match):
- UrlshortenerExtractor.__init__(self, match)
- self.id = match.group(match.lastindex)
-
- def _init(self):
- self.headers = self.config_instance("headers")
-
def items(self):
- response = self.request(
- "{}/{}".format(self.root, self.id), headers=self.headers,
- method="HEAD", allow_redirects=False, notfound="URL")
- try:
- yield Message.Queue, response.headers["location"], {}
- except KeyError:
+ url = self.root + self.groups[-1]
+ location = self.request_location(
+ url, headers=self.config_instance("headers"), notfound="URL")
+ if not location:
raise exception.StopExtraction("Unable to resolve short URL")
+ yield Message.Queue, location, {}
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index 13b0520..ed2a395 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -7,7 +7,7 @@
"""Extractors for https://www.weasyl.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, util
BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/"
@@ -18,6 +18,7 @@ class WeasylExtractor(Extractor):
filename_fmt = "{submitid} {title}.{extension}"
archive_fmt = "{submitid}"
root = "https://www.weasyl.com"
+ useragent = util.USERAGENT
@staticmethod
def populate_submission(data):
diff --git a/gallery_dl/extractor/wikifeet.py b/gallery_dl/extractor/wikifeet.py
index d3586c0..f7bfeb2 100644
--- a/gallery_dl/extractor/wikifeet.py
+++ b/gallery_dl/extractor/wikifeet.py
@@ -33,12 +33,12 @@ class WikifeetGalleryExtractor(GalleryExtractor):
return {
"celeb" : self.celeb,
"type" : self.type,
- "rating" : text.parse_float(extr('"ratingValue": "', '"')),
- "celebrity" : text.unescape(extr("times'>", "</h1>")),
- "shoesize" : text.remove_html(extr("Shoe Size:", "edit")),
- "birthplace": text.remove_html(extr("Birthplace:", "edit")),
- "birthday" : text.parse_datetime(text.remove_html(
- extr("Birth Date:", "edit")), "%Y-%m-%d"),
+ "birthplace": text.unescape(extr('"bplace":"', '"')),
+ "birthday" : text.parse_datetime(text.unescape(
+ extr('"bdate":"', '"'))[:10], "%Y-%m-%d"),
+ "shoesize" : text.unescape(extr('"ssize":', ',')),
+ "rating" : text.parse_float(extr('"score":', ',')),
+ "celebrity" : text.unescape(extr('"cname":"', '"')),
}
def images(self, page):
@@ -61,5 +61,6 @@ class WikifeetGalleryExtractor(GalleryExtractor):
for tag in data["tags"] if tag in tagmap
],
})
- for data in util.json_loads(text.extr(page, "['gdata'] = ", ";"))
+ for data in
+ util.json_loads("[" + text.extr(page, '"gallery":[', '],') + "]")
]