From 4a18b5837c1dd82f5964afcfc3fecc53cd97e79c Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sun, 27 Apr 2025 20:34:08 -0400 Subject: New upstream version 1.29.5. --- gallery_dl/extractor/__init__.py | 14 ++- gallery_dl/extractor/architizer.py | 2 +- gallery_dl/extractor/bluesky.py | 121 +++++++++++++------ gallery_dl/extractor/common.py | 7 +- gallery_dl/extractor/deviantart.py | 54 ++++++--- gallery_dl/extractor/everia.py | 2 +- gallery_dl/extractor/fanbox.py | 22 ++-- gallery_dl/extractor/fapello.py | 3 + gallery_dl/extractor/gelbooru.py | 14 +-- gallery_dl/extractor/instagram.py | 44 +++++-- gallery_dl/extractor/itaku.py | 24 ++++ gallery_dl/extractor/kemonoparty.py | 22 ++-- gallery_dl/extractor/moebooru.py | 1 + gallery_dl/extractor/naver.py | 61 +++++++++- gallery_dl/extractor/patreon.py | 6 +- gallery_dl/extractor/pictoa.py | 78 ++++++++++++ gallery_dl/extractor/pinterest.py | 9 +- gallery_dl/extractor/pixiv.py | 38 +++--- gallery_dl/extractor/postmill.py | 12 +- gallery_dl/extractor/reddit.py | 5 +- gallery_dl/extractor/scrolller.py | 218 +++++++++++++++++++++++++--------- gallery_dl/extractor/seiga.py | 4 +- gallery_dl/extractor/subscribestar.py | 67 ++++++++--- gallery_dl/extractor/tiktok.py | 5 +- gallery_dl/extractor/twitter.py | 176 ++++++++++++++++++--------- gallery_dl/extractor/urlshortener.py | 20 +--- gallery_dl/extractor/weasyl.py | 3 +- gallery_dl/extractor/wikifeet.py | 15 +-- gallery_dl/postprocessor/ugoira.py | 25 ++-- gallery_dl/util.py | 20 +++- gallery_dl/version.py | 2 +- 31 files changed, 771 insertions(+), 323 deletions(-) create mode 100644 gallery_dl/extractor/pictoa.py (limited to 'gallery_dl') diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 87c3798..9a7ca53 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -7,7 +7,7 @@ # published by the Free Software Foundation. import sys -import re +from ..util import re_compile modules = [ "2ch", @@ -130,6 +130,7 @@ modules = [ "philomena", "photovogue", "picarto", + "pictoa", "piczel", "pillowfort", "pinterest", @@ -234,7 +235,8 @@ def find(url): def add(cls): """Add 'cls' to the list of available extractors""" - cls.pattern = re.compile(cls.pattern) + if isinstance(cls.pattern, str): + cls.pattern = re_compile(cls.pattern) _cache.append(cls) return cls @@ -242,9 +244,11 @@ def add(cls): def add_module(module): """Add all extractors in 'module' to the list of available extractors""" classes = _get_classes(module) - for cls in classes: - cls.pattern = re.compile(cls.pattern) - _cache.extend(classes) + if classes: + if isinstance(classes[0].pattern, str): + for cls in classes: + cls.pattern = re_compile(cls.pattern) + _cache.extend(classes) return classes diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py index 0268224..911753b 100644 --- a/gallery_dl/extractor/architizer.py +++ b/gallery_dl/extractor/architizer.py @@ -54,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor): return [ (url, None) for url in text.extract_iter( - page, "property='og:image:secure_url' content='", "?") + page, 'property="og:image:secure_url" content="', "?") ] diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index f8fef93..ec274b8 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -25,10 +25,6 @@ class BlueskyExtractor(Extractor): archive_fmt = "{filename}" root = "https://bsky.app" - def __init__(self, match): - Extractor.__init__(self, match) - self.user = match.group(1) - def _init(self): meta = self.config("metadata") or () if meta: @@ -87,6 +83,22 @@ class BlueskyExtractor(Extractor): def posts(self): return () + def _posts_records(self, actor, collection): + depth = self.config("depth", "0") + + for record in self.api.list_records(actor, collection): + uri = None + try: + uri = record["value"]["subject"]["uri"] + if "/app.bsky.feed.post/" in uri: + yield from self.api.get_post_thread_uri(uri, depth) + except exception.StopExtraction: + pass # deleted post + except Exception as exc: + self.log.debug(record, exc_info=exc) + self.log.warning("Failed to extract %s (%s: %s)", + uri or "record", exc.__class__.__name__, exc) + def _pid(self, post): return post["uri"].rpartition("/")[2] @@ -203,7 +215,7 @@ class BlueskyUserExtractor(BlueskyExtractor): pass def items(self): - base = "{}/profile/{}/".format(self.root, self.user) + base = "{}/profile/{}/".format(self.root, self.groups[0]) default = ("posts" if self.config("quoted", False) or self.config("reposts", False) else "media") return self._dispatch_extractors(( @@ -213,6 +225,7 @@ class BlueskyUserExtractor(BlueskyExtractor): (BlueskyPostsExtractor , base + "posts"), (BlueskyRepliesExtractor , base + "replies"), (BlueskyMediaExtractor , base + "media"), + (BlueskyVideoExtractor , base + "video"), (BlueskyLikesExtractor , base + "likes"), ), (default,)) @@ -223,7 +236,8 @@ class BlueskyPostsExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/posts" def posts(self): - return self.api.get_author_feed(self.user, "posts_and_author_threads") + return self.api.get_author_feed( + self.groups[0], "posts_and_author_threads") class BlueskyRepliesExtractor(BlueskyExtractor): @@ -232,7 +246,8 @@ class BlueskyRepliesExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/replies" def posts(self): - return self.api.get_author_feed(self.user, "posts_with_replies") + return self.api.get_author_feed( + self.groups[0], "posts_with_replies") class BlueskyMediaExtractor(BlueskyExtractor): @@ -241,7 +256,18 @@ class BlueskyMediaExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/media" def posts(self): - return self.api.get_author_feed(self.user, "posts_with_media") + return self.api.get_author_feed( + self.groups[0], "posts_with_media") + + +class BlueskyVideoExtractor(BlueskyExtractor): + subcategory = "video" + pattern = USER_PATTERN + r"/video" + example = "https://bsky.app/profile/HANDLE/video" + + def posts(self): + return self.api.get_author_feed( + self.groups[0], "posts_with_video") class BlueskyLikesExtractor(BlueskyExtractor): @@ -250,7 +276,9 @@ class BlueskyLikesExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/likes" def posts(self): - return self.api.get_actor_likes(self.user) + if self.config("endpoint") == "getActorLikes": + return self.api.get_actor_likes(self.groups[0]) + return self._posts_records(self.groups[0], "app.bsky.feed.like") class BlueskyFeedExtractor(BlueskyExtractor): @@ -258,12 +286,9 @@ class BlueskyFeedExtractor(BlueskyExtractor): pattern = USER_PATTERN + r"/feed/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/feed/NAME" - def __init__(self, match): - BlueskyExtractor.__init__(self, match) - self.feed = match.group(2) - def posts(self): - return self.api.get_feed(self.user, self.feed) + actor, feed = self.groups + return self.api.get_feed(actor, feed) class BlueskyListExtractor(BlueskyExtractor): @@ -271,12 +296,9 @@ class BlueskyListExtractor(BlueskyExtractor): pattern = USER_PATTERN + r"/lists/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/lists/ID" - def __init__(self, match): - BlueskyExtractor.__init__(self, match) - self.list = match.group(2) - def posts(self): - return self.api.get_list_feed(self.user, self.list) + actor, list_id = self.groups + return self.api.get_list_feed(actor, list_id) class BlueskyFollowingExtractor(BlueskyExtractor): @@ -285,7 +307,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/follows" def items(self): - for user in self.api.get_follows(self.user): + for user in self.api.get_follows(self.groups[0]): url = "https://bsky.app/profile/" + user["did"] user["_extractor"] = BlueskyUserExtractor yield Message.Queue, url, user @@ -296,12 +318,9 @@ class BlueskyPostExtractor(BlueskyExtractor): pattern = USER_PATTERN + r"/post/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/post/ID" - def __init__(self, match): - BlueskyExtractor.__init__(self, match) - self.post_id = match.group(2) - def posts(self): - return self.api.get_post_thread(self.user, self.post_id) + actor, post_id = self.groups + return self.api.get_post_thread(actor, post_id) class BlueskyInfoExtractor(BlueskyExtractor): @@ -311,7 +330,7 @@ class BlueskyInfoExtractor(BlueskyExtractor): def items(self): self._metadata_user = True - self.api._did_from_actor(self.user) + self.api._did_from_actor(self.groups[0]) return iter(((Message.Directory, self._user),)) @@ -322,7 +341,7 @@ class BlueskyAvatarExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/avatar" def posts(self): - return self._make_post(self.user, "avatar") + return self._make_post(self.groups[0], "avatar") class BlueskyBackgroundExtractor(BlueskyExtractor): @@ -332,7 +351,7 @@ class BlueskyBackgroundExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/banner" def posts(self): - return self._make_post(self.user, "banner") + return self._make_post(self.groups[0], "banner") class BlueskySearchExtractor(BlueskyExtractor): @@ -341,7 +360,7 @@ class BlueskySearchExtractor(BlueskyExtractor): example = "https://bsky.app/search?q=QUERY" def posts(self): - query = text.unquote(self.user.replace("+", " ")) + query = text.unquote(self.groups[0].replace("+", " ")) return self.api.search_posts(query) @@ -351,13 +370,14 @@ class BlueskyHashtagExtractor(BlueskyExtractor): example = "https://bsky.app/hashtag/NAME" def posts(self): - return self.api.search_posts("#"+self.user, self.groups[1]) + hashtag, order = self.groups + return self.api.search_posts("#"+hashtag, order) class BlueskyAPI(): """Interface for the Bluesky API - https://www.docs.bsky.app/docs/category/http-reference + https://docs.bsky.app/docs/category/http-reference """ def __init__(self, extractor): @@ -378,7 +398,7 @@ class BlueskyAPI(): "actor": self._did_from_actor(actor), "limit": "100", } - return self._pagination(endpoint, params) + return self._pagination(endpoint, params, check_empty=True) def get_author_feed(self, actor, filter="posts_and_author_threads"): endpoint = "app.bsky.feed.getAuthorFeed" @@ -416,11 +436,16 @@ class BlueskyAPI(): return self._pagination(endpoint, params) def get_post_thread(self, actor, post_id): + uri = "at://{}/app.bsky.feed.post/{}".format( + self._did_from_actor(actor), post_id) + depth = self.extractor.config("depth", "0") + return self.get_post_thread_uri(uri, depth) + + def get_post_thread_uri(self, uri, depth="0"): endpoint = "app.bsky.feed.getPostThread" params = { - "uri": "at://{}/app.bsky.feed.post/{}".format( - self._did_from_actor(actor), post_id), - "depth" : self.extractor.config("depth", "0"), + "uri" : uri, + "depth" : depth, "parentHeight": "0", } @@ -443,6 +468,18 @@ class BlueskyAPI(): params = {"actor": did} return self._call(endpoint, params) + def list_records(self, actor, collection): + endpoint = "com.atproto.repo.listRecords" + actor_did = self._did_from_actor(actor) + params = { + "repo" : actor_did, + "collection": collection, + "limit" : "100", + # "reverse" : "false", + } + return self._pagination(endpoint, params, "records", + self.service_endpoint(actor_did)) + @memcache(keyarg=1) def resolve_handle(self, handle): endpoint = "com.atproto.identity.resolveHandle" @@ -523,8 +560,10 @@ class BlueskyAPI(): _refresh_token_cache.update(self.username, data["refreshJwt"]) return "Bearer " + data["accessJwt"] - def _call(self, endpoint, params): - url = "{}/xrpc/{}".format(self.root, endpoint) + def _call(self, endpoint, params, root=None): + if root is None: + root = self.root + url = "{}/xrpc/{}".format(root, endpoint) while True: self.authenticate() @@ -549,9 +588,13 @@ class BlueskyAPI(): self.extractor.log.debug("Server response: %s", response.text) raise exception.StopExtraction(msg) - def _pagination(self, endpoint, params, key="feed"): + def _pagination(self, endpoint, params, + key="feed", root=None, check_empty=False): while True: - data = self._call(endpoint, params) + data = self._call(endpoint, params, root) + + if check_empty and not data[key]: + return yield from data[key] cursor = data.get("cursor") diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 995505f..c430ec1 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -59,7 +59,7 @@ class Extractor(): @classmethod def from_url(cls, url): if isinstance(cls.pattern, str): - cls.pattern = re.compile(cls.pattern) + cls.pattern = util.re_compile(cls.pattern) match = cls.pattern.match(url) return cls(match) if match else None @@ -240,6 +240,11 @@ class Extractor(): raise exception.HttpError(msg, response) + def request_location(self, url, **kwargs): + kwargs.setdefault("method", "HEAD") + kwargs.setdefault("allow_redirects", False) + return self.request(url, **kwargs).headers.get("location", "") + _handle_429 = util.false def wait(self, seconds=None, until=None, adjust=1.0, diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 378c7ec..ae475e2 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -867,6 +867,9 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ )["deviation"]["extended"]["deviationUuid"] yield self.api.deviation(deviation_uuid) + def _unescape_json(self, json): + return json.replace('\\"', '"').replace("\\\\", "\\") + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -1046,7 +1049,7 @@ class DeviantartStashExtractor(DeviantartExtractor): DeviantartExtractor.__init__(self, match) self.user = None - def deviations(self, stash_id=None): + def deviations(self, stash_id=None, stash_data=None): if stash_id is None: legacy_url, stash_id = self.groups else: @@ -1068,14 +1071,33 @@ class DeviantartStashExtractor(DeviantartExtractor): deviation["_page"] = page deviation["index"] = text.parse_int(text.extr( page, '\\"deviationId\\":', ',')) + + deviation["stash_id"] = stash_id + if stash_data: + folder = stash_data["folder"] + deviation["stash_name"] = folder["name"] + deviation["stash_folder"] = folder["folderId"] + deviation["stash_parent"] = folder["parentId"] or 0 + deviation["stash_description"] = \ + folder["richDescription"]["excerpt"] + else: + deviation["stash_name"] = "" + deviation["stash_description"] = "" + deviation["stash_folder"] = 0 + deviation["stash_parent"] = 0 + yield deviation return + stash_data = text.extr(page, ',\\"stash\\":', ',\\"@@') + if stash_data: + stash_data = util.json_loads(self._unescape_json(stash_data)) + for sid in text.extract_iter( page, 'href="https://www.deviantart.com/stash/', '"'): if sid == stash_id or sid.endswith("#comments"): continue - yield from self.deviations(sid) + yield from self.deviations(sid, stash_data) class DeviantartFavoriteExtractor(DeviantartExtractor): @@ -1276,28 +1298,26 @@ class DeviantartDeviationExtractor(DeviantartExtractor): deviation = self.api.deviation(uuid) deviation["_page"] = page + deviation["index_file"] = 0 + deviation["num"] = deviation["count"] = 1 - _dev_info = text.extr( - page, '\\"deviationExtended\\":', ',\\"deviation\\":', None) - # Clean up escaped quotes - _json_str = re.sub( - r'(?02}.{extension}") - self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}." - "{extension}") + self.filename_fmt = ("{category}_{index}_{index_file}_{title}_" + "{num:>02}.{extension}") + self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}." + "{extension}") - deviation["index_file"] = 0 + additional_media = util.json_loads(self._unescape_json( + additional_media) + "}]") deviation["count"] = 1 + len(additional_media) - deviation["num"] = 1 yield deviation for index, post in enumerate(additional_media): - uri = post["media"]["baseUri"].encode().decode("unicode-escape") + uri = self._eclipse_media(post["media"], "fullview")[0] deviation["content"]["src"] = uri deviation["num"] += 1 deviation["index_file"] = post["fileId"] diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py index e41f6f6..3bf0a74 100644 --- a/gallery_dl/extractor/everia.py +++ b/gallery_dl/extractor/everia.py @@ -57,7 +57,7 @@ class EveriaPostExtractor(EveriaExtractor): data = { "title": text.unescape( - text.extr(page, 'itemprop="headline">', "")), + text.extr(page, 'itemprop="headline">', "', "")), "post_url": url, "post_category": text.extr( diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 9bbfb43..3b43134 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -173,15 +173,16 @@ class FanboxExtractor(Extractor): return plans def _get_comment_data(self, post_id): - url = ("https://api.fanbox.cc/post.listComments" + url = ("https://api.fanbox.cc/post.getComments" "?limit=10&postId=" + post_id) comments = [] while url: url = text.ensure_http_scheme(url) body = self.request(url, headers=self.headers).json()["body"] - comments.extend(body["items"]) - url = body["nextUrl"] + data = body["commentList"] + comments.extend(data["items"]) + url = data["nextUrl"] return comments def _get_urls_from_post(self, content_body, post): @@ -296,8 +297,7 @@ class FanboxExtractor(Extractor): url = "https://www.pixiv.net/fanbox/"+content_id # resolve redirect try: - url = self.request(url, method="HEAD", - allow_redirects=False).headers["location"] + url = self.request_location(url) except Exception as exc: url = None self.log.warning("Unable to extract fanbox embed %s (%s: %s)", @@ -392,13 +392,7 @@ class FanboxRedirectExtractor(Extractor): pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)" example = "https://www.pixiv.net/fanbox/creator/12345" - def __init__(self, match): - Extractor.__init__(self, match) - self.user_id = match.group(1) - def items(self): - url = "https://www.pixiv.net/fanbox/creator/" + self.user_id - data = {"_extractor": FanboxCreatorExtractor} - response = self.request( - url, method="HEAD", allow_redirects=False, notfound="user") - yield Message.Queue, response.headers["Location"], data + url = "https://www.pixiv.net/fanbox/creator/" + self.groups[0] + location = self.request_location(url, notfound="user") + yield Message.Queue, location, {"_extractor": FanboxCreatorExtractor} diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py index 838ae7b..cf18edc 100644 --- a/gallery_dl/extractor/fapello.py +++ b/gallery_dl/extractor/fapello.py @@ -72,10 +72,13 @@ class FapelloModelExtractor(Extractor): if not page: return + url = None for url in text.extract_iter(page, '') if not notes_data: diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 432a7ad..0f88cac 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -606,6 +606,20 @@ class InstagramHighlightsExtractor(InstagramExtractor): return self.api.highlights_media(uid) +class InstagramFollowersExtractor(InstagramExtractor): + """Extractor for an Instagram user's followers""" + subcategory = "followers" + pattern = USER_PATTERN + r"/followers" + example = "https://www.instagram.com/USER/followers/" + + def items(self): + uid = self.api.user_id(self.item) + for user in self.api.user_followers(uid): + user["_extractor"] = InstagramUserExtractor + url = "{}/{}".format(self.root, user["username"]) + yield Message.Queue, url, user + + class InstagramFollowingExtractor(InstagramExtractor): """Extractor for an Instagram user's followed users""" subcategory = "following" @@ -693,11 +707,21 @@ class InstagramPostExtractor(InstagramExtractor): """Extractor for an Instagram post""" subcategory = "post" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/(?:[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)") + r"/(?:share/()|[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)") example = "https://www.instagram.com/p/abcdefg/" def posts(self): - return self.api.media(self.item) + share, shortcode = self.groups + if share is not None: + url = text.ensure_http_scheme(self.url) + headers = { + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "same-origin", + } + location = self.request_location(url, headers=headers) + shortcode = location.split("/")[-2] + return self.api.media(shortcode) class InstagramRestAPI(): @@ -816,6 +840,11 @@ class InstagramRestAPI(): params = {"count": 30} return self._pagination(endpoint, params) + def user_followers(self, user_id): + endpoint = "/v1/friendships/{}/followers/".format(user_id) + params = {"count": 12} + return self._pagination_following(endpoint, params) + def user_following(self, user_id): endpoint = "/v1/friendships/{}/following/".format(user_id) params = {"count": 12} @@ -908,9 +937,10 @@ class InstagramRestAPI(): for item in data["items"]: yield from item["media_items"] - if "next_max_id" not in data: + next_max_id = data.get("next_max_id") + if not next_max_id: return extr._update_cursor(None) - params["max_id"] = extr._update_cursor(data["next_max_id"]) + params["max_id"] = extr._update_cursor(next_max_id) def _pagination_following(self, endpoint, params): extr = self.extractor @@ -921,10 +951,10 @@ class InstagramRestAPI(): yield from data["users"] - if len(data["users"]) < params["count"]: + next_max_id = data.get("next_max_id") + if not next_max_id: return extr._update_cursor(None) - params["max_id"] = extr._update_cursor( - params["max_id"] + params["count"]) + params["max_id"] = extr._update_cursor(next_max_id) class InstagramGraphqlAPI(): diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index 2974b59..e602665 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -65,6 +65,15 @@ class ItakuGalleryExtractor(ItakuExtractor): return self.api.galleries_images(*self.groups) +class ItakuStarsExtractor(ItakuExtractor): + subcategory = "stars" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/stars(?:/(\d+))?" + example = "https://itaku.ee/profile/USER/stars" + + def posts(self): + return self.api.galleries_images_starred(*self.groups) + + class ItakuImageExtractor(ItakuExtractor): subcategory = "image" pattern = BASE_PATTERN + r"/images/(\d+)" @@ -139,6 +148,21 @@ class ItakuAPI(): } return self._pagination(endpoint, params, self.image) + def galleries_images_starred(self, username, section=None): + endpoint = "/galleries/images/user_starred_imgs/" + params = { + "cursor" : None, + "stars_of" : self.user(username)["owner"], + "sections" : section, + "date_range": "", + "ordering" : "-date_added", + "maturity_rating": ("SFW", "Questionable", "NSFW"), + "page" : "1", + "page_size" : "30", + "visibility": ("PUBLIC", "PROFILE_ONLY"), + } + return self._pagination(endpoint, params, self.image) + def image(self, image_id): endpoint = "/galleries/images/{}/".format(image_id) return self._call(endpoint) diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index de7d040..79070ee 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -153,7 +153,7 @@ class KemonopartyExtractor(Extractor): file["type"] = "archive" if archives: try: - data = self.api.posts_archives(file["hash"]) + data = self.api.file(file["hash"]) data.update(file) post_archives.append(data) except Exception as exc: @@ -319,12 +319,9 @@ class KemonopartyUserExtractor(KemonopartyExtractor): def posts(self): _, _, service, creator_id, query = self.groups params = text.parse_query(query) - if params.get("tag"): - return self.api.creator_tagged_posts( - service, creator_id, params.get("tag"), params.get("o")) - else: - return self.api.creator_posts( - service, creator_id, params.get("o"), params.get("q")) + return self.api.creator_posts_legacy( + service, creator_id, + params.get("o"), params.get("q"), params.get("tag")) class KemonopartyPostsExtractor(KemonopartyExtractor): @@ -524,18 +521,19 @@ class KemonoAPI(): params = {"q": query, "o": offset, "tag": tags} return self._pagination(endpoint, params, 50, "posts") - def posts_archives(self, file_hash): - endpoint = "/posts/archives/" + file_hash - return self._call(endpoint)["archive"] + def file(self, file_hash): + endpoint = "/file/" + file_hash + return self._call(endpoint) def creator_posts(self, service, creator_id, offset=0, query=None): endpoint = "/{}/user/{}".format(service, creator_id) params = {"q": query, "o": offset} return self._pagination(endpoint, params, 50) - def creator_tagged_posts(self, service, creator_id, tags, offset=0): + def creator_posts_legacy(self, service, creator_id, + offset=0, query=None, tags=None): endpoint = "/{}/user/{}/posts-legacy".format(service, creator_id) - params = {"o": offset, "tag": tags} + params = {"o": offset, "tag": tags, "q": query} return self._pagination(endpoint, params, 50, "results") def creator_announcements(self, service, creator_id): diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py index e97d273..9fd66e2 100644 --- a/gallery_dl/extractor/moebooru.py +++ b/gallery_dl/extractor/moebooru.py @@ -127,6 +127,7 @@ class MoebooruPoolExtractor(MoebooruExtractor): if self.config("metadata"): url = "{}/pool/show/{}.json".format(self.root, self.pool_id) pool = self.request(url).json() + pool["name"] = pool["name"].replace("_", " ") pool.pop("posts", None) return {"pool": pool} return {"pool": text.parse_int(self.pool_id)} diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index d3150e6..2287325 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -9,7 +9,9 @@ """Extractors for https://blog.naver.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text +from .. import text, util +import datetime +import time class NaverBase(): @@ -59,19 +61,66 @@ class NaverPostExtractor(NaverBase, GalleryExtractor): "user" : extr("var nickName = '", "'"), }, } - data["post"]["date"] = text.parse_datetime( + + data["post"]["date"] = self._parse_datetime( extr('se_publishDate pcol2">', '<') or - extr('_postAddDate">', '<'), "%Y. %m. %d. %H:%M") + extr('_postAddDate">', '<')) + return data + def _parse_datetime(self, date_string): + if "전" in date_string: + ts = time.gmtime() + return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday) + return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M") + def images(self, page): - results = [] + files = [] + self._extract_images(files, page) + if self.config("videos", True): + self._extract_videos(files, page) + return files + + def _extract_images(self, files, page): for url in text.extract_iter(page, 'data-lazy-src="', '"'): url = url.replace("://post", "://blog", 1).partition("?")[0] if "\ufffd" in text.unquote(url): url = text.unquote(url, encoding="EUC-KR") - results.append((url, None)) - return results + files.append((url, None)) + + def _extract_videos(self, files, page): + for module in text.extract_iter(page, " data-module='", "'>", "<"), + "tags" : text.split_html(text.extr( + page, '