From 4a18b5837c1dd82f5964afcfc3fecc53cd97e79c Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sun, 27 Apr 2025 20:34:08 -0400 Subject: New upstream version 1.29.5. --- CHANGELOG.md | 61 ++++++---- PKG-INFO | 6 +- README.rst | 4 +- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 47 +++++++- docs/gallery-dl.conf | 8 ++ gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl.egg-info/SOURCES.txt | 1 + gallery_dl/extractor/__init__.py | 14 ++- gallery_dl/extractor/architizer.py | 2 +- gallery_dl/extractor/bluesky.py | 121 +++++++++++++------ gallery_dl/extractor/common.py | 7 +- gallery_dl/extractor/deviantart.py | 54 ++++++--- gallery_dl/extractor/everia.py | 2 +- gallery_dl/extractor/fanbox.py | 22 ++-- gallery_dl/extractor/fapello.py | 3 + gallery_dl/extractor/gelbooru.py | 14 +-- gallery_dl/extractor/instagram.py | 44 +++++-- gallery_dl/extractor/itaku.py | 24 ++++ gallery_dl/extractor/kemonoparty.py | 22 ++-- gallery_dl/extractor/moebooru.py | 1 + gallery_dl/extractor/naver.py | 61 +++++++++- gallery_dl/extractor/patreon.py | 6 +- gallery_dl/extractor/pictoa.py | 78 ++++++++++++ gallery_dl/extractor/pinterest.py | 9 +- gallery_dl/extractor/pixiv.py | 38 +++--- gallery_dl/extractor/postmill.py | 12 +- gallery_dl/extractor/reddit.py | 5 +- gallery_dl/extractor/scrolller.py | 218 +++++++++++++++++++++++++--------- gallery_dl/extractor/seiga.py | 4 +- gallery_dl/extractor/subscribestar.py | 67 ++++++++--- gallery_dl/extractor/tiktok.py | 5 +- gallery_dl/extractor/twitter.py | 176 ++++++++++++++++++--------- gallery_dl/extractor/urlshortener.py | 20 +--- gallery_dl/extractor/weasyl.py | 3 +- gallery_dl/extractor/wikifeet.py | 15 +-- gallery_dl/postprocessor/ugoira.py | 25 ++-- gallery_dl/util.py | 20 +++- gallery_dl/version.py | 2 +- 39 files changed, 867 insertions(+), 362 deletions(-) create mode 100644 gallery_dl/extractor/pictoa.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d779ffa..182d685 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,32 +1,41 @@ -## 1.29.4 - 2025-04-13 +## 1.29.5 - 2025-04-26 ### Extractors #### Additions -- [chevereto] support `imagepond.net` ([#7278](https://github.com/mikf/gallery-dl/issues/7278)) -- [webtoons] add `artist` extractor ([#7274](https://github.com/mikf/gallery-dl/issues/7274)) +- [bluesky] add `video` extractor ([#4438](https://github.com/mikf/gallery-dl/issues/4438)) +- [instagram] add `followers` extractor ([#7374](https://github.com/mikf/gallery-dl/issues/7374)) +- [itaku] add `stars` extractor ([#7411](https://github.com/mikf/gallery-dl/issues/7411)) +- [pictoa] add support ([#6683](https://github.com/mikf/gallery-dl/issues/6683) [#7409](https://github.com/mikf/gallery-dl/issues/7409)) +- [twitter] add `followers` extractor ([#6331](https://github.com/mikf/gallery-dl/issues/6331)) #### Fixes -- [deviantart] fix `KeyError: 'has_subfolders'` ([#7272](https://github.com/mikf/gallery-dl/issues/7272) [#7337](https://github.com/mikf/gallery-dl/issues/7337)) -- [discord] fix `parent` keyword inconsistency ([#7341](https://github.com/mikf/gallery-dl/issues/7341) [#7353](https://github.com/mikf/gallery-dl/issues/7353)) -- [E621:pool] fix `AttributeError` ([#7265](https://github.com/mikf/gallery-dl/issues/7265) [#7344](https://github.com/mikf/gallery-dl/issues/7344)) -- [everia] fix/improve image extraction ([#7270](https://github.com/mikf/gallery-dl/issues/7270)) -- [gelbooru] fix video URLs ([#7345](https://github.com/mikf/gallery-dl/issues/7345)) -- [hentai2read] fix `AttributeError` exception for chapters without artist ([#7355](https://github.com/mikf/gallery-dl/issues/7355)) -- [issuu] fix extractors ([#7317](https://github.com/mikf/gallery-dl/issues/7317)) -- [kemonoparty] fix file paths with backslashes ([#7321](https://github.com/mikf/gallery-dl/issues/7321)) -- [readcomiconline] fix `issue` extractor ([#7269](https://github.com/mikf/gallery-dl/issues/7269) [#7330](https://github.com/mikf/gallery-dl/issues/7330)) -- [rule34xyz] update to API v2 ([#7289](https://github.com/mikf/gallery-dl/issues/7289)) -- [zerochan] fix `KeyError: 'author'` ([#7282](https://github.com/mikf/gallery-dl/issues/7282)) +- [architizer] fix `project` extractor ([#7421](https://github.com/mikf/gallery-dl/issues/7421)) +- [bluesky:likes] fix infinite loop ([#7194](https://github.com/mikf/gallery-dl/issues/7194) [#7287](https://github.com/mikf/gallery-dl/issues/7287)) +- [deviantart] fix `401 Unauthorized` errors for for multi-image posts ([#6653](https://github.com/mikf/gallery-dl/issues/6653)) +- [everia] fix `title` extraction ([#7379](https://github.com/mikf/gallery-dl/issues/7379)) +- [fanbox] fix `comments` extraction +- [fapello] stop pagination on empty results ([#7385](https://github.com/mikf/gallery-dl/issues/7385)) +- [kemonoparty] fix `archives` option ([#7416](https://github.com/mikf/gallery-dl/issues/7416) [#7419](https://github.com/mikf/gallery-dl/issues/7419)) +- [pixiv] fix `user_details` requests not being cached ([#7414](https://github.com/mikf/gallery-dl/issues/7414)) +- [pixiv:novel] handle exceptions during `embeds` extraction ([#7422](https://github.com/mikf/gallery-dl/issues/7422)) +- [subscribestar] fix username & password login +- [wikifeet] support site redesign ([#7286](https://github.com/mikf/gallery-dl/issues/7286) [#7396](https://github.com/mikf/gallery-dl/issues/7396)) #### Improvements -- [instagram] use Chrome `User-Agent` by default ([#6379](https://github.com/mikf/gallery-dl/issues/6379)) -- [pixiv] support `phixiv.net` URLs ([#7352](https://github.com/mikf/gallery-dl/issues/7352)) -- [tumblr] support URLs without subdomain ([#7358](https://github.com/mikf/gallery-dl/issues/7358)) -- [webtoons] download JPEG files in higher quality -- [webtoons] use a default 0.5-1.5s delay between requests ([#7329](https://github.com/mikf/gallery-dl/issues/7329)) -- [zzup] support `w.zzup.com` URLs ([#7327](https://github.com/mikf/gallery-dl/issues/7327)) -### Downloaders -- [ytdl] fix `KeyError: 'extractor'` exception when `ytdl` reports an error ([#7301](https://github.com/mikf/gallery-dl/issues/7301)) +- [bluesky:likes] use `repo.listRecords` endpoint ([#7194](https://github.com/mikf/gallery-dl/issues/7194) [#7287](https://github.com/mikf/gallery-dl/issues/7287)) +- [gelbooru] don't hardcode image server domains ([#7392](https://github.com/mikf/gallery-dl/issues/7392)) +- [instagram] support `/share/` URLs ([#7241](https://github.com/mikf/gallery-dl/issues/7241)) +- [kemonoparty] use `/posts-legacy` endpoint ([#6780](https://github.com/mikf/gallery-dl/issues/6780) [#6931](https://github.com/mikf/gallery-dl/issues/6931) [#7404](https://github.com/mikf/gallery-dl/issues/7404)) +- [naver] support videos ([#4682](https://github.com/mikf/gallery-dl/issues/4682) [#7395](https://github.com/mikf/gallery-dl/issues/7395)) +- [scrolller] support album posts ([#7339](https://github.com/mikf/gallery-dl/issues/7339)) +- [subscribestar] add warning for missing login cookie +- [twitter] update API endpoint query hashes ([#7382](https://github.com/mikf/gallery-dl/issues/7382) [#7386](https://github.com/mikf/gallery-dl/issues/7386)) +- [weasyl] use `gallery-dl` User-Agent header ([#7412](https://github.com/mikf/gallery-dl/issues/7412)) +#### Metadata +- [deviantart:stash] extract more metadata ([#7397](https://github.com/mikf/gallery-dl/issues/7397)) +- [moebooru:pool] replace underscores in pool names ([#4646](https://github.com/mikf/gallery-dl/issues/4646)) +- [naver] fix recent `date` bug ([#4682](https://github.com/mikf/gallery-dl/issues/4682)) ### Post Processors -- [metadata] add `metadata-path` option ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) -- [metadata] fix handling of empty directory paths ([#7296](https://github.com/mikf/gallery-dl/issues/7296)) -- [ugoira] preserve `extension` when using `"mode": "archive"` ([#7304](https://github.com/mikf/gallery-dl/issues/7304)) +- [ugoira] restore `keep-files` functionality ([#7304](https://github.com/mikf/gallery-dl/issues/7304)) +- [ugoira] support `"keep-files": true` + custom extension ([#7304](https://github.com/mikf/gallery-dl/issues/7304)) +- [ugoira] use `_ugoira_frame_index` to detect `.zip` files ### Miscellaneous -- [formatter] add `i` and `f` conversions ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) +- [util] auto-update Chrome version +- use internal version of `re.compile()` for extractor patterns diff --git a/PKG-INFO b/PKG-INFO index 3d113ec..7d3e9ca 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.29.4 +Version: 1.29.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -133,9 +133,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/README.rst b/README.rst index 1d8a195..f99c289 100644 --- a/README.rst +++ b/README.rst @@ -77,9 +77,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 7eb34af..7a6c97d 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2025-04-13" "1.29.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2025-04-26" "1.29.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index dc11605..d329d9c 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2025-04-13" "1.29.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2025-04-26" "1.29.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -810,13 +810,13 @@ or a \f[I]list\f[] with IP and explicit port number as elements. .IP "Default:" 9 .br -* \f[I]"gallery-dl/VERSION"\f[]: \f[I][Danbooru]\f[], \f[I]mangadex\f[] +* \f[I]"gallery-dl/VERSION"\f[]: \f[I][Danbooru]\f[], \f[I]mangadex\f[], \f[I]weasyl\f[] .br * \f[I]"gallery-dl/VERSION (by mikf)"\f[]: \f[I][E621]\f[] .br * \f[I]"Patreon/72.2.28 (Android; Android 14; Scale/2.10)"\f[]: \f[I]patreon\f[] .br -* \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"\f[]: \f[I]instagram\f[] +* \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/LATEST.0.0.0 Safari/537.36"\f[]: \f[I]instagram\f[] .br * \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:LATEST) Gecko/20100101 Firefox/LATEST"\f[]: otherwise @@ -1857,11 +1857,39 @@ Possible values are \f[I]"posts"\f[], \f[I]"replies"\f[], \f[I]"media"\f[], +\f[I]"video"\f[], \f[I]"likes"\f[], It is possible to use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.bluesky.likes.endpoint +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"listRecords"\f[] + +.IP "Description:" 4 +API endpoint to use for retrieving liked posts. + +\f[I]"listRecords"\f[] +Use the results from +.br +\f[I]com.atproto.repo.listRecords\f[] +Requires no login and alows accessing likes of all users, +.br +but uses one request to +\f[I]getPostThread\f[] +per post, +\f[I]"getActorLikes"\f[] +Use the results from +.br +\f[I]app.bsky.feed.getActorLikes\f[] +Requires login and only allows accessing your own likes. +.br + + .SS extractor.bluesky.metadata .IP "Type:" 6 .br @@ -1890,7 +1918,7 @@ Extract additional metadata. (See \f[I]app.bsky.actor.getProfile\f[]). -.SS extractor.bluesky.post.depth +.SS extractor.bluesky.likes.depth .IP "Type:" 6 \f[I]integer\f[] @@ -4007,6 +4035,17 @@ Extract extended \f[I]pool\f[] metadata. Note: Not supported by all \f[I]moebooru\f[] instances. +.SS extractor.naver.videos +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download videos. + + .SS extractor.newgrounds.flash .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index b85a3e7..b8b46d6 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -153,6 +153,10 @@ "reposts" : false, "videos" : true, + "likes": { + "depth" : 0, + "endpoint": "listRecords" + }, "post": { "depth": 0 } @@ -416,6 +420,10 @@ "username": "", "password": "" }, + "naver": + { + "videos": true + }, "newgrounds": { "username": "", diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 3d113ec..7d3e9ca 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.29.4 +Version: 1.29.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -133,9 +133,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 2f4a87c..a6afdd7 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -177,6 +177,7 @@ gallery_dl/extractor/pexels.py gallery_dl/extractor/philomena.py gallery_dl/extractor/photovogue.py gallery_dl/extractor/picarto.py +gallery_dl/extractor/pictoa.py gallery_dl/extractor/piczel.py gallery_dl/extractor/pillowfort.py gallery_dl/extractor/pinterest.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 87c3798..9a7ca53 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -7,7 +7,7 @@ # published by the Free Software Foundation. import sys -import re +from ..util import re_compile modules = [ "2ch", @@ -130,6 +130,7 @@ modules = [ "philomena", "photovogue", "picarto", + "pictoa", "piczel", "pillowfort", "pinterest", @@ -234,7 +235,8 @@ def find(url): def add(cls): """Add 'cls' to the list of available extractors""" - cls.pattern = re.compile(cls.pattern) + if isinstance(cls.pattern, str): + cls.pattern = re_compile(cls.pattern) _cache.append(cls) return cls @@ -242,9 +244,11 @@ def add(cls): def add_module(module): """Add all extractors in 'module' to the list of available extractors""" classes = _get_classes(module) - for cls in classes: - cls.pattern = re.compile(cls.pattern) - _cache.extend(classes) + if classes: + if isinstance(classes[0].pattern, str): + for cls in classes: + cls.pattern = re_compile(cls.pattern) + _cache.extend(classes) return classes diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py index 0268224..911753b 100644 --- a/gallery_dl/extractor/architizer.py +++ b/gallery_dl/extractor/architizer.py @@ -54,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor): return [ (url, None) for url in text.extract_iter( - page, "property='og:image:secure_url' content='", "?") + page, 'property="og:image:secure_url" content="', "?") ] diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index f8fef93..ec274b8 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -25,10 +25,6 @@ class BlueskyExtractor(Extractor): archive_fmt = "{filename}" root = "https://bsky.app" - def __init__(self, match): - Extractor.__init__(self, match) - self.user = match.group(1) - def _init(self): meta = self.config("metadata") or () if meta: @@ -87,6 +83,22 @@ class BlueskyExtractor(Extractor): def posts(self): return () + def _posts_records(self, actor, collection): + depth = self.config("depth", "0") + + for record in self.api.list_records(actor, collection): + uri = None + try: + uri = record["value"]["subject"]["uri"] + if "/app.bsky.feed.post/" in uri: + yield from self.api.get_post_thread_uri(uri, depth) + except exception.StopExtraction: + pass # deleted post + except Exception as exc: + self.log.debug(record, exc_info=exc) + self.log.warning("Failed to extract %s (%s: %s)", + uri or "record", exc.__class__.__name__, exc) + def _pid(self, post): return post["uri"].rpartition("/")[2] @@ -203,7 +215,7 @@ class BlueskyUserExtractor(BlueskyExtractor): pass def items(self): - base = "{}/profile/{}/".format(self.root, self.user) + base = "{}/profile/{}/".format(self.root, self.groups[0]) default = ("posts" if self.config("quoted", False) or self.config("reposts", False) else "media") return self._dispatch_extractors(( @@ -213,6 +225,7 @@ class BlueskyUserExtractor(BlueskyExtractor): (BlueskyPostsExtractor , base + "posts"), (BlueskyRepliesExtractor , base + "replies"), (BlueskyMediaExtractor , base + "media"), + (BlueskyVideoExtractor , base + "video"), (BlueskyLikesExtractor , base + "likes"), ), (default,)) @@ -223,7 +236,8 @@ class BlueskyPostsExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/posts" def posts(self): - return self.api.get_author_feed(self.user, "posts_and_author_threads") + return self.api.get_author_feed( + self.groups[0], "posts_and_author_threads") class BlueskyRepliesExtractor(BlueskyExtractor): @@ -232,7 +246,8 @@ class BlueskyRepliesExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/replies" def posts(self): - return self.api.get_author_feed(self.user, "posts_with_replies") + return self.api.get_author_feed( + self.groups[0], "posts_with_replies") class BlueskyMediaExtractor(BlueskyExtractor): @@ -241,7 +256,18 @@ class BlueskyMediaExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/media" def posts(self): - return self.api.get_author_feed(self.user, "posts_with_media") + return self.api.get_author_feed( + self.groups[0], "posts_with_media") + + +class BlueskyVideoExtractor(BlueskyExtractor): + subcategory = "video" + pattern = USER_PATTERN + r"/video" + example = "https://bsky.app/profile/HANDLE/video" + + def posts(self): + return self.api.get_author_feed( + self.groups[0], "posts_with_video") class BlueskyLikesExtractor(BlueskyExtractor): @@ -250,7 +276,9 @@ class BlueskyLikesExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/likes" def posts(self): - return self.api.get_actor_likes(self.user) + if self.config("endpoint") == "getActorLikes": + return self.api.get_actor_likes(self.groups[0]) + return self._posts_records(self.groups[0], "app.bsky.feed.like") class BlueskyFeedExtractor(BlueskyExtractor): @@ -258,12 +286,9 @@ class BlueskyFeedExtractor(BlueskyExtractor): pattern = USER_PATTERN + r"/feed/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/feed/NAME" - def __init__(self, match): - BlueskyExtractor.__init__(self, match) - self.feed = match.group(2) - def posts(self): - return self.api.get_feed(self.user, self.feed) + actor, feed = self.groups + return self.api.get_feed(actor, feed) class BlueskyListExtractor(BlueskyExtractor): @@ -271,12 +296,9 @@ class BlueskyListExtractor(BlueskyExtractor): pattern = USER_PATTERN + r"/lists/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/lists/ID" - def __init__(self, match): - BlueskyExtractor.__init__(self, match) - self.list = match.group(2) - def posts(self): - return self.api.get_list_feed(self.user, self.list) + actor, list_id = self.groups + return self.api.get_list_feed(actor, list_id) class BlueskyFollowingExtractor(BlueskyExtractor): @@ -285,7 +307,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/follows" def items(self): - for user in self.api.get_follows(self.user): + for user in self.api.get_follows(self.groups[0]): url = "https://bsky.app/profile/" + user["did"] user["_extractor"] = BlueskyUserExtractor yield Message.Queue, url, user @@ -296,12 +318,9 @@ class BlueskyPostExtractor(BlueskyExtractor): pattern = USER_PATTERN + r"/post/([^/?#]+)" example = "https://bsky.app/profile/HANDLE/post/ID" - def __init__(self, match): - BlueskyExtractor.__init__(self, match) - self.post_id = match.group(2) - def posts(self): - return self.api.get_post_thread(self.user, self.post_id) + actor, post_id = self.groups + return self.api.get_post_thread(actor, post_id) class BlueskyInfoExtractor(BlueskyExtractor): @@ -311,7 +330,7 @@ class BlueskyInfoExtractor(BlueskyExtractor): def items(self): self._metadata_user = True - self.api._did_from_actor(self.user) + self.api._did_from_actor(self.groups[0]) return iter(((Message.Directory, self._user),)) @@ -322,7 +341,7 @@ class BlueskyAvatarExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/avatar" def posts(self): - return self._make_post(self.user, "avatar") + return self._make_post(self.groups[0], "avatar") class BlueskyBackgroundExtractor(BlueskyExtractor): @@ -332,7 +351,7 @@ class BlueskyBackgroundExtractor(BlueskyExtractor): example = "https://bsky.app/profile/HANDLE/banner" def posts(self): - return self._make_post(self.user, "banner") + return self._make_post(self.groups[0], "banner") class BlueskySearchExtractor(BlueskyExtractor): @@ -341,7 +360,7 @@ class BlueskySearchExtractor(BlueskyExtractor): example = "https://bsky.app/search?q=QUERY" def posts(self): - query = text.unquote(self.user.replace("+", " ")) + query = text.unquote(self.groups[0].replace("+", " ")) return self.api.search_posts(query) @@ -351,13 +370,14 @@ class BlueskyHashtagExtractor(BlueskyExtractor): example = "https://bsky.app/hashtag/NAME" def posts(self): - return self.api.search_posts("#"+self.user, self.groups[1]) + hashtag, order = self.groups + return self.api.search_posts("#"+hashtag, order) class BlueskyAPI(): """Interface for the Bluesky API - https://www.docs.bsky.app/docs/category/http-reference + https://docs.bsky.app/docs/category/http-reference """ def __init__(self, extractor): @@ -378,7 +398,7 @@ class BlueskyAPI(): "actor": self._did_from_actor(actor), "limit": "100", } - return self._pagination(endpoint, params) + return self._pagination(endpoint, params, check_empty=True) def get_author_feed(self, actor, filter="posts_and_author_threads"): endpoint = "app.bsky.feed.getAuthorFeed" @@ -416,11 +436,16 @@ class BlueskyAPI(): return self._pagination(endpoint, params) def get_post_thread(self, actor, post_id): + uri = "at://{}/app.bsky.feed.post/{}".format( + self._did_from_actor(actor), post_id) + depth = self.extractor.config("depth", "0") + return self.get_post_thread_uri(uri, depth) + + def get_post_thread_uri(self, uri, depth="0"): endpoint = "app.bsky.feed.getPostThread" params = { - "uri": "at://{}/app.bsky.feed.post/{}".format( - self._did_from_actor(actor), post_id), - "depth" : self.extractor.config("depth", "0"), + "uri" : uri, + "depth" : depth, "parentHeight": "0", } @@ -443,6 +468,18 @@ class BlueskyAPI(): params = {"actor": did} return self._call(endpoint, params) + def list_records(self, actor, collection): + endpoint = "com.atproto.repo.listRecords" + actor_did = self._did_from_actor(actor) + params = { + "repo" : actor_did, + "collection": collection, + "limit" : "100", + # "reverse" : "false", + } + return self._pagination(endpoint, params, "records", + self.service_endpoint(actor_did)) + @memcache(keyarg=1) def resolve_handle(self, handle): endpoint = "com.atproto.identity.resolveHandle" @@ -523,8 +560,10 @@ class BlueskyAPI(): _refresh_token_cache.update(self.username, data["refreshJwt"]) return "Bearer " + data["accessJwt"] - def _call(self, endpoint, params): - url = "{}/xrpc/{}".format(self.root, endpoint) + def _call(self, endpoint, params, root=None): + if root is None: + root = self.root + url = "{}/xrpc/{}".format(root, endpoint) while True: self.authenticate() @@ -549,9 +588,13 @@ class BlueskyAPI(): self.extractor.log.debug("Server response: %s", response.text) raise exception.StopExtraction(msg) - def _pagination(self, endpoint, params, key="feed"): + def _pagination(self, endpoint, params, + key="feed", root=None, check_empty=False): while True: - data = self._call(endpoint, params) + data = self._call(endpoint, params, root) + + if check_empty and not data[key]: + return yield from data[key] cursor = data.get("cursor") diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 995505f..c430ec1 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -59,7 +59,7 @@ class Extractor(): @classmethod def from_url(cls, url): if isinstance(cls.pattern, str): - cls.pattern = re.compile(cls.pattern) + cls.pattern = util.re_compile(cls.pattern) match = cls.pattern.match(url) return cls(match) if match else None @@ -240,6 +240,11 @@ class Extractor(): raise exception.HttpError(msg, response) + def request_location(self, url, **kwargs): + kwargs.setdefault("method", "HEAD") + kwargs.setdefault("allow_redirects", False) + return self.request(url, **kwargs).headers.get("location", "") + _handle_429 = util.false def wait(self, seconds=None, until=None, adjust=1.0, diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 378c7ec..ae475e2 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -867,6 +867,9 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ )["deviation"]["extended"]["deviationUuid"] yield self.api.deviation(deviation_uuid) + def _unescape_json(self, json): + return json.replace('\\"', '"').replace("\\\\", "\\") + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -1046,7 +1049,7 @@ class DeviantartStashExtractor(DeviantartExtractor): DeviantartExtractor.__init__(self, match) self.user = None - def deviations(self, stash_id=None): + def deviations(self, stash_id=None, stash_data=None): if stash_id is None: legacy_url, stash_id = self.groups else: @@ -1068,14 +1071,33 @@ class DeviantartStashExtractor(DeviantartExtractor): deviation["_page"] = page deviation["index"] = text.parse_int(text.extr( page, '\\"deviationId\\":', ',')) + + deviation["stash_id"] = stash_id + if stash_data: + folder = stash_data["folder"] + deviation["stash_name"] = folder["name"] + deviation["stash_folder"] = folder["folderId"] + deviation["stash_parent"] = folder["parentId"] or 0 + deviation["stash_description"] = \ + folder["richDescription"]["excerpt"] + else: + deviation["stash_name"] = "" + deviation["stash_description"] = "" + deviation["stash_folder"] = 0 + deviation["stash_parent"] = 0 + yield deviation return + stash_data = text.extr(page, ',\\"stash\\":', ',\\"@@') + if stash_data: + stash_data = util.json_loads(self._unescape_json(stash_data)) + for sid in text.extract_iter( page, 'href="https://www.deviantart.com/stash/', '"'): if sid == stash_id or sid.endswith("#comments"): continue - yield from self.deviations(sid) + yield from self.deviations(sid, stash_data) class DeviantartFavoriteExtractor(DeviantartExtractor): @@ -1276,28 +1298,26 @@ class DeviantartDeviationExtractor(DeviantartExtractor): deviation = self.api.deviation(uuid) deviation["_page"] = page + deviation["index_file"] = 0 + deviation["num"] = deviation["count"] = 1 - _dev_info = text.extr( - page, '\\"deviationExtended\\":', ',\\"deviation\\":', None) - # Clean up escaped quotes - _json_str = re.sub( - r'(?02}.{extension}") - self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}." - "{extension}") + self.filename_fmt = ("{category}_{index}_{index_file}_{title}_" + "{num:>02}.{extension}") + self.archive_fmt = ("g_{_username}_{index}{index_file:?_//}." + "{extension}") - deviation["index_file"] = 0 + additional_media = util.json_loads(self._unescape_json( + additional_media) + "}]") deviation["count"] = 1 + len(additional_media) - deviation["num"] = 1 yield deviation for index, post in enumerate(additional_media): - uri = post["media"]["baseUri"].encode().decode("unicode-escape") + uri = self._eclipse_media(post["media"], "fullview")[0] deviation["content"]["src"] = uri deviation["num"] += 1 deviation["index_file"] = post["fileId"] diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py index e41f6f6..3bf0a74 100644 --- a/gallery_dl/extractor/everia.py +++ b/gallery_dl/extractor/everia.py @@ -57,7 +57,7 @@ class EveriaPostExtractor(EveriaExtractor): data = { "title": text.unescape( - text.extr(page, 'itemprop="headline">', "")), + text.extr(page, 'itemprop="headline">', "', "")), "post_url": url, "post_category": text.extr( diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 9bbfb43..3b43134 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -173,15 +173,16 @@ class FanboxExtractor(Extractor): return plans def _get_comment_data(self, post_id): - url = ("https://api.fanbox.cc/post.listComments" + url = ("https://api.fanbox.cc/post.getComments" "?limit=10&postId=" + post_id) comments = [] while url: url = text.ensure_http_scheme(url) body = self.request(url, headers=self.headers).json()["body"] - comments.extend(body["items"]) - url = body["nextUrl"] + data = body["commentList"] + comments.extend(data["items"]) + url = data["nextUrl"] return comments def _get_urls_from_post(self, content_body, post): @@ -296,8 +297,7 @@ class FanboxExtractor(Extractor): url = "https://www.pixiv.net/fanbox/"+content_id # resolve redirect try: - url = self.request(url, method="HEAD", - allow_redirects=False).headers["location"] + url = self.request_location(url) except Exception as exc: url = None self.log.warning("Unable to extract fanbox embed %s (%s: %s)", @@ -392,13 +392,7 @@ class FanboxRedirectExtractor(Extractor): pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)" example = "https://www.pixiv.net/fanbox/creator/12345" - def __init__(self, match): - Extractor.__init__(self, match) - self.user_id = match.group(1) - def items(self): - url = "https://www.pixiv.net/fanbox/creator/" + self.user_id - data = {"_extractor": FanboxCreatorExtractor} - response = self.request( - url, method="HEAD", allow_redirects=False, notfound="user") - yield Message.Queue, response.headers["Location"], data + url = "https://www.pixiv.net/fanbox/creator/" + self.groups[0] + location = self.request_location(url, notfound="user") + yield Message.Queue, location, {"_extractor": FanboxCreatorExtractor} diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py index 838ae7b..cf18edc 100644 --- a/gallery_dl/extractor/fapello.py +++ b/gallery_dl/extractor/fapello.py @@ -72,10 +72,13 @@ class FapelloModelExtractor(Extractor): if not page: return + url = None for url in text.extract_iter(page, '') if not notes_data: diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 432a7ad..0f88cac 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -606,6 +606,20 @@ class InstagramHighlightsExtractor(InstagramExtractor): return self.api.highlights_media(uid) +class InstagramFollowersExtractor(InstagramExtractor): + """Extractor for an Instagram user's followers""" + subcategory = "followers" + pattern = USER_PATTERN + r"/followers" + example = "https://www.instagram.com/USER/followers/" + + def items(self): + uid = self.api.user_id(self.item) + for user in self.api.user_followers(uid): + user["_extractor"] = InstagramUserExtractor + url = "{}/{}".format(self.root, user["username"]) + yield Message.Queue, url, user + + class InstagramFollowingExtractor(InstagramExtractor): """Extractor for an Instagram user's followed users""" subcategory = "following" @@ -693,11 +707,21 @@ class InstagramPostExtractor(InstagramExtractor): """Extractor for an Instagram post""" subcategory = "post" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/(?:[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)") + r"/(?:share/()|[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)") example = "https://www.instagram.com/p/abcdefg/" def posts(self): - return self.api.media(self.item) + share, shortcode = self.groups + if share is not None: + url = text.ensure_http_scheme(self.url) + headers = { + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "same-origin", + } + location = self.request_location(url, headers=headers) + shortcode = location.split("/")[-2] + return self.api.media(shortcode) class InstagramRestAPI(): @@ -816,6 +840,11 @@ class InstagramRestAPI(): params = {"count": 30} return self._pagination(endpoint, params) + def user_followers(self, user_id): + endpoint = "/v1/friendships/{}/followers/".format(user_id) + params = {"count": 12} + return self._pagination_following(endpoint, params) + def user_following(self, user_id): endpoint = "/v1/friendships/{}/following/".format(user_id) params = {"count": 12} @@ -908,9 +937,10 @@ class InstagramRestAPI(): for item in data["items"]: yield from item["media_items"] - if "next_max_id" not in data: + next_max_id = data.get("next_max_id") + if not next_max_id: return extr._update_cursor(None) - params["max_id"] = extr._update_cursor(data["next_max_id"]) + params["max_id"] = extr._update_cursor(next_max_id) def _pagination_following(self, endpoint, params): extr = self.extractor @@ -921,10 +951,10 @@ class InstagramRestAPI(): yield from data["users"] - if len(data["users"]) < params["count"]: + next_max_id = data.get("next_max_id") + if not next_max_id: return extr._update_cursor(None) - params["max_id"] = extr._update_cursor( - params["max_id"] + params["count"]) + params["max_id"] = extr._update_cursor(next_max_id) class InstagramGraphqlAPI(): diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index 2974b59..e602665 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -65,6 +65,15 @@ class ItakuGalleryExtractor(ItakuExtractor): return self.api.galleries_images(*self.groups) +class ItakuStarsExtractor(ItakuExtractor): + subcategory = "stars" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/stars(?:/(\d+))?" + example = "https://itaku.ee/profile/USER/stars" + + def posts(self): + return self.api.galleries_images_starred(*self.groups) + + class ItakuImageExtractor(ItakuExtractor): subcategory = "image" pattern = BASE_PATTERN + r"/images/(\d+)" @@ -139,6 +148,21 @@ class ItakuAPI(): } return self._pagination(endpoint, params, self.image) + def galleries_images_starred(self, username, section=None): + endpoint = "/galleries/images/user_starred_imgs/" + params = { + "cursor" : None, + "stars_of" : self.user(username)["owner"], + "sections" : section, + "date_range": "", + "ordering" : "-date_added", + "maturity_rating": ("SFW", "Questionable", "NSFW"), + "page" : "1", + "page_size" : "30", + "visibility": ("PUBLIC", "PROFILE_ONLY"), + } + return self._pagination(endpoint, params, self.image) + def image(self, image_id): endpoint = "/galleries/images/{}/".format(image_id) return self._call(endpoint) diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index de7d040..79070ee 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -153,7 +153,7 @@ class KemonopartyExtractor(Extractor): file["type"] = "archive" if archives: try: - data = self.api.posts_archives(file["hash"]) + data = self.api.file(file["hash"]) data.update(file) post_archives.append(data) except Exception as exc: @@ -319,12 +319,9 @@ class KemonopartyUserExtractor(KemonopartyExtractor): def posts(self): _, _, service, creator_id, query = self.groups params = text.parse_query(query) - if params.get("tag"): - return self.api.creator_tagged_posts( - service, creator_id, params.get("tag"), params.get("o")) - else: - return self.api.creator_posts( - service, creator_id, params.get("o"), params.get("q")) + return self.api.creator_posts_legacy( + service, creator_id, + params.get("o"), params.get("q"), params.get("tag")) class KemonopartyPostsExtractor(KemonopartyExtractor): @@ -524,18 +521,19 @@ class KemonoAPI(): params = {"q": query, "o": offset, "tag": tags} return self._pagination(endpoint, params, 50, "posts") - def posts_archives(self, file_hash): - endpoint = "/posts/archives/" + file_hash - return self._call(endpoint)["archive"] + def file(self, file_hash): + endpoint = "/file/" + file_hash + return self._call(endpoint) def creator_posts(self, service, creator_id, offset=0, query=None): endpoint = "/{}/user/{}".format(service, creator_id) params = {"q": query, "o": offset} return self._pagination(endpoint, params, 50) - def creator_tagged_posts(self, service, creator_id, tags, offset=0): + def creator_posts_legacy(self, service, creator_id, + offset=0, query=None, tags=None): endpoint = "/{}/user/{}/posts-legacy".format(service, creator_id) - params = {"o": offset, "tag": tags} + params = {"o": offset, "tag": tags, "q": query} return self._pagination(endpoint, params, 50, "results") def creator_announcements(self, service, creator_id): diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py index e97d273..9fd66e2 100644 --- a/gallery_dl/extractor/moebooru.py +++ b/gallery_dl/extractor/moebooru.py @@ -127,6 +127,7 @@ class MoebooruPoolExtractor(MoebooruExtractor): if self.config("metadata"): url = "{}/pool/show/{}.json".format(self.root, self.pool_id) pool = self.request(url).json() + pool["name"] = pool["name"].replace("_", " ") pool.pop("posts", None) return {"pool": pool} return {"pool": text.parse_int(self.pool_id)} diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index d3150e6..2287325 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -9,7 +9,9 @@ """Extractors for https://blog.naver.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text +from .. import text, util +import datetime +import time class NaverBase(): @@ -59,19 +61,66 @@ class NaverPostExtractor(NaverBase, GalleryExtractor): "user" : extr("var nickName = '", "'"), }, } - data["post"]["date"] = text.parse_datetime( + + data["post"]["date"] = self._parse_datetime( extr('se_publishDate pcol2">', '<') or - extr('_postAddDate">', '<'), "%Y. %m. %d. %H:%M") + extr('_postAddDate">', '<')) + return data + def _parse_datetime(self, date_string): + if "전" in date_string: + ts = time.gmtime() + return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday) + return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M") + def images(self, page): - results = [] + files = [] + self._extract_images(files, page) + if self.config("videos", True): + self._extract_videos(files, page) + return files + + def _extract_images(self, files, page): for url in text.extract_iter(page, 'data-lazy-src="', '"'): url = url.replace("://post", "://blog", 1).partition("?")[0] if "\ufffd" in text.unquote(url): url = text.unquote(url, encoding="EUC-KR") - results.append((url, None)) - return results + files.append((url, None)) + + def _extract_videos(self, files, page): + for module in text.extract_iter(page, " data-module='", "'>", "<"), + "tags" : text.split_html(text.extr( + page, '