From 9fb906aeb3816abb42f459d1b67e35024e6f2348 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 19 Jun 2023 01:14:28 -0400 Subject: New upstream version 1.25.6. --- CHANGELOG.md | 33 +++++++++ PKG-INFO | 6 +- README.rst | 4 +- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 34 ++++++++- gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl.egg-info/SOURCES.txt | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/blogger.py | 1 + gallery_dl/extractor/bunkr.py | 13 ++-- gallery_dl/extractor/fanbox.py | 12 ++- gallery_dl/extractor/fantia.py | 142 ++++++++++++++++++++++-------------- gallery_dl/extractor/furaffinity.py | 15 +++- gallery_dl/extractor/imagehosts.py | 31 ++++++-- gallery_dl/extractor/instagram.py | 2 +- gallery_dl/extractor/jpgfish.py | 23 +++--- gallery_dl/extractor/jschan.py | 94 ++++++++++++++++++++++++ gallery_dl/extractor/kemonoparty.py | 46 ++++++++---- gallery_dl/extractor/pixiv.py | 99 +++++++++++++++++-------- gallery_dl/extractor/pornhub.py | 23 ++++-- gallery_dl/extractor/reddit.py | 5 +- gallery_dl/extractor/redgifs.py | 73 ++++++++++++++---- gallery_dl/extractor/senmanga.py | 96 +++++++++++++++--------- gallery_dl/extractor/twitter.py | 119 +++++++++++++++++++----------- gallery_dl/extractor/vipergirls.py | 94 ++++++++++++++---------- gallery_dl/extractor/wallhaven.py | 24 ++++-- gallery_dl/extractor/weibo.py | 4 +- gallery_dl/formatter.py | 1 + gallery_dl/version.py | 2 +- test/test_formatter.py | 9 ++- test/test_results.py | 3 +- 31 files changed, 733 insertions(+), 285 deletions(-) create mode 100644 gallery_dl/extractor/jschan.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 405c117..429c7ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +## 1.25.6 - 2023-06-17 +### Additions +- [blogger] download files from `lh*.googleusercontent.com` ([#4070](https://github.com/mikf/gallery-dl/issues/4070)) +- [fantia] extract `plan` metadata ([#2477](https://github.com/mikf/gallery-dl/issues/2477)) +- [fantia] emit warning for non-visible content sections ([#4128](https://github.com/mikf/gallery-dl/issues/4128)) +- [furaffinity] extract `favorite_id` metadata ([#4133](https://github.com/mikf/gallery-dl/issues/4133)) +- [jschan] add generic extractors for jschan image boards ([#3447](https://github.com/mikf/gallery-dl/issues/3447)) +- [kemonoparty] support `.su` TLDs ([#4139](https://github.com/mikf/gallery-dl/issues/4139)) +- [pixiv:novel] add `novel-bookmark` extractor ([#4111](https://github.com/mikf/gallery-dl/issues/4111)) +- [pixiv:novel] add `full-series` option ([#4111](https://github.com/mikf/gallery-dl/issues/4111)) +- [postimage] add gallery support, update image extractor ([#3115](https://github.com/mikf/gallery-dl/issues/3115), [#4134](https://github.com/mikf/gallery-dl/issues/4134)) +- [redgifs] support galleries ([#4021](https://github.com/mikf/gallery-dl/issues/4021)) +- [twitter] extract `conversation_id` metadata ([#3839](https://github.com/mikf/gallery-dl/issues/3839)) +- [vipergirls] add login support ([#4166](https://github.com/mikf/gallery-dl/issues/4166)) +- [vipergirls] use API endpoints ([#4166](https://github.com/mikf/gallery-dl/issues/4166)) +- [formatter] implement `H` conversion ([#4164](https://github.com/mikf/gallery-dl/issues/4164)) +### Fixes +- [acidimg] fix extraction ([#4136](https://github.com/mikf/gallery-dl/issues/4136)) +- [bunkr] update domain to bunkrr.su ([#4159](https://github.com/mikf/gallery-dl/issues/4159), [#4189](https://github.com/mikf/gallery-dl/issues/4189)) +- [bunkr] fix video downloads +- [fanbox] prevent exception due to missing embeds ([#4088](https://github.com/mikf/gallery-dl/issues/4088)) +- [instagram] fix retrieving `/tagged` posts ([#4122](https://github.com/mikf/gallery-dl/issues/4122)) +- [jpgfish] update domain to `jpg.pet` ([#4138](https://github.com/mikf/gallery-dl/issues/4138)) +- [pixiv:novel] fix error with embeds extraction ([#4175](https://github.com/mikf/gallery-dl/issues/4175)) +- [pornhub] improve redirect handling ([#4188](https://github.com/mikf/gallery-dl/issues/4188)) +- [reddit] fix crash due to empty `crosspost_parent_lists` ([#4120](https://github.com/mikf/gallery-dl/issues/4120), [#4172](https://github.com/mikf/gallery-dl/issues/4172)) +- [redgifs] update `search` URL pattern ([#4115](https://github.com/mikf/gallery-dl/issues/4115), [#4185](https://github.com/mikf/gallery-dl/issues/4185)) +- [senmanga] fix and update ([#4160](https://github.com/mikf/gallery-dl/issues/4160)) +- [twitter] use GraphQL API search endpoint ([#3942](https://github.com/mikf/gallery-dl/issues/3942)) +- [wallhaven] improve HTTP error handling ([#4192](https://github.com/mikf/gallery-dl/issues/4192)) +- [weibo] prevent fatal exception due to missing video data ([#4150](https://github.com/mikf/gallery-dl/issues/4150)) +- [weibo] fix `.json` extension for some videos + ## 1.25.5 - 2023-05-27 ### Additions - [8muses] add `parts` metadata field ([#3329](https://github.com/mikf/gallery-dl/issues/3329)) diff --git a/PKG-INFO b/PKG-INFO index cadb98c..68bf134 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.25.5 +Version: 1.25.6 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/README.rst b/README.rst index ba745a8..44cbfb3 100644 --- a/README.rst +++ b/README.rst @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 3d5e4e8..c86db6a 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-05-27" "1.25.5" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-06-17" "1.25.6" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index be234ce..e4df909 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-05-27" "1.25.5" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-06-17" "1.25.6" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -486,6 +486,8 @@ and optional for .br * \f[I]twitter\f[] .br +* \f[I]vipergirls\f[] +.br * \f[I]zerochan\f[] These values can also be specified via the @@ -2828,6 +2830,18 @@ by using a third-party tool like Download images embedded in novels. +.SS extractor.pixiv.novel.full-series +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +When downloading a novel being part of a series, +download all novels of that series. + + .SS extractor.pixiv.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -3631,6 +3645,24 @@ If this value is \f[I]"original"\f[], metadata for these files will be taken from the original Tweets, not the Retweets. +.SS extractor.twitter.search-endpoint +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Description:" 4 +Selects the API endpoint used to retrieve search results. + +.br +* \f[I]"rest"\f[]: Legacy REST endpoint - returns a \f[I]403 Forbidden\f[] error when not logged in +.br +* \f[I]"graphql"\f[]: New GraphQL endpoint +.br +* \f[I]"auto"\f[]: \f[I]"rest"\f[] when logged in, \f[I]"graphql"\f[] otherwise + + .SS extractor.twitter.timeline.strategy .IP "Type:" 6 \f[I]string\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index c069128..547f3be 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.25.5 +Version: 1.25.6 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index fde82b6..44fbd22 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -114,6 +114,7 @@ gallery_dl/extractor/issuu.py gallery_dl/extractor/itaku.py gallery_dl/extractor/itchio.py gallery_dl/extractor/jpgfish.py +gallery_dl/extractor/jschan.py gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py gallery_dl/extractor/kemonoparty.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3e47c3e..a344fe4 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -76,6 +76,7 @@ modules = [ "itaku", "itchio", "jpgfish", + "jschan", "kabeuchi", "keenspot", "kemonoparty", diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index eafc8af..3ceada8 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -44,6 +44,7 @@ class BloggerExtractor(Extractor): findall_image = re.compile( r'src="(https?://(?:' r'blogger\.googleusercontent\.com/img|' + r'lh\d+\.googleusercontent\.com/|' r'\d+\.bp\.blogspot\.com)/[^"]+)').findall findall_video = re.compile( r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 7c66fb0..5c8c530 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -6,19 +6,19 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://bunkr.la/""" +"""Extractors for https://bunkrr.su/""" from .lolisafe import LolisafeAlbumExtractor from .. import text class BunkrAlbumExtractor(LolisafeAlbumExtractor): - """Extractor for bunkr.la albums""" + """Extractor for bunkrr.su albums""" category = "bunkr" - root = "https://bunkr.la" - pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:la|[sr]u|is|to)/a/([^/?#]+)" + root = "https://bunkrr.su" + pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)" test = ( - ("https://bunkr.la/a/Lktg9Keq", { + ("https://bunkrr.su/a/Lktg9Keq", { "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png", "content": "0c8768055e4e20e7c7259608b67799171b691140", "keyword": { @@ -52,6 +52,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): "num": int, }, }), + ("https://bunkrr.su/a/Lktg9Keq"), ("https://bunkr.la/a/Lktg9Keq"), ("https://bunkr.su/a/Lktg9Keq"), ("https://bunkr.ru/a/Lktg9Keq"), @@ -70,7 +71,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): cdn = None files = [] append = files.append - headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"} + headers = {"Referer": self.root + "/"} pos = page.index('class="grid-images') for url in text.extract_iter(page, '', '<', pos) return url, text.unescape(filename) +class PostimgGalleryExtractor(ImagehostImageExtractor): + """Extractor for images galleries from postimages.org""" + category = "postimg" + subcategory = "gallery" + pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)" + r"/(?:gallery/)([^/?#]+)/?)") + test = ("https://postimg.cc/gallery/wxpDLgX", { + "pattern": PostimgImageExtractor.pattern, + "count": 22, + }) + + def items(self): + page = self.request(self.page_url).text + data = {"_extractor": PostimgImageExtractor} + for url in text.extract_iter(page, ' class="thumb">= 15" + }) + ) + + def __init__(self, match): + JschanExtractor.__init__(self, match) + index = match.lastindex + self.board = match.group(index-1) + self.thread = match.group(index) + + def items(self): + url = "{}/{}/thread/{}.json".format( + self.root, self.board, self.thread) + thread = self.request(url).json() + thread["threadId"] = thread["postId"] + posts = thread.pop("replies", ()) + + yield Message.Directory, thread + for post in itertools.chain((thread,), posts): + files = post.pop("files", ()) + if files: + thread.update(post) + thread["count"] = len(files) + for num, file in enumerate(files): + url = self.root + "/file/" + file["filename"] + file.update(thread) + file["num"] = num + file["siteFilename"] = file["filename"] + text.nameext_from_url(file["originalFilename"], file) + yield Message.Url, url, file + + +class JschanBoardExtractor(JschanExtractor): + """Extractor for jschan boards""" + subcategory = "board" + pattern = (BASE_PATTERN + r"/([^/?#]+)" + r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)") + test = ( + ("https://94chan.org/art/", { + "pattern": JschanThreadExtractor.pattern, + "count": ">= 30" + }), + ("https://94chan.org/art/2.html"), + ("https://94chan.org/art/catalog.html"), + ("https://94chan.org/art/index.html"), + ) + + def __init__(self, match): + JschanExtractor.__init__(self, match) + self.board = match.group(match.lastindex) + + def items(self): + url = "{}/{}/catalog.json".format(self.root, self.board) + for thread in self.request(url).json(): + url = "{}/{}/thread/{}.html".format( + self.root, self.board, thread["postId"]) + thread["_extractor"] = JschanThreadExtractor + yield Message.Queue, url, thread diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 915fbe6..5aeefeb 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -14,7 +14,7 @@ from ..cache import cache import itertools import re -BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.party" +BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)" USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)" HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})" @@ -29,10 +29,11 @@ class KemonopartyExtractor(Extractor): cookiedomain = ".kemono.party" def __init__(self, match): - if match.group(1) == "coomer": - self.category = "coomerparty" - self.cookiedomain = ".coomer.party" + domain = match.group(1) + tld = match.group(2) + self.category = domain + "party" self.root = text.root_from_url(match.group(0)) + self.cookiedomain = ".{}.{}".format(domain, tld) Extractor.__init__(self, match) self.session.headers["Referer"] = self.root + "/" @@ -40,7 +41,7 @@ class KemonopartyExtractor(Extractor): self._prepare_ddosguard_cookies() self._find_inline = re.compile( - r'src="(?:https?://(?:kemono|coomer)\.party)?(/inline/[^"]+' + r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+' r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall find_hash = re.compile(HASH_PATTERN).match generators = self._build_file_generators(self.config("files")) @@ -224,11 +225,12 @@ class KemonopartyUserExtractor(KemonopartyExtractor): "options": (("max-posts", 25),), "count": "< 100", }), + ("https://kemono.su/subscribestar/user/alcorart"), ("https://kemono.party/subscribestar/user/alcorart"), ) def __init__(self, match): - _, service, user_id, offset = match.groups() + _, _, service, user_id, offset = match.groups() self.subcategory = service KemonopartyExtractor.__init__(self, match) self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id) @@ -329,13 +331,14 @@ class KemonopartyPostExtractor(KemonopartyExtractor): r"f51c10adc9dabd86e92bd52339f298b9\.txt", "content": "da39a3ee5e6b4b0d3255bfef95601890afd80709", # empty }), + ("https://kemono.su/subscribestar/user/alcorart/post/184330"), ("https://kemono.party/subscribestar/user/alcorart/post/184330"), ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"), ("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"), ) def __init__(self, match): - _, service, user_id, post_id = match.groups() + _, _, service, user_id, post_id = match.groups() self.subcategory = service KemonopartyExtractor.__init__(self, match) self.api_url = "{}/api/{}/user/{}/post/{}".format( @@ -361,9 +364,9 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): "count": 4, "keyword": {"channel_name": "finish-work"}, }), - (("https://kemono.party/discord" + (("https://kemono.su/discord" "/server/256559665620451329/channel/462437519519383555#"), { - "pattern": r"https://kemono\.party/data/(" + "pattern": r"https://kemono\.su/data/(" r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|" r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)", "keyword": {"hash": "re:e377e3525164559484ace2e64425b0cec1db08" @@ -382,7 +385,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): def __init__(self, match): KemonopartyExtractor.__init__(self, match) - _, self.server, self.channel, self.channel_name = match.groups() + _, _, self.server, self.channel, self.channel_name = match.groups() def items(self): self._prepare_ddosguard_cookies() @@ -457,14 +460,20 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): class KemonopartyDiscordServerExtractor(KemonopartyExtractor): subcategory = "discord-server" pattern = BASE_PATTERN + r"/discord/server/(\d+)$" - test = ("https://kemono.party/discord/server/488668827274444803", { - "pattern": KemonopartyDiscordExtractor.pattern, - "count": 13, - }) + test = ( + ("https://kemono.party/discord/server/488668827274444803", { + "pattern": KemonopartyDiscordExtractor.pattern, + "count": 13, + }), + ("https://kemono.su/discord/server/488668827274444803", { + "pattern": KemonopartyDiscordExtractor.pattern, + "count": 13, + }), + ) def __init__(self, match): KemonopartyExtractor.__init__(self, match) - self.server = match.group(2) + self.server = match.group(3) def items(self): url = "{}/api/discord/channels/lookup?q={}".format( @@ -493,11 +502,16 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor): "url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f", "count": 3, }), + ("https://kemono.su/favorites?type=post", { + "pattern": KemonopartyPostExtractor.pattern, + "url": "4be8e84cb384a907a8e7997baaf6287b451783b5", + "count": 3, + }), ) def __init__(self, match): KemonopartyExtractor.__init__(self, match) - self.favorites = (text.parse_query(match.group(2)).get("type") or + self.favorites = (text.parse_query(match.group(3)).get("type") or self.config("favorites") or "artist") diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index cdaf595..861959e 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -15,6 +15,9 @@ from datetime import datetime, timedelta import itertools import hashlib +BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" +USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)" + class PixivExtractor(Extractor): """Base class for pixiv extractors""" @@ -150,7 +153,7 @@ class PixivExtractor(Extractor): class PixivUserExtractor(PixivExtractor): """Extractor for a pixiv user profile""" subcategory = "user" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id=" r")(\d+)(?:$|[?#])") test = ( @@ -168,18 +171,19 @@ class PixivUserExtractor(PixivExtractor): def items(self): base = "{}/users/{}/".format(self.root, self.user_id) return self._dispatch_extractors(( - (PixivAvatarExtractor , base + "avatar"), - (PixivBackgroundExtractor, base + "background"), - (PixivArtworksExtractor , base + "artworks"), - (PixivFavoriteExtractor , base + "bookmarks/artworks"), - (PixivNovelUserExtractor , base + "novels"), + (PixivAvatarExtractor , base + "avatar"), + (PixivBackgroundExtractor , base + "background"), + (PixivArtworksExtractor , base + "artworks"), + (PixivFavoriteExtractor , base + "bookmarks/artworks"), + (PixivNovelBookmarkExtractor, base + "bookmarks/novels"), + (PixivNovelUserExtractor , base + "novels"), ), ("artworks",)) class PixivArtworksExtractor(PixivExtractor): """Extractor for artworks of a pixiv user""" subcategory = "artworks" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" + pattern = (BASE_PATTERN + r"/(?:" r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)" r"(?:/([^/?#]+))?/?(?:$|[?#])" r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)") @@ -240,8 +244,7 @@ class PixivAvatarExtractor(PixivExtractor): subcategory = "avatar" filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "avatar_{user[id]}_{date}" - pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" - r"/(?:en/)?users/(\d+)/avatar") + pattern = USER_PATTERN + r"/avatar" test = ("https://www.pixiv.net/en/users/173530/avatar", { "content": "4e57544480cc2036ea9608103e8f024fa737fe66", }) @@ -261,8 +264,7 @@ class PixivBackgroundExtractor(PixivExtractor): subcategory = "background" filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "background_{user[id]}_{date}" - pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" - r"/(?:en/)?users/(\d+)/background") + pattern = USER_PATTERN + "/background" test = ("https://www.pixiv.net/en/users/194921/background", { "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02" r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg", @@ -376,12 +378,12 @@ class PixivWorkExtractor(PixivExtractor): class PixivFavoriteExtractor(PixivExtractor): - """Extractor for all favorites/bookmarks of a pixiv-user""" + """Extractor for all favorites/bookmarks of a pixiv user""" subcategory = "favorite" directory_fmt = ("{category}", "bookmarks", "{user_bookmark[id]} {user_bookmark[account]}") archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:(?:en/)?" + pattern = (BASE_PATTERN + r"/(?:(?:en/)?" r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?" r"|bookmark\.php)(?:\?([^#]*))?") test = ( @@ -484,8 +486,7 @@ class PixivRankingExtractor(PixivExtractor): archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}" directory_fmt = ("{category}", "rankings", "{ranking[mode]}", "{ranking[date]}") - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/ranking\.php(?:\?([^#]*))?") + pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?" test = ( ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"), ("https://www.pixiv.net/ranking.php"), @@ -550,8 +551,7 @@ class PixivSearchExtractor(PixivExtractor): subcategory = "search" archive_fmt = "s_{search[word]}_{id}{num}.{extension}" directory_fmt = ("{category}", "search", "{search[word]}") - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" + pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" r"|search\.php)(?:\?([^#]+))?") test = ( ("https://www.pixiv.net/en/tags/Original", { @@ -634,8 +634,7 @@ class PixivFollowExtractor(PixivExtractor): subcategory = "follow" archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}" directory_fmt = ("{category}", "following") - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/bookmark_new_illust\.php") + pattern = BASE_PATTERN + r"/bookmark_new_illust\.php" test = ( ("https://www.pixiv.net/bookmark_new_illust.php"), ("https://touch.pixiv.net/bookmark_new_illust.php"), @@ -697,8 +696,7 @@ class PixivSeriesExtractor(PixivExtractor): directory_fmt = ("{category}", "{user[id]} {user[account]}", "{series[id]} {series[title]}") filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}" - pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" - r"/user/(\d+)/series/(\d+)") + pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)" test = ("https://www.pixiv.net/user/10509347/series/21859", { "range": "1-10", "count": 10, @@ -755,8 +753,7 @@ class PixivNovelExtractor(PixivExtractor): """Extractor for pixiv novels""" subcategory = "novel" request_interval = 1.0 - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/n(?:ovel/show\.php\?id=|/)(\d+)") + pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)" test = ( ("https://www.pixiv.net/novel/show.php?id=19612040", { "count": 1, @@ -799,6 +796,12 @@ class PixivNovelExtractor(PixivExtractor): "options": (("embeds", True),), "count": 3, }), + # full series + ("https://www.pixiv.net/novel/show.php?id=19612040", { + "options": (("full-series", True),), + "count": 4, + }), + # short URL ("https://www.pixiv.net/n/19612040"), ) @@ -862,7 +865,7 @@ class PixivNovelExtractor(PixivExtractor): illusts = {} for marker in text.extract_iter(content, "[", "]"): - if marker.startswith("[jumpuri:"): + if marker.startswith("[jumpuri:If you would like to "): desktop = True elif marker.startswith("pixivimage:"): illusts[marker[11:].partition("-")[0]] = None @@ -895,14 +898,17 @@ class PixivNovelExtractor(PixivExtractor): yield Message.Queue, url, novel def novels(self): - return (self.api.novel_detail(self.novel_id),) + novel = self.api.novel_detail(self.novel_id) + if self.config("full-series") and novel["series"]: + self.subcategory = PixivNovelSeriesExtractor.subcategory + return self.api.novel_series(novel["series"]["id"]) + return (novel,) class PixivNovelUserExtractor(PixivNovelExtractor): """Extractor for pixiv users' novels""" subcategory = "novel-user" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/(?:en/)?users/(\d+)/novels") + pattern = USER_PATTERN + r"/novels" test = ("https://www.pixiv.net/en/users/77055466/novels", { "pattern": "^text:", "range": "1-5", @@ -916,8 +922,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor): class PixivNovelSeriesExtractor(PixivNovelExtractor): """Extractor for pixiv novel series""" subcategory = "novel-series" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/novel/series/(\d+)") + pattern = BASE_PATTERN + r"/novel/series/(\d+)" test = ("https://www.pixiv.net/novel/series/10278364", { "count": 4, "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2", @@ -927,6 +932,37 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor): return self.api.novel_series(self.novel_id) +class PixivNovelBookmarkExtractor(PixivNovelExtractor): + """Extractor for bookmarked pixiv novels""" + subcategory = "novel-bookmark" + pattern = (USER_PATTERN + r"/bookmarks/novels" + r"(?:/([^/?#]+))?(?:/?\?([^#]+))?") + test = ( + ("https://www.pixiv.net/en/users/77055466/bookmarks/novels", { + "count": 1, + "content": "7194e8faa876b2b536f185ee271a2b6e46c69089", + }), + ("https://www.pixiv.net/en/users/11/bookmarks/novels/TAG?rest=hide"), + ) + + def __init__(self, match): + PixivNovelExtractor.__init__(self, match) + self.user_id, self.tag, self.query = match.groups() + + def novels(self): + if self.tag: + tag = text.unquote(self.tag) + else: + tag = None + + if text.parse_query(self.query).get("rest") == "hide": + restrict = "private" + else: + restrict = "public" + + return self.api.user_bookmarks_novel(self.user_id, tag, restrict) + + class PixivSketchExtractor(Extractor): """Extractor for user pages on sketch.pixiv.net""" category = "pixiv" @@ -1113,6 +1149,11 @@ class PixivAppAPI(): params = {"user_id": user_id, "tag": tag, "restrict": restrict} return self._pagination("/v1/user/bookmarks/illust", params) + def user_bookmarks_novel(self, user_id, tag=None, restrict="public"): + """Return novels bookmarked by a user""" + params = {"user_id": user_id, "tag": tag, "restrict": restrict} + return self._pagination("/v1/user/bookmarks/novel", params, "novels") + def user_bookmark_tags_illust(self, user_id, restrict="public"): """Return bookmark tags defined by a user""" params = {"user_id": user_id, "restrict": restrict} diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py index f8497c0..f19e33c 100644 --- a/gallery_dl/extractor/pornhub.py +++ b/gallery_dl/extractor/pornhub.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,7 +11,6 @@ from .common import Extractor, Message from .. import text, exception - BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?pornhub\.com" @@ -146,10 +145,20 @@ class PornhubUserExtractor(PornhubExtractor): data = {"_extractor": PornhubGalleryExtractor} while True: - page = self.request( - url, method="POST", headers=headers, params=params).text - if not page: - return - for gid in text.extract_iter(page, 'id="albumphoto', '"'): + response = self.request( + url, method="POST", headers=headers, params=params, + allow_redirects=False) + + if 300 <= response.status_code < 400: + url = "{}{}/photos/{}/ajax".format( + self.root, response.headers["location"], + self.cat or "public") + continue + + gid = None + for gid in text.extract_iter(response.text, 'id="albumphoto', '"'): yield Message.Queue, self.root + "/album/" + gid, data + if gid is None: + return + params["page"] += 1 diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 3f09e13..9a57dcf 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -56,7 +56,10 @@ class RedditExtractor(Extractor): submission["num"] = 0 if "crosspost_parent_list" in submission: - media = submission["crosspost_parent_list"][-1] + try: + media = submission["crosspost_parent_list"][-1] + except Exception: + media = submission else: media = submission diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index eaaef7d..bfd18b5 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2022 Mike Fährmann +# Copyright 2020-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -16,7 +16,8 @@ from ..cache import memcache class RedgifsExtractor(Extractor): """Base class for redgifs extractors""" category = "redgifs" - filename_fmt = "{category}_{id}.{extension}" + filename_fmt = \ + "{category}_{gallery:?//[:11]}{num:?_/_/>02}{id}.{extension}" archive_fmt = "{id}" root = "https://www.redgifs.com" @@ -34,16 +35,32 @@ class RedgifsExtractor(Extractor): def items(self): metadata = self.metadata() + for gif in self.gifs(): - url = self._process(gif) - if not url: - self.log.warning("Skipping '%s' (format not available)", - gif["id"]) - continue + + gallery = gif.get("gallery") + if gallery: + gifs = self.api.gallery(gallery)["gifs"] + enum = 1 + cnt = len(gifs) + else: + gifs = (gif,) + enum = 0 + cnt = 1 gif.update(metadata) + gif["count"] = cnt yield Message.Directory, gif - yield Message.Url, url, gif + + for num, gif in enumerate(gifs, enum): + url = self._process(gif) + if not url: + self.log.warning( + "Skipping '%s' (format not available)", gif["id"]) + continue + gif["num"] = num + gif["count"] = cnt + yield Message.Url, url, gif def _process(self, gif): gif["_fallback"] = formats = self._formats(gif) @@ -145,21 +162,36 @@ class RedgifsSearchExtractor(RedgifsExtractor): """Extractor for redgifs search results""" subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") - pattern = r"(?:https?://)?(?:\w+\.)?redgifs\.com/browse/?\?([^#]+)" + pattern = (r"(?:https?://)?(?:\w+\.)?redgifs\.com" + r"/(?:gifs/([^/?#]+)|browse)(?:/?\?([^#]+))?") test = ( + ("https://www.redgifs.com/gifs/jav", { + "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)", + "range": "1-10", + "count": 10, + }), ("https://www.redgifs.com/browse?tags=JAV", { "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)", "range": "1-10", "count": 10, }), - ("https://v3.redgifs.com/browse?tags=JAV"), + ("https://www.redgifs.com/gifs/jav?order=best&verified=1"), ("https://www.redgifs.com/browse?type=i&verified=y&order=top7"), + ("https://v3.redgifs.com/browse?tags=JAV"), ) + def __init__(self, match): + RedgifsExtractor.__init__(self, match) + self.search, self.query = match.groups() + def metadata(self): - self.params = params = text.parse_query(self.key) - search = params.get("tags") or params.get("order") or "trending" - return {"search": search} + self.params = text.parse_query(self.query) + if self.search: + self.params["tags"] = text.unquote(self.search) + + return {"search": (self.params.get("tags") or + self.params.get("order") or + "trending")} def gifs(self): return self.api.search(self.params) @@ -178,6 +210,16 @@ class RedgifsImageExtractor(RedgifsExtractor): r"/FoolishForkedAbyssiniancat\.mp4", "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533", }), + # gallery (#4021) + ("https://www.redgifs.com/watch/desertedbaregraywolf", { + "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.jpg", + "count": 4, + "keyword": { + "num": int, + "count": 4, + "gallery": "187ad979693-1922-fc66-0000-a96fb07b8a5d", + }, + }), ("https://redgifs.com/ifr/FoolishForkedAbyssiniancat"), ("https://i.redgifs.com/i/FoolishForkedAbyssiniancat"), ("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"), @@ -207,6 +249,10 @@ class RedgifsAPI(): endpoint = "/v2/gifs/" + gif_id.lower() return self._call(endpoint)["gif"] + def gallery(self, gallery_id): + endpoint = "/v2/gallery/" + gallery_id + return self._call(endpoint) + def user(self, user, order="best"): endpoint = "/v2/users/{}/search".format(user.lower()) params = {"order": order} @@ -228,7 +274,6 @@ class RedgifsAPI(): def search(self, params): endpoint = "/v2/gifs/search" params["search_text"] = params.pop("tags", None) - params.pop("needSendGtm", None) return self._pagination(endpoint, params) def _call(self, endpoint, params=None): diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index 34177b4..6d025f4 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -1,64 +1,88 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters from from https://raw.senmanga.com/""" +"""Extractors for https://raw.senmanga.com/""" -from .common import Extractor, Message +from .common import ChapterExtractor from .. import text -class SenmangaChapterExtractor(Extractor): - """Extractor for manga-chapters from raw.senmanga.com""" +class SenmangaChapterExtractor(ChapterExtractor): + """Extractor for manga chapters from raw.senmanga.com""" category = "senmanga" - subcategory = "chapter" - directory_fmt = ("{category}", "{manga}", "{chapter_string}") - filename_fmt = "{manga}_{chapter_string}_{page:>03}.{extension}" - archive_fmt = "{manga}_{chapter_string}_{page}" - pattern = r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)" + root = "https://raw.senmanga.com" + pattern = r"(?:https?://)?raw\.senmanga\.com(/[^/?#]+/[^/?#]+)" test = ( - ("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", { + ("https://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", { + "pattern": r"https://raw\.senmanga\.com/viewer" + r"/Bokura-wa-Minna-Kawaisou/37A/[12]", "url": "5f95140ff511d8497e2ec08fa7267c6bb231faec", - "keyword": "705d941a150765edb33cd2707074bd703a93788c", "content": "556a16d5ca3441d7a5807b6b5ac06ec458a3e4ba", + "keyword": { + "chapter": "37A", + "count": 2, + "extension": "", + "filename": "re:[12]", + "lang": "ja", + "language": "Japanese", + "manga": "Bokura wa Minna Kawaisou", + "page": int, + }, }), ("http://raw.senmanga.com/Love-Lab/2016-03/1", { + "pattern": r"https://raw\.senmanga\.com/viewer" + r"/Love-Lab/2016-03/\d", "url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de", - "keyword": "8a8ab2529ba2edfc83a6b3a8bede1d6c580db7b4", + "keyword": { + "chapter": "2016-03", + "count": 9, + "extension": "", + "filename": r"re:\d", + "manga": "Renai Lab 恋愛ラボ", + }, + }), + ("https://raw.senmanga.com/akabane-honeko-no-bodyguard/1", { + "pattern": r"https://i\d\.wp\.com/kumacdn.club/image-new-2/a" + r"/akabane-honeko-no-bodyguard/chapter-1" + r"/\d+-[0-9a-f]{13}\.jpg", + "keyword": { + "chapter": "1", + "count": 65, + "extension": "jpg", + "filename": r"re:\d+-\w+", + "manga": "Akabane Honeko no Bodyguard", + }, }), ) - root = "https://raw.senmanga.com" def __init__(self, match): - Extractor.__init__(self, match) - part = match.group(1) - self.chapter_url = "{}/{}/".format(self.root, part) - self.img_url = "{}/viewer/{}/".format(self.root, part) - self.session.headers["Referer"] = self.chapter_url + ChapterExtractor.__init__(self, match) + self.session.headers["Referer"] = self.gallery_url - def items(self): - data = self.metadata() - yield Message.Directory, data - for data["page"] in range(1, data["count"]+1): - data["extension"] = None - yield Message.Url, self.img_url + str(data["page"]), data + # select "All pages" viewer + self.session.cookies.set( + "viewer", "1", domain="raw.senmanga.com") - def metadata(self): - """Collect metadata for extractor-job""" - page = self.request(self.chapter_url).text - self.session.cookies.clear() - title, pos = text.extract(page, '', '') - count, pos = text.extract(page, ' of ', '\n', pos) + def metadata(self, page): + title = text.extr(page, "", "") manga, _, chapter = title.partition(" - Chapter ") return { - "manga": text.unescape(manga).replace("-", " "), - "chapter_string": chapter.partition(" - Page ")[0], - "count": text.parse_int(count), - "lang": "jp", - "language": "Japanese", + "manga" : text.unescape(manga).replace("-", " "), + "chapter" : chapter.partition(" - Page ")[0], + "chapter_minor": "", + "lang" : "ja", + "language" : "Japanese", } + + def images(self, page): + return [ + (url, None) + for url in text.extract_iter( + page, '")[2].strip()), - } + for post in self.posts(): + data = post.attrib + data["thread_id"] = self.thread_id yield Message.Directory, data - for href in text.extract_iter(html, '', '') + url = "{}/vr.php?t={}".format(self.root, self.thread_id) + root = ElementTree.fromstring(self.request(url).text) + posts = root.iter("post") - url = text.extr(page, '