From e052f3b9e1d9703a5a466daeaf37bacf476c2daf Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sun, 22 Oct 2023 01:00:14 -0400 Subject: New upstream version 1.26.1. --- CHANGELOG.md | 40 ++++++++++ PKG-INFO | 6 +- README.rst | 4 +- data/completion/_gallery-dl | 4 +- data/completion/gallery-dl.fish | 4 +- data/man/gallery-dl.1 | 6 +- data/man/gallery-dl.conf.5 | 47 +++++++++++- docs/gallery-dl.conf | 2 +- gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl.egg-info/SOURCES.txt | 2 +- gallery_dl/cookies.py | 27 +++++-- gallery_dl/extractor/4chanarchives.py | 1 + gallery_dl/extractor/__init__.py | 2 +- gallery_dl/extractor/bunkr.py | 44 +++++++++-- gallery_dl/extractor/chevereto.py | 113 ++++++++++++++++++++++++++++ gallery_dl/extractor/deviantart.py | 64 ++++++++++++---- gallery_dl/extractor/fantia.py | 9 ++- gallery_dl/extractor/hentaifoundry.py | 22 ++++-- gallery_dl/extractor/imgbb.py | 39 +++++++--- gallery_dl/extractor/jpgfish.py | 105 -------------------------- gallery_dl/extractor/kemonoparty.py | 134 +++++++++++++++++++++++----------- gallery_dl/extractor/moebooru.py | 5 ++ gallery_dl/extractor/newgrounds.py | 56 ++++++++++++-- gallery_dl/extractor/patreon.py | 2 +- gallery_dl/extractor/reddit.py | 3 +- gallery_dl/extractor/redgifs.py | 15 +++- gallery_dl/extractor/sankaku.py | 2 +- gallery_dl/extractor/twitter.py | 2 + gallery_dl/extractor/warosu.py | 34 ++++----- gallery_dl/option.py | 5 +- gallery_dl/postprocessor/exec.py | 17 +++-- gallery_dl/version.py | 2 +- test/test_extractor.py | 2 +- test/test_postprocessor.py | 8 +- test/test_results.py | 3 + 35 files changed, 582 insertions(+), 255 deletions(-) create mode 100644 gallery_dl/extractor/chevereto.py delete mode 100644 gallery_dl/extractor/jpgfish.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a4ce4ba..34607f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,45 @@ # Changelog +## 1.26.1 - 2023-10-21 +### Extractors +#### Additions +- [bunkr] add extractor for media URLs ([#4684](https://github.com/mikf/gallery-dl/issues/4684)) +- [chevereto] add generic extractors for `chevereto` sites ([#4664](https://github.com/mikf/gallery-dl/issues/4664)) + - `deltaporno.com` ([#1381](https://github.com/mikf/gallery-dl/issues/1381)) + - `img.kiwi` + - `jpgfish` + - `pixl.li` ([#3179](https://github.com/mikf/gallery-dl/issues/3179), [#4357](https://github.com/mikf/gallery-dl/issues/4357)) +- [deviantart] implement `"group": "skip"` ([#4630](https://github.com/mikf/gallery-dl/issues/4630)) +- [fantia] add `content_count` and `content_num` metadata fields ([#4627](https://github.com/mikf/gallery-dl/issues/4627)) +- [imgbb] add `displayname` and `user_id` metadata ([#4626](https://github.com/mikf/gallery-dl/issues/4626)) +- [kemonoparty] support post revisions; add `revisions` option ([#4498](https://github.com/mikf/gallery-dl/issues/4498), [#4597](https://github.com/mikf/gallery-dl/issues/4597)) +- [kemonoparty] support searches ([#3385](https://github.com/mikf/gallery-dl/issues/3385), [#4057](https://github.com/mikf/gallery-dl/issues/4057)) +- [kemonoparty] support discord URLs with channel IDs ([#4662](https://github.com/mikf/gallery-dl/issues/4662)) +- [moebooru] add `metadata` option ([#4646](https://github.com/mikf/gallery-dl/issues/4646)) +- [newgrounds] support multi-image posts ([#4642](https://github.com/mikf/gallery-dl/issues/4642)) +- [sankaku] support `/posts/` URLs ([#4688](https://github.com/mikf/gallery-dl/issues/4688)) +- [twitter] add `sensitive` metadata field ([#4619](https://github.com/mikf/gallery-dl/issues/4619)) +#### Fixes +- [4chanarchives] disable Referer headers by default ([#4686](https://github.com/mikf/gallery-dl/issues/4686)) +- [bunkr] fix `/d/` file URLs ([#4685](https://github.com/mikf/gallery-dl/issues/4685)) +- [deviantart] expand nested comment replies ([#4653](https://github.com/mikf/gallery-dl/issues/4653)) +- [deviantart] disable `jwt` ([#4652](https://github.com/mikf/gallery-dl/issues/4652)) +- [hentaifoundry] fix `.swf` file downloads ([#4641](https://github.com/mikf/gallery-dl/issues/4641)) +- [imgbb] fix `user` metadata extraction ([#4626](https://github.com/mikf/gallery-dl/issues/4626)) +- [imgbb] update pagination end condition ([#4626](https://github.com/mikf/gallery-dl/issues/4626)) +- [kemonoparty] update API endpoints ([#4676](https://github.com/mikf/gallery-dl/issues/4676), [#4677](https://github.com/mikf/gallery-dl/issues/4677)) +- [patreon] update `campaign_id` path ([#4639](https://github.com/mikf/gallery-dl/issues/4639)) +- [reddit] fix wrong previews ([#4649](https://github.com/mikf/gallery-dl/issues/4649)) +- [redgifs] fix `niches` extraction ([#4666](https://github.com/mikf/gallery-dl/issues/4666), [#4667](https://github.com/mikf/gallery-dl/issues/4667)) +- [twitter] fix crash due to missing `source` ([#4620](https://github.com/mikf/gallery-dl/issues/4620)) +- [warosu] fix extraction ([#4634](https://github.com/mikf/gallery-dl/issues/4634)) +### Post Processors +#### Additions +- support `{_filename}`, `{_directory}`, and `{_path}` replacement fields for `--exec` ([#4633](https://github.com/mikf/gallery-dl/issues/4633)) +### Miscellaneous +#### Improvements +- avoid temporary copies with `--cookies-from-browser` by opening cookie databases in read-only mode + ## 1.26.0 - 2023-10-03 - ### Extractors #### Additions diff --git a/PKG-INFO b/PKG-INFO index a2dedf5..0aed72d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.26.0 +Version: 1.26.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/README.rst b/README.rst index 14cfb09..207b68e 100644 --- a/README.rst +++ b/README.rst @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 94d2f06..c1425bb 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -73,8 +73,8 @@ _arguments -C -S \ --write-infojson'[==SUPPRESS==]' \ --write-tags'[Write image tags to separate text files]' \ --mtime-from-date'[Set file modification times according to "date" metadata]' \ ---exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'' \ ---exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'' \ +--exec'[Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"]':'' \ +--exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {_directory} && convert * ../doc.pdf"]':'' \ {-P,--postprocessor}'[Activate the specified post processor]':'' \ {-O,--postprocessor-option}'[Additional "=" post processor options]':'' && rc=0 diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index 00e7b24..593ab89 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -67,7 +67,7 @@ complete -c gallery-dl -l 'write-info-json' -d 'Write gallery metadata to a info complete -c gallery-dl -l 'write-infojson' -d '==SUPPRESS==' complete -c gallery-dl -l 'write-tags' -d 'Write image tags to separate text files' complete -c gallery-dl -l 'mtime-from-date' -d 'Set file modification times according to "date" metadata' -complete -c gallery-dl -x -l 'exec' -d 'Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"' -complete -c gallery-dl -x -l 'exec-after' -d 'Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"' +complete -c gallery-dl -x -l 'exec' -d 'Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"' +complete -c gallery-dl -x -l 'exec-after' -d 'Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {_directory} && convert * ../doc.pdf"' complete -c gallery-dl -x -s 'P' -l 'postprocessor' -d 'Activate the specified post processor' complete -c gallery-dl -x -s 'O' -l 'postprocessor-option' -d 'Additional "=" post processor options' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index c2eedb7..27f13af 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-10-03" "1.26.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-10-21" "1.26.1" "gallery-dl Manual" .\" disable hyphenation .nh @@ -216,10 +216,10 @@ Write image tags to separate text files Set file modification times according to 'date' metadata .TP .B "\-\-exec" \f[I]CMD\f[] -Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}" +Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}" .TP .B "\-\-exec\-after" \f[I]CMD\f[] -Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf" +Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {_directory} && convert * ../doc.pdf" .TP .B "\-P, \-\-postprocessor" \f[I]NAME\f[] Activate the specified post processor diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 19a5812..9083d24 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-10-03" "1.26.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-10-21" "1.26.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1529,7 +1529,10 @@ Use with caution. .SS extractor.deviantart.group .IP "Type:" 6 -\f[I]bool\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]true\f[] @@ -1538,6 +1541,14 @@ Use with caution. Check whether the profile name in a given URL belongs to a group or a regular user. +When disabled, assume every given profile name +belongs to a regular user. + +Special values: + +.br +* \f[I]"skip"\f[]: Skip groups + .SS extractor.deviantart.include .IP "Type:" 6 @@ -1589,13 +1600,15 @@ literature and status updates. \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Update \f[I]JSON Web Tokens\f[] (the \f[I]token\f[] URL parameter) of otherwise non-downloadable, low-resolution images to be able to download them in full resolution. +Note: No longer functional as of 2023-10-11 + .SS extractor.deviantart.mature .IP "Type:" 6 @@ -2415,7 +2428,20 @@ Limit the number of posts to download. \f[I]false\f[] .IP "Description:" 4 -Extract \f[I]username\f[] metadata +Extract \f[I]username\f[] metadata. + + +.SS extractor.kemonoparty.revisions +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract post revisions. + +Note: This requires 1 additional HTTP request per post. .SS extractor.khinsider.format @@ -2625,6 +2651,19 @@ Fetch media from renoted notes. Fetch media from replies to other notes. +.SS extractor.[moebooru].pool.metadata +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract extended \f[I]pool\f[] metadata. + +Note: Not supported by all \f[I]moebooru\f[] instances. + + .SS extractor.newgrounds.flash .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 2eac0a1..9f12652 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -85,7 +85,7 @@ "group": true, "include": "gallery", "journals": "html", - "jwt": true, + "jwt": false, "mature": true, "metadata": false, "original": true, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 022a2d6..95861dc 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.26.0 +Version: 1.26.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index e319eef..fb6cb4b 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -60,6 +60,7 @@ gallery_dl/extractor/blogger.py gallery_dl/extractor/booru.py gallery_dl/extractor/bunkr.py gallery_dl/extractor/catbox.py +gallery_dl/extractor/chevereto.py gallery_dl/extractor/comicvine.py gallery_dl/extractor/common.py gallery_dl/extractor/cyberdrop.py @@ -111,7 +112,6 @@ gallery_dl/extractor/instagram.py gallery_dl/extractor/issuu.py gallery_dl/extractor/itaku.py gallery_dl/extractor/itchio.py -gallery_dl/extractor/jpgfish.py gallery_dl/extractor/jschan.py gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index c5c5667..416cc9a 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -47,7 +47,7 @@ def load_cookies(cookiejar, browser_specification): def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None): path, container_id = _firefox_cookies_database(profile, container) - with DatabaseCopy(path) as db: + with DatabaseConnection(path) as db: sql = ("SELECT name, value, host, path, isSecure, expiry " "FROM moz_cookies") @@ -100,7 +100,7 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None, path = _chrome_cookies_database(profile, config) _log_debug("Extracting cookies from %s", path) - with DatabaseCopy(path) as db: + with DatabaseConnection(path) as db: db.text_factory = bytes decryptor = get_cookie_decryptor( config["directory"], config["keyring"], keyring) @@ -814,7 +814,7 @@ class DataParser: self.skip_to(len(self._data), description) -class DatabaseCopy(): +class DatabaseConnection(): def __init__(self, path): self.path = path @@ -822,13 +822,27 @@ class DatabaseCopy(): self.directory = None def __enter__(self): + try: + # https://www.sqlite.org/uri.html#the_uri_path + path = self.path.replace("?", "%3f").replace("#", "%23") + if util.WINDOWS: + path = "/" + os.path.abspath(path) + + uri = "file:{}?mode=ro&immutable=1".format(path) + self.database = sqlite3.connect( + uri, uri=True, isolation_level=None, check_same_thread=False) + return self.database + except Exception as exc: + _log_debug("Falling back to temporary database copy (%s: %s)", + exc.__class__.__name__, exc) + try: self.directory = tempfile.TemporaryDirectory(prefix="gallery-dl-") path_copy = os.path.join(self.directory.name, "copy.sqlite") shutil.copyfile(self.path, path_copy) - self.database = db = sqlite3.connect( + self.database = sqlite3.connect( path_copy, isolation_level=None, check_same_thread=False) - return db + return self.database except BaseException: if self.directory: self.directory.cleanup() @@ -836,7 +850,8 @@ class DatabaseCopy(): def __exit__(self, exc, value, tb): self.database.close() - self.directory.cleanup() + if self.directory: + self.directory.cleanup() def Popen_communicate(*args): diff --git a/gallery_dl/extractor/4chanarchives.py b/gallery_dl/extractor/4chanarchives.py index f018d3e..27ac7c5 100644 --- a/gallery_dl/extractor/4chanarchives.py +++ b/gallery_dl/extractor/4chanarchives.py @@ -20,6 +20,7 @@ class _4chanarchivesThreadExtractor(Extractor): directory_fmt = ("{category}", "{board}", "{thread} - {title}") filename_fmt = "{no}-{filename}.{extension}" archive_fmt = "{board}_{thread}_{no}" + referer = False pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)/thread/(\d+)" example = "https://4chanarchives.com/board/a/thread/12345/" diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3abe74b..1c1473a 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -28,6 +28,7 @@ modules = [ "blogger", "bunkr", "catbox", + "chevereto", "comicvine", "cyberdrop", "danbooru", @@ -73,7 +74,6 @@ modules = [ "issuu", "itaku", "itchio", - "jpgfish", "jschan", "kabeuchi", "keenspot", diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 5509f5a..26123b8 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -12,6 +12,8 @@ from .lolisafe import LolisafeAlbumExtractor from .. import text from urllib.parse import urlsplit, urlunsplit +BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)" + MEDIA_DOMAIN_OVERRIDES = { "cdn9.bunkr.ru" : "c9.bunkr.ru", "cdn12.bunkr.ru": "media-files12.bunkr.la", @@ -28,7 +30,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): """Extractor for bunkrr.su albums""" category = "bunkr" root = "https://bunkrr.su" - pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)" + pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://bunkrr.su/a/ID" def fetch_album(self, album_id): @@ -53,11 +55,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): for url in urls: if url.startswith("/"): try: - page = self.request(self.root + text.unescape(url)).text - if url[1] == "v": - url = text.extr(page, '<') + + +BASE_PATTERN = CheveretoExtractor.update({ + "jpgfish": { + "root": "https://jpg2.su", + "pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)", + }, + "pixl": { + "root": "https://pixl.li", + "pattern": r"pixl\.(?:li|is)", + }, + "imgkiwi": { + "root": "https://img.kiwi", + "pattern": r"img\.kiwi", + }, + "deltaporno": { + "root": "https://gallery.deltaporno.com", + "pattern": r"gallery\.deltaporno\.com", + }, +}) + + +class CheveretoImageExtractor(CheveretoExtractor): + """Extractor for chevereto Images""" + subcategory = "image" + pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)" + example = "https://jpg2.su/img/TITLE.ID" + + def items(self): + url = self.root + self.path + extr = text.extract_from(self.request(url).text) + + image = { + "id" : self.path.rpartition(".")[2], + "url" : extr('"), ">", "<"), + "user" : extr('username: "', '"'), + } + + text.nameext_from_url(image["url"], image) + yield Message.Directory, image + yield Message.Url, image["url"], image + + +class CheveretoAlbumExtractor(CheveretoExtractor): + """Extractor for chevereto Albums""" + subcategory = "album" + pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)" + example = "https://jpg2.su/album/TITLE.ID" + + def items(self): + url = self.root + self.path + data = {"_extractor": CheveretoImageExtractor} + + if self.path.endswith("/sub"): + albums = self._pagination(url) + else: + albums = (url,) + + for album in albums: + for image in self._pagination(album): + yield Message.Queue, image, data + + +class CheveretoUserExtractor(CheveretoExtractor): + """Extractor for chevereto Users""" + subcategory = "user" + pattern = BASE_PATTERN + r"(/(?!img|image|a(?:lbum)?)[^/?#]+(?:/albums)?)" + example = "https://jpg2.su/USER" + + def items(self): + url = self.root + self.path + + if self.path.endswith("/albums"): + data = {"_extractor": CheveretoAlbumExtractor} + else: + data = {"_extractor": CheveretoImageExtractor} + + for url in self._pagination(url): + yield Message.Queue, url, data diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 9421096..2c37ef1 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -42,7 +42,7 @@ class DeviantartExtractor(Extractor): self.offset = 0 def _init(self): - self.jwt = self.config("jwt", True) + self.jwt = self.config("jwt", False) self.flat = self.config("flat", True) self.extra = self.config("extra", False) self.quality = self.config("quality", "100") @@ -91,14 +91,20 @@ class DeviantartExtractor(Extractor): return True def items(self): - if self.user and self.config("group", True): - profile = self.api.user_profile(self.user) - self.group = not profile - if self.group: - self.subcategory = "group-" + self.subcategory - self.user = self.user.lower() - else: - self.user = profile["user"]["username"] + if self.user: + group = self.config("group", True) + if group: + profile = self.api.user_profile(self.user) + if profile: + self.user = profile["user"]["username"] + self.group = False + elif group == "skip": + self.log.info("Skipping group '%s'", self.user) + raise exception.StopExtraction() + else: + self.subcategory = "group-" + self.subcategory + self.user = self.user.lower() + self.group = True for deviation in self.deviations(): if isinstance(deviation, tuple): @@ -228,7 +234,7 @@ class DeviantartExtractor(Extractor): if self.comments: deviation["comments"] = ( - self.api.comments(deviation["deviationid"], target="deviation") + self._extract_comments(deviation["deviationid"], "deviation") if deviation["stats"]["comments"] else () ) @@ -395,6 +401,28 @@ class DeviantartExtractor(Extractor): binascii.b2a_base64(payload).rstrip(b"=\n").decode()) ) + def _extract_comments(self, target_id, target_type="deviation"): + results = None + comment_ids = [None] + + while comment_ids: + comments = self.api.comments( + target_id, target_type, comment_ids.pop()) + + if results: + results.extend(comments) + else: + results = comments + + # parent comments, i.e. nodes with at least one child + parents = {c["parentid"] for c in comments} + # comments with more than one reply + replies = {c["commentid"] for c in comments if c["replies"]} + # add comment UUIDs with replies that are not parent to any node + comment_ids.extend(replies - parents) + + return results + def _limited_request(self, url, **kwargs): """Limits HTTP requests to one every 2 seconds""" kwargs["fatal"] = None @@ -698,7 +726,7 @@ class DeviantartStatusExtractor(DeviantartExtractor): deviation["stats"] = {"comments": comments_count} if self.comments: deviation["comments"] = ( - self.api.comments(deviation["statusid"], target="status") + self._extract_comments(deviation["statusid"], "status") if comments_count else () ) @@ -1072,11 +1100,17 @@ class DeviantartOAuthAPI(): "mature_content": self.mature} return self._pagination_list(endpoint, params) - def comments(self, id, target, offset=0): + def comments(self, target_id, target_type="deviation", + comment_id=None, offset=0): """Fetch comments posted on a target""" - endpoint = "/comments/{}/{}".format(target, id) - params = {"maxdepth": "5", "offset": offset, "limit": 50, - "mature_content": self.mature} + endpoint = "/comments/{}/{}".format(target_type, target_id) + params = { + "commentid" : comment_id, + "maxdepth" : "5", + "offset" : offset, + "limit" : 50, + "mature_content": self.mature, + } return self._pagination_list(endpoint, params=params, key="thread") def deviation(self, deviation_id, public=None): diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py index f1d51e2..4a67695 100644 --- a/gallery_dl/extractor/fantia.py +++ b/gallery_dl/extractor/fantia.py @@ -42,7 +42,11 @@ class FantiaExtractor(Extractor): post = self._get_post_data(post_id) post["num"] = 0 - for content in self._get_post_contents(post): + contents = self._get_post_contents(post) + post["content_count"] = len(contents) + post["content_num"] = 0 + + for content in contents: files = self._process_content(post, content) yield Message.Directory, post @@ -59,6 +63,8 @@ class FantiaExtractor(Extractor): post["content_filename"] or file["file_url"], post) yield Message.Url, file["file_url"], post + post["content_num"] += 1 + def posts(self): """Return post IDs""" @@ -131,6 +137,7 @@ class FantiaExtractor(Extractor): post["content_filename"] = content.get("filename") or "" post["content_id"] = content["id"] post["content_comment"] = content.get("comment") or "" + post["content_num"] += 1 post["plan"] = content["plan"] or self._empty_plan files = [] diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 4c02000..8ba23c2 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -72,13 +72,11 @@ class HentaifoundryExtractor(Extractor): extr = text.extract_from(page, page.index('id="picBox"')) data = { + "index" : text.parse_int(path.rsplit("/", 2)[1]), "title" : text.unescape(extr('class="imageTitle">', '<')), "artist" : text.unescape(extr('/profile">', '<')), - "width" : text.parse_int(extr('width="', '"')), - "height" : text.parse_int(extr('height="', '"')), - "index" : text.parse_int(path.rsplit("/", 2)[1]), - "src" : text.urljoin(self.root, text.unescape(extr( - 'src="', '"'))), + "_body" : extr( + '
Description
', '') .replace("\r\n", "\n"), "", "")), @@ -92,6 +90,20 @@ class HentaifoundryExtractor(Extractor): ">Tags ", "")), } + body = data["_body"] + if "', '<', pos) + album = text.extr(page, '"og:title" content="', '"') + user = self._extract_user(page) return { - "album_id" : self.album_id, - "album_name": text.unescape(album), - "user" : user.lower() if user else "", + "album_id" : self.album_id, + "album_name" : text.unescape(album), + "user" : user.get("username") or "", + "user_id" : user.get("id") or "", + "displayname": user.get("name") or "", } def images(self, page): @@ -158,7 +167,12 @@ class ImgbbUserExtractor(ImgbbExtractor): self.page_url = "https://{}.imgbb.com/".format(self.user) def metadata(self, page): - return {"user": self.user} + user = self._extract_user(page) + return { + "user" : user.get("username") or self.user, + "user_id" : user.get("id") or "", + "displayname": user.get("name") or "", + } def images(self, page): user = text.extr(page, '.obj.resource={"id":"', '"') @@ -181,15 +195,20 @@ class ImgbbImageExtractor(ImgbbExtractor): def items(self): url = "https://ibb.co/" + self.image_id - extr = text.extract_from(self.request(url).text) + page = self.request(url).text + extr = text.extract_from(page) + user = self._extract_user(page) image = { "id" : self.image_id, - "title" : text.unescape(extr('"og:title" content="', '"')), + "title" : text.unescape(extr( + '"og:title" content="', ' hosted at ImgBB"')), "url" : extr('"og:image" content="', '"'), "width" : text.parse_int(extr('"og:image:width" content="', '"')), "height": text.parse_int(extr('"og:image:height" content="', '"')), - "user" : extr('rel="author">', '<').lower(), + "user" : user.get("username") or "", + "user_id" : user.get("id") or "", + "displayname": user.get("name") or "", } image["extension"] = text.ext_from_url(image["url"]) diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py deleted file mode 100644 index 8862a7b..0000000 --- a/gallery_dl/extractor/jpgfish.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://jpg1.su/""" - -from .common import Extractor, Message -from .. import text - -BASE_PATTERN = r"(?:https?://)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)" - - -class JpgfishExtractor(Extractor): - """Base class for jpgfish extractors""" - category = "jpgfish" - root = "https://jpg1.su" - directory_fmt = ("{category}", "{user}", "{album}",) - archive_fmt = "{id}" - - def _pagination(self, url): - while url: - page = self.request(url).text - - for item in text.extract_iter( - page, '
<')[0] - - -class JpgfishImageExtractor(JpgfishExtractor): - """Extractor for jpgfish Images""" - subcategory = "image" - pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))" - example = "https://jpg1.su/img/TITLE.ID" - - def __init__(self, match): - JpgfishExtractor.__init__(self, match) - self.path, self.image_id = match.groups() - - def items(self): - url = "{}/img/{}".format(self.root, self.path) - extr = text.extract_from(self.request(url).text) - - image = { - "id" : self.image_id, - "url" : extr('"), ">", "<")[0] or "", - "user" : extr('username: "', '"'), - } - - text.nameext_from_url(image["url"], image) - yield Message.Directory, image - yield Message.Url, image["url"], image - - -class JpgfishAlbumExtractor(JpgfishExtractor): - """Extractor for jpgfish Albums""" - subcategory = "album" - pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?" - example = "https://jpg1.su/album/TITLE.ID" - - def __init__(self, match): - JpgfishExtractor.__init__(self, match) - self.album, self.sub_albums = match.groups() - - def items(self): - url = "{}/a/{}".format(self.root, self.album) - data = {"_extractor": JpgfishImageExtractor} - - if self.sub_albums: - albums = self._pagination(url + "/sub") - else: - albums = (url,) - - for album in albums: - for image in self._pagination(album): - yield Message.Queue, image, data - - -class JpgfishUserExtractor(JpgfishExtractor): - """Extractor for jpgfish Users""" - subcategory = "user" - pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?" - example = "https://jpg1.su/USER" - - def __init__(self, match): - JpgfishExtractor.__init__(self, match) - self.user, self.albums = match.groups() - - def items(self): - url = "{}/{}".format(self.root, self.user) - - if self.albums: - url += "/albums" - data = {"_extractor": JpgfishAlbumExtractor} - else: - data = {"_extractor": JpgfishImageExtractor} - - for url in self._pagination(url): - yield Message.Queue, url, data diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 894c671..1596cfb 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from .. import text, exception -from ..cache import cache +from ..cache import cache, memcache import itertools import re @@ -70,8 +70,7 @@ class KemonopartyExtractor(Extractor): self.root, post["service"], post["user"], post["id"]) post["_http_headers"] = headers post["date"] = text.parse_datetime( - post["published"] or post["added"], - "%a, %d %b %Y %H:%M:%S %Z") + post["published"] or post["added"], "%Y-%m-%dT%H:%M:%S") if username: post["username"] = username if comments: @@ -197,14 +196,25 @@ class KemonopartyExtractor(Extractor): dms = [] for dm in text.extract_iter(page, ""): + footer = text.extr(dm, "") dms.append({ - "body": text.unescape(text.extract( + "body": text.unescape(text.extr( dm, "
", "
1 else posts + if not self.revision: + post = self.request(self.api_url).json() + if self.config("revisions"): + post["revision_id"] = 0 + try: + revs = self._post_revisions(self.api_url) + except exception.HttpError: + pass + else: + return itertools.chain((post,), revs) + return (post,) + + revs = self._post_revisions(self.api_url) + if not self.revision_id: + return revs + + for rev in revs: + if str(rev["revision_id"]) == self.revision_id: + return (rev,) + + raise exception.NotFoundError("revision") class KemonopartyDiscordExtractor(KemonopartyExtractor): @@ -270,11 +314,29 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): def __init__(self, match): KemonopartyExtractor.__init__(self, match) - _, _, self.server, self.channel, self.channel_name = match.groups() + _, _, self.server, self.channel_id, self.channel = match.groups() + self.channel_name = "" def items(self): self._prepare_ddosguard_cookies() + if self.channel_id: + self.channel_name = self.channel + else: + if self.channel.isdecimal() and len(self.channel) >= 16: + key = "id" + else: + key = "name" + + for channel in self._discord_channels(self.server): + if channel[key] == self.channel: + break + else: + raise exception.NotFoundError("channel") + + self.channel_id = channel["id"] + self.channel_name = channel["name"] + find_inline = re.compile( r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)" r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall @@ -299,7 +361,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): post["channel_name"] = self.channel_name post["date"] = text.parse_datetime( - post["published"], "%a, %d %b %Y %H:%M:%S %Z") + post["published"], "%Y-%m-%dT%H:%M:%S.%f") post["count"] = len(files) yield Message.Directory, post @@ -319,27 +381,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): yield Message.Url, url, post def posts(self): - if self.channel is None: - url = "{}/api/discord/channels/lookup?q={}".format( - self.root, self.server) - for channel in self.request(url).json(): - if channel["name"] == self.channel_name: - self.channel = channel["id"] - break - else: - raise exception.NotFoundError("channel") - - url = "{}/api/discord/channel/{}".format(self.root, self.channel) - params = {"skip": 0} + url = "{}/api/v1/discord/channel/{}".format( + self.root, self.channel_id) + params = {"o": 0} while True: posts = self.request(url, params=params).json() yield from posts - cnt = len(posts) - if cnt < 25: + if len(posts) < 150: break - params["skip"] += cnt + params["o"] += 150 class KemonopartyDiscordServerExtractor(KemonopartyExtractor): @@ -352,11 +404,7 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor): self.server = match.group(3) def items(self): - url = "{}/api/discord/channels/lookup?q={}".format( - self.root, self.server) - channels = self.request(url).json() - - for channel in channels: + for channel in self._discord_channels(self.server): url = "{}/discord/server/{}/channel/{}#{}".format( self.root, self.server, channel["id"], channel["name"]) channel["_extractor"] = KemonopartyDiscordExtractor diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py index 145dd51..e97d273 100644 --- a/gallery_dl/extractor/moebooru.py +++ b/gallery_dl/extractor/moebooru.py @@ -124,6 +124,11 @@ class MoebooruPoolExtractor(MoebooruExtractor): self.pool_id = match.group(match.lastindex) def metadata(self): + if self.config("metadata"): + url = "{}/pool/show/{}.json".format(self.root, self.pool_id) + pool = self.request(url).json() + pool.pop("posts", None) + return {"pool": pool} return {"pool": text.parse_int(self.pool_id)} def posts(self): diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 1bcc915..a6971e8 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -54,14 +54,31 @@ class NewgroundsExtractor(Extractor): if metadata: post.update(metadata) yield Message.Directory, post + post["num"] = 0 yield Message.Url, url, text.nameext_from_url(url, post) - for num, url in enumerate(text.extract_iter( - post["_comment"], 'data-smartload-src="', '"'), 1): - post["num"] = num - post["_index"] = "{}_{:>02}".format(post["index"], num) + if "_multi" in post: + for data in post["_multi"]: + post["num"] += 1 + post["_index"] = "{}_{:>02}".format( + post["index"], post["num"]) + post.update(data) + url = data["image"] + + text.nameext_from_url(url, post) + yield Message.Url, url, post + + if "_fallback" in post: + del post["_fallback"] + + for url in text.extract_iter( + post["_comment"], 'data-smartload-src="', '"'): + post["num"] += 1 + post["_index"] = "{}_{:>02}".format( + post["index"], post["num"]) url = text.ensure_http_scheme(url) - yield Message.Url, url, text.nameext_from_url(url, post) + text.nameext_from_url(url, post) + yield Message.Url, url, post else: self.log.warning( "Unable to get download URL for '%s'", post_url) @@ -153,8 +170,7 @@ class NewgroundsExtractor(Extractor): data["post_url"] = post_url return data - @staticmethod - def _extract_image_data(extr, url): + def _extract_image_data(self, extr, url): full = text.extract_from(util.json_loads(extr( '"full_image_text":', '});'))) data = { @@ -172,8 +188,34 @@ class NewgroundsExtractor(Extractor): index = data["url"].rpartition("/")[2].partition("_")[0] data["index"] = text.parse_int(index) data["_index"] = index + + image_data = extr("let imageData =", "\n];") + if image_data: + data["_multi"] = self._extract_images_multi(image_data) + else: + art_images = extr('
", "<"), + "sensitive" : tget("possibly_sensitive"), "favorite_count": tget("favorite_count"), "quote_count" : tget("quote_count"), "reply_count" : tget("reply_count"), @@ -451,6 +452,7 @@ class TwitterExtractor(Extractor): "id_str": id_str, "lang": None, "user": user, + "source": "><", "entities": {}, "extended_entities": { "media": [ diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py index 6f152ed..8e6b842 100644 --- a/gallery_dl/extractor/warosu.py +++ b/gallery_dl/extractor/warosu.py @@ -47,7 +47,7 @@ class WarosuThreadExtractor(Extractor): def metadata(self, page): boardname = text.extr(page, "", "") - title = text.extr(page, 'filetitle" itemprop="name">', '<') + title = text.unescape(text.extr(page, "class=filetitle>", "<")) return { "board" : self.board, "board_name": boardname.rpartition(" - ")[2], @@ -57,39 +57,37 @@ class WarosuThreadExtractor(Extractor): def posts(self, page): """Build a list of all post objects""" - page = text.extr(page, '
', '') - needle = '
' + page = text.extr(page, "
") + needle = "
" return [self.parse(post) for post in page.split(needle)] def parse(self, post): """Build post object by extracting data from an HTML post""" data = self._extract_post(post) - if "File:" in post: + if " File:" in post: self._extract_image(post, data) part = data["image"].rpartition("/")[2] data["tim"], _, data["extension"] = part.partition(".") data["ext"] = "." + data["extension"] return data - @staticmethod - def _extract_post(post): + def _extract_post(self, post): extr = text.extract_from(post) return { - "no" : extr('id="p', '"'), - "name": extr('', ""), - "time": extr(''), - "now" : extr("", "<"), + "no" : extr("id=p", ">"), + "name": extr("class=postername>", "<").strip(), + "time": extr("class=posttime title=", "000>"), + "now" : extr("", "<").strip(), "com" : text.unescape(text.remove_html(extr( - '

', '

' - ).strip())), + "
", "
").strip())), } - @staticmethod - def _extract_image(post, data): + def _extract_image(self, post, data): extr = text.extract_from(post) - data["fsize"] = extr("File: ", ", ") + data["fsize"] = extr(" File: ", ", ") data["w"] = extr("", "x") data["h"] = extr("", ", ") - data["filename"] = text.unquote(extr("", "<").rpartition(".")[0]) - extr("
", "") - data["image"] = "https:" + extr('", "") + data["image"] = self.root + extr("") diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 08e6e70..1982b71 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -510,6 +510,8 @@ def build_parser(): dest="postprocessors", metavar="CMD", action=AppendCommandAction, const={"name": "exec"}, help=("Execute CMD for each downloaded file. " + "Supported replacement fields are " + "{} or {_path}, {_directory}, {_filename}. " "Example: --exec \"convert {} {}.png && rm {}\""), ) postprocessor.add_argument( @@ -518,7 +520,8 @@ def build_parser(): action=AppendCommandAction, const={ "name": "exec", "event": "finalize"}, help=("Execute CMD after all files were downloaded successfully. " - "Example: --exec-after \"cd {} && convert * ../doc.pdf\""), + "Example: --exec-after \"cd {_directory} " + "&& convert * ../doc.pdf\""), ) postprocessor.add_argument( "-P", "--postprocessor", diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index afa828c..e7ed2f6 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -12,6 +12,7 @@ from .common import PostProcessor from .. import util, formatter import subprocess import os +import re if util.WINDOWS: @@ -32,6 +33,7 @@ class ExecPP(PostProcessor): args = options["command"] if isinstance(args, str): self.args = args + self._sub = re.compile(r"\{(_directory|_filename|_path|)\}").sub execute = self.exec_string else: self.args = [formatter.parse(arg) for arg in args] @@ -69,11 +71,8 @@ class ExecPP(PostProcessor): if archive and archive.check(pathfmt.kwdict): return - if pathfmt.realpath: - args = self.args.replace("{}", quote(pathfmt.realpath)) - else: - args = self.args.replace("{}", quote(pathfmt.realdirectory)) - + self.pathfmt = pathfmt + args = self._sub(self._replace, self.args) self._exec(args, True) if archive: @@ -90,5 +89,13 @@ class ExecPP(PostProcessor): self.log.debug("Running '%s'", args) subprocess.Popen(args, shell=shell) + def _replace(self, match): + name = match.group(1) + if name == "_directory": + return quote(self.pathfmt.realdirectory) + if name == "_filename": + return quote(self.pathfmt.filename) + return quote(self.pathfmt.realpath) + __postprocessor__ = ExecPP diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d06d9d6..593cffa 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.26.0" +__version__ = "1.26.1" diff --git a/test/test_extractor.py b/test/test_extractor.py index 9387f5b..29ccf97 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -238,7 +238,7 @@ class TestExtractorWait(unittest.TestCase): until = datetime.fromtimestamp(until) o = self._isotime_to_seconds(output) u = self._isotime_to_seconds(until.time().isoformat()[:8]) - self.assertLess(o-u, 1.0) + self.assertLessEqual(o-u, 1.0) @staticmethod def _isotime_to_seconds(isotime): diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index c00144e..b64df88 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -168,7 +168,7 @@ class ExecTest(BasePostprocessorTest): def test_command_string(self): self._create({ - "command": "echo {} && rm {};", + "command": "echo {} {_path} {_directory} {_filename} && rm {};", }) with patch("subprocess.Popen") as p: @@ -178,7 +178,11 @@ class ExecTest(BasePostprocessorTest): self._trigger(("after",)) p.assert_called_once_with( - "echo {0} && rm {0};".format(self.pathfmt.realpath), shell=True) + "echo {0} {0} {1} {2} && rm {0};".format( + self.pathfmt.realpath, + self.pathfmt.realdirectory, + self.pathfmt.filename), + shell=True) i.wait.assert_called_once_with() def test_command_list(self): diff --git a/test/test_results.py b/test/test_results.py index 4fb22c7..f275bbf 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -201,6 +201,9 @@ class TestExtractorResults(unittest.TestCase): self.assertEqual(str(value), test[3:], msg=key) elif test.startswith("type:"): self.assertEqual(type(value).__name__, test[5:], msg=key) + elif test.startswith("len:"): + self.assertIsInstance(value, (list, tuple), msg=key) + self.assertEqual(len(value), int(test[4:]), msg=key) else: self.assertEqual(value, test, msg=key) else: -- cgit v1.2.3