diff options
| author | 2023-07-03 00:57:09 -0400 | |
|---|---|---|
| committer | 2023-07-03 00:57:09 -0400 | |
| commit | 3a93cde3966897be5924ee1de7b4e044d02c7b5d (patch) | |
| tree | 201f838ff418278c49b54b9e967afd8d67b621a2 | |
| parent | 9b5645cc3e880ed2219bfd3842d6947ca989ad99 (diff) | |
| parent | ef30b1fa552fd4ceebdd14bbcc16f30f430883f8 (diff) | |
Update upstream source from tag 'upstream/1.25.7'
Update to upstream version '1.25.7'
with Debian dir 4760b04a98c58821dc7db0f3d75c73f5f395ad73
| -rw-r--r-- | CHANGELOG.md | 16 | ||||
| -rw-r--r-- | PKG-INFO | 6 | ||||
| -rw-r--r-- | README.rst | 4 | ||||
| -rw-r--r-- | data/man/gallery-dl.1 | 2 | ||||
| -rw-r--r-- | data/man/gallery-dl.conf.5 | 87 | ||||
| -rw-r--r-- | docs/gallery-dl.conf | 6 | ||||
| -rw-r--r-- | gallery_dl.egg-info/PKG-INFO | 6 | ||||
| -rw-r--r-- | gallery_dl/extractor/flickr.py | 27 | ||||
| -rw-r--r-- | gallery_dl/extractor/furaffinity.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/gelbooru_v01.py | 41 | ||||
| -rw-r--r-- | gallery_dl/extractor/hentaifox.py | 12 | ||||
| -rw-r--r-- | gallery_dl/extractor/mangapark.py | 551 | ||||
| -rw-r--r-- | gallery_dl/extractor/piczel.py | 4 | ||||
| -rw-r--r-- | gallery_dl/extractor/poipiku.py | 9 | ||||
| -rw-r--r-- | gallery_dl/extractor/sankaku.py | 13 | ||||
| -rw-r--r-- | gallery_dl/extractor/senmanga.py | 8 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 9 | ||||
| -rw-r--r-- | gallery_dl/extractor/wallhaven.py | 2 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 | ||||
| -rw-r--r-- | setup.cfg | 1 | ||||
| -rw-r--r-- | test/test_postprocessor.py | 70 |
21 files changed, 709 insertions, 177 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 429c7ea..b71b404 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## 1.25.7 - 2023-07-02 +### Additions +- [flickr] add 'exif' option +- [flickr] add 'metadata' option ([#4227](https://github.com/mikf/gallery-dl/issues/4227)) +- [mangapark] add 'source' option ([#3969](https://github.com/mikf/gallery-dl/issues/3969)) +- [twitter] extend 'conversations' option ([#4211](https://github.com/mikf/gallery-dl/issues/4211)) +### Fixes +- [furaffinity] improve 'description' HTML ([#4224](https://github.com/mikf/gallery-dl/issues/4224)) +- [gelbooru_v01] fix '--range' ([#4167](https://github.com/mikf/gallery-dl/issues/4167)) +- [hentaifox] fix titles containing '@' ([#4201](https://github.com/mikf/gallery-dl/issues/4201)) +- [mangapark] update to v5 ([#3969](https://github.com/mikf/gallery-dl/issues/3969)) +- [piczel] update API server address ([#4244](https://github.com/mikf/gallery-dl/issues/4244)) +- [poipiku] improve error detection ([#4206](https://github.com/mikf/gallery-dl/issues/4206)) +- [sankaku] improve warnings for unavailable posts +- [senmanga] ensure download URLs have a scheme ([#4235](https://github.com/mikf/gallery-dl/issues/4235)) + ## 1.25.6 - 2023-06-17 ### Additions - [blogger] download files from `lh*.googleusercontent.com` ([#4070](https://github.com/mikf/gallery-dl/issues/4070)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.25.6 +Version: 1.25.7 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.bin>`__ Nightly Builds @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.bin>`__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index c86db6a..16a4bba 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-06-17" "1.25.6" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-07-02" "1.25.7" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index e4df909..2cba623 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-06-17" "1.25.6" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-07-02" "1.25.7" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1806,6 +1806,47 @@ The \f[I]access_token\f[] and \f[I]access_token_secret\f[] values you get from \f[I]linking your Flickr account to gallery-dl\f[]. +.SS extractor.flickr.exif +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Fetch exif and camera metadata for each photo. + +Note: This requires 1 additional API call per photo. + + +.SS extractor.flickr.metadata +.IP "Type:" 6 +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Example:" 4 +.br +* license,last_update,machine_tags +.br +* ["license", "last_update", "machine_tags"] + +.IP "Description:" 4 +Extract additional metadata +(license, date_taken, original_format, last_update, geo, machine_tags, o_dims) + +It is possible to specify a custom list of metadata includes. +See \f[I]the extras parameter\f[] +in \f[I]Flickr API docs\f[] +for possible field names. + + .SS extractor.flickr.videos .IP "Type:" 6 \f[I]bool\f[] @@ -2422,6 +2463,31 @@ to filter chapters by. List of acceptable content ratings for returned chapters. +.SS extractor.mangapark.source +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]integer\f[] + +.IP "Example:" 4 +.br +* "koala:en" +.br +* 15150116 + +.IP "Description:" 4 +Select chapter source and language for a manga. + +The general syntax is \f[I]"<source name>:<ISO 639-1 language code>"\f[]. +.br +Both are optional, meaning \f[I]"koala"\f[], \f[I]"koala:"\f[], \f[I]":en"\f[], +.br +or even just \f[I]":"\f[] are possible as well. + +Specifying the numeric \f[I]ID\f[] of a source is also supported. + + .SS extractor.[mastodon].access-token .IP "Type:" 6 \f[I]string\f[] @@ -2803,7 +2869,12 @@ A (comma-separated) list of subcategories to include when processing a user profile. Possible values are -\f[I]"artworks"\f[], \f[I]"avatar"\f[], \f[I]"background"\f[], \f[I]"favorite"\f[]. +\f[I]"artworks"\f[], +\f[I]"avatar"\f[], +\f[I]"background"\f[], +\f[I]"favorite"\f[], +\f[I]"novel-user"\f[], +\f[I]"novel-bookmark"\f[]. It is possible to use \f[I]"all"\f[] instead of listing all values separately. @@ -3467,7 +3538,10 @@ Possible values are .SS extractor.twitter.conversations .IP "Type:" 6 -\f[I]bool\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 \f[I]false\f[] @@ -3476,8 +3550,11 @@ Possible values are For input URLs pointing to a single Tweet, e.g. https://twitter.com/i/web/status/<TweetID>, fetch media from all Tweets and replies in this \f[I]conversation -<https://help.twitter.com/en/using-twitter/twitter-conversations>\f[] -or thread. +<https://help.twitter.com/en/using-twitter/twitter-conversations>\f[]. + +If this option is equal to \f[I]"accessible"\f[], +only download from conversation Tweets +if the given initial Tweet is accessible. .SS extractor.twitter.csrf diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 6a3c84f..902d0a2 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -108,8 +108,10 @@ }, "flickr": { - "videos": true, - "size-max": null + "exif": false, + "metadata": false, + "size-max": null, + "videos": true }, "furaffinity": { diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 547f3be..d008254 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.25.6 +Version: 1.25.7 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index e85d68a..d44ff3c 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -106,6 +106,8 @@ class FlickrImageExtractor(FlickrExtractor): def items(self): photo = self.api.photos_getInfo(self.item_id) + if self.api.exif: + photo.update(self.api.photos_getExif(self.item_id)) if photo["media"] == "video" and self.api.videos: self.api._extract_video(photo) @@ -323,6 +325,7 @@ class FlickrAPI(oauth.OAuth1API): def __init__(self, extractor): oauth.OAuth1API.__init__(self, extractor) + self.exif = extractor.config("exif", False) self.videos = extractor.config("videos", True) self.maxsize = extractor.config("size-max") if isinstance(self.maxsize, str): @@ -367,6 +370,11 @@ class FlickrAPI(oauth.OAuth1API): params = {"user_id": user_id} return self._pagination("people.getPhotos", params) + def photos_getExif(self, photo_id): + """Retrieves a list of EXIF/TIFF/GPS tags for a given photo.""" + params = {"photo_id": photo_id} + return self._call("photos.getExif", params)["photo"] + def photos_getInfo(self, photo_id): """Get information about a photo.""" params = {"photo_id": photo_id} @@ -451,9 +459,19 @@ class FlickrAPI(oauth.OAuth1API): return data def _pagination(self, method, params, key="photos"): - params["extras"] = ("description,date_upload,tags,views,media," - "path_alias,owner_name,") - params["extras"] += ",".join("url_" + fmt[0] for fmt in self.formats) + extras = ("description,date_upload,tags,views,media," + "path_alias,owner_name,") + includes = self.extractor.config("metadata") + if includes: + if isinstance(includes, (list, tuple)): + includes = ",".join(includes) + elif not isinstance(includes, str): + includes = ("license,date_taken,original_format,last_update," + "geo,machine_tags,o_dims") + extras = extras + includes + "," + extras += ",".join("url_" + fmt[0] for fmt in self.formats) + + params["extras"] = extras params["page"] = 1 while True: @@ -478,6 +496,9 @@ class FlickrAPI(oauth.OAuth1API): photo["views"] = text.parse_int(photo["views"]) photo["date"] = text.parse_timestamp(photo["dateupload"]) photo["tags"] = photo["tags"].split() + + if self.exif: + photo.update(self.photos_getExif(photo["id"])) photo["id"] = text.parse_int(photo["id"]) if "owner" in photo: diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 9f5cbba..ec9cd94 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2022 Mike Fährmann +# Copyright 2020-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -98,7 +98,9 @@ class FuraffinityExtractor(Extractor): 'class="tags-row">', '</section>')) data["title"] = text.unescape(extr("<h2><p>", "</p></h2>")) data["artist"] = extr("<strong>", "<") - data["_description"] = extr('class="section-body">', '</div>') + data["_description"] = extr( + 'class="submission-description user-submitted-links">', + ' </div>') data["views"] = pi(rh(extr('class="views">', '</span>'))) data["favorites"] = pi(rh(extr('class="favorites">', '</span>'))) data["comments"] = pi(rh(extr('class="comments">', '</span>'))) @@ -125,7 +127,9 @@ class FuraffinityExtractor(Extractor): data["tags"] = text.split_html(extr( 'id="keywords">', '</div>'))[::2] data["rating"] = extr('<img alt="', ' ') - data["_description"] = extr("</table>", "</table>") + data["_description"] = extr( + '<td valign="top" align="left" width="70%" class="alt1" ' + 'style="padding:8px">', ' </td>') data["artist_url"] = data["artist"].replace("_", "").lower() data["user"] = self.user or data["artist_url"] diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py index 9c19664..c4f32a4 100644 --- a/gallery_dl/extractor/gelbooru_v01.py +++ b/gallery_dl/extractor/gelbooru_v01.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,29 +19,32 @@ class GelbooruV01Extractor(booru.BooruExtractor): def _parse_post(self, post_id): url = "{}/index.php?page=post&s=view&id={}".format( self.root, post_id) - page = self.request(url).text - - post = text.extract_all(page, ( - ("created_at", 'Posted: ', ' <'), - ("uploader" , 'By: ', ' <'), - ("width" , 'Size: ', 'x'), - ("height" , '', ' <'), - ("source" , 'Source: <a href="', '"'), - ("rating" , 'Rating: ', '<'), - ("score" , 'Score: ', ' <'), - ("file_url" , '<img alt="img" src="', '"'), - ("tags" , 'id="tags" name="tags" cols="40" rows="5">', '<'), - ))[0] - - post["id"] = post_id + extr = text.extract_from(self.request(url).text) + + post = { + "id" : post_id, + "created_at": extr('Posted: ', ' <'), + "uploader" : extr('By: ', ' <'), + "width" : extr('Size: ', 'x'), + "height" : extr('', ' <'), + "source" : extr('Source: <a href="', '"'), + "rating" : (extr('Rating: ', '<') or "?")[0].lower(), + "score" : extr('Score: ', ' <'), + "file_url" : extr('<img alt="img" src="', '"'), + "tags" : text.unescape(extr( + 'id="tags" name="tags" cols="40" rows="5">', '<')), + } + post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0] - post["rating"] = (post["rating"] or "?")[0].lower() - post["tags"] = text.unescape(post["tags"]) post["date"] = text.parse_datetime( post["created_at"], "%Y-%m-%d %H:%M:%S") return post + def skip(self, num): + self.page_start += num + return num + def _pagination(self, url, begin, end): pid = self.page_start @@ -182,7 +185,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor): "md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb", "rating": "s", "score": str, - "source": None, + "source": "", "tags": "blush dress green_eyes green_hair hatsune_miku " "long_hair twintails vocaloid", "uploader": "Honochi31", diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index ed8576f..a1e681d 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -45,6 +45,15 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): "type": "doujinshi", }, }), + # email-protected title (#4201) + ("https://hentaifox.com/gallery/35261/", { + "keyword": { + "gallery_id": 35261, + "title": "ManageM@ster!", + "artist": ["haritama hiroki"], + "group": ["studio n.ball"], + }, + }), ) def __init__(self, match): @@ -65,13 +74,14 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): return { "gallery_id": text.parse_int(self.gallery_id), - "title" : text.unescape(extr("<h1>", "</h1>")), "parody" : split(extr(">Parodies:" , "</ul>")), "characters": split(extr(">Characters:", "</ul>")), "tags" : split(extr(">Tags:" , "</ul>")), "artist" : split(extr(">Artists:" , "</ul>")), "group" : split(extr(">Groups:" , "</ul>")), "type" : text.remove_html(extr(">Category:", "<span")), + "title" : text.unescape(extr( + 'id="gallery_title" value="', '"')), "language" : "English", "lang" : "en", } diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 168fbe8..a0d1e80 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -8,155 +8,464 @@ """Extractors for https://mangapark.net/""" -from .common import ChapterExtractor, MangaExtractor +from .common import ChapterExtractor, Extractor, Message from .. import text, util, exception import re +BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)" + class MangaparkBase(): """Base class for mangapark extractors""" category = "mangapark" - root_fmt = "https://v2.mangapark.{}" - browser = "firefox" - - @staticmethod - def parse_chapter_path(path, data): - """Get volume/chapter information from url-path of a chapter""" - data["volume"], data["chapter_minor"] = 0, "" - for part in path.split("/")[1:]: - key, value = part[0], part[1:] - if key == "c": - chapter, dot, minor = value.partition(".") - data["chapter"] = text.parse_int(chapter) - data["chapter_minor"] = dot + minor - elif key == "i": - data["chapter_id"] = text.parse_int(value) - elif key == "v": - data["volume"] = text.parse_int(value) - elif key == "s": - data["stream"] = text.parse_int(value) - elif key == "e": - data["chapter_minor"] = "v" + value - - @staticmethod - def parse_chapter_title(title, data): - match = re.search(r"(?i)(?:vol(?:ume)?[ .]*(\d+) )?" - r"ch(?:apter)?[ .]*(\d+)(\.\w+)?", title) - if match: - vol, ch, data["chapter_minor"] = match.groups() - data["volume"] = text.parse_int(vol) - data["chapter"] = text.parse_int(ch) + _match_title = None + + def _parse_chapter_title(self, title): + if not self._match_title: + MangaparkBase._match_title = re.compile( + r"(?i)" + r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?" + r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)" + r"(?:\s*:\s*(.*))?" + ).match + match = self._match_title(title) + return match.groups() if match else (0, 0, "", "") class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): """Extractor for manga-chapters from mangapark.net""" - pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)" - r"/manga/([^?#]+/i\d+)") + pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)" test = ( - ("https://mangapark.net/manga/gosu/i811653/c055/1", { - "count": 50, - "keyword": "db1ed9af4f972756a25dbfa5af69a8f155b043ff", - }), - (("https://mangapark.net/manga" - "/ad-astra-per-aspera-hata-kenjirou/i662051/c001.2/1"), { - "count": 40, - "keyword": "2bb3a8f426383ea13f17ff5582f3070d096d30ac", + ("https://mangapark.net/title/114972-aria/6710214-en-ch.60.2", { + "count": 70, + "pattern": r"https://[\w-]+\.mpcdn\.org/comic/2002/e67" + r"/61e29278a583b9227964076e/\d+_\d+_\d+_\d+\.jpeg" + r"\?acc=[^&#]+&exp=\d+", + "keyword": { + "artist": [], + "author": ["Amano Kozue"], + "chapter": 60, + "chapter_id": 6710214, + "chapter_minor": ".2", + "count": 70, + "date": "dt:2022-01-15 09:25:03", + "extension": "jpeg", + "filename": str, + "genre": ["adventure", "comedy", "drama", "sci_fi", + "shounen", "slice_of_life"], + "lang": "en", + "language": "English", + "manga": "Aria", + "manga_id": 114972, + "page": int, + "source": "Koala", + "title": "Special Navigation - Aquaria Ii", + "volume": 12, + }, }), - (("https://mangapark.net/manga" - "/gekkan-shoujo-nozaki-kun/i2067426/v7/c70/1"), { - "count": 15, - "keyword": "edc14993c4752cee3a76e09b2f024d40d854bfd1", - }), - ("https://mangapark.me/manga/gosu/i811615/c55/1"), - ("https://mangapark.com/manga/gosu/i811615/c55/1"), + ("https://mangapark.com/title/114972-aria/6710214-en-ch.60.2"), + ("https://mangapark.org/title/114972-aria/6710214-en-ch.60.2"), + ("https://mangapark.io/title/114972-aria/6710214-en-ch.60.2"), + ("https://mangapark.me/title/114972-aria/6710214-en-ch.60.2"), ) def __init__(self, match): - tld, self.path = match.groups() - self.root = self.root_fmt.format(tld) - url = "{}/manga/{}?zoom=2".format(self.root, self.path) + self.root = text.root_from_url(match.group(0)) + url = "{}/title/_/{}".format(self.root, match.group(1)) ChapterExtractor.__init__(self, match, url) def metadata(self, page): - data = text.extract_all(page, ( - ("manga_id" , "var _manga_id = '", "'"), - ("chapter_id", "var _book_id = '", "'"), - ("stream" , "var _stream = '", "'"), - ("path" , "var _book_link = '", "'"), - ("manga" , "<h2>", "</h2>"), - ("title" , "</a>", "<"), - ), values={"lang": "en", "language": "English"})[0] - - if not data["path"]: - raise exception.NotFoundError("chapter") - - self.parse_chapter_path(data["path"], data) - if "chapter" not in data: - self.parse_chapter_title(data["title"], data) - - data["manga"], _, data["type"] = data["manga"].rpartition(" ") - data["manga"] = text.unescape(data["manga"]) - data["title"] = data["title"].partition(": ")[2] - for key in ("manga_id", "chapter_id", "stream"): - data[key] = text.parse_int(data[key]) - - return data + data = util.json_loads(text.extr( + page, 'id="__NEXT_DATA__" type="application/json">', '<')) + chapter = (data["props"]["pageProps"]["dehydratedState"] + ["queries"][0]["state"]["data"]["data"]) + manga = chapter["comicNode"]["data"] + source = chapter["sourceNode"]["data"] + + self._urls = chapter["imageSet"]["httpLis"] + self._params = chapter["imageSet"]["wordLis"] + vol, ch, minor, title = self._parse_chapter_title(chapter["dname"]) + + return { + "manga" : manga["name"], + "manga_id" : manga["id"], + "artist" : source["artists"], + "author" : source["authors"], + "genre" : source["genres"], + "volume" : text.parse_int(vol), + "chapter" : text.parse_int(ch), + "chapter_minor": minor, + "chapter_id": chapter["id"], + "title" : chapter["title"] or title or "", + "lang" : chapter["lang"], + "language" : util.code_to_language(chapter["lang"]), + "source" : source["srcTitle"], + "source_id" : source["id"], + "date" : text.parse_timestamp(chapter["dateCreate"] // 1000), + } def images(self, page): - data = util.json_loads(text.extr(page, "var _load_pages =", ";")) return [ - (text.urljoin(self.root, item["u"]), { - "width": text.parse_int(item["w"]), - "height": text.parse_int(item["h"]), - }) - for item in data + (url + "?" + params, None) + for url, params in zip(self._urls, self._params) ] -class MangaparkMangaExtractor(MangaparkBase, MangaExtractor): +class MangaparkMangaExtractor(MangaparkBase, Extractor): """Extractor for manga from mangapark.net""" - chapterclass = MangaparkChapterExtractor - pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)" - r"(/manga/[^/?#]+)/?$") + subcategory = "manga" + pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$" test = ( - ("https://mangapark.net/manga/aria", { - "url": "51c6d82aed5c3c78e0d3f980b09a998e6a2a83ee", - "keyword": "cabc60cf2efa82749d27ac92c495945961e4b73c", + ("https://mangapark.net/title/114972-aria", { + "count": 141, + "pattern": MangaparkChapterExtractor.pattern, + "keyword": { + "chapter": int, + "chapter_id": int, + "chapter_minor": str, + "date": "type:datetime", + "lang": "en", + "language": "English", + "manga_id": 114972, + "source": "re:Horse|Koala", + "source_id": int, + "title": str, + "volume": int, + }, + }), + # 'source' option + ("https://mangapark.net/title/114972-aria", { + "options": (("source", "koala"),), + "count": 70, + "pattern": MangaparkChapterExtractor.pattern, + "keyword": { + "source": "Koala", + "source_id": 15150116, + }, }), - ("https://mangapark.me/manga/aria"), - ("https://mangapark.com/manga/aria"), + ("https://mangapark.com/title/114972-"), + ("https://mangapark.com/title/114972"), + ("https://mangapark.com/title/114972-aria"), + ("https://mangapark.org/title/114972-aria"), + ("https://mangapark.io/title/114972-aria"), + ("https://mangapark.me/title/114972-aria"), ) def __init__(self, match): - self.root = self.root_fmt.format(match.group(1)) - MangaExtractor.__init__(self, match, self.root + match.group(2)) - - def chapters(self, page): - results = [] - data = {"lang": "en", "language": "English"} - data["manga"] = text.unescape( - text.extr(page, '<title>', ' Manga - ')) - - for stream in page.split('<div id="stream_')[1:]: - data["stream"] = text.parse_int(text.extr(stream, '', '"')) - - for chapter in text.extract_iter(stream, '<li ', '</li>'): - path , pos = text.extract(chapter, 'href="', '"') - title1, pos = text.extract(chapter, '>', '<', pos) - title2, pos = text.extract(chapter, '>: </span>', '<', pos) - count , pos = text.extract(chapter, ' of ', ' ', pos) - - self.parse_chapter_path(path[8:], data) - if "chapter" not in data: - self.parse_chapter_title(title1, data) - - if title2: - data["title"] = title2.strip() - else: - data["title"] = title1.partition(":")[2].strip() - - data["count"] = text.parse_int(count) - results.append((self.root + path, data.copy())) - data.pop("chapter", None) - - return results + self.root = text.root_from_url(match.group(0)) + self.manga_id = int(match.group(1)) + Extractor.__init__(self, match) + + def items(self): + for chapter in self.chapters(): + chapter = chapter["data"] + url = self.root + chapter["urlPath"] + + vol, ch, minor, title = self._parse_chapter_title(chapter["dname"]) + data = { + "manga_id" : self.manga_id, + "volume" : text.parse_int(vol), + "chapter" : text.parse_int(ch), + "chapter_minor": minor, + "chapter_id": chapter["id"], + "title" : chapter["title"] or title or "", + "lang" : chapter["lang"], + "language" : util.code_to_language(chapter["lang"]), + "source" : chapter["srcTitle"], + "source_id" : chapter["sourceId"], + "date" : text.parse_timestamp( + chapter["dateCreate"] // 1000), + "_extractor": MangaparkChapterExtractor, + } + yield Message.Queue, url, data + + def chapters(self): + source = self.config("source") + if not source: + return self.chapters_all() + + source_id = self._select_source(source) + self.log.debug("Requesting chapters for source_id %s", source_id) + return self.chapters_source(source_id) + + def chapters_all(self): + pnum = 0 + variables = { + "select": { + "comicId": self.manga_id, + "range" : None, + "isAsc" : not self.config("chapter-reverse"), + } + } + + while True: + data = self._request_graphql( + "get_content_comicChapterRangeList", variables) + + for item in data["items"]: + yield from item["chapterNodes"] + + if not pnum: + pager = data["pager"] + pnum += 1 + + try: + variables["select"]["range"] = pager[pnum] + except IndexError: + return + + def chapters_source(self, source_id): + variables = { + "sourceId": source_id, + } + chapters = self._request_graphql( + "get_content_source_chapterList", variables) + + if self.config("chapter-reverse"): + chapters.reverse() + return chapters + + def _select_source(self, source): + if isinstance(source, int): + return source + + group, _, lang = source.partition(":") + group = group.lower() + + variables = { + "comicId" : self.manga_id, + "dbStatuss" : ["normal"], + "haveChapter": True, + } + for item in self._request_graphql( + "get_content_comic_sources", variables): + data = item["data"] + if (not group or data["srcTitle"].lower() == group) and ( + not lang or data["lang"] == lang): + return data["id"] + + raise exception.StopExtraction( + "'%s' does not match any available source", source) + + def _request_graphql(self, opname, variables): + url = self.root + "/apo/" + data = { + "query" : QUERIES[opname], + "variables" : util.json_dumps(variables), + "operationName": opname, + } + return self.request( + url, method="POST", json=data).json()["data"][opname] + + +QUERIES = { + "get_content_comicChapterRangeList": """ + query get_content_comicChapterRangeList($select: Content_ComicChapterRangeList_Select) { + get_content_comicChapterRangeList( + select: $select + ) { + reqRange{x y} + missing + pager {x y} + items{ + serial + chapterNodes { + + id + data { + + + id + sourceId + + dbStatus + isNormal + isHidden + isDeleted + isFinal + + dateCreate + datePublic + dateModify + lang + volume + serial + dname + title + urlPath + + srcTitle srcColor + + count_images + + stat_count_post_child + stat_count_post_reply + stat_count_views_login + stat_count_views_guest + + userId + userNode { + + id + data { + +id +name +uniq +avatarUrl +urlPath + +verified +deleted +banned + +dateCreate +dateOnline + +stat_count_chapters_normal +stat_count_chapters_others + +is_adm is_mod is_vip is_upr + + } + + } + + disqusId + + + } + + sser_read + } + } + + } + } +""", + + "get_content_source_chapterList": """ + query get_content_source_chapterList($sourceId: Int!) { + get_content_source_chapterList( + sourceId: $sourceId + ) { + + id + data { + + + id + sourceId + + dbStatus + isNormal + isHidden + isDeleted + isFinal + + dateCreate + datePublic + dateModify + lang + volume + serial + dname + title + urlPath + + srcTitle srcColor + + count_images + + stat_count_post_child + stat_count_post_reply + stat_count_views_login + stat_count_views_guest + + userId + userNode { + + id + data { + +id +name +uniq +avatarUrl +urlPath + +verified +deleted +banned + +dateCreate +dateOnline + +stat_count_chapters_normal +stat_count_chapters_others + +is_adm is_mod is_vip is_upr + + } + + } + + disqusId + + + } + + } + } +""", + + "get_content_comic_sources": """ + query get_content_comic_sources($comicId: Int!, $dbStatuss: [String] = [], $userId: Int, $haveChapter: Boolean, $sortFor: String) { + get_content_comic_sources( + comicId: $comicId + dbStatuss: $dbStatuss + userId: $userId + haveChapter: $haveChapter + sortFor: $sortFor + ) { + +id +data{ + + id + + dbStatus + isNormal + isHidden + isDeleted + + lang name altNames authors artists + + release + genres summary{code} extraInfo{code} + + urlCover600 + urlCover300 + urlCoverOri + + srcTitle srcColor + + chapterCount + chapterNode_last { + id + data { + dateCreate datePublic dateModify + volume serial + dname title + urlPath + userNode { + id data {uniq name} + } + } + } +} + + } + } +""", +} diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index b03d6f8..56c2978 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2022 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,7 +19,7 @@ class PiczelExtractor(Extractor): filename_fmt = "{category}_{id}_{title}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" root = "https://piczel.tv" - api_root = "https://tombstone.piczel.tv" + api_root = root def items(self): for post in self.posts(): diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py index 14c25c4..e3bb512 100644 --- a/gallery_dl/extractor/poipiku.py +++ b/gallery_dl/extractor/poipiku.py @@ -76,11 +76,12 @@ class PoipikuExtractor(Extractor): "MD" : "0", "TWF": "-1", } - page = self.request( - url, method="POST", headers=headers, data=data).json()["html"] + resp = self.request( + url, method="POST", headers=headers, data=data).json() - if page.startswith(("You need to", "Password is incorrect")): - self.log.warning("'%s'", page) + page = resp["html"] + if (resp.get("result_num") or 0) < 0: + self.log.warning("'%s'", page.replace("<br/>", " ")) for thumb in text.extract_iter( page, 'class="IllustItemThumbImg" src="', '"'): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index f36051b..09e5421 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -46,10 +46,15 @@ class SankakuExtractor(BooruExtractor): def _file_url(self, post): url = post["file_url"] - if not url and self._warning: - self.log.warning( - "Login required to download 'contentious_content' posts") - SankakuExtractor._warning = False + if not url: + if post["status"] != "active": + self.log.warning( + "Unable to download post %s (%s)", + post["id"], post["status"]) + elif self._warning: + self.log.warning( + "Login required to download 'contentious_content' posts") + SankakuExtractor._warning = False elif url[8] == "v": url = "https://s.sankakucomplex.com" + url[url.index("/", 8):] return url diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index 6d025f4..92c9d2c 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -58,6 +58,12 @@ class SenmangaChapterExtractor(ChapterExtractor): "manga": "Akabane Honeko no Bodyguard", }, }), + # no http scheme () + ("https://raw.senmanga.com/amama-cinderella/3", { + "pattern": r"^https://kumacdn.club/image-new-2/a/amama-cinderella" + r"/chapter-3/.+\.jpg", + "count": 30, + }), ) def __init__(self, match): @@ -82,7 +88,7 @@ class SenmangaChapterExtractor(ChapterExtractor): def images(self, page): return [ - (url, None) + (text.ensure_http_scheme(url), None) for url in text.extract_iter( page, '<img class="picture" src="', '"') ] diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 710bde3..10db974 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -919,7 +919,9 @@ Your reaction.""", self.tweet_id = match.group(2) def tweets(self): - if self.config("conversations", False): + conversations = self.config("conversations") + if conversations: + self._accessible = (conversations == "accessible") return self._tweets_conversation(self.tweet_id) else: return self._tweets_single(self.tweet_id) @@ -950,6 +952,11 @@ Your reaction.""", tweet.get("_retweet_id_str") == tweet_id: self._assign_user(tweet["core"]["user_results"]["result"]) break + else: + # initial Tweet not accessible + if self._accessible: + return () + return buffer return itertools.chain(buffer, tweets) diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index a0fba3c..0ba0d91 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -260,7 +260,7 @@ class WallhavenAPI(): self.extractor.log.debug("Server response: %s", response.text) raise exception.StopExtraction( - "API request failed (%s: %s)", + "API request failed (%s %s)", response.status_code, response.reason) def _pagination(self, endpoint, params=None, metadata=None): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 09b8612..9438d73 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.25.6" +__version__ = "1.25.7" @@ -4,6 +4,7 @@ ignore = E203,E226,W504 per-file-ignores = setup.py: E501 gallery_dl/extractor/500px.py: E501 + gallery_dl/extractor/mangapark.py: E501 [egg_info] tag_build = diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index ac89b55..554a51e 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -164,6 +164,76 @@ class ClassifyTest(BasePostprocessorTest): mkdirs.assert_called_once_with(path, exist_ok=True) +class ExecTest(BasePostprocessorTest): + + def test_command_string(self): + self._create({ + "command": "echo {} && rm {};", + }) + + with patch("subprocess.Popen") as p: + i = Mock() + i.wait.return_value = 0 + p.return_value = i + self._trigger(("after",)) + + p.assert_called_once_with( + "echo {0} && rm {0};".format(self.pathfmt.realpath), shell=True) + i.wait.assert_called_once_with() + + def test_command_list(self): + self._create({ + "command": ["~/script.sh", "{category}", + "\fE _directory.upper()"], + }) + + with patch("subprocess.Popen") as p: + i = Mock() + i.wait.return_value = 0 + p.return_value = i + self._trigger(("after",)) + + p.assert_called_once_with( + [ + os.path.expanduser("~/script.sh"), + self.pathfmt.kwdict["category"], + self.pathfmt.realdirectory.upper(), + ], + shell=False, + ) + + def test_command_returncode(self): + self._create({ + "command": "echo {}", + }) + + with patch("subprocess.Popen") as p: + i = Mock() + i.wait.return_value = 123 + p.return_value = i + + with self.assertLogs() as log: + self._trigger(("after",)) + + msg = ("WARNING:postprocessor.exec:'echo {}' returned with " + "non-zero exit status (123)".format(self.pathfmt.realpath)) + self.assertEqual(log.output[0], msg) + + def test_async(self): + self._create({ + "async" : True, + "command": "echo {}", + }) + + with patch("subprocess.Popen") as p: + i = Mock() + p.return_value = i + self._trigger(("after",)) + + self.assertTrue(p.called) + self.assertFalse(i.wait.called) + + class MetadataTest(BasePostprocessorTest): def test_metadata_default(self): |
