aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md16
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.587
-rw-r--r--docs/gallery-dl.conf6
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl/extractor/flickr.py27
-rw-r--r--gallery_dl/extractor/furaffinity.py10
-rw-r--r--gallery_dl/extractor/gelbooru_v01.py41
-rw-r--r--gallery_dl/extractor/hentaifox.py12
-rw-r--r--gallery_dl/extractor/mangapark.py551
-rw-r--r--gallery_dl/extractor/piczel.py4
-rw-r--r--gallery_dl/extractor/poipiku.py9
-rw-r--r--gallery_dl/extractor/sankaku.py13
-rw-r--r--gallery_dl/extractor/senmanga.py8
-rw-r--r--gallery_dl/extractor/twitter.py9
-rw-r--r--gallery_dl/extractor/wallhaven.py2
-rw-r--r--gallery_dl/version.py2
-rw-r--r--setup.cfg1
-rw-r--r--test/test_postprocessor.py70
21 files changed, 709 insertions, 177 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 429c7ea..b71b404 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,21 @@
# Changelog
+## 1.25.7 - 2023-07-02
+### Additions
+- [flickr] add 'exif' option
+- [flickr] add 'metadata' option ([#4227](https://github.com/mikf/gallery-dl/issues/4227))
+- [mangapark] add 'source' option ([#3969](https://github.com/mikf/gallery-dl/issues/3969))
+- [twitter] extend 'conversations' option ([#4211](https://github.com/mikf/gallery-dl/issues/4211))
+### Fixes
+- [furaffinity] improve 'description' HTML ([#4224](https://github.com/mikf/gallery-dl/issues/4224))
+- [gelbooru_v01] fix '--range' ([#4167](https://github.com/mikf/gallery-dl/issues/4167))
+- [hentaifox] fix titles containing '@' ([#4201](https://github.com/mikf/gallery-dl/issues/4201))
+- [mangapark] update to v5 ([#3969](https://github.com/mikf/gallery-dl/issues/3969))
+- [piczel] update API server address ([#4244](https://github.com/mikf/gallery-dl/issues/4244))
+- [poipiku] improve error detection ([#4206](https://github.com/mikf/gallery-dl/issues/4206))
+- [sankaku] improve warnings for unavailable posts
+- [senmanga] ensure download URLs have a scheme ([#4235](https://github.com/mikf/gallery-dl/issues/4235))
+
## 1.25.6 - 2023-06-17
### Additions
- [blogger] download files from `lh*.googleusercontent.com` ([#4070](https://github.com/mikf/gallery-dl/issues/4070))
diff --git a/PKG-INFO b/PKG-INFO
index 68bf134..ff9ab3f 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.25.6
+Version: 1.25.7
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 44cbfb3..86dd58d 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index c86db6a..16a4bba 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-06-17" "1.25.6" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-07-02" "1.25.7" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index e4df909..2cba623 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-06-17" "1.25.6" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-07-02" "1.25.7" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1806,6 +1806,47 @@ The \f[I]access_token\f[] and \f[I]access_token_secret\f[] values you get
from \f[I]linking your Flickr account to gallery-dl\f[].
+.SS extractor.flickr.exif
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Fetch exif and camera metadata for each photo.
+
+Note: This requires 1 additional API call per photo.
+
+
+.SS extractor.flickr.metadata
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Example:" 4
+.br
+* license,last_update,machine_tags
+.br
+* ["license", "last_update", "machine_tags"]
+
+.IP "Description:" 4
+Extract additional metadata
+(license, date_taken, original_format, last_update, geo, machine_tags, o_dims)
+
+It is possible to specify a custom list of metadata includes.
+See \f[I]the extras parameter\f[]
+in \f[I]Flickr API docs\f[]
+for possible field names.
+
+
.SS extractor.flickr.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -2422,6 +2463,31 @@ to filter chapters by.
List of acceptable content ratings for returned chapters.
+.SS extractor.mangapark.source
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]integer\f[]
+
+.IP "Example:" 4
+.br
+* "koala:en"
+.br
+* 15150116
+
+.IP "Description:" 4
+Select chapter source and language for a manga.
+
+The general syntax is \f[I]"<source name>:<ISO 639-1 language code>"\f[].
+.br
+Both are optional, meaning \f[I]"koala"\f[], \f[I]"koala:"\f[], \f[I]":en"\f[],
+.br
+or even just \f[I]":"\f[] are possible as well.
+
+Specifying the numeric \f[I]ID\f[] of a source is also supported.
+
+
.SS extractor.[mastodon].access-token
.IP "Type:" 6
\f[I]string\f[]
@@ -2803,7 +2869,12 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"artworks"\f[], \f[I]"avatar"\f[], \f[I]"background"\f[], \f[I]"favorite"\f[].
+\f[I]"artworks"\f[],
+\f[I]"avatar"\f[],
+\f[I]"background"\f[],
+\f[I]"favorite"\f[],
+\f[I]"novel-user"\f[],
+\f[I]"novel-bookmark"\f[].
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
@@ -3467,7 +3538,10 @@ Possible values are
.SS extractor.twitter.conversations
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]false\f[]
@@ -3476,8 +3550,11 @@ Possible values are
For input URLs pointing to a single Tweet,
e.g. https://twitter.com/i/web/status/<TweetID>,
fetch media from all Tweets and replies in this \f[I]conversation
-<https://help.twitter.com/en/using-twitter/twitter-conversations>\f[]
-or thread.
+<https://help.twitter.com/en/using-twitter/twitter-conversations>\f[].
+
+If this option is equal to \f[I]"accessible"\f[],
+only download from conversation Tweets
+if the given initial Tweet is accessible.
.SS extractor.twitter.csrf
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 6a3c84f..902d0a2 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -108,8 +108,10 @@
},
"flickr":
{
- "videos": true,
- "size-max": null
+ "exif": false,
+ "metadata": false,
+ "size-max": null,
+ "videos": true
},
"furaffinity":
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 547f3be..d008254 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.25.6
+Version: 1.25.7
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index e85d68a..d44ff3c 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -106,6 +106,8 @@ class FlickrImageExtractor(FlickrExtractor):
def items(self):
photo = self.api.photos_getInfo(self.item_id)
+ if self.api.exif:
+ photo.update(self.api.photos_getExif(self.item_id))
if photo["media"] == "video" and self.api.videos:
self.api._extract_video(photo)
@@ -323,6 +325,7 @@ class FlickrAPI(oauth.OAuth1API):
def __init__(self, extractor):
oauth.OAuth1API.__init__(self, extractor)
+ self.exif = extractor.config("exif", False)
self.videos = extractor.config("videos", True)
self.maxsize = extractor.config("size-max")
if isinstance(self.maxsize, str):
@@ -367,6 +370,11 @@ class FlickrAPI(oauth.OAuth1API):
params = {"user_id": user_id}
return self._pagination("people.getPhotos", params)
+ def photos_getExif(self, photo_id):
+ """Retrieves a list of EXIF/TIFF/GPS tags for a given photo."""
+ params = {"photo_id": photo_id}
+ return self._call("photos.getExif", params)["photo"]
+
def photos_getInfo(self, photo_id):
"""Get information about a photo."""
params = {"photo_id": photo_id}
@@ -451,9 +459,19 @@ class FlickrAPI(oauth.OAuth1API):
return data
def _pagination(self, method, params, key="photos"):
- params["extras"] = ("description,date_upload,tags,views,media,"
- "path_alias,owner_name,")
- params["extras"] += ",".join("url_" + fmt[0] for fmt in self.formats)
+ extras = ("description,date_upload,tags,views,media,"
+ "path_alias,owner_name,")
+ includes = self.extractor.config("metadata")
+ if includes:
+ if isinstance(includes, (list, tuple)):
+ includes = ",".join(includes)
+ elif not isinstance(includes, str):
+ includes = ("license,date_taken,original_format,last_update,"
+ "geo,machine_tags,o_dims")
+ extras = extras + includes + ","
+ extras += ",".join("url_" + fmt[0] for fmt in self.formats)
+
+ params["extras"] = extras
params["page"] = 1
while True:
@@ -478,6 +496,9 @@ class FlickrAPI(oauth.OAuth1API):
photo["views"] = text.parse_int(photo["views"])
photo["date"] = text.parse_timestamp(photo["dateupload"])
photo["tags"] = photo["tags"].split()
+
+ if self.exif:
+ photo.update(self.photos_getExif(photo["id"]))
photo["id"] = text.parse_int(photo["id"])
if "owner" in photo:
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 9f5cbba..ec9cd94 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -98,7 +98,9 @@ class FuraffinityExtractor(Extractor):
'class="tags-row">', '</section>'))
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
data["artist"] = extr("<strong>", "<")
- data["_description"] = extr('class="section-body">', '</div>')
+ data["_description"] = extr(
+ 'class="submission-description user-submitted-links">',
+ ' </div>')
data["views"] = pi(rh(extr('class="views">', '</span>')))
data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
data["comments"] = pi(rh(extr('class="comments">', '</span>')))
@@ -125,7 +127,9 @@ class FuraffinityExtractor(Extractor):
data["tags"] = text.split_html(extr(
'id="keywords">', '</div>'))[::2]
data["rating"] = extr('<img alt="', ' ')
- data["_description"] = extr("</table>", "</table>")
+ data["_description"] = extr(
+ '<td valign="top" align="left" width="70%" class="alt1" '
+ 'style="padding:8px">', ' </td>')
data["artist_url"] = data["artist"].replace("_", "").lower()
data["user"] = self.user or data["artist_url"]
diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py
index 9c19664..c4f32a4 100644
--- a/gallery_dl/extractor/gelbooru_v01.py
+++ b/gallery_dl/extractor/gelbooru_v01.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,29 +19,32 @@ class GelbooruV01Extractor(booru.BooruExtractor):
def _parse_post(self, post_id):
url = "{}/index.php?page=post&s=view&id={}".format(
self.root, post_id)
- page = self.request(url).text
-
- post = text.extract_all(page, (
- ("created_at", 'Posted: ', ' <'),
- ("uploader" , 'By: ', ' <'),
- ("width" , 'Size: ', 'x'),
- ("height" , '', ' <'),
- ("source" , 'Source: <a href="', '"'),
- ("rating" , 'Rating: ', '<'),
- ("score" , 'Score: ', ' <'),
- ("file_url" , '<img alt="img" src="', '"'),
- ("tags" , 'id="tags" name="tags" cols="40" rows="5">', '<'),
- ))[0]
-
- post["id"] = post_id
+ extr = text.extract_from(self.request(url).text)
+
+ post = {
+ "id" : post_id,
+ "created_at": extr('Posted: ', ' <'),
+ "uploader" : extr('By: ', ' <'),
+ "width" : extr('Size: ', 'x'),
+ "height" : extr('', ' <'),
+ "source" : extr('Source: <a href="', '"'),
+ "rating" : (extr('Rating: ', '<') or "?")[0].lower(),
+ "score" : extr('Score: ', ' <'),
+ "file_url" : extr('<img alt="img" src="', '"'),
+ "tags" : text.unescape(extr(
+ 'id="tags" name="tags" cols="40" rows="5">', '<')),
+ }
+
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
- post["rating"] = (post["rating"] or "?")[0].lower()
- post["tags"] = text.unescape(post["tags"])
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S")
return post
+ def skip(self, num):
+ self.page_start += num
+ return num
+
def _pagination(self, url, begin, end):
pid = self.page_start
@@ -182,7 +185,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
"md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb",
"rating": "s",
"score": str,
- "source": None,
+ "source": "",
"tags": "blush dress green_eyes green_hair hatsune_miku "
"long_hair twintails vocaloid",
"uploader": "Honochi31",
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index ed8576f..a1e681d 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -45,6 +45,15 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
"type": "doujinshi",
},
}),
+ # email-protected title (#4201)
+ ("https://hentaifox.com/gallery/35261/", {
+ "keyword": {
+ "gallery_id": 35261,
+ "title": "ManageM@ster!",
+ "artist": ["haritama hiroki"],
+ "group": ["studio n.ball"],
+ },
+ }),
)
def __init__(self, match):
@@ -65,13 +74,14 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
return {
"gallery_id": text.parse_int(self.gallery_id),
- "title" : text.unescape(extr("<h1>", "</h1>")),
"parody" : split(extr(">Parodies:" , "</ul>")),
"characters": split(extr(">Characters:", "</ul>")),
"tags" : split(extr(">Tags:" , "</ul>")),
"artist" : split(extr(">Artists:" , "</ul>")),
"group" : split(extr(">Groups:" , "</ul>")),
"type" : text.remove_html(extr(">Category:", "<span")),
+ "title" : text.unescape(extr(
+ 'id="gallery_title" value="', '"')),
"language" : "English",
"lang" : "en",
}
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 168fbe8..a0d1e80 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -8,155 +8,464 @@
"""Extractors for https://mangapark.net/"""
-from .common import ChapterExtractor, MangaExtractor
+from .common import ChapterExtractor, Extractor, Message
from .. import text, util, exception
import re
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)"
+
class MangaparkBase():
"""Base class for mangapark extractors"""
category = "mangapark"
- root_fmt = "https://v2.mangapark.{}"
- browser = "firefox"
-
- @staticmethod
- def parse_chapter_path(path, data):
- """Get volume/chapter information from url-path of a chapter"""
- data["volume"], data["chapter_minor"] = 0, ""
- for part in path.split("/")[1:]:
- key, value = part[0], part[1:]
- if key == "c":
- chapter, dot, minor = value.partition(".")
- data["chapter"] = text.parse_int(chapter)
- data["chapter_minor"] = dot + minor
- elif key == "i":
- data["chapter_id"] = text.parse_int(value)
- elif key == "v":
- data["volume"] = text.parse_int(value)
- elif key == "s":
- data["stream"] = text.parse_int(value)
- elif key == "e":
- data["chapter_minor"] = "v" + value
-
- @staticmethod
- def parse_chapter_title(title, data):
- match = re.search(r"(?i)(?:vol(?:ume)?[ .]*(\d+) )?"
- r"ch(?:apter)?[ .]*(\d+)(\.\w+)?", title)
- if match:
- vol, ch, data["chapter_minor"] = match.groups()
- data["volume"] = text.parse_int(vol)
- data["chapter"] = text.parse_int(ch)
+ _match_title = None
+
+ def _parse_chapter_title(self, title):
+ if not self._match_title:
+ MangaparkBase._match_title = re.compile(
+ r"(?i)"
+ r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?"
+ r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)"
+ r"(?:\s*:\s*(.*))?"
+ ).match
+ match = self._match_title(title)
+ return match.groups() if match else (0, 0, "", "")
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
- pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)"
- r"/manga/([^?#]+/i\d+)")
+ pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)"
test = (
- ("https://mangapark.net/manga/gosu/i811653/c055/1", {
- "count": 50,
- "keyword": "db1ed9af4f972756a25dbfa5af69a8f155b043ff",
- }),
- (("https://mangapark.net/manga"
- "/ad-astra-per-aspera-hata-kenjirou/i662051/c001.2/1"), {
- "count": 40,
- "keyword": "2bb3a8f426383ea13f17ff5582f3070d096d30ac",
+ ("https://mangapark.net/title/114972-aria/6710214-en-ch.60.2", {
+ "count": 70,
+ "pattern": r"https://[\w-]+\.mpcdn\.org/comic/2002/e67"
+ r"/61e29278a583b9227964076e/\d+_\d+_\d+_\d+\.jpeg"
+ r"\?acc=[^&#]+&exp=\d+",
+ "keyword": {
+ "artist": [],
+ "author": ["Amano Kozue"],
+ "chapter": 60,
+ "chapter_id": 6710214,
+ "chapter_minor": ".2",
+ "count": 70,
+ "date": "dt:2022-01-15 09:25:03",
+ "extension": "jpeg",
+ "filename": str,
+ "genre": ["adventure", "comedy", "drama", "sci_fi",
+ "shounen", "slice_of_life"],
+ "lang": "en",
+ "language": "English",
+ "manga": "Aria",
+ "manga_id": 114972,
+ "page": int,
+ "source": "Koala",
+ "title": "Special Navigation - Aquaria Ii",
+ "volume": 12,
+ },
}),
- (("https://mangapark.net/manga"
- "/gekkan-shoujo-nozaki-kun/i2067426/v7/c70/1"), {
- "count": 15,
- "keyword": "edc14993c4752cee3a76e09b2f024d40d854bfd1",
- }),
- ("https://mangapark.me/manga/gosu/i811615/c55/1"),
- ("https://mangapark.com/manga/gosu/i811615/c55/1"),
+ ("https://mangapark.com/title/114972-aria/6710214-en-ch.60.2"),
+ ("https://mangapark.org/title/114972-aria/6710214-en-ch.60.2"),
+ ("https://mangapark.io/title/114972-aria/6710214-en-ch.60.2"),
+ ("https://mangapark.me/title/114972-aria/6710214-en-ch.60.2"),
)
def __init__(self, match):
- tld, self.path = match.groups()
- self.root = self.root_fmt.format(tld)
- url = "{}/manga/{}?zoom=2".format(self.root, self.path)
+ self.root = text.root_from_url(match.group(0))
+ url = "{}/title/_/{}".format(self.root, match.group(1))
ChapterExtractor.__init__(self, match, url)
def metadata(self, page):
- data = text.extract_all(page, (
- ("manga_id" , "var _manga_id = '", "'"),
- ("chapter_id", "var _book_id = '", "'"),
- ("stream" , "var _stream = '", "'"),
- ("path" , "var _book_link = '", "'"),
- ("manga" , "<h2>", "</h2>"),
- ("title" , "</a>", "<"),
- ), values={"lang": "en", "language": "English"})[0]
-
- if not data["path"]:
- raise exception.NotFoundError("chapter")
-
- self.parse_chapter_path(data["path"], data)
- if "chapter" not in data:
- self.parse_chapter_title(data["title"], data)
-
- data["manga"], _, data["type"] = data["manga"].rpartition(" ")
- data["manga"] = text.unescape(data["manga"])
- data["title"] = data["title"].partition(": ")[2]
- for key in ("manga_id", "chapter_id", "stream"):
- data[key] = text.parse_int(data[key])
-
- return data
+ data = util.json_loads(text.extr(
+ page, 'id="__NEXT_DATA__" type="application/json">', '<'))
+ chapter = (data["props"]["pageProps"]["dehydratedState"]
+ ["queries"][0]["state"]["data"]["data"])
+ manga = chapter["comicNode"]["data"]
+ source = chapter["sourceNode"]["data"]
+
+ self._urls = chapter["imageSet"]["httpLis"]
+ self._params = chapter["imageSet"]["wordLis"]
+ vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
+
+ return {
+ "manga" : manga["name"],
+ "manga_id" : manga["id"],
+ "artist" : source["artists"],
+ "author" : source["authors"],
+ "genre" : source["genres"],
+ "volume" : text.parse_int(vol),
+ "chapter" : text.parse_int(ch),
+ "chapter_minor": minor,
+ "chapter_id": chapter["id"],
+ "title" : chapter["title"] or title or "",
+ "lang" : chapter["lang"],
+ "language" : util.code_to_language(chapter["lang"]),
+ "source" : source["srcTitle"],
+ "source_id" : source["id"],
+ "date" : text.parse_timestamp(chapter["dateCreate"] // 1000),
+ }
def images(self, page):
- data = util.json_loads(text.extr(page, "var _load_pages =", ";"))
return [
- (text.urljoin(self.root, item["u"]), {
- "width": text.parse_int(item["w"]),
- "height": text.parse_int(item["h"]),
- })
- for item in data
+ (url + "?" + params, None)
+ for url, params in zip(self._urls, self._params)
]
-class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
+class MangaparkMangaExtractor(MangaparkBase, Extractor):
"""Extractor for manga from mangapark.net"""
- chapterclass = MangaparkChapterExtractor
- pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)"
- r"(/manga/[^/?#]+)/?$")
+ subcategory = "manga"
+ pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$"
test = (
- ("https://mangapark.net/manga/aria", {
- "url": "51c6d82aed5c3c78e0d3f980b09a998e6a2a83ee",
- "keyword": "cabc60cf2efa82749d27ac92c495945961e4b73c",
+ ("https://mangapark.net/title/114972-aria", {
+ "count": 141,
+ "pattern": MangaparkChapterExtractor.pattern,
+ "keyword": {
+ "chapter": int,
+ "chapter_id": int,
+ "chapter_minor": str,
+ "date": "type:datetime",
+ "lang": "en",
+ "language": "English",
+ "manga_id": 114972,
+ "source": "re:Horse|Koala",
+ "source_id": int,
+ "title": str,
+ "volume": int,
+ },
+ }),
+ # 'source' option
+ ("https://mangapark.net/title/114972-aria", {
+ "options": (("source", "koala"),),
+ "count": 70,
+ "pattern": MangaparkChapterExtractor.pattern,
+ "keyword": {
+ "source": "Koala",
+ "source_id": 15150116,
+ },
}),
- ("https://mangapark.me/manga/aria"),
- ("https://mangapark.com/manga/aria"),
+ ("https://mangapark.com/title/114972-"),
+ ("https://mangapark.com/title/114972"),
+ ("https://mangapark.com/title/114972-aria"),
+ ("https://mangapark.org/title/114972-aria"),
+ ("https://mangapark.io/title/114972-aria"),
+ ("https://mangapark.me/title/114972-aria"),
)
def __init__(self, match):
- self.root = self.root_fmt.format(match.group(1))
- MangaExtractor.__init__(self, match, self.root + match.group(2))
-
- def chapters(self, page):
- results = []
- data = {"lang": "en", "language": "English"}
- data["manga"] = text.unescape(
- text.extr(page, '<title>', ' Manga - '))
-
- for stream in page.split('<div id="stream_')[1:]:
- data["stream"] = text.parse_int(text.extr(stream, '', '"'))
-
- for chapter in text.extract_iter(stream, '<li ', '</li>'):
- path , pos = text.extract(chapter, 'href="', '"')
- title1, pos = text.extract(chapter, '>', '<', pos)
- title2, pos = text.extract(chapter, '>: </span>', '<', pos)
- count , pos = text.extract(chapter, ' of ', ' ', pos)
-
- self.parse_chapter_path(path[8:], data)
- if "chapter" not in data:
- self.parse_chapter_title(title1, data)
-
- if title2:
- data["title"] = title2.strip()
- else:
- data["title"] = title1.partition(":")[2].strip()
-
- data["count"] = text.parse_int(count)
- results.append((self.root + path, data.copy()))
- data.pop("chapter", None)
-
- return results
+ self.root = text.root_from_url(match.group(0))
+ self.manga_id = int(match.group(1))
+ Extractor.__init__(self, match)
+
+ def items(self):
+ for chapter in self.chapters():
+ chapter = chapter["data"]
+ url = self.root + chapter["urlPath"]
+
+ vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
+ data = {
+ "manga_id" : self.manga_id,
+ "volume" : text.parse_int(vol),
+ "chapter" : text.parse_int(ch),
+ "chapter_minor": minor,
+ "chapter_id": chapter["id"],
+ "title" : chapter["title"] or title or "",
+ "lang" : chapter["lang"],
+ "language" : util.code_to_language(chapter["lang"]),
+ "source" : chapter["srcTitle"],
+ "source_id" : chapter["sourceId"],
+ "date" : text.parse_timestamp(
+ chapter["dateCreate"] // 1000),
+ "_extractor": MangaparkChapterExtractor,
+ }
+ yield Message.Queue, url, data
+
+ def chapters(self):
+ source = self.config("source")
+ if not source:
+ return self.chapters_all()
+
+ source_id = self._select_source(source)
+ self.log.debug("Requesting chapters for source_id %s", source_id)
+ return self.chapters_source(source_id)
+
+ def chapters_all(self):
+ pnum = 0
+ variables = {
+ "select": {
+ "comicId": self.manga_id,
+ "range" : None,
+ "isAsc" : not self.config("chapter-reverse"),
+ }
+ }
+
+ while True:
+ data = self._request_graphql(
+ "get_content_comicChapterRangeList", variables)
+
+ for item in data["items"]:
+ yield from item["chapterNodes"]
+
+ if not pnum:
+ pager = data["pager"]
+ pnum += 1
+
+ try:
+ variables["select"]["range"] = pager[pnum]
+ except IndexError:
+ return
+
+ def chapters_source(self, source_id):
+ variables = {
+ "sourceId": source_id,
+ }
+ chapters = self._request_graphql(
+ "get_content_source_chapterList", variables)
+
+ if self.config("chapter-reverse"):
+ chapters.reverse()
+ return chapters
+
+ def _select_source(self, source):
+ if isinstance(source, int):
+ return source
+
+ group, _, lang = source.partition(":")
+ group = group.lower()
+
+ variables = {
+ "comicId" : self.manga_id,
+ "dbStatuss" : ["normal"],
+ "haveChapter": True,
+ }
+ for item in self._request_graphql(
+ "get_content_comic_sources", variables):
+ data = item["data"]
+ if (not group or data["srcTitle"].lower() == group) and (
+ not lang or data["lang"] == lang):
+ return data["id"]
+
+ raise exception.StopExtraction(
+ "'%s' does not match any available source", source)
+
+ def _request_graphql(self, opname, variables):
+ url = self.root + "/apo/"
+ data = {
+ "query" : QUERIES[opname],
+ "variables" : util.json_dumps(variables),
+ "operationName": opname,
+ }
+ return self.request(
+ url, method="POST", json=data).json()["data"][opname]
+
+
+QUERIES = {
+ "get_content_comicChapterRangeList": """
+ query get_content_comicChapterRangeList($select: Content_ComicChapterRangeList_Select) {
+ get_content_comicChapterRangeList(
+ select: $select
+ ) {
+ reqRange{x y}
+ missing
+ pager {x y}
+ items{
+ serial
+ chapterNodes {
+
+ id
+ data {
+
+
+ id
+ sourceId
+
+ dbStatus
+ isNormal
+ isHidden
+ isDeleted
+ isFinal
+
+ dateCreate
+ datePublic
+ dateModify
+ lang
+ volume
+ serial
+ dname
+ title
+ urlPath
+
+ srcTitle srcColor
+
+ count_images
+
+ stat_count_post_child
+ stat_count_post_reply
+ stat_count_views_login
+ stat_count_views_guest
+
+ userId
+ userNode {
+
+ id
+ data {
+
+id
+name
+uniq
+avatarUrl
+urlPath
+
+verified
+deleted
+banned
+
+dateCreate
+dateOnline
+
+stat_count_chapters_normal
+stat_count_chapters_others
+
+is_adm is_mod is_vip is_upr
+
+ }
+
+ }
+
+ disqusId
+
+
+ }
+
+ sser_read
+ }
+ }
+
+ }
+ }
+""",
+
+ "get_content_source_chapterList": """
+ query get_content_source_chapterList($sourceId: Int!) {
+ get_content_source_chapterList(
+ sourceId: $sourceId
+ ) {
+
+ id
+ data {
+
+
+ id
+ sourceId
+
+ dbStatus
+ isNormal
+ isHidden
+ isDeleted
+ isFinal
+
+ dateCreate
+ datePublic
+ dateModify
+ lang
+ volume
+ serial
+ dname
+ title
+ urlPath
+
+ srcTitle srcColor
+
+ count_images
+
+ stat_count_post_child
+ stat_count_post_reply
+ stat_count_views_login
+ stat_count_views_guest
+
+ userId
+ userNode {
+
+ id
+ data {
+
+id
+name
+uniq
+avatarUrl
+urlPath
+
+verified
+deleted
+banned
+
+dateCreate
+dateOnline
+
+stat_count_chapters_normal
+stat_count_chapters_others
+
+is_adm is_mod is_vip is_upr
+
+ }
+
+ }
+
+ disqusId
+
+
+ }
+
+ }
+ }
+""",
+
+ "get_content_comic_sources": """
+ query get_content_comic_sources($comicId: Int!, $dbStatuss: [String] = [], $userId: Int, $haveChapter: Boolean, $sortFor: String) {
+ get_content_comic_sources(
+ comicId: $comicId
+ dbStatuss: $dbStatuss
+ userId: $userId
+ haveChapter: $haveChapter
+ sortFor: $sortFor
+ ) {
+
+id
+data{
+
+ id
+
+ dbStatus
+ isNormal
+ isHidden
+ isDeleted
+
+ lang name altNames authors artists
+
+ release
+ genres summary{code} extraInfo{code}
+
+ urlCover600
+ urlCover300
+ urlCoverOri
+
+ srcTitle srcColor
+
+ chapterCount
+ chapterNode_last {
+ id
+ data {
+ dateCreate datePublic dateModify
+ volume serial
+ dname title
+ urlPath
+ userNode {
+ id data {uniq name}
+ }
+ }
+ }
+}
+
+ }
+ }
+""",
+}
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index b03d6f8..56c2978 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,7 +19,7 @@ class PiczelExtractor(Extractor):
filename_fmt = "{category}_{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
root = "https://piczel.tv"
- api_root = "https://tombstone.piczel.tv"
+ api_root = root
def items(self):
for post in self.posts():
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 14c25c4..e3bb512 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -76,11 +76,12 @@ class PoipikuExtractor(Extractor):
"MD" : "0",
"TWF": "-1",
}
- page = self.request(
- url, method="POST", headers=headers, data=data).json()["html"]
+ resp = self.request(
+ url, method="POST", headers=headers, data=data).json()
- if page.startswith(("You need to", "Password is incorrect")):
- self.log.warning("'%s'", page)
+ page = resp["html"]
+ if (resp.get("result_num") or 0) < 0:
+ self.log.warning("'%s'", page.replace("<br/>", " "))
for thumb in text.extract_iter(
page, 'class="IllustItemThumbImg" src="', '"'):
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index f36051b..09e5421 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -46,10 +46,15 @@ class SankakuExtractor(BooruExtractor):
def _file_url(self, post):
url = post["file_url"]
- if not url and self._warning:
- self.log.warning(
- "Login required to download 'contentious_content' posts")
- SankakuExtractor._warning = False
+ if not url:
+ if post["status"] != "active":
+ self.log.warning(
+ "Unable to download post %s (%s)",
+ post["id"], post["status"])
+ elif self._warning:
+ self.log.warning(
+ "Login required to download 'contentious_content' posts")
+ SankakuExtractor._warning = False
elif url[8] == "v":
url = "https://s.sankakucomplex.com" + url[url.index("/", 8):]
return url
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index 6d025f4..92c9d2c 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -58,6 +58,12 @@ class SenmangaChapterExtractor(ChapterExtractor):
"manga": "Akabane Honeko no Bodyguard",
},
}),
+ # no http scheme ()
+ ("https://raw.senmanga.com/amama-cinderella/3", {
+ "pattern": r"^https://kumacdn.club/image-new-2/a/amama-cinderella"
+ r"/chapter-3/.+\.jpg",
+ "count": 30,
+ }),
)
def __init__(self, match):
@@ -82,7 +88,7 @@ class SenmangaChapterExtractor(ChapterExtractor):
def images(self, page):
return [
- (url, None)
+ (text.ensure_http_scheme(url), None)
for url in text.extract_iter(
page, '<img class="picture" src="', '"')
]
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 710bde3..10db974 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -919,7 +919,9 @@ Your reaction.""",
self.tweet_id = match.group(2)
def tweets(self):
- if self.config("conversations", False):
+ conversations = self.config("conversations")
+ if conversations:
+ self._accessible = (conversations == "accessible")
return self._tweets_conversation(self.tweet_id)
else:
return self._tweets_single(self.tweet_id)
@@ -950,6 +952,11 @@ Your reaction.""",
tweet.get("_retweet_id_str") == tweet_id:
self._assign_user(tweet["core"]["user_results"]["result"])
break
+ else:
+ # initial Tweet not accessible
+ if self._accessible:
+ return ()
+ return buffer
return itertools.chain(buffer, tweets)
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index a0fba3c..0ba0d91 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -260,7 +260,7 @@ class WallhavenAPI():
self.extractor.log.debug("Server response: %s", response.text)
raise exception.StopExtraction(
- "API request failed (%s: %s)",
+ "API request failed (%s %s)",
response.status_code, response.reason)
def _pagination(self, endpoint, params=None, metadata=None):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 09b8612..9438d73 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.25.6"
+__version__ = "1.25.7"
diff --git a/setup.cfg b/setup.cfg
index f3565af..46af2d5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,6 +4,7 @@ ignore = E203,E226,W504
per-file-ignores =
setup.py: E501
gallery_dl/extractor/500px.py: E501
+ gallery_dl/extractor/mangapark.py: E501
[egg_info]
tag_build =
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index ac89b55..554a51e 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -164,6 +164,76 @@ class ClassifyTest(BasePostprocessorTest):
mkdirs.assert_called_once_with(path, exist_ok=True)
+class ExecTest(BasePostprocessorTest):
+
+ def test_command_string(self):
+ self._create({
+ "command": "echo {} && rm {};",
+ })
+
+ with patch("subprocess.Popen") as p:
+ i = Mock()
+ i.wait.return_value = 0
+ p.return_value = i
+ self._trigger(("after",))
+
+ p.assert_called_once_with(
+ "echo {0} && rm {0};".format(self.pathfmt.realpath), shell=True)
+ i.wait.assert_called_once_with()
+
+ def test_command_list(self):
+ self._create({
+ "command": ["~/script.sh", "{category}",
+ "\fE _directory.upper()"],
+ })
+
+ with patch("subprocess.Popen") as p:
+ i = Mock()
+ i.wait.return_value = 0
+ p.return_value = i
+ self._trigger(("after",))
+
+ p.assert_called_once_with(
+ [
+ os.path.expanduser("~/script.sh"),
+ self.pathfmt.kwdict["category"],
+ self.pathfmt.realdirectory.upper(),
+ ],
+ shell=False,
+ )
+
+ def test_command_returncode(self):
+ self._create({
+ "command": "echo {}",
+ })
+
+ with patch("subprocess.Popen") as p:
+ i = Mock()
+ i.wait.return_value = 123
+ p.return_value = i
+
+ with self.assertLogs() as log:
+ self._trigger(("after",))
+
+ msg = ("WARNING:postprocessor.exec:'echo {}' returned with "
+ "non-zero exit status (123)".format(self.pathfmt.realpath))
+ self.assertEqual(log.output[0], msg)
+
+ def test_async(self):
+ self._create({
+ "async" : True,
+ "command": "echo {}",
+ })
+
+ with patch("subprocess.Popen") as p:
+ i = Mock()
+ p.return_value = i
+ self._trigger(("after",))
+
+ self.assertTrue(p.called)
+ self.assertFalse(i.wait.called)
+
+
class MetadataTest(BasePostprocessorTest):
def test_metadata_default(self):