summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-11-01 21:32:54 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2022-11-01 21:32:54 -0400
commite59d46ecda74190381b1d2725b0bd9df5c0be8d8 (patch)
treecff6d69d4f68ef011a496ff2311173ebef70bf3f
parent78e2d1672e4301497f786cd03637de9ddbc717ac (diff)
New upstream version 1.23.5.upstream/1.23.5
-rw-r--r--CHANGELOG.md32
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.536
-rw-r--r--docs/gallery-dl.conf1
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/extractor/8kun.py100
-rw-r--r--gallery_dl/extractor/__init__.py3
-rw-r--r--gallery_dl/extractor/bcy.py30
-rw-r--r--gallery_dl/extractor/danbooru.py18
-rw-r--r--gallery_dl/extractor/gelbooru.py14
-rw-r--r--gallery_dl/extractor/hentai2read.py82
-rw-r--r--gallery_dl/extractor/hentaihere.py71
-rw-r--r--gallery_dl/extractor/instagram.py97
-rw-r--r--gallery_dl/extractor/kemonoparty.py12
-rw-r--r--gallery_dl/extractor/manganelo.py12
-rw-r--r--gallery_dl/extractor/mangasee.py94
-rw-r--r--gallery_dl/extractor/mastodon.py35
-rw-r--r--gallery_dl/extractor/moebooru.py61
-rw-r--r--gallery_dl/extractor/pixiv.py9
-rw-r--r--gallery_dl/extractor/reactor.py6
-rw-r--r--gallery_dl/extractor/redgifs.py32
-rw-r--r--gallery_dl/extractor/skeb.py7
-rw-r--r--gallery_dl/extractor/tumblr.py6
-rw-r--r--gallery_dl/extractor/vichan.py163
-rw-r--r--gallery_dl/extractor/wikieat.py95
-rw-r--r--gallery_dl/util.py6
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_results.py2
31 files changed, 694 insertions, 353 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5901e37..21341ef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,37 @@
# Changelog
+## 1.23.5 - 2022-10-30
+### Fixes
+- [instagram] fix AttributeError on user stories extraction ([#3123](https://github.com/mikf/gallery-dl/issues/3123))
+
+## 1.23.4 - 2022-10-29
+### Additions
+- [aibooru] add support for aibooru.online ([#3075](https://github.com/mikf/gallery-dl/issues/3075))
+- [instagram] add 'avatar' extractor ([#929](https://github.com/mikf/gallery-dl/issues/929), [#1097](https://github.com/mikf/gallery-dl/issues/1097), [#2992](https://github.com/mikf/gallery-dl/issues/2992))
+- [instagram] support 'instagram.com/s/' highlight URLs ([#3076](https://github.com/mikf/gallery-dl/issues/3076))
+- [instagram] extract 'coauthors' metadata ([#3107](https://github.com/mikf/gallery-dl/issues/3107))
+- [mangasee] add support for 'mangalife' ([#3086](https://github.com/mikf/gallery-dl/issues/3086))
+- [mastodon] add 'bookmark' extractor ([#3109](https://github.com/mikf/gallery-dl/issues/3109))
+- [mastodon] support cross-instance user references and '/web/' URLs ([#3109](https://github.com/mikf/gallery-dl/issues/3109))
+- [moebooru] implement 'notes' extraction ([#3094](https://github.com/mikf/gallery-dl/issues/3094))
+- [pixiv] extend 'metadata' option ([#3057](https://github.com/mikf/gallery-dl/issues/3057))
+- [reactor] match 'best', 'new', 'all' URLs ([#3073](https://github.com/mikf/gallery-dl/issues/3073))
+- [smugloli] add 'smugloli' extractors ([#3060](https://github.com/mikf/gallery-dl/issues/3060))
+- [tumblr] add 'fallback-delay' and 'fallback-retries' options ([#2957](https://github.com/mikf/gallery-dl/issues/2957))
+- [vichan] add generic extractors for vichan imageboards
+### Fixes
+- [bcy] fix extraction ([#3103](https://github.com/mikf/gallery-dl/issues/3103))
+- [gelbooru] support alternate parameter order in post URLs ([#2821](https://github.com/mikf/gallery-dl/issues/2821))
+- [hentai2read] support minor versions in chapter URLs ([#3089](https://github.com/mikf/gallery-dl/issues/3089))
+- [hentaihere] support minor versions in chapter URLs
+- [kemonoparty] fix 'dms' extraction ([#3106](https://github.com/mikf/gallery-dl/issues/3106))
+- [kemonoparty] update pagination offset
+- [manganelo] update domain to 'chapmanganato.com' ([#3097](https://github.com/mikf/gallery-dl/issues/3097))
+- [pixiv] use 'exact_match_for_tags' as default search mode ([#3092](https://github.com/mikf/gallery-dl/issues/3092))
+- [redgifs] fix 'token' extraction ([#3080](https://github.com/mikf/gallery-dl/issues/3080), [#3081](https://github.com/mikf/gallery-dl/issues/3081))
+- [skeb] fix extraction ([#3112](https://github.com/mikf/gallery-dl/issues/3112))
+- improve compatibility of DownloadArchive ([#3078](https://github.com/mikf/gallery-dl/issues/3078))
+
## 1.23.3 - 2022-10-15
### Additions
- [2chen] Add `2chen.moe` extractor ([#2707](https://github.com/mikf/gallery-dl/issues/2707))
diff --git a/PKG-INFO b/PKG-INFO
index 2ecb797..f229a02 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.23.3
+Version: 1.23.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.5/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.5/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index 1457efc..c385526 100644
--- a/README.rst
+++ b/README.rst
@@ -66,8 +66,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.5/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.5/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index cca3dee..0b27854 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-10-15" "1.23.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-10-30" "1.23.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 1c484b6..8944195 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-10-15" "1.23.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-10-30" "1.23.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1746,8 +1746,13 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"posts"\f[], \f[I]"reels"\f[], \f[I]"channel"\f[], \f[I]"tagged"\f[],
-\f[I]"stories"\f[], \f[I]"highlights"\f[].
+\f[I]"posts"\f[],
+\f[I]"reels"\f[],
+\f[I]"channel"\f[]
+\f[I]"tagged"\f[],
+\f[I]"stories"\f[],
+\f[I]"highlights"\f[],
+\f[I]"avatar"\f[].
You can use \f[I]"all"\f[] instead of listing all values separately.
@@ -2250,7 +2255,7 @@ Possible values are
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
-.SS extractor.pixiv.artworks.metadata
+.SS extractor.pixiv.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -2727,6 +2732,29 @@ Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[]
You can use \f[I]"all"\f[] instead of listing all types separately.
+.SS extractor.tumblr.fallback-delay
+.IP "Type:" 6
+\f[I]float\f[]
+
+.IP "Default:" 9
+\f[I]120.0\f[]
+
+.IP "Description:" 4
+Number of seconds to wait between retries
+for fetching full-resolution images.
+
+
+.SS extractor.tumblr.fallback-retries
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]2\f[]
+
+.IP "Description:" 4
+Number of retries for fetching full-resolution images.
+
+
.SS extractor.twibooru.api-key
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index e507eb0..1fcbb3b 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -230,6 +230,7 @@
{
"refresh-token": null,
"include": "artworks",
+ "metadata": false,
"tags": "japanese",
"ugoira": true
},
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index c1bfabf..d00e803 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.23.3
+Version: 1.23.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.5/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.5/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index b768d5b..3fa2176 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -47,7 +47,6 @@ gallery_dl/extractor/420chan.py
gallery_dl/extractor/4chan.py
gallery_dl/extractor/500px.py
gallery_dl/extractor/8chan.py
-gallery_dl/extractor/8kun.py
gallery_dl/extractor/8muses.py
gallery_dl/extractor/__init__.py
gallery_dl/extractor/adultempire.py
@@ -189,6 +188,7 @@ gallery_dl/extractor/twibooru.py
gallery_dl/extractor/twitter.py
gallery_dl/extractor/unsplash.py
gallery_dl/extractor/vanillarock.py
+gallery_dl/extractor/vichan.py
gallery_dl/extractor/vk.py
gallery_dl/extractor/vsco.py
gallery_dl/extractor/wallhaven.py
@@ -198,7 +198,6 @@ gallery_dl/extractor/weasyl.py
gallery_dl/extractor/webtoons.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
-gallery_dl/extractor/wikieat.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
gallery_dl/extractor/ytdl.py
diff --git a/gallery_dl/extractor/8kun.py b/gallery_dl/extractor/8kun.py
deleted file mode 100644
index 5d260b9..0000000
--- a/gallery_dl/extractor/8kun.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2020-2022 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://8kun.top/"""
-
-from .common import Extractor, Message
-from .. import text
-
-
-class _8kunThreadExtractor(Extractor):
- """Extractor for 8kun threads"""
- category = "8kun"
- subcategory = "thread"
- directory_fmt = ("{category}", "{board}", "{thread} {title}")
- filename_fmt = "{time}{num:?-//} {filename}.{extension}"
- archive_fmt = "{board}_{thread}_{tim}"
- pattern = r"(?:https?://)?8kun\.top/([^/]+)/res/(\d+)"
- test = (
- ("https://8kun.top/test/res/65248.html", {
- "pattern": r"https://media\.8kun\.top/file_store/\w{64}\.\w+",
- "count": ">= 8",
- }),
- # old-style file URLs (#1101)
- # ("https://8kun.top/d/res/13258.html", {
- # "pattern": r"https://media\.8kun\.top/d/src/\d+(-\d)?\.\w+",
- # "range": "1-20",
- # }),
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
-
- def items(self):
- url = "https://8kun.top/{}/res/{}.json".format(self.board, self.thread)
- posts = self.request(url).json()["posts"]
- title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
- process = self._process
-
- data = {
- "board" : self.board,
- "thread": self.thread,
- "title" : text.unescape(title)[:50],
- "num" : 0,
- }
-
- yield Message.Directory, data
- for post in posts:
- if "filename" in post:
- yield process(post, data)
- if "extra_files" in post:
- for post["num"], filedata in enumerate(
- post["extra_files"], 1):
- yield process(post, filedata)
-
- @staticmethod
- def _process(post, data):
- post.update(data)
- post["extension"] = post["ext"][1:]
- tim = post["tim"]
- url = ("https://media.8kun.top/" +
- ("file_store/" if len(tim) > 16 else post["board"] + "/src/") +
- tim + post["ext"])
- return Message.Url, url, post
-
-
-class _8kunBoardExtractor(Extractor):
- """Extractor for 8kun boards"""
- category = "8kun"
- subcategory = "board"
- pattern = r"(?:https?://)?8kun\.top/([^/?#]+)/(?:index|\d+)\.html"
- test = (
- ("https://8kun.top/v/index.html", {
- "pattern": _8kunThreadExtractor.pattern,
- "count": ">= 100",
- }),
- ("https://8kun.top/v/2.html"),
- ("https://8kun.top/v/index.html?PageSpeed=noscript"),
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board = match.group(1)
-
- def items(self):
- url = "https://8kun.top/{}/threads.json".format(self.board)
- threads = self.request(url).json()
-
- for page in threads:
- for thread in page["threads"]:
- url = "https://8kun.top/{}/res/{}.html".format(
- self.board, thread["no"])
- thread["page"] = page["page"]
- thread["_extractor"] = _8kunThreadExtractor
- yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 851f660..9e0340a 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -17,7 +17,6 @@ modules = [
"4chan",
"500px",
"8chan",
- "8kun",
"8muses",
"adultempire",
"architizer",
@@ -143,6 +142,7 @@ modules = [
"twitter",
"unsplash",
"vanillarock",
+ "vichan",
"vk",
"vsco",
"wallhaven",
@@ -152,7 +152,6 @@ modules = [
"webtoons",
"weibo",
"wikiart",
- "wikieat",
"xhamster",
"xvideos",
"zerochan",
diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py
index 47e51b3..7982881 100644
--- a/gallery_dl/extractor/bcy.py
+++ b/gallery_dl/extractor/bcy.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,9 +25,12 @@ class BcyExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.item_id = match.group(1)
+ self.session.headers["Referer"] = self.root + "/"
def items(self):
- sub = re.compile(r"^https?://p\d+-bcy\.byteimg\.com/img/banciyuan").sub
+ sub = re.compile(r"^https?://p\d+-bcy"
+ r"(?:-sign\.bcyimg\.com|\.byteimg\.com/img)"
+ r"/banciyuan").sub
iroot = "https://img-bcy-qn.pstatp.com"
noop = self.config("noop")
@@ -64,19 +67,18 @@ class BcyExtractor(Extractor):
url = image["path"].partition("~")[0]
text.nameext_from_url(url, data)
+ # full-resolution image without watermark
if data["extension"]:
if not url.startswith(iroot):
url = sub(iroot, url)
data["filter"] = ""
yield Message.Url, url, data
+ # watermarked image & low quality noop filter
else:
- if not multi:
- if len(post["multi"]) < len(post["image_list"]):
- multi = self._data_from_post(post["item_id"])
- multi = multi["post_data"]["multi"]
- else:
- multi = post["multi"]
+ if multi is None:
+ multi = self._data_from_post(
+ post["item_id"])["post_data"]["multi"]
image = multi[data["num"] - 1]
if image["origin"]:
@@ -111,8 +113,8 @@ class BcyUserExtractor(BcyExtractor):
"count": ">= 20",
}),
("https://bcy.net/u/109282764041", {
- "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
- r"~tplv-banciyuan-logo-v3:.+\.image",
+ "pattern": r"https://p\d-bcy-sign\.bcyimg\.com/banciyuan/[0-9a-f]+"
+ r"~tplv-bcyx-yuan-logo-v1:.+\.image",
"range": "1-25",
"count": 25,
}),
@@ -171,13 +173,13 @@ class BcyPostExtractor(BcyExtractor):
}),
# only watermarked images available
("https://bcy.net/item/detail/6950136331708144648", {
- "pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
- r"~tplv-banciyuan-logo-v3:.+\.image",
- "count": 8,
+ "pattern": r"https://p\d-bcy-sign\.bcyimg\.com/banciyuan/[0-9a-f]+"
+ r"~tplv-bcyx-yuan-logo-v1:.+\.image",
+ "count": 10,
"keyword": {"filter": "watermark"},
}),
# deleted
- ("https://bcy.net/item/detail/6780546160802143236", {
+ ("https://bcy.net/item/detail/6780546160802143237", {
"exception": exception.NotFoundError,
"count": 0,
}),
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index c455ce1..906afda 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -102,6 +102,9 @@ class DanbooruExtractor(BaseExtractor):
resp = self.request(template.format(self.root, post["id"]))
post.update(resp.json())
+ if url[0] == "/":
+ url = self.root + url
+
post.update(data)
yield Message.Directory, post
yield Message.Url, url, post
@@ -170,6 +173,10 @@ INSTANCES = {
"pattern": r"booru\.allthefallen\.moe",
"page-limit": 5000,
},
+ "aibooru": {
+ "root": None,
+ "pattern": r"(?:safe.)?aibooru\.online",
+ }
}
BASE_PATTERN = DanbooruExtractor.update(INSTANCES)
@@ -202,10 +209,16 @@ class DanbooruTagExtractor(DanbooruExtractor):
("https://booru.allthefallen.moe/posts?tags=yume_shokunin", {
"count": 12,
}),
+ ("https://aibooru.online/posts?tags=center_frills&z=1", {
+ "pattern": r"https://aibooru\.online/data/original"
+ r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+",
+ "count": ">= 3",
+ }),
("https://hijiribe.donmai.us/posts?tags=bonocho"),
("https://sonohara.donmai.us/posts?tags=bonocho"),
("https://safebooru.donmai.us/posts?tags=bonocho"),
("https://e926.net/posts?tags=anry"),
+ ("https://safe.aibooru.online/posts?tags=center_frills"),
)
def __init__(self, match):
@@ -238,6 +251,7 @@ class DanbooruPoolExtractor(DanbooruExtractor):
"url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5",
"count": 6,
}),
+ ("https://aibooru.online/pools/1"),
("https://danbooru.donmai.us/pool/show/7659"),
("https://e621.net/pool/show/73"),
)
@@ -300,6 +314,9 @@ class DanbooruPostExtractor(DanbooruExtractor):
("https://booru.allthefallen.moe/posts/22", {
"content": "21dda68e1d7e0a554078e62923f537d8e895cac8",
}),
+ ("https://aibooru.online/posts/1", {
+ "content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9",
+ }),
("https://danbooru.donmai.us/post/show/294929"),
("https://e621.net/post/show/535"),
)
@@ -334,6 +351,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
"count": ">= 70",
}),
("https://booru.allthefallen.moe/explore/posts/popular"),
+ ("https://aibooru.online/explore/posts/popular"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 92f7ac2..a2cf0c0 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -142,13 +142,23 @@ class GelbooruPoolExtractor(GelbooruBase,
class GelbooruPostExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PostExtractor):
"""Extractor for single images from gelbooru.com"""
- pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
- r"\?page=post&s=view&id=(?P<post>\d+)")
+ pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?"
+ r"(?=(?:[^#]+&)?page=post(?:&|#|$))"
+ r"(?=(?:[^#]+&)?s=view(?:&|#|$))"
+ r"(?:[^#]+&)?id=(\d+)")
test = (
("https://gelbooru.com/index.php?page=post&s=view&id=313638", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1,
}),
+
+ ("https://gelbooru.com/index.php?page=post&s=view&id=313638"),
+ ("https://gelbooru.com/index.php?s=view&page=post&id=313638"),
+ ("https://gelbooru.com/index.php?page=post&id=313638&s=view"),
+ ("https://gelbooru.com/index.php?s=view&id=313638&page=post"),
+ ("https://gelbooru.com/index.php?id=313638&page=post&s=view"),
+ ("https://gelbooru.com/index.php?id=313638&s=view&page=post"),
+
("https://gelbooru.com/index.php?page=post&s=view&id=6018318", {
"options": (("tags", True),),
"content": "977caf22f27c72a5d07ea4d4d9719acdab810991",
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index 53be67b..dc4e31d 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract hentai-manga from https://hentai2read.com/"""
+"""Extractors for https://hentai2read.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
@@ -23,11 +23,32 @@ class Hentai2readBase():
class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
"""Extractor for a single manga chapter from hentai2read.com"""
archive_fmt = "{chapter_id}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/(\d+))"
- test = ("https://hentai2read.com/amazon_elixir/1/", {
- "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
- "keyword": "ff84b8f751f0e4ee37717efc4332ff1db71951d9",
- })
+ pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/([^/?#]+))"
+ test = (
+ ("https://hentai2read.com/amazon_elixir/1/", {
+ "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
+ "keyword": "85645b02d34aa11b3deb6dadd7536863476e1bad",
+ }),
+ ("https://hentai2read.com/popuni_kei_joshi_panic/2.5/", {
+ "pattern": r"https://hentaicdn\.com/hentai"
+ r"/13088/2\.5y/ccdn00\d+\.jpg",
+ "count": 36,
+ "keyword": {
+ "author": "Kurisu",
+ "chapter": 2,
+ "chapter_id": 75152,
+ "chapter_minor": ".5",
+ "count": 36,
+ "lang": "en",
+ "language": "English",
+ "manga": "Popuni Kei Joshi Panic!",
+ "manga_id": 13088,
+ "page": int,
+ "title": "Popuni Kei Joshi Panic! 2.5",
+ "type": "Original",
+ },
+ }),
+ )
def __init__(self, match):
self.chapter = match.group(2)
@@ -37,12 +58,14 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
title, pos = text.extract(page, "<title>", "</title>")
manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
+ chapter, sep, minor = self.chapter.partition(".")
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
- r"(\d+): (.+) . Page 1 ", title)
+ r"([^:]+): (.+) . Page 1 ", title)
return {
"manga": match.group(1),
"manga_id": text.parse_int(manga_id),
- "chapter": text.parse_int(self.chapter),
+ "chapter": text.parse_int(chapter),
+ "chapter_minor": sep + minor,
"chapter_id": text.parse_int(chapter_id),
"type": match.group(2),
"author": match.group(3),
@@ -51,8 +74,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
"language": "English",
}
- @staticmethod
- def images(page):
+ def images(self, page):
images = text.extract(page, "'images' : ", ",\n")[0]
return [
("https://hentaicdn.com/hentai" + part, None)
@@ -67,18 +89,35 @@ class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
test = (
("https://hentai2read.com/amazon_elixir/", {
"url": "273073752d418ec887d7f7211e42b832e8c403ba",
- "keyword": "13c1ce7e15cbb941f01c843b0e89adc993d939ac",
+ "keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
}),
("https://hentai2read.com/oshikage_riot/", {
"url": "6595f920a3088a15c2819c502862d45f8eb6bea6",
- "keyword": "675c7b7a4fa52cf569c283553bd16b4200a5cd36",
+ "keyword": "a2e9724acb221040d4b29bf9aa8cb75b2240d8af",
+ }),
+ ("https://hentai2read.com/popuni_kei_joshi_panic/", {
+ "pattern": Hentai2readChapterExtractor.pattern,
+ "range": "2-3",
+ "keyword": {
+ "chapter": int,
+ "chapter_id": int,
+ "chapter_minor": ".5",
+ "lang": "en",
+ "language": "English",
+ "manga": "Popuni Kei Joshi Panic!",
+ "manga_id": 13088,
+ "title": str,
+ "type": "Original",
+ },
}),
)
def chapters(self, page):
results = []
+
+ pos = page.find('itemscope itemtype="http://schema.org/Book') + 1
manga, pos = text.extract(
- page, '<span itemprop="name">', '</span>')
+ page, '<span itemprop="name">', '</span>', pos)
mtype, pos = text.extract(
page, '<small class="text-danger">[', ']</small>', pos)
manga_id = text.parse_int(text.extract(
@@ -90,12 +129,19 @@ class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
return results
_ , pos = text.extract(page, ' href="', '"', pos)
url, pos = text.extract(page, ' href="', '"', pos)
- chapter, pos = text.extract(page, '>', '<', pos)
+ chapter, pos = text.extract(page, '>', '<', pos)
chapter, _, title = text.unescape(chapter).strip().partition(" - ")
+ chapter, sep, minor = chapter.partition(".")
+
results.append((url, {
- "manga_id": manga_id, "manga": manga, "type": mtype,
- "chapter_id": text.parse_int(chapter_id),
+ "manga": manga,
+ "manga_id": manga_id,
"chapter": text.parse_int(chapter),
- "title": title, "lang": "en", "language": "English",
+ "chapter_minor": sep + minor,
+ "chapter_id": text.parse_int(chapter_id),
+ "type": mtype,
+ "title": title,
+ "lang": "en",
+ "language": "English",
}))
diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py
index 8083a9b..c3e6d76 100644
--- a/gallery_dl/extractor/hentaihere.py
+++ b/gallery_dl/extractor/hentaihere.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract hentai-manga from https://hentaihere.com/"""
+"""Extractors for https://hentaihere.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
@@ -23,11 +23,28 @@ class HentaihereBase():
class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
"""Extractor for a single manga chapter from hentaihere.com"""
archive_fmt = "{chapter_id}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"
- test = ("https://hentaihere.com/m/S13812/1/1/", {
- "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
- "keyword": "cbcee0c0eb178c4b87f06a834085784f8dddad24",
- })
+ pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/([^/?#]+)"
+ test = (
+ ("https://hentaihere.com/m/S13812/1/1/", {
+ "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
+ "keyword": "0207d20eea3a15d2a8d1496755bdfa49de7cfa9d",
+ }),
+ ("https://hentaihere.com/m/S23048/1.5/1/", {
+ "author": "Shinozuka Yuuji",
+ "chapter": 1,
+ "chapter_id": 80186,
+ "chapter_minor": ".5",
+ "count": 32,
+ "lang": "en",
+ "language": "English",
+ "manga": "High School Slut's Love Consultation",
+ "manga_id": 23048,
+ "page": int,
+ "title": "High School Slut's Love Consultation + "
+ "Girlfriend [Full Color]",
+ "type": "Original",
+ }),
+ )
def __init__(self, match):
self.manga_id, self.chapter = match.groups()
@@ -37,12 +54,14 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
def metadata(self, page):
title = text.extract(page, "<title>", "</title>")[0]
chapter_id = text.extract(page, 'report/C', '"')[0]
+ chapter, sep, minor = self.chapter.partition(".")
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
match = re.match(pattern, title)
return {
"manga": match.group(1),
"manga_id": text.parse_int(self.manga_id),
- "chapter": text.parse_int(self.chapter),
+ "chapter": text.parse_int(chapter),
+ "chapter_minor": sep + minor,
"chapter_id": text.parse_int(chapter_id),
"type": match.group(2),
"title": match.group(3),
@@ -67,22 +86,34 @@ class HentaihereMangaExtractor(HentaihereBase, MangaExtractor):
test = (
("https://hentaihere.com/m/S13812", {
"url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
- "keyword": "13c1ce7e15cbb941f01c843b0e89adc993d939ac",
+ "keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
}),
("https://hentaihere.com/m/S7608", {
"url": "6c5239758dc93f6b1b4175922836c10391b174f7",
- "keyword": "675c7b7a4fa52cf569c283553bd16b4200a5cd36",
+ "keyword": {
+ "chapter": int,
+ "chapter_id": int,
+ "chapter_minor": "",
+ "lang": "en",
+ "language": "English",
+ "manga": "Oshikake Riot",
+ "manga_id": 7608,
+ "title": r"re:Oshikake Riot( \d+)?",
+ "type": "Original",
+ },
}),
)
def chapters(self, page):
results = []
- manga_id = text.parse_int(
- self.manga_url.rstrip("/").rpartition("/")[2][1:])
+
+ pos = page.find('itemscope itemtype="http://schema.org/Book') + 1
manga, pos = text.extract(
- page, '<span itemprop="name">', '</span>')
+ page, '<span itemprop="name">', '</span>', pos)
mtype, pos = text.extract(
page, '<span class="mngType text-danger">[', ']</span>', pos)
+ manga_id = text.parse_int(
+ self.manga_url.rstrip("/").rpartition("/")[2][1:])
while True:
marker, pos = text.extract(
@@ -90,12 +121,20 @@ class HentaihereMangaExtractor(HentaihereBase, MangaExtractor):
if marker is None:
return results
url, pos = text.extract(page, '<a href="', '"', pos)
+
chapter, pos = text.extract(page, 'title="Tagged: -">\n', '<', pos)
chapter_id, pos = text.extract(page, '/C', '"', pos)
chapter, _, title = text.unescape(chapter).strip().partition(" - ")
+ chapter, sep, minor = chapter.partition(".")
+
results.append((url, {
- "manga_id": manga_id, "manga": manga, "type": mtype,
- "chapter_id": text.parse_int(chapter_id),
+ "manga_id": manga_id,
+ "manga": manga,
"chapter": text.parse_int(chapter),
- "title": title, "lang": "en", "language": "English",
+ "chapter_minor": sep + minor,
+ "chapter_id": text.parse_int(chapter_id),
+ "type": mtype,
+ "title": title,
+ "lang": "en",
+ "language": "English",
}))
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 4775613..a4ea71a 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -12,6 +12,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
+import binascii
import json
import time
import re
@@ -171,6 +172,15 @@ class InstagramExtractor(Extractor):
data["location_url"] = "{}/explore/locations/{}/{}/".format(
self.root, location["pk"], slug)
+ coauthors = post.get("coauthor_producers")
+ if coauthors:
+ data["coauthors"] = [
+ {"id" : user["pk"],
+ "username" : user["username"],
+ "full_name": user["full_name"]}
+ for user in coauthors
+ ]
+
if "carousel_media" in post:
items = post["carousel_media"]
data["sidecar_media_id"] = data["post_id"]
@@ -265,6 +275,14 @@ class InstagramExtractor(Extractor):
data["location_url"] = "{}/explore/locations/{}/{}/".format(
self.root, location["id"], location["slug"])
+ coauthors = post.get("coauthor_producers")
+ if coauthors:
+ data["coauthors"] = [
+ {"id" : user["id"],
+ "username": user["username"]}
+ for user in coauthors
+ ]
+
data["_files"] = files = []
if "edge_sidecar_to_children" in post:
for num, edge in enumerate(
@@ -361,6 +379,7 @@ class InstagramUserExtractor(InstagramExtractor):
base = "{}/{}/".format(self.root, self.item)
stories = "{}/stories/{}/".format(self.root, self.item)
return self._dispatch_extractors((
+ (InstagramAvatarExtractor , base + "avatar/"),
(InstagramStoriesExtractor , stories),
(InstagramHighlightsExtractor, base + "highlights/"),
(InstagramPostsExtractor , base + "posts/"),
@@ -418,7 +437,7 @@ class InstagramTaggedExtractor(InstagramExtractor):
return {"tagged_owner_id": self.user_id}
self.user_id = self.api.user_id(self.item)
- user = self.api.user(self.item)
+ user = self.api.user_by_name(self.item)
return {
"tagged_owner_id" : user["id"],
@@ -483,25 +502,32 @@ class InstagramStoriesExtractor(InstagramExtractor):
"""Extractor for Instagram stories"""
subcategory = "stories"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/stories/(?:highlights/(\d+)|([^/?#]+)(?:/(\d+))?)")
+ r"/s(?:tories/(?:highlights/(\d+)|([^/?#]+)(?:/(\d+))?)"
+ r"|/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)")
test = (
("https://www.instagram.com/stories/instagram/"),
("https://www.instagram.com/stories/highlights/18042509488170095/"),
("https://instagram.com/stories/geekmig/2724343156064789461"),
+ ("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"),
+ ("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"
+ "?story_media_id=2724343156064789461"),
)
def __init__(self, match):
- self.highlight_id, self.user, self.media_id = match.groups()
- if self.highlight_id:
+ h1, self.user, m1, h2, m2 = match.groups()
+
+ if self.user:
+ self.highlight_id = None
+ else:
self.subcategory = InstagramHighlightsExtractor.subcategory
+ self.highlight_id = ("highlight:" + h1 if h1 else
+ binascii.a2b_base64(h2).decode())
+
+ self.media_id = m1 or m2
InstagramExtractor.__init__(self, match)
def posts(self):
- if self.highlight_id:
- reel_id = "highlight:" + self.highlight_id
- else:
- reel_id = self.api.user_id(self.user)
-
+ reel_id = self.highlight_id or self.api.user_id(self.user)
reels = self.api.reels_media(reel_id)
if self.media_id and reels:
@@ -544,6 +570,48 @@ class InstagramTagExtractor(InstagramExtractor):
return self.api.tags_media(self.item)
+class InstagramAvatarExtractor(InstagramExtractor):
+ """Extractor for an Instagram user's avatar"""
+ subcategory = "avatar"
+ pattern = USER_PATTERN + r"/avatar"
+ test = ("https://www.instagram.com/instagram/avatar", {
+ "pattern": r"https://instagram\.[\w.-]+\.fbcdn\.net/v/t51\.2885-19"
+ r"/281440578_1088265838702675_6233856337905829714_n\.jpg",
+ })
+
+ def posts(self):
+ if self._logged_in:
+ user_id = self.api.user_id(self.item)
+ user = self.api.user_by_id(user_id)
+ avatar = (user.get("hd_profile_pic_url_info") or
+ user["hd_profile_pic_versions"][-1])
+ else:
+ user = self.item
+ if user.startswith("id:"):
+ user = self.api.user_by_id(user[3:])
+ else:
+ user = self.api.user_by_name(user)
+ user["pk"] = user["id"]
+ url = user.get("profile_pic_url_hd") or user["profile_pic_url"]
+ avatar = {"url": url, "width": 0, "height": 0}
+
+ pk = user.get("profile_pic_id")
+ if pk:
+ pk = pk.partition("_")[0]
+ code = shortcode_from_id(pk)
+ else:
+ pk = code = "avatar:" + str(user["pk"])
+
+ return ({
+ "pk" : pk,
+ "code" : code,
+ "user" : user,
+ "caption" : None,
+ "like_count": 0,
+ "image_versions2": {"candidates": (avatar,)},
+ },)
+
+
class InstagramPostExtractor(InstagramExtractor):
"""Extractor for an Instagram post"""
subcategory = "post"
@@ -693,15 +761,19 @@ class InstagramRestAPI():
return self._pagination_sections(endpoint, data)
@memcache(keyarg=1)
- def user(self, screen_name):
+ def user_by_name(self, screen_name):
endpoint = "/v1/users/web_profile_info/"
params = {"username": screen_name}
return self._call(endpoint, params=params)["data"]["user"]
+ def user_by_id(self, user_id):
+ endpoint = "/v1/users/{}/info/".format(user_id)
+ return self._call(endpoint)["user"]
+
def user_id(self, screen_name):
if screen_name.startswith("id:"):
return screen_name[3:]
- user = self.user(screen_name)
+ user = self.user_by_name(screen_name)
if user is None:
raise exception.AuthorizationError(
"Login required to access this profile")
@@ -812,7 +884,8 @@ class InstagramGraphqlAPI():
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
api = InstagramRestAPI(extractor)
- self.user = api.user
+ self.user_by_name = api.user_by_name
+ self.user_by_id = api.user_by_id
self.user_id = api.user_id
@staticmethod
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 750b741..21ff114 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -190,7 +190,7 @@ class KemonopartyExtractor(Extractor):
for dm in text.extract_iter(page, "<article", "</article>"):
dms.append({
"body": text.unescape(text.extract(
- dm, '<pre>', '</pre></section>',
+ dm, "<pre>", "</pre></",
)[0].strip()),
"date": text.extract(dm, 'datetime="', '"')[0],
})
@@ -230,9 +230,10 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
posts = self.request(url, params=params).json()
yield from posts
- if len(posts) < 25:
+ cnt = len(posts)
+ if cnt < 25:
return
- params["o"] += 25
+ params["o"] += cnt
class KemonopartyPostExtractor(KemonopartyExtractor):
@@ -420,9 +421,10 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
posts = self.request(url, params=params).json()
yield from posts
- if len(posts) < 25:
+ cnt = len(posts)
+ if cnt < 25:
break
- params["skip"] += 25
+ params["skip"] += cnt
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 3444a7a..a12a801 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -11,21 +11,22 @@ from .. import text
import re
BASE_PATTERN = \
- r"(?:https?://)?((?:(?:read)?manganato|(?:www\.)?manganelo)\.com)"
+ r"(?:https?://)?((?:(?:chap|read)?manganato|(?:www\.)?manganelo)\.com)"
class ManganeloChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from manganelo.com"""
category = "manganelo"
- root = "https://readmanganato.com"
+ root = "https://chapmanganato.com"
pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
test = (
- ("https://readmanganato.com/manga-gn983696/chapter-23", {
+ ("https://chapmanganato.com/manga-gn983696/chapter-23", {
"pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/03/23"
r"/39/gn983696/vol_3_chapter_23_24_yen/\d+-[no]\.jpg",
"keyword": "2c5cd59342f149375df9bcb50aa416b4d04a43cf",
"count": 25,
}),
+ ("https://readmanganato.com/manga-gn983696/chapter-23"),
("https://manganelo.com/chapter/gamers/chapter_15"),
("https://manganelo.com/chapter/gq921227/chapter_23"),
)
@@ -73,14 +74,15 @@ class ManganeloChapterExtractor(ChapterExtractor):
class ManganeloMangaExtractor(MangaExtractor):
"""Extractor for manga from manganelo.com"""
category = "manganelo"
- root = "https://readmanganato.com"
+ root = "https://chapmanganato.com"
chapterclass = ManganeloChapterExtractor
pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
test = (
- ("https://readmanganato.com/manga-gn983696", {
+ ("https://chapmanganato.com/manga-gn983696", {
"pattern": ManganeloChapterExtractor.pattern,
"count": ">= 25",
}),
+ ("https://readmanganato.com/manga-gn983696"),
("https://manganelo.com/manga/read_otome_no_teikoku"),
("https://manganelo.com/manga/ol921234/"),
)
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
index 2bd11ef..5fa5631 100644
--- a/gallery_dl/extractor/mangasee.py
+++ b/gallery_dl/extractor/mangasee.py
@@ -35,33 +35,59 @@ class MangaseeBase():
class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
- pattern = r"(?:https?://)?mangasee123\.com(/read-online/[^/?#]+\.html)"
- test = (("https://mangasee123.com/read-online"
- "/Tokyo-Innocent-chapter-4.5-page-1.html"), {
- "pattern": r"https://[^/]+/manga/Tokyo-Innocent/0004\.5-00\d\.png",
- "count": 8,
- "keyword": {
- "chapter": 4,
- "chapter_minor": ".5",
- "chapter_string": "100045",
+ pattern = (r"(?:https?://)?(mangasee123|manga4life)\.com"
+ r"(/read-online/[^/?#]+\.html)")
+ test = (
+ (("https://mangasee123.com/read-online"
+ "/Tokyo-Innocent-chapter-4.5-page-1.html"), {
+ "pattern": r"https://[^/]+/manga/Tokyo-Innocent/0004\.5-00\d\.png",
"count": 8,
- "date": "dt:2020-01-20 21:52:53",
- "extension": "png",
- "filename": r"re:0004\.5-00\d",
- "index": "1",
- "lang": "en",
- "language": "English",
- "manga": "Tokyo Innocent",
- "page": int,
- "title": "",
- },
- })
+ "keyword": {
+ "chapter": 4,
+ "chapter_minor": ".5",
+ "chapter_string": "100045",
+ "count": 8,
+ "date": "dt:2020-01-20 21:52:53",
+ "extension": "png",
+ "filename": r"re:0004\.5-00\d",
+ "index": "1",
+ "lang": "en",
+ "language": "English",
+ "manga": "Tokyo Innocent",
+ "page": int,
+ "title": "",
+ },
+ }),
+ (("https://manga4life.com/read-online"
+ "/One-Piece-chapter-1063-page-1.html"), {
+ "pattern": r"https://[^/]+/manga/One-Piece/1063-0\d\d\.png",
+ "count": 13,
+ "keyword": {
+ "chapter": 1063,
+ "chapter_minor": "",
+ "chapter_string": "110630",
+ "count": 13,
+ "date": "dt:2022-10-16 17:32:54",
+ "extension": "png",
+ "filename": r"re:1063-0\d\d",
+ "index": "1",
+ "lang": "en",
+ "language": "English",
+ "manga": "One Piece",
+ "page": int,
+ "title": "",
+ },
+ }),
+ )
def __init__(self, match):
- ChapterExtractor.__init__(self, match)
+ if match.group(1) == "manga4life":
+ self.category = "mangalife"
+ self.root = "https://manga4life.com"
+ ChapterExtractor.__init__(self, match, self.root + match.group(2))
self.session.headers["Referer"] = self.gallery_url
- domain = "mangasee123.com"
+ domain = self.root.rpartition("/")[2]
cookies = self.session.cookies
if not cookies.get("PHPSESSID", domain=domain):
cookies.set("PHPSESSID", util.generate_token(13), domain=domain)
@@ -96,12 +122,24 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
chapterclass = MangaseeChapterExtractor
- pattern = r"(?:https?://)?mangasee123\.com(/manga/[^/?#]+)"
- test = (("https://mangasee123.com/manga"
- "/Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai"), {
- "pattern": MangaseeChapterExtractor.pattern,
- "count": ">= 17",
- })
+ pattern = r"(?:https?://)?(mangasee123|manga4life)\.com(/manga/[^/?#]+)"
+ test = (
+ (("https://mangasee123.com/manga"
+ "/Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai"), {
+ "pattern": MangaseeChapterExtractor.pattern,
+ "count": ">= 17",
+ }),
+ ("https://manga4life.com/manga/Ano-Musume-Ni-Kiss-To-Shirayuri-O", {
+ "pattern": MangaseeChapterExtractor.pattern,
+ "count": ">= 50",
+ }),
+ )
+
+ def __init__(self, match):
+ if match.group(1) == "manga4life":
+ self.category = "mangalife"
+ self.root = "https://manga4life.com"
+ MangaExtractor.__init__(self, match, self.root + match.group(2))
def chapters(self, page):
slug, pos = text.extract(page, 'vm.IndexName = "', '"')
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 9ce5772..0d2cded 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -92,7 +92,7 @@ INSTANCES = {
}
}
-BASE_PATTERN = MastodonExtractor.update(INSTANCES)
+BASE_PATTERN = MastodonExtractor.update(INSTANCES) + "(?:/web)?"
class MastodonUserExtractor(MastodonExtractor):
@@ -111,9 +111,16 @@ class MastodonUserExtractor(MastodonExtractor):
"count": 60,
}),
("https://baraag.net/@pumpkinnsfw"),
+ ("https://mastodon.social/@yoru_nine@pawoo.net", {
+ "pattern": r"https://mastodon\.social/media_proxy/\d+/original",
+ "range": "1-10",
+ "count": 10,
+ }),
("https://mastodon.social/@id:10843"),
("https://mastodon.social/users/id:10843"),
("https://mastodon.social/users/jk"),
+ ("https://mastodon.social/users/yoru_nine@pawoo.net"),
+ ("https://mastodon.social/web/@jk"),
)
def statuses(self):
@@ -126,6 +133,20 @@ class MastodonUserExtractor(MastodonExtractor):
)
+class MastodonBookmarkExtractor(MastodonExtractor):
+ """Extractor for mastodon bookmarks"""
+ subcategory = "bookmark"
+ pattern = BASE_PATTERN + r"/bookmarks"
+ test = (
+ ("https://mastodon.social/bookmarks"),
+ ("https://pawoo.net/bookmarks"),
+ ("https://baraag.net/bookmarks"),
+ )
+
+ def statuses(self):
+ return MastodonAPI(self).account_bookmarks()
+
+
class MastodonFollowingExtractor(MastodonExtractor):
"""Extractor for followed mastodon users"""
subcategory = "following"
@@ -197,13 +218,21 @@ class MastodonAPI():
if username.startswith("id:"):
return username[3:]
- handle = "@{}@{}".format(username, self.extractor.instance)
+ if "@" in username:
+ handle = "@" + username
+ else:
+ handle = "@{}@{}".format(username, self.extractor.instance)
+
for account in self.account_search(handle, 1):
- if account["username"] == username:
+ if account["acct"] == username:
self.extractor._check_move(account)
return account["id"]
raise exception.NotFoundError("account")
+ def account_bookmarks(self):
+ endpoint = "/v1/bookmarks"
+ return self._pagination(endpoint, None)
+
def account_following(self, account_id):
endpoint = "/v1/accounts/{}/following".format(account_id)
return self._pagination(endpoint, None)
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index 27ec929..4d63c3e 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -26,9 +26,10 @@ class MoebooruExtractor(BooruExtractor):
def _prepare(post):
post["date"] = text.parse_timestamp(post["created_at"])
- def _extended_tags(self, post):
- url = "{}/post/show/{}".format(self.root, post["id"])
- page = self.request(url).text
+ def _extended_tags(self, post, page=None):
+ if not page:
+ url = "{}/post/show/{}".format(self.root, post["id"])
+ page = self.request(url).text
html = text.extract(page, '<ul id="tag-', '</ul>')[0]
if html:
tags = collections.defaultdict(list)
@@ -37,6 +38,29 @@ class MoebooruExtractor(BooruExtractor):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
post["tags_" + key] = " ".join(value)
+ return page
+
+ def _notes(self, post, page=None):
+ if not page:
+ url = "{}/post/show/{}".format(self.root, post["id"])
+ page = self.request(url).text
+ notes = []
+ notes_container = text.extract(page, 'id="note-container"', "<img ")[0]
+ if not notes_container:
+ return
+
+ for note in notes_container.split('class="note-box"')[1:]:
+ extr = text.extract_from(note)
+ notes.append({
+ "width" : int(extr("width: ", "p")),
+ "height": int(extr("height: ", "p")),
+ "y" : int(extr("top: ", "p")),
+ "x" : int(extr("left: ", "p")),
+ "id" : int(extr('id="note-body-', '"')),
+ "body" : text.remove_html(extr('>', "</div>")),
+ })
+
+ post["notes"] = notes
def _pagination(self, url, params):
params["page"] = self.page_start
@@ -96,6 +120,37 @@ class MoebooruPostExtractor(MoebooruExtractor):
"tags_general": str,
},
}),
+ ("https://yande.re/post/show/993156", {
+ "content": "fed722bd90f48de41ec163692befc701056e2b1e",
+ "options": (("notes", True),),
+ "keyword": {
+ "notes": [
+ {
+ "id": 7096,
+ "x" : 90,
+ "y" : 626,
+ "width" : 283,
+ "height": 529,
+ "body" : "Please keep this as a secret for me!!",
+ },
+ {
+ "id": 7095,
+ "x" : 900,
+ "y" : 438,
+ "width" : 314,
+ "height": 588,
+ "body" : "The facts that I love playing games",
+ },
+ ],
+ },
+ }),
+ ("https://lolibooru.moe/post/show/281305/", {
+ "content": "a331430223ffc5b23c31649102e7d49f52489b57",
+ "options": (("notes", True),),
+ "keyword": {
+ "notes": list,
+ },
+ }),
("https://konachan.net/post/show/205189"),
("https://www.sakugabooru.com/post/show/125570"),
("https://lolibooru.moe/post/show/287835"),
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 6b2e1c3..e3a96bd 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -45,6 +45,7 @@ class PixivExtractor(Extractor):
work["tags"] = [tag["name"] for tag in work["tags"]]
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
+ userdata = self.config("metadata")
metadata = self.metadata()
works = self.works()
@@ -60,6 +61,8 @@ class PixivExtractor(Extractor):
del work["image_urls"]
del work["meta_pages"]
+ if userdata:
+ work.update(self.api.user_detail(work["user"]["id"]))
if transform_tags:
transform_tags(work)
work["num"] = 0
@@ -198,7 +201,7 @@ class PixivArtworksExtractor(PixivExtractor):
def metadata(self):
if self.config("metadata"):
- return self.api.user_detail(self.user_id)
+ self.api.user_detail(self.user_id)
return {}
def works(self):
@@ -557,7 +560,7 @@ class PixivSearchExtractor(PixivExtractor):
sort = "date_d"
self.sort = sort_map[sort]
- target = query.get("s_mode", "s_tag")
+ target = query.get("s_mode", "s_tag_full")
target_map = {
"s_tag": "partial_match_for_tags",
"s_tag_full": "exact_match_for_tags",
@@ -565,7 +568,7 @@ class PixivSearchExtractor(PixivExtractor):
}
if target not in target_map:
self.log.warning("invalid search target '%s'", target)
- target = "s_tag"
+ target = "s_tag_full"
self.target = target_map[target]
self.date_start = query.get("scd")
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index db8d700..448dc1b 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -170,7 +170,7 @@ class ReactorTagExtractor(ReactorExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{search_tags}_{post_id}_{num}"
- pattern = BASE_PATTERN + r"/tag/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/tag/([^/?#]+)(?:/[^/?#]+)?"
test = (
("http://reactor.cc/tag/gif"),
("http://anime.reactor.cc/tag/Anime+Art"),
@@ -180,6 +180,10 @@ class ReactorTagExtractor(ReactorExtractor):
("http://joyreactor.com/tag/Cirno", {
"url": "aa59090590b26f4654881301fe8fe748a51625a8",
}),
+ # 'best' rating (#3073)
+ ("http://joyreactor.com/tag/Dark+Souls+2/best", {
+ "count": 4,
+ }),
("http://pornreactor.cc/tag/RiceGnat", {
"range": "1-25",
"count": ">= 25",
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 1111c3a..53e5e79 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text
-from ..cache import cache
+from ..cache import memcache
class RedgifsExtractor(Extractor):
@@ -133,10 +133,11 @@ class RedgifsAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
- "Referer" : extractor.root + "/",
- "authorization": "Bearer " + self._fetch_bearer_token(extractor),
- "content-type" : "application/json",
- "Origin" : extractor.root,
+ "Referer" : extractor.root + "/",
+ "authorization" : None,
+ "content-type" : "application/json",
+ "x-customheader": extractor.root + "/",
+ "Origin" : extractor.root,
}
def gif(self, gif_id):
@@ -156,6 +157,7 @@ class RedgifsAPI():
def _call(self, endpoint, params=None):
url = self.API_ROOT + endpoint
+ self.headers["authorization"] = self._auth()
return self.extractor.request(
url, params=params, headers=self.headers).json()
@@ -170,16 +172,10 @@ class RedgifsAPI():
return
params["page"] += 1
- @cache(maxage=3600)
- def _fetch_bearer_token(self, extr):
- extr.log.debug("Retrieving Bearer token")
-
- page = extr.request(extr.root + "/").text
- index = text.extract(page, "/assets/js/index", ".js")[0]
-
- url = extr.root + "/assets/js/index" + index + ".js"
- page = extr.request(url, encoding="utf-8").text
- token = "ey" + text.extract(page, '="ey', '"')[0]
-
- extr.log.debug("Token: '%s'", token)
- return token
+ @memcache(maxage=600)
+ def _auth(self):
+ # https://github.com/Redgifs/api/wiki/Temporary-tokens
+ url = self.API_ROOT + "/v2/auth/temporary"
+ self.headers["authorization"] = None
+ return "Bearer " + self.extractor.request(
+ url, headers=self.headers).json()["token"]
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 822b1f2..3724c85 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -77,9 +77,6 @@ class SkebExtractor(Extractor):
"body" : resp["body"],
"source_body" : resp["source_body"],
"translated_body" : resp["translated"],
- "completed_at" : resp["completed_at"],
- "date" : text.parse_datetime(
- resp["completed_at"], "%Y-%m-%dT%H:%M:%S.%fZ"),
"nsfw" : resp["nsfw"],
"anonymous" : resp["anonymous"],
"tags" : resp["tag_list"],
@@ -160,7 +157,6 @@ class SkebPostExtractor(SkebExtractor):
"name": str,
"screen_name": "minato_ragi",
},
- "completed_at": "2022-02-27T14:03:45.442Z",
"content_category": "preview",
"creator": {
"avatar_url": "https://pbs.twimg.com/profile_images"
@@ -171,7 +167,6 @@ class SkebPostExtractor(SkebExtractor):
"name": "イチノセ奏",
"screen_name": "kanade_cocotte",
},
- "date": "dt:2022-02-27 14:03:45",
"file_id": int,
"file_url": str,
"genre": "art",
@@ -212,7 +207,7 @@ class SkebUserExtractor(SkebExtractor):
"pattern": r"https://skeb\.imgix\.net/uploads/origins/[\w-]+"
r"\?bg=%23fff&auto=format&txtfont=bold&txtshad=70"
r"&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150"
- r"&txt=SAMPLE&w=800&s=\w+",
+ r"&txt=SAMPLE&fm=webp&w=800&s=\w+",
"range": "1-5",
})
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 324a3c6..5451f6e 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -49,6 +49,8 @@ class TumblrExtractor(Extractor):
self.reblogs = self.config("reblogs", True)
self.external = self.config("external", False)
self.original = self.config("original", True)
+ self.fallback_delay = self.config("fallback-delay", 120.0)
+ self.fallback_retries = self.config("fallback-retries", 2)
if len(self.types) == 1:
self.api.posts_type = next(iter(self.types))
@@ -250,8 +252,8 @@ class TumblrExtractor(Extractor):
return updated, (resized == updated)
def _original_image_fallback(self, url, post_id):
- for _ in range(3):
- self.sleep(120, "image token")
+ for _ in range(self.fallback_retries):
+ self.sleep(self.fallback_delay, "image token")
yield self._update_image_token(url)[0]
self.log.warning("Unable to fetch higher-resolution "
"version of %s (%s)", url, post_id)
diff --git a/gallery_dl/extractor/vichan.py b/gallery_dl/extractor/vichan.py
new file mode 100644
index 0000000..2fafb56
--- /dev/null
+++ b/gallery_dl/extractor/vichan.py
@@ -0,0 +1,163 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for vichan imageboards"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class VichanExtractor(BaseExtractor):
+ """Base class for vichan extractors"""
+ basecategory = "vichan"
+
+
+BASE_PATTERN = VichanExtractor.update({
+ "8kun": {
+ "root": "https://8kun.top",
+ "pattern": r"8kun\.top",
+ },
+ "wikieat": {
+ "root": "https://wikieat.club",
+ "pattern": r"wikieat\.club",
+ },
+ "smugloli": {
+ "root": None,
+ "pattern": r"smuglo(?:\.li|li\.net)",
+ },
+})
+
+
+class VichanThreadExtractor(VichanExtractor):
+ """Extractor for vichan threads"""
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board}", "{thread} {title}")
+ filename_fmt = "{time}{num:?-//} {filename}.{extension}"
+ archive_fmt = "{board}_{thread}_{tim}"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+ test = (
+ ("https://8kun.top/test/res/65248.html", {
+ "pattern": r"https://media\.128ducks\.com/file_store/\w{64}\.\w+",
+ "count": ">= 8",
+ }),
+ # old-style file URLs (#1101)
+ # ("https://8kun.top/d/res/13258.html", {
+ # "pattern": r"https://media\.128ducks\.com/d/src/\d+(-\d)?\.\w+",
+ # "range": "1-20",
+ # }),
+
+ ("https://wikieat.club/cel/res/25321.html", {
+ "pattern": r"https://wikieat\.club/cel/src/\d+(-\d)?\.\w+",
+ "count": ">= 200",
+ }),
+
+ ("https://smuglo.li/a/res/1154380.html", {
+ "pattern": r"https://smug.+/a/src/\d+(-\d)?\.\w+",
+ "count": ">= 18",
+ "keyword": {
+ "board": "a",
+ "thread": "1154380",
+ "title": "Mob Psycho 100 Season 3",
+ },
+ }),
+ ("https://smugloli.net/a/res/1145409.html"),
+ )
+
+ def __init__(self, match):
+ VichanExtractor.__init__(self, match)
+ index = match.lastindex
+ self.board = match.group(index-1)
+ self.thread = match.group(index)
+
+ def items(self):
+ url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
+ posts = self.request(url).json()["posts"]
+ title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
+ process = (self._process_8kun if self.category == "8kun" else
+ self._process)
+ data = {
+ "board" : self.board,
+ "thread": self.thread,
+ "title" : text.unescape(title)[:50],
+ "num" : 0,
+ }
+
+ yield Message.Directory, data
+ for post in posts:
+ if "filename" in post:
+ yield process(post, data)
+ if "extra_files" in post:
+ for post["num"], filedata in enumerate(
+ post["extra_files"], 1):
+ yield process(post, filedata)
+
+ def _process(self, post, data):
+ post.update(data)
+ post["extension"] = post["ext"][1:]
+ post["url"] = "{}/{}/src/{}{}".format(
+ self.root, post["board"], post["tim"], post["ext"])
+ return Message.Url, post["url"], post
+
+ @staticmethod
+ def _process_8kun(post, data):
+ post.update(data)
+ post["extension"] = post["ext"][1:]
+
+ tim = post["tim"]
+ if len(tim) > 16:
+ post["url"] = "https://media.128ducks.com/file_store/{}{}".format(
+ tim, post["ext"])
+ else:
+ post["url"] = "https://media.128ducks.com/{}/src/{}{}".format(
+ post["board"], tim, post["ext"])
+
+ return Message.Url, post["url"], post
+
+
+class VichanBoardExtractor(VichanExtractor):
+ """Extractor for vichan boards"""
+ subcategory = "board"
+ pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
+ test = (
+ ("https://8kun.top/v/index.html", {
+ "pattern": VichanThreadExtractor.pattern,
+ "count": ">= 100",
+ }),
+ ("https://8kun.top/v/2.html"),
+ ("https://8kun.top/v/index.html?PageSpeed=noscript"),
+
+ ("https://wikieat.club/cel/index.html", {
+ "pattern": VichanThreadExtractor.pattern,
+ "count": ">= 100",
+ }),
+ ("https://wikieat.club/cel/catalog.html"),
+ ("https://wikieat.club/cel/2.html"),
+
+ ("https://smuglo.li/a", {
+ "pattern": VichanThreadExtractor.pattern,
+ "count": ">= 100",
+ }),
+ ("https://smuglo.li/a/1.html"),
+ ("https://smugloli.net/cute/catalog.html"),
+ )
+
+ def __init__(self, match):
+ VichanExtractor.__init__(self, match)
+ self.board = match.group(match.lastindex)
+
+ def items(self):
+ url = "{}/{}/threads.json".format(self.root, self.board)
+ threads = self.request(url).json()
+
+ for page in threads:
+ for thread in page["threads"]:
+ url = "{}/{}/res/{}.html".format(
+ self.root, self.board, thread["no"])
+ thread["page"] = page["page"]
+ thread["_extractor"] = VichanThreadExtractor
+ yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/wikieat.py b/gallery_dl/extractor/wikieat.py
deleted file mode 100644
index c7b1958..0000000
--- a/gallery_dl/extractor/wikieat.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2021 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://wikieat.club/"""
-
-from .common import Extractor, Message
-from .. import text
-
-
-class WikieatThreadExtractor(Extractor):
- """Extractor for Wikieat threads"""
- category = "wikieat"
- subcategory = "thread"
- directory_fmt = ("{category}", "{board}", "{thread} {title}")
- filename_fmt = "{time}{num:?-//} {filename}.{extension}"
- archive_fmt = "{board}_{thread}_{tim}"
- pattern = r"(?:https?://)?wikieat\.club/([^/]+)/res/(\d+)"
- test = ("https://wikieat.club/cel/res/25321.html", {
- "pattern": r"https://wikieat\.club/cel/src/\d+(-\d)?\.\w+",
- "count": ">= 200",
- })
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
-
- def items(self):
- url = "https://wikieat.club/{}/res/{}.json".format(
- self.board, self.thread)
- posts = self.request(url).json()["posts"]
- title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
- process = self._process
-
- data = {
- "board" : self.board,
- "thread": self.thread,
- "title" : text.unescape(title)[:50],
- "num" : 0,
- }
-
- yield Message.Directory, data
- for post in posts:
- if "filename" in post:
- yield process(post, data)
- if "extra_files" in post:
- for post["num"], filedata in enumerate(
- post["extra_files"], 1):
- yield process(post, filedata)
-
- @staticmethod
- def _process(post, data):
- post.update(data)
- post["extension"] = post["ext"][1:]
- tim = post["tim"]
- url = ("https://wikieat.club/" +
- post["board"] + "/src/" +
- tim + post["ext"])
- return Message.Url, url, post
-
-
-class WikieatBoardExtractor(Extractor):
- """Extractor for Wikieat boards"""
- category = "wikieat"
- subcategory = "board"
- pattern = (r"(?:https?://)?wikieat\.club"
- r"/([^/?#]+)/(?:index|catalog|\d+)\.html")
- test = (
- ("https://wikieat.club/cel/index.html", {
- "pattern": WikieatThreadExtractor.pattern,
- "count": ">= 100",
- }),
- ("https://wikieat.club/cel/catalog.html"),
- ("https://wikieat.club/cel/2.html")
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board = match.group(1)
-
- def items(self):
- url = "https://wikieat.club/{}/threads.json".format(self.board)
- threads = self.request(url).json()
-
- for page in threads:
- for thread in page["threads"]:
- url = "https://wikieat.club/{}/res/{}.html".format(
- self.board, thread["no"])
- thread["page"] = page["page"]
- thread["_extractor"] = WikieatThreadExtractor
- yield Message.Queue, url, thread
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 1650b0a..98b6d59 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -790,11 +790,11 @@ class DownloadArchive():
try:
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
- "(entry PRIMARY KEY) WITHOUT ROWID")
+ "(entry TEXT PRIMARY KEY) WITHOUT ROWID")
except sqlite3.OperationalError:
# fallback for missing WITHOUT ROWID support (#553)
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
- "(entry PRIMARY KEY)")
+ "(entry TEXT PRIMARY KEY)")
def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
@@ -807,4 +807,4 @@ class DownloadArchive():
"""Add item described by 'kwdict' to archive"""
key = kwdict.get(self._cache_key) or self.keygen(kwdict)
self.cursor.execute(
- "INSERT OR IGNORE INTO archive VALUES (?)", (key,))
+ "INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index f758857..85a03de 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.23.3"
+__version__ = "1.23.5"
diff --git a/test/test_results.py b/test/test_results.py
index e594933..a42de09 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -324,7 +324,7 @@ def setup_test_config():
for category in ("danbooru", "instagram", "twitter", "subscribestar",
"e621", "atfbooru", "inkbunny", "tapas", "pillowfort",
- "mangadex"):
+ "mangadex", "aibooru"):
config.set(("extractor", category), "username", None)
config.set(("extractor", "mastodon.social"), "access-token",