summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-01-11 04:09:13 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2023-01-11 04:09:13 -0500
commitfe385c3ff784ba3d19454a35446502c0ec295893 (patch)
tree897982793ef2a0c0f349044bf4cf803ccd483e6e /gallery_dl
parentebdfcd3cd3f76534a590ba08933ff7ea54813316 (diff)
New upstream version 1.24.3.upstream/1.24.3
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/cookies.py3
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/behance.py5
-rw-r--r--gallery_dl/extractor/bunkr.py6
-rw-r--r--gallery_dl/extractor/common.py1
-rw-r--r--gallery_dl/extractor/danbooru.py4
-rw-r--r--gallery_dl/extractor/deviantart.py57
-rw-r--r--gallery_dl/extractor/fanbox.py20
-rw-r--r--gallery_dl/extractor/fanleaks.py127
-rw-r--r--gallery_dl/extractor/gelbooru.py4
-rw-r--r--gallery_dl/extractor/imagefap.py191
-rw-r--r--gallery_dl/extractor/kemonoparty.py12
-rw-r--r--gallery_dl/extractor/lynxchan.py16
-rw-r--r--gallery_dl/extractor/myhentaigallery.py5
-rw-r--r--gallery_dl/extractor/nitter.py4
-rw-r--r--gallery_dl/extractor/pinterest.py96
-rw-r--r--gallery_dl/extractor/pixiv.py25
-rw-r--r--gallery_dl/extractor/poipiku.py22
-rw-r--r--gallery_dl/extractor/tcbscans.py106
-rw-r--r--gallery_dl/extractor/telegraph.py25
-rw-r--r--gallery_dl/extractor/twitter.py74
-rw-r--r--gallery_dl/extractor/vk.py9
-rw-r--r--gallery_dl/extractor/zerochan.py12
-rw-r--r--gallery_dl/formatter.py13
-rw-r--r--gallery_dl/option.py19
-rw-r--r--gallery_dl/util.py89
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py30
28 files changed, 764 insertions, 215 deletions
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index ee00bf7..f18cc47 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -980,6 +980,7 @@ def _is_path(value):
def _parse_browser_specification(
browser, profile=None, keyring=None, container=None):
+ browser = browser.lower()
if browser not in SUPPORTED_BROWSERS:
raise ValueError("unsupported browser '{}'".format(browser))
if keyring and keyring not in SUPPORTED_KEYRINGS:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 444075c..f26f6a9 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -38,6 +38,7 @@ modules = [
"exhentai",
"fallenangels",
"fanbox",
+ "fanleaks",
"fantia",
"fapello",
"fapachi",
@@ -135,6 +136,7 @@ modules = [
"speakerdeck",
"subscribestar",
"tapas",
+ "tcbscans",
"telegraph",
"toyhouse",
"tsumino",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index cf332ac..6da6175 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.behance.net/"""
+"""Extractors for https://www.behance.net/"""
from .common import Extractor, Message
from .. import text
@@ -17,6 +17,7 @@ class BehanceExtractor(Extractor):
"""Base class for behance extractors"""
category = "behance"
root = "https://www.behance.net"
+ request_interval = (2.0, 4.0)
def items(self):
for gallery in self.galleries():
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 882c2b3..8283fbc 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -56,8 +56,12 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
files = album["files"]
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ self.log.debug("Falling back to lolisafe API")
self.root = root.replace("://", "://app.", 1)
files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
+ # fix file URLs (bunkr..ru -> bunkr.ru) (#3481)
+ for file in files:
+ file["file"] = file["file"].replace("bunkr..", "bunkr.", 1)
else:
for file in files:
file["file"] = file["cdn"] + "/" + file["name"]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 4352aa7..ad766da 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -327,6 +327,7 @@ class Extractor():
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
+ self.log.debug("Loading cookies from '%s'", cookies)
self._cookiefile = cookiefile
elif isinstance(cookies, (list, tuple)):
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ef17176..4c93604 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -101,8 +101,8 @@ class DanbooruExtractor(BaseExtractor):
if self.extended_metadata:
template = (
- "{}/posts/{}.json"
- "?only=artist_commentary,children,notes,parent"
+ "{}/posts/{}.json?only=artist_commentary,children,notes,"
+ "parent,uploader"
)
resp = self.request(template.format(self.root, post["id"]))
post.update(resp.json())
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index aa78cfb..aeb2d0a 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -987,13 +987,9 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
_warning = True
def deviations(self):
- eclipse_api = DeviantartEclipseAPI(self)
- if self._warning:
- DeviantartScrapsExtractor._warning = False
- if not self._check_cookies(self.cookienames):
- self.log.warning(
- "No session cookies set: Unable to fetch mature scraps.")
+ self.login()
+ eclipse_api = DeviantartEclipseAPI(self)
for obj in eclipse_api.gallery_scraps(self.user, self.offset):
deviation = obj["deviation"]
deviation_uuid = eclipse_api.deviation_extended_fetch(
@@ -1004,6 +1000,17 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
yield self.api.deviation(deviation_uuid)
+ def login(self):
+ """Login and obtain session cookies"""
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(_login_impl(self, username, password))
+ elif self._warning:
+ self.log.warning(
+ "No session cookies set: Unable to fetch mature scraps.")
+ DeviantartScrapsExtractor._warning = False
+
class DeviantartFollowingExtractor(DeviantartExtractor):
"""Extractor for user's watched users"""
@@ -1513,13 +1520,47 @@ class DeviantartEclipseAPI():
return token
-@cache(maxage=100*365*24*3600, keyarg=0)
+@cache(maxage=100*365*86400, keyarg=0)
def _refresh_token_cache(token):
if token and token[0] == "#":
return None
return token
+@cache(maxage=28*86400, keyarg=1)
+def _login_impl(extr, username, password):
+ extr.log.info("Logging in as %s", username)
+
+ url = "https://www.deviantart.com/users/login"
+ page = extr.request(url).text
+
+ data = {}
+ for item in text.extract_iter(page, '<input type="hidden" name="', '"/>'):
+ name, _, value = item.partition('" value="')
+ data[name] = value
+
+ challenge = data.get("challenge")
+ if challenge and challenge != "0":
+ extr.log.warning("Login requires solving a CAPTCHA")
+ extr.log.debug(challenge)
+
+ data["username"] = username
+ data["password"] = password
+ data["remember"] = "on"
+
+ extr.sleep(2.0, "login")
+ url = "https://www.deviantart.com/_sisu/do/signin"
+ response = extr.request(url, method="POST", data=data)
+
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ return {
+ cookie.name: cookie.value
+ for cookie in extr.session.cookies
+ }
+
+
###############################################################################
# Journal Formats #############################################################
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index f692a90..41431dc 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -69,14 +69,28 @@ class FanboxExtractor(Extractor):
if post["type"] == "article":
post["articleBody"] = content_body.copy()
if "blocks" in content_body:
- content = []
+ content = [] # text content
+ images = [] # image IDs in 'body' order
+
append = content.append
+ append_img = images.append
for block in content_body["blocks"]:
if "text" in block:
append(block["text"])
if "links" in block:
for link in block["links"]:
append(link["url"])
+ if "imageId" in block:
+ append_img(block["imageId"])
+
+ if images and "imageMap" in content_body:
+ # reorder 'imageMap' (#2718)
+ image_map = content_body["imageMap"]
+ content_body["imageMap"] = {
+ image_id: image_map[image_id]
+ for image_id in images
+ }
+
post["content"] = "\n".join(content)
post["date"] = text.parse_datetime(post["publishedDatetime"])
@@ -294,6 +308,10 @@ class FanboxPostExtractor(FanboxExtractor):
r"Thank you for your continued support of FANBOX.$",
},
}),
+ # imageMap file order (#2718)
+ ("https://mochirong.fanbox.cc/posts/3746116", {
+ "url": "c92ddd06f2efc4a5fe30ec67e21544f79a5c4062",
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py
new file mode 100644
index 0000000..466bb8c
--- /dev/null
+++ b/gallery_dl/extractor/fanleaks.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fanleaks.club/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+
+class FanleaksExtractor(Extractor):
+ """Base class for Fanleaks extractors"""
+ category = "fanleaks"
+ directory_fmt = ("{category}", "{model}")
+ filename_fmt = "{model_id}_{id}.{extension}"
+ archive_fmt = "{model_id}_{id}"
+ root = "https://fanleaks.club"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.model_id = match.group(1)
+
+ def extract_post(self, url):
+ extr = text.extract_from(self.request(url, notfound="post").text)
+ data = {
+ "model_id": self.model_id,
+ "model" : text.unescape(extr('text-lg">', "</a>")),
+ "id" : text.parse_int(self.id),
+ "type" : extr('type="', '"')[:5] or "photo",
+ }
+ url = extr('src="', '"')
+ yield Message.Directory, data
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class FanleaksPostExtractor(FanleaksExtractor):
+ """Extractor for individual posts on fanleak.club"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)"
+ test = (
+ ("https://fanleaks.club/selti/880", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/selti/images/selti_0880\.jpg"),
+ "keyword": {
+ "model_id": "selti",
+ "model" : "Selti",
+ "id" : 880,
+ "type" : "photo",
+ },
+ }),
+ ("https://fanleaks.club/daisy-keech/1038", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/daisy-keech/videos/daisy-keech_1038\.mp4"),
+ "keyword": {
+ "model_id": "daisy-keech",
+ "model" : "Daisy Keech",
+ "id" : 1038,
+ "type" : "video",
+ },
+ }),
+ ("https://fanleaks.club/hannahowo/000", {
+ "exception": exception.NotFoundError,
+ }),
+ )
+
+ def __init__(self, match):
+ FanleaksExtractor.__init__(self, match)
+ self.id = match.group(2)
+
+ def items(self):
+ url = "{}/{}/{}".format(self.root, self.model_id, self.id)
+ return self.extract_post(url)
+
+
+class FanleaksModelExtractor(FanleaksExtractor):
+ """Extractor for all posts from a fanleaks model"""
+ subcategory = "model"
+ pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club"
+ r"/(?!latest/?$)([^/?#]+)/?$")
+ test = (
+ ("https://fanleaks.club/hannahowo", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/hannahowo/(images|videos)/hannahowo_\d+\.\w+"),
+ "range" : "1-100",
+ "count" : 100,
+ }),
+ ("https://fanleaks.club/belle-delphine", {
+ "pattern": (r"https://fanleaks\.club//models"
+ r"/belle-delphine/(images|videos)"
+ r"/belle-delphine_\d+\.\w+"),
+ "range" : "1-100",
+ "count" : 100,
+ }),
+ ("https://fanleaks.club/daisy-keech"),
+ )
+
+ def items(self):
+ page_num = 1
+ page = self.request(
+ self.root + "/" + self.model_id, notfound="model").text
+ data = {
+ "model_id": self.model_id,
+ "model" : text.unescape(
+ text.extr(page, 'mt-4">', "</h1>")),
+ "type" : "photo",
+ }
+ page_url = text.extr(page, "url: '", "'")
+ while True:
+ page = self.request("{}{}".format(page_url, page_num)).text
+ if not page:
+ return
+
+ for item in text.extract_iter(page, '<a href="/', "</a>"):
+ self.id = id = text.extr(item, "/", '"')
+ if "/icon-play.svg" in item:
+ url = "{}/{}/{}".format(self.root, self.model_id, id)
+ yield from self.extract_post(url)
+ continue
+
+ data["id"] = text.parse_int(id)
+ url = text.extr(item, 'src="', '"').replace(
+ "/thumbs/", "/", 1)
+ yield Message.Directory, data
+ yield Message.Url, url, text.nameext_from_url(url, data)
+ page_num += 1
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index d8109e1..8d73949 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -195,7 +195,7 @@ class GelbooruPostExtractor(GelbooruBase,
# notes
("https://gelbooru.com/index.php?page=post&s=view&id=5997331", {
"options": (("notes", True),),
- "keywords": {
+ "keyword": {
"notes": [
{
"body": "Look over this way when you talk~",
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 56bd048..1efbbf0 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,25 +9,37 @@
"""Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
import json
-
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
class ImagefapExtractor(Extractor):
"""Base class for imagefap extractors"""
category = "imagefap"
+ root = "https://www.imagefap.com"
directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = "{category}_{gallery_id}_{filename}.{extension}"
archive_fmt = "{gallery_id}_{image_id}"
- root = "https://www.imagefap.com"
+ request_interval = (2.0, 4.0)
def __init__(self, match):
Extractor.__init__(self, match)
self.session.headers["Referer"] = self.root
+ def request(self, url, **kwargs):
+ response = Extractor.request(self, url, **kwargs)
+
+ if response.history and response.url.endswith("/human-verification"):
+ msg = text.extr(response.text, '<div class="mt-4', '<')
+ if msg:
+ msg = " ".join(msg.partition(">")[2].split())
+ raise exception.StopExtraction("'%s'", msg)
+ self.log.warning("HTTP redirect to %s", response.url)
+
+ return response
+
class ImagefapGalleryExtractor(ImagefapExtractor):
"""Extractor for image galleries from imagefap.com"""
@@ -41,12 +53,20 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
"keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
}),
- ("https://www.imagefap.com/gallery/5486966", {
+ ("https://www.imagefap.com/gallery/7876223", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg",
- "keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98",
- "archive": False,
- "count": 62,
+ "keyword": {
+ "count": 44,
+ "gallery_id": 7876223,
+ "image_id": int,
+ "num": int,
+ "tags": ["big ass", "panties", "horny",
+ "pussy", "exposed", "outdoor"],
+ "title": "Kelsi Monroe in lingerie",
+ "uploader": "BdRachel",
+ },
+ "count": 44,
}),
("https://www.imagefap.com/gallery.php?gid=7102714"),
("https://beta.imagefap.com/gallery.php?gid=7102714"),
@@ -118,12 +138,20 @@ class ImagefapImageExtractor(ImagefapExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/photo/(\d+)"
test = (
- ("https://www.imagefap.com/photo/1369341772/", {
+ ("https://www.imagefap.com/photo/1962981893", {
"pattern": r"https://cdnh?\.imagefap\.com"
- r"/images/full/\d+/\d+/\d+\.jpg",
- "keyword": "8894e45f7262020d8d66ce59917315def1fc475b",
+ r"/images/full/65/196/1962981893\.jpg",
+ "keyword": {
+ "date": "21/08/2014",
+ "gallery_id": 7876223,
+ "height": 1600,
+ "image_id": 1962981893,
+ "title": "Kelsi Monroe in lingerie",
+ "uploader": "BdRachel",
+ "width": 1066,
+ },
}),
- ("https://beta.imagefap.com/photo/1369341772/"),
+ ("https://beta.imagefap.com/photo/1962981893"),
)
def __init__(self, match):
@@ -159,61 +187,70 @@ class ImagefapImageExtractor(ImagefapExtractor):
})
-class ImagefapUserExtractor(ImagefapExtractor):
- """Extractor for all galleries from a user at imagefap.com"""
- subcategory = "user"
- categorytransfer = True
- pattern = (BASE_PATTERN +
- r"/(?:profile(?:\.php\?user=|/)([^/?#]+)"
- r"|usergallery\.php\?userid=(\d+))")
+class ImagefapFolderExtractor(ImagefapExtractor):
+ """Extractor for imagefap user folders"""
+ subcategory = "folder"
+ pattern = (BASE_PATTERN + r"/(?:organizer/|"
+ r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
+ r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
test = (
- ("https://www.imagefap.com/profile/LucyRae/galleries", {
- "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a",
+ ("https://www.imagefap.com/organizer/409758", {
+ "pattern": r"https://www\.imagefap\.com/gallery/7876223",
+ "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+ "count": 1,
}),
- ("https://www.imagefap.com/usergallery.php?userid=1862791", {
- "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a",
+ (("https://www.imagefap.com/usergallery.php"
+ "?userid=1981976&folderid=409758"), {
+ "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+ }),
+ (("https://www.imagefap.com/usergallery.php"
+ "?user=BdRachel&folderid=409758"), {
+ "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
+ }),
+ ("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
+ "pattern": ImagefapGalleryExtractor.pattern,
+ "range": "1-40",
+ }),
+ (("https://www.imagefap.com/usergallery.php"
+ "?userid=1981976&folderid=-1"), {
+ "pattern": ImagefapGalleryExtractor.pattern,
+ "range": "1-40",
+ }),
+ (("https://www.imagefap.com/usergallery.php"
+ "?user=BdRachel&folderid=-1"), {
+ "pattern": ImagefapGalleryExtractor.pattern,
+ "range": "1-40",
}),
- ("https://www.imagefap.com/profile.php?user=LucyRae"),
- ("https://beta.imagefap.com/profile.php?user=LucyRae"),
)
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
- self.user, self.user_id = match.groups()
+ self._id, user, profile, self.folder_id = match.groups()
+ self.user = user or profile
def items(self):
- for folder_id in self.folders():
- for gallery_id, name in self.galleries(folder_id):
- url = "{}/gallery/{}".format(self.root, gallery_id)
- data = {
- "gallery_id": text.parse_int(gallery_id),
- "title" : text.unescape(name),
- "_extractor": ImagefapGalleryExtractor,
- }
- yield Message.Queue, url, data
-
- def folders(self):
- """Return a list of folder_ids of a specific user"""
- if self.user:
- url = "{}/profile/{}/galleries".format(self.root, self.user)
- else:
- url = "{}/usergallery.php?userid={}".format(
- self.root, self.user_id)
-
- response = self.request(url)
- self.user = response.url.split("/")[-2]
- folders = text.extr(response.text, ' id="tgl_all" value="', '"')
- return folders.rstrip("|").split("|")
+ for gallery_id, name in self.galleries(self.folder_id):
+ url = "{}/gallery/{}".format(self.root, gallery_id)
+ data = {
+ "gallery_id": gallery_id,
+ "title" : text.unescape(name),
+ "_extractor": ImagefapGalleryExtractor,
+ }
+ yield Message.Queue, url, data
def galleries(self, folder_id):
- """Yield gallery_ids of a folder"""
+ """Yield gallery IDs and titles of a folder"""
if folder_id == "-1":
- url = "{}/profile/{}/galleries?folderid=-1".format(
- self.root, self.user)
+ if self._id:
+ url = "{}/usergallery.php?userid={}&folderid=-1".format(
+ self.root, self.user)
+ else:
+ url = "{}/profile/{}/galleries?folderid=-1".format(
+ self.root, self.user)
else:
url = "{}/organizer/{}/".format(self.root, folder_id)
- params = {"page": 0}
+ params = {"page": 0}
while True:
extr = text.extract_from(self.request(url, params=params).text)
cnt = 0
@@ -228,3 +265,53 @@ class ImagefapUserExtractor(ImagefapExtractor):
if cnt < 25:
break
params["page"] += 1
+
+
+class ImagefapUserExtractor(ImagefapExtractor):
+ """Extractor for an imagefap user profile"""
+ subcategory = "user"
+ pattern = (BASE_PATTERN +
+ r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
+ r"|usergallery\.php\?userid=(\d+))(?:$|#)")
+ test = (
+ ("https://www.imagefap.com/profile/BdRachel", {
+ "pattern": ImagefapFolderExtractor.pattern,
+ "count": ">= 18",
+ }),
+ ("https://www.imagefap.com/usergallery.php?userid=1862791", {
+ "pattern": r"https://www\.imagefap\.com"
+ r"/profile/LucyRae/galleries\?folderid=-1",
+ "count": 1,
+ }),
+ ("https://www.imagefap.com/profile/BdRachel/galleries"),
+ ("https://www.imagefap.com/profile.php?user=BdRachel"),
+ ("https://beta.imagefap.com/profile.php?user=BdRachel"),
+ )
+
+ def __init__(self, match):
+ ImagefapExtractor.__init__(self, match)
+ self.user, self.user_id = match.groups()
+
+ def items(self):
+ data = {"_extractor": ImagefapFolderExtractor}
+
+ for folder_id in self.folders():
+ if folder_id == "-1":
+ url = "{}/profile/{}/galleries?folderid=-1".format(
+ self.root, self.user)
+ else:
+ url = "{}/organizer/{}/".format(self.root, folder_id)
+ yield Message.Queue, url, data
+
+ def folders(self):
+ """Return a list of folder IDs of a user"""
+ if self.user:
+ url = "{}/profile/{}/galleries".format(self.root, self.user)
+ else:
+ url = "{}/usergallery.php?userid={}".format(
+ self.root, self.user_id)
+
+ response = self.request(url)
+ self.user = response.url.split("/")[-2]
+ folders = text.extr(response.text, ' id="tgl_all" value="', '"')
+ return folders.rstrip("|").split("|")
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 8a61728..541e427 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -67,6 +67,7 @@ class KemonopartyExtractor(Extractor):
headers["Referer"] = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
+ post["_http_validate"] = _validate
post["date"] = text.parse_datetime(
post["published"] or post["added"],
"%a, %d %b %Y %H:%M:%S %Z")
@@ -197,6 +198,11 @@ class KemonopartyExtractor(Extractor):
return dms
+def _validate(response):
+ return (response.headers["content-length"] != "9" and
+ response.content != b"not found")
+
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
@@ -309,6 +315,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"pattern": r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968"
r"c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg",
}),
+ # invalid file (#3510)
+ ("https://kemono.party/patreon/user/19623797/post/29035449", {
+ "pattern": r"907ba78b4545338d3539683e63ecb51c"
+ r"f51c10adc9dabd86e92bd52339f298b9\.txt",
+ "content": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
+ }),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
index bbcf9c0..85e8bb1 100644
--- a/gallery_dl/extractor/lynxchan.py
+++ b/gallery_dl/extractor/lynxchan.py
@@ -17,9 +17,13 @@ class LynxchanExtractor(BaseExtractor):
BASE_PATTERN = LynxchanExtractor.update({
+ "bbw-chan": {
+ "root": "https://bbw-chan.nl",
+ "pattern": r"bbw-chan\.nl",
+ },
"kohlchan": {
"root": "https://kohlchan.net",
- "pattern": r"kohlchan\.net"
+ "pattern": r"kohlchan\.net",
},
"endchan": {
"root": None,
@@ -37,6 +41,11 @@ class LynxchanThreadExtractor(LynxchanExtractor):
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
test = (
+ ("https://bbw-chan.nl/bbwdraw/res/499.html", {
+ "pattern": r"https://bbw-chan\.nl/\.media/[0-9a-f]{64}(\.\w+)?$",
+ "count": ">= 352",
+ }),
+ ("https://bbw-chan.nl/bbwdraw/res/489.html"),
("https://kohlchan.net/a/res/4594.html", {
"pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
"count": ">= 80",
@@ -78,6 +87,11 @@ class LynxchanBoardExtractor(LynxchanExtractor):
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
test = (
+ ("https://bbw-chan.nl/bbwdraw/", {
+ "pattern": LynxchanThreadExtractor.pattern,
+ "count": ">= 148",
+ }),
+ ("https://bbw-chan.nl/bbwdraw/2.html"),
("https://kohlchan.net/a/", {
"pattern": LynxchanThreadExtractor.pattern,
"count": ">= 100",
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 3dbd5fc..5dc4cb6 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -44,7 +44,10 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
extr = text.extract_from(page)
split = text.split_html
- title = extr('<div class="comic-description">\n<h1>', '</h1>')
+ title = extr('<div class="comic-description">\n', '</h1>').lstrip()
+ if title.startswith("<h1>"):
+ title = title[len("<h1>"):]
+
if not title:
raise exception.NotFoundError("gallery")
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index dfe78ae..f9c6abf 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -451,7 +451,7 @@ class NitterTweetExtractor(NitterExtractor):
}),
# age-restricted (#2354)
("https://nitter.unixfox.eu/mightbecurse/status/1492954264909479936", {
- "keywords": {"date": "dt:2022-02-13 20:10:09"},
+ "keyword": {"date": "dt:2022-02-13 20:10:00"},
"count": 1,
}),
)
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index f786be6..63b16ce 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,13 @@ class PinterestExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
+
+ domain = self.config("domain")
+ if not domain or domain == "auto" :
+ self.root = text.root_from_url(match.group(0))
+ else:
+ self.root = text.ensure_http_scheme(domain)
+
self.api = PinterestAPI(self)
def items(self):
@@ -142,7 +149,7 @@ class PinterestBoardExtractor(PinterestExtractor):
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)"
- "/(?!_saved|_created)([^/?#&]+)/?$")
+ "/(?!_saved|_created|pins/)([^/?#&]+)/?$")
test = (
("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/",
@@ -151,7 +158,7 @@ class PinterestBoardExtractor(PinterestExtractor):
# board with sections (#835)
("https://www.pinterest.com/g1952849/stuff/", {
"options": (("sections", True),),
- "count": 5,
+ "count": 4,
}),
# secret board (#1055)
("https://www.pinterest.de/g1952849/secret/", {
@@ -194,11 +201,11 @@ class PinterestUserExtractor(PinterestExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$"
test = (
- ("https://www.pinterest.de/g1952849/", {
+ ("https://www.pinterest.com/g1952849/", {
"pattern": PinterestBoardExtractor.pattern,
"count": ">= 2",
}),
- ("https://www.pinterest.de/g1952849/_saved/"),
+ ("https://www.pinterest.com/g1952849/_saved/"),
)
def __init__(self, match):
@@ -213,15 +220,38 @@ class PinterestUserExtractor(PinterestExtractor):
yield Message.Queue, self.root + url, board
+class PinterestAllpinsExtractor(PinterestExtractor):
+ """Extractor for a user's 'All Pins' feed"""
+ subcategory = "allpins"
+ directory_fmt = ("{category}", "{user}")
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/pins/?$"
+ test = ("https://www.pinterest.com/g1952849/pins/", {
+ "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
+ r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}",
+ "count": 7,
+ })
+
+ def __init__(self, match):
+ PinterestExtractor.__init__(self, match)
+ self.user = text.unquote(match.group(1))
+
+ def metadata(self):
+ return {"user": self.user}
+
+ def pins(self):
+ return self.api.user_pins(self.user)
+
+
class PinterestCreatedExtractor(PinterestExtractor):
"""Extractor for a user's created pins"""
subcategory = "created"
directory_fmt = ("{category}", "{user}")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$"
- test = ("https://www.pinterest.com/amazon/_created", {
+ test = ("https://www.pinterest.de/digitalmomblog/_created/", {
"pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
"count": 10,
+ "range": "1-10",
})
def __init__(self, match):
@@ -272,7 +302,7 @@ class PinterestSearchExtractor(PinterestExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search/pins/?\?q=([^&#]+)"
- test = ("https://www.pinterest.de/search/pins/?q=nature", {
+ test = ("https://www.pinterest.com/search/pins/?q=nature", {
"range": "1-50",
"count": ">= 50",
})
@@ -357,26 +387,23 @@ class PinterestAPI():
- https://github.com/seregazhuk/php-pinterest-bot
"""
- BASE_URL = "https://www.pinterest.com"
- HEADERS = {
- "Accept" : "application/json, text/javascript, "
- "*/*, q=0.01",
- "Accept-Language" : "en-US,en;q=0.5",
- "Referer" : BASE_URL + "/",
- "X-Requested-With" : "XMLHttpRequest",
- "X-APP-VERSION" : "31461e0",
- "X-CSRFToken" : None,
- "X-Pinterest-AppState": "active",
- "Origin" : BASE_URL,
- }
-
def __init__(self, extractor):
- self.extractor = extractor
-
csrf_token = util.generate_token()
- self.headers = self.HEADERS.copy()
- self.headers["X-CSRFToken"] = csrf_token
+
+ self.extractor = extractor
+ self.root = extractor.root
self.cookies = {"csrftoken": csrf_token}
+ self.headers = {
+ "Accept" : "application/json, text/javascript, "
+ "*/*, q=0.01",
+ "Accept-Language" : "en-US,en;q=0.5",
+ "Referer" : self.root + "/",
+ "X-Requested-With" : "XMLHttpRequest",
+ "X-APP-VERSION" : "0c4af40",
+ "X-CSRFToken" : csrf_token,
+ "X-Pinterest-AppState": "active",
+ "Origin" : self.root,
+ }
def pin(self, pin_id):
"""Query information about a pin"""
@@ -437,6 +464,16 @@ class PinterestAPI():
options = {"board_id": board_id, "add_vase": True}
return self._pagination("BoardRelatedPixieFeed", options)
+ def user_pins(self, user):
+ """Yield all pins from 'user'"""
+ options = {
+ "is_own_profile_pins": False,
+ "username" : user,
+ "field_set_key" : "grid_item",
+ "pin_filter" : None,
+ }
+ return self._pagination("UserPins", options)
+
def user_activity_pins(self, user):
"""Yield pins created by 'user'"""
options = {
@@ -462,7 +499,7 @@ class PinterestAPI():
def _login_impl(self, username, password):
self.extractor.log.info("Logging in as %s", username)
- url = self.BASE_URL + "/resource/UserSessionResource/create/"
+ url = self.root + "/resource/UserSessionResource/create/"
options = {
"username_or_email": username,
"password" : password,
@@ -485,7 +522,7 @@ class PinterestAPI():
}
def _call(self, resource, options):
- url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource)
+ url = "{}/resource/{}Resource/get/".format(self.root, resource)
params = {"data": json.dumps({"options": options}), "source_url": ""}
response = self.extractor.request(
@@ -497,10 +534,11 @@ class PinterestAPI():
except ValueError:
data = {}
- if response.status_code < 400 and not response.history:
+ if response.history:
+ self.root = text.root_from_url(response.url)
+ if response.status_code < 400:
return data
-
- if response.status_code == 404 or response.history:
+ if response.status_code == 404:
resource = self.extractor.subcategory.rpartition("-")[2]
raise exception.NotFoundError(resource)
self.extractor.log.debug("Server response: %s", response.text)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 134361d..a17518f 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -45,7 +45,8 @@ class PixivExtractor(Extractor):
work["tags"] = [tag["name"] for tag in work["tags"]]
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
- userdata = self.config("metadata")
+ meta_user = self.config("metadata")
+ meta_bookmark = self.config("metadata-bookmark")
metadata = self.metadata()
works = self.works()
@@ -61,8 +62,12 @@ class PixivExtractor(Extractor):
del work["image_urls"]
del work["meta_pages"]
- if userdata:
+ if meta_user:
work.update(self.api.user_detail(work["user"]["id"]))
+ if meta_bookmark and work["is_bookmarked"]:
+ detail = self.api.illust_bookmark_detail(work["id"])
+ work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
+ if tag["is_registered"]]
if transform_tags:
transform_tags(work)
work["num"] = 0
@@ -398,6 +403,8 @@ class PixivFavoriteExtractor(PixivExtractor):
# own bookmarks
("https://www.pixiv.net/bookmark.php", {
"url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
+ "keyword": {"tags_bookmark": ["47", "hitman"]},
+ "options": (("metadata-bookmark", True),),
}),
# own bookmarks with tag (#596)
("https://www.pixiv.net/bookmark.php?tag=foobar", {
@@ -880,6 +887,11 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._call("/v1/illust/detail", params)["illust"]
+ def illust_bookmark_detail(self, illust_id):
+ params = {"illust_id": illust_id}
+ return self._call(
+ "/v2/illust/bookmark/detail", params)["bookmark_detail"]
+
def illust_follow(self, restrict="all"):
params = {"restrict": restrict}
return self._pagination("/v2/illust/follow", params)
@@ -900,9 +912,16 @@ class PixivAppAPI():
return self._pagination("/v1/search/illust", params)
def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
+ """Return illusts bookmarked by a user"""
params = {"user_id": user_id, "tag": tag, "restrict": restrict}
return self._pagination("/v1/user/bookmarks/illust", params)
+ def user_bookmark_tags_illust(self, user_id, restrict="public"):
+ """Return bookmark tags defined by a user"""
+ params = {"user_id": user_id, "restrict": restrict}
+ return self._pagination(
+ "/v1/user/bookmark-tags/illust", params, "bookmark_tags")
+
@memcache(keyarg=1)
def user_detail(self, user_id):
params = {"user_id": user_id}
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 4283081..c35ee74 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -59,7 +59,7 @@ class PoipikuExtractor(Extractor):
"//img.", "//img-org.", 1)
yield Message.Url, url, text.nameext_from_url(url, post)
- if not extr('> show all', '<'):
+ if not extr(' show all(+', '<'):
continue
url = self.root + "/f/ShowAppendFileF.jsp"
@@ -79,6 +79,9 @@ class PoipikuExtractor(Extractor):
page = self.request(
url, method="POST", headers=headers, data=data).json()["html"]
+ if page.startswith("You need to"):
+ self.log.warning("'%s'", page)
+
for thumb in text.extract_iter(
page, 'class="IllustItemThumbImg" src="', '"'):
post["num"] += 1
@@ -162,6 +165,21 @@ class PoipikuPostExtractor(PoipikuExtractor):
"user_name": "wadahito",
},
}),
+ # different warning button style
+ ("https://poipiku.com/3572553/5776587.html", {
+ "pattern": r"https://img-org\.poipiku.com/user_img\d+/003572553"
+ r"/005776587_(\d+_)?\w+\.jpeg$",
+ "count": 3,
+ "keyword": {
+ "count": "3",
+ "description": "ORANGE OASISボスネタバレ",
+ "num": int,
+ "post_category": "SPOILER",
+ "post_id": "5776587",
+ "user_id": "3572553",
+ "user_name": "nagakun",
+ },
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py
new file mode 100644
index 0000000..cac5a54
--- /dev/null
+++ b/gallery_dl/extractor/tcbscans.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://onepiecechapters.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+
+
+class TcbscansChapterExtractor(ChapterExtractor):
+ category = "tcbscans"
+ pattern = (r"(?:https?://)?onepiecechapters\.com"
+ r"(/chapters/\d+/[^/?#]+)")
+ root = "https://onepiecechapters.com"
+ test = (
+ (("https://onepiecechapters.com"
+ "/chapters/4708/chainsaw-man-chapter-108"), {
+ "pattern": (r"https://cdn\.[^/]+"
+ r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+ "count" : 17,
+ "keyword": {
+ "manga": "Chainsaw Man",
+ "chapter": 108,
+ "chapter_minor": "",
+ "lang": "en",
+ "language": "English",
+ },
+ }),
+ ("https://onepiecechapters.com/chapters/4716/one-piece-chapter-1065", {
+ "pattern": (r"https://cdn\.[^/]+"
+ r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
+ "count" : 18,
+ "keyword": {
+ "manga": "One Piece",
+ "chapter": 1065,
+ "chapter_minor": "",
+ "lang": "en",
+ "language": "English",
+ },
+ }),
+ (("https://onepiecechapters.com/"
+ "chapters/44/ace-novel-manga-adaptation-chapter-1")),
+ )
+
+ def images(self, page):
+ return [
+ (url, None)
+ for url in text.extract_iter(
+ page, '<img class="fixed-ratio-content" src="', '"')
+ ]
+
+ def metadata(self, page):
+ manga, _, chapter = text.extr(
+ page, 'font-bold mt-8">', "</h1>").rpartition(" - Chapter ")
+ chapter, sep, minor = chapter.partition(".")
+ return {
+ "manga": text.unescape(manga),
+ "chapter": text.parse_int(chapter),
+ "chapter_minor": sep + minor,
+ "lang": "en", "language": "English",
+ }
+
+
+class TcbscansMangaExtractor(MangaExtractor):
+ category = "tcbscans"
+ chapterclass = TcbscansChapterExtractor
+ pattern = (r"(?:https?://)?onepiecechapters\.com"
+ r"(/mangas/\d+/[^/?#]+)")
+ root = "https://onepiecechapters.com"
+ test = (
+ ("https://onepiecechapters.com/mangas/13/chainsaw-man", {
+ "pattern": TcbscansChapterExtractor.pattern,
+ "range" : "1-50",
+ "count" : 50,
+ }),
+ ("https://onepiecechapters.com/mangas/4/jujutsu-kaisen", {
+ "pattern": TcbscansChapterExtractor.pattern,
+ "range" : "1-50",
+ "count" : 50,
+ }),
+ ("https://onepiecechapters.com/mangas/15/hunter-x-hunter"),
+ )
+
+ def chapters(self, page):
+ data = {
+ "manga": text.unescape(text.extr(
+ page, 'class="my-3 font-bold text-3xl">', "</h1>")),
+ "lang": "en", "language": "English",
+ }
+
+ results = []
+ page = text.extr(page, 'class="col-span-2"', 'class="order-1')
+ for chapter in text.extract_iter(page, "<a", "</a>"):
+ url = text.extr(chapter, 'href="', '"')
+ data["title"] = text.unescape(text.extr(
+ chapter, 'text-gray-500">', "</div>"))
+ chapter = text.extr(
+ chapter, 'font-bold">', "</div>").rpartition(" Chapter ")[2]
+ chapter, sep, minor = chapter.partition(".")
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
+ results.append((self.root + url, data.copy()))
+ return results
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index 8e9bf2c..5996268 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -12,7 +12,6 @@ from .. import text
class TelegraphGalleryExtractor(GalleryExtractor):
"""Extractor for articles from telegra.ph"""
-
category = "telegraph"
root = "https://telegra.ph"
directory_fmt = ("{category}", "{slug}")
@@ -52,6 +51,23 @@ class TelegraphGalleryExtractor(GalleryExtractor):
"url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
},
}),
+ ("https://telegra.ph/Vsyo-o-druzyah-moej-sestricy-05-27", {
+ "url": "c1f3048e5d94bee53af30a8c27f70b0d3b15438e",
+ "pattern": r"^https://pith1\.ru/uploads"
+ r"/posts/2019-12/\d+_\d+\.jpg$",
+ "keyword": {
+ "author": "Shotacon - заходи сюда",
+ "caption": "",
+ "count": 19,
+ "date": "dt:2022-05-27 16:17:27",
+ "description": "",
+ "num_formatted": r"re:^\d{2}$",
+ "post_url": "https://telegra.ph"
+ "/Vsyo-o-druzyah-moej-sestricy-05-27",
+ "slug": "Vsyo-o-druzyah-moej-sestricy-05-27",
+ "title": "Всё о друзьях моей сестрицы",
+ },
+ }),
)
def metadata(self, page):
@@ -79,11 +95,12 @@ class TelegraphGalleryExtractor(GalleryExtractor):
result = []
for figure in figures:
- src, pos = text.extract(figure, 'src="', '"')
- if src.startswith("/embed/"):
+ url, pos = text.extract(figure, 'src="', '"')
+ if url.startswith("/embed/"):
continue
+ elif url.startswith("/"):
+ url = self.root + url
caption, pos = text.extract(figure, "<figcaption>", "<", pos)
- url = self.root + src
num += 1
result.append((url, {
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 22aa78e..c2d8247 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -39,6 +39,7 @@ class TwitterExtractor(Extractor):
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
self.cards_blacklist = self.config("cards-blacklist")
+ self.syndication = self.config("syndication")
self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -75,11 +76,6 @@ class TwitterExtractor(Extractor):
else:
data = tweet
- if seen_tweets is not None:
- if data["id_str"] in seen_tweets:
- continue
- seen_tweets.add(data["id_str"])
-
if not self.retweets and "retweeted_status_id_str" in data:
self.log.debug("Skipping %s (retweet)", data["id_str"])
continue
@@ -97,6 +93,13 @@ class TwitterExtractor(Extractor):
self.log.debug("Skipping %s (reply)", data["id_str"])
continue
+ if seen_tweets is not None:
+ if data["id_str"] in seen_tweets:
+ self.log.debug(
+ "Skipping %s (previously seen)", data["id_str"])
+ continue
+ seen_tweets.add(data["id_str"])
+
files = []
if "extended_entities" in data:
self._extract_media(
@@ -220,14 +223,16 @@ class TwitterExtractor(Extractor):
def _extract_twitpic(self, tweet, files):
for url in tweet["entities"].get("urls", ()):
url = url["expanded_url"]
- if "//twitpic.com/" in url and "/photos/" not in url:
- response = self.request(url, fatal=False)
- if response.status_code >= 400:
- continue
- url = text.extr(
- response.text, 'name="twitter:image" value="', '"')
- if url:
- files.append({"url": url})
+ if "//twitpic.com/" not in url or "/photos/" in url:
+ continue
+ if url.startswith("http:"):
+ url = "https" + url[4:]
+ response = self.request(url, fatal=False)
+ if response.status_code >= 400:
+ continue
+ url = text.extr(response.text, 'name="twitter:image" value="', '"')
+ if url:
+ files.append({"url": url})
def _transform_tweet(self, tweet):
if "author" in tweet:
@@ -299,6 +304,9 @@ class TwitterExtractor(Extractor):
if "legacy" in user:
user = user["legacy"]
+ elif "statuses_count" not in user and self.syndication == "extended":
+ # try to fetch extended user data
+ user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
uget = user.get
entities = user["entities"]
@@ -361,18 +369,22 @@ class TwitterExtractor(Extractor):
def _expand_tweets(self, tweets):
seen = set()
for tweet in tweets:
-
- if "legacy" in tweet:
- cid = tweet["legacy"]["conversation_id_str"]
- else:
- cid = tweet["conversation_id_str"]
-
- if cid not in seen:
- seen.add(cid)
- try:
- yield from self.api.tweet_detail(cid)
- except Exception:
- yield tweet
+ obj = tweet["legacy"] if "legacy" in tweet else tweet
+ cid = obj.get("conversation_id_str")
+ if not cid:
+ tid = obj["id_str"]
+ self.log.warning(
+ "Unable to expand %s (no 'conversation_id')", tid)
+ continue
+ if cid in seen:
+ self.log.debug(
+ "Skipping expansion of %s (previously seen)", cid)
+ continue
+ seen.add(cid)
+ try:
+ yield from self.api.tweet_detail(cid)
+ except Exception:
+ yield tweet
def _make_tweet(self, user, id_str, url, timestamp):
return {
@@ -772,7 +784,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# age-restricted (#2354)
("https://twitter.com/mightbecursed/status/1492954264909479936", {
"options": (("syndication", True),),
- "keywords": {"date": "dt:2022-02-13 20:10:09"},
+ "keyword": {"date": "dt:2022-02-13 20:10:09"},
"count": 1,
}),
# media alt texts / descriptions (#2617)
@@ -991,7 +1003,7 @@ class TwitterAPI():
}
self._nsfw_warning = True
- self._syndication = extractor.config("syndication")
+ self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
cookies = extractor.session.cookies
@@ -1516,6 +1528,12 @@ class TwitterAPI():
else:
retweet_id = None
+ # assume 'conversation_id' is the same as 'id' when the tweet
+ # is not a reply
+ if "conversation_id_str" not in tweet and \
+ "in_reply_to_status_id_str" not in tweet:
+ tweet["conversation_id_str"] = tweet["id_str"]
+
tweet["created_at"] = text.parse_datetime(
tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
"%a %b %d %H:%M:%S +0000 %Y")
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9b6831b..5692452 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -110,7 +110,7 @@ class VkPhotosExtractor(VkExtractor):
"pattern": r"https://sun\d+-\d+\.userapi\.com/s/v1/if1"
r"/[\w-]+\.jpg\?size=\d+x\d+&quality=96&type=album",
"count": ">= 35",
- "keywords": {
+ "keyword": {
"id": r"re:\d+",
"user": {
"id": "398982326",
@@ -122,12 +122,11 @@ class VkPhotosExtractor(VkExtractor):
}),
("https://vk.com/cosplayinrussia", {
"range": "15-25",
- "keywords": {
+ "keyword": {
"id": r"re:\d+",
"user": {
"id" : "-165740836",
- "info": "Предложка открыта, кидайте ваши косплейчики. При "
- "правильном оформлении они будут опубликованы",
+ "info": str,
"name": "cosplayinrussia",
"nick": "Косплей | Cosplay 18+",
},
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 74da615..03fd909 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -111,13 +111,15 @@ class ZerochanTagExtractor(ZerochanExtractor):
test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
"pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
"count": "> 24",
- "keywords": {
+ "keyword": {
"extension": r"re:jpg|png",
- "file_url": "",
- "filename": r"re:Perth.\(Kantai.Collection\).full.\d+",
+ "file_url": r"re:https://static\.zerochan\.net"
+ r"/.+\.full\.\d+\.(jpg|png)",
+ "filename": r"re:(Perth\.\(Kantai\.Collection\)"
+ r"|Kantai\.Collection)\.full\.\d+",
"height": r"re:^\d+$",
"id": r"re:^\d+$",
- "name": "Perth (Kantai Collection)",
+ "name": r"re:(Perth \(Kantai Collection\)|Kantai Collection)",
"search_tags": "Perth (Kantai Collection)",
"size": r"re:^\d+k$",
"width": r"re:^\d+$",
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 8a45330..58bf48d 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -60,14 +60,21 @@ class StringFormatter():
- "u": calls str.upper
- "c": calls str.capitalize
- "C": calls string.capwords
- - "j". calls json.dumps
+ - "g": calls text.slugify()
+ - "j": calls json.dumps
- "t": calls str.strip
+ - "T": calls util.datetime_to_timestamp_string()
- "d": calls text.parse_timestamp
- - "U": calls urllib.parse.unescape
+ - "s": calls str()
- "S": calls util.to_string()
- - "T": calls util.to_timestamü()
+ - "U": calls urllib.parse.unescape
+ - "r": calls repr()
+ - "a": calls ascii()
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
+ # Go to _CONVERSIONS and _SPECIFIERS below to se all of them, read:
+ # https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
+
Extra Format Specifiers:
- "?<before>/<after>/":
Adds <before> and <after> to the actual value if it evaluates to True.
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 91e9169..32cac79 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -325,7 +325,7 @@ def build_parser():
configuration.add_argument(
"--ignore-config",
dest="load_config", action="store_false",
- help="Do not read the default configuration files",
+ help="Do not read default configuration files",
)
authentication = parser.add_argument_group("Authentication Options")
@@ -349,7 +349,7 @@ def build_parser():
selection.add_argument(
"--download-archive",
dest="archive", metavar="FILE", action=ConfigAction,
- help=("Record all downloaded files in the archive file and "
+ help=("Record all downloaded or skipped files in FILE and "
"skip downloading any file already in it"),
)
selection.add_argument(
@@ -367,19 +367,20 @@ def build_parser():
selection.add_argument(
"--range",
dest="image-range", metavar="RANGE", action=ConfigAction,
- help=("Index-range(s) specifying which images to download. "
- "For example '5-10' or '1,3-5,10-'"),
+ help=("Index range(s) specifying which files to download. "
+ "These can be either a constant value, range, or slice "
+ "(e.g. '5', '8-20', or '1:24:3')"),
)
selection.add_argument(
"--chapter-range",
dest="chapter-range", metavar="RANGE", action=ConfigAction,
- help=("Like '--range', but applies to manga-chapters "
+ help=("Like '--range', but applies to manga chapters "
"and other delegated URLs"),
)
selection.add_argument(
"--filter",
dest="image-filter", metavar="EXPR", action=ConfigAction,
- help=("Python expression controlling which images to download. "
+ help=("Python expression controlling which files to download. "
"Files for which the expression evaluates to False are ignored. "
"Available keys are the filename-specific ones listed by '-K'. "
"Example: --filter \"image_width >= 1000 and "
@@ -388,7 +389,7 @@ def build_parser():
selection.add_argument(
"--chapter-filter",
dest="chapter-filter", metavar="EXPR", action=ConfigAction,
- help=("Like '--filter', but applies to manga-chapters "
+ help=("Like '--filter', but applies to manga chapters "
"and other delegated URLs"),
)
@@ -472,7 +473,7 @@ def build_parser():
dest="postprocessors", metavar="CMD",
action=AppendCommandAction, const={"name": "exec"},
help=("Execute CMD for each downloaded file. "
- "Example: --exec 'convert {} {}.png && rm {}'"),
+ "Example: --exec \"convert {} {}.png && rm {}\""),
)
postprocessor.add_argument(
"--exec-after",
@@ -480,7 +481,7 @@ def build_parser():
action=AppendCommandAction, const={
"name": "exec", "event": "finalize"},
help=("Execute CMD after all files were downloaded successfully. "
- "Example: --exec-after 'cd {} && convert * ../doc.pdf'"),
+ "Example: --exec-after \"cd {} && convert * ../doc.pdf\""),
)
postprocessor.add_argument(
"-P", "--postprocessor",
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 23d5bc8..543fb10 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -714,74 +714,71 @@ def chain_predicates(predicates, url, kwdict):
class RangePredicate():
- """Predicate; True if the current index is in the given range"""
+ """Predicate; True if the current index is in the given range(s)"""
+
def __init__(self, rangespec):
- self.ranges = self.optimize_range(self.parse_range(rangespec))
+ self.ranges = ranges = self._parse(rangespec)
self.index = 0
- if self.ranges:
- self.lower, self.upper = self.ranges[0][0], self.ranges[-1][1]
+ if ranges:
+ # technically wrong, but good enough for now
+ # and evaluating min/max for a large range is slow
+ self.lower = min(r.start for r in ranges)
+ self.upper = max(r.stop for r in ranges) - 1
else:
- self.lower, self.upper = 0, 0
+ self.lower = self.upper = 0
- def __call__(self, url, _):
- self.index += 1
+ def __call__(self, _url, _kwdict):
+ self.index = index = self.index + 1
- if self.index > self.upper:
+ if index > self.upper:
raise exception.StopExtraction()
- for lower, upper in self.ranges:
- if lower <= self.index <= upper:
+ for range in self.ranges:
+ if index in range:
return True
return False
@staticmethod
- def parse_range(rangespec):
+ def _parse(rangespec):
"""Parse an integer range string and return the resulting ranges
Examples:
- parse_range("-2,4,6-8,10-") -> [(1,2), (4,4), (6,8), (10,INTMAX)]
- parse_range(" - 3 , 4- 4, 2-6") -> [(1,3), (4,4), (2,6)]
+ _parse("-2,4,6-8,10-") -> [(1,3), (4,5), (6,9), (10,INTMAX)]
+ _parse(" - 3 , 4- 4, 2-6") -> [(1,4), (4,5), (2,7)]
+ _parse("1:2,4:8:2") -> [(1,1), (4,7,2)]
"""
ranges = []
+ append = ranges.append
- for group in rangespec.split(","):
+ if isinstance(rangespec, str):
+ rangespec = rangespec.split(",")
+
+ for group in rangespec:
if not group:
continue
- first, sep, last = group.partition("-")
- if not sep:
- beg = end = int(first)
- else:
- beg = int(first) if first.strip() else 1
- end = int(last) if last.strip() else sys.maxsize
- ranges.append((beg, end) if beg <= end else (end, beg))
- return ranges
+ elif ":" in group:
+ start, _, stop = group.partition(":")
+ stop, _, step = stop.partition(":")
+ append(range(
+ int(start) if start.strip() else 1,
+ int(stop) if stop.strip() else sys.maxsize,
+ int(step) if step.strip() else 1,
+ ))
+
+ elif "-" in group:
+ start, _, stop = group.partition("-")
+ append(range(
+ int(start) if start.strip() else 1,
+ int(stop) + 1 if stop.strip() else sys.maxsize,
+ ))
- @staticmethod
- def optimize_range(ranges):
- """Simplify/Combine a parsed list of ranges
-
- Examples:
- optimize_range([(2,4), (4,6), (5,8)]) -> [(2,8)]
- optimize_range([(1,1), (2,2), (3,6), (8,9))]) -> [(1,6), (8,9)]
- """
- if len(ranges) <= 1:
- return ranges
-
- ranges.sort()
- riter = iter(ranges)
- result = []
+ else:
+ start = int(group)
+ append(range(start, start+1))
- beg, end = next(riter)
- for lower, upper in riter:
- if lower > end+1:
- result.append((beg, end))
- beg, end = lower, upper
- elif upper > end:
- end = upper
- result.append((beg, end))
- return result
+ return ranges
class UniquePredicate():
@@ -802,6 +799,8 @@ class FilterPredicate():
"""Predicate; True if evaluating the given expression returns True"""
def __init__(self, expr, target="image"):
+ if not isinstance(expr, str):
+ expr = "(" + ") and (".join(expr) + ")"
name = "<{} filter>".format(target)
self.expr = compile_expression(expr, name)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d832185..5e3b507 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.24.2"
+__version__ = "1.24.3"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index db313c3..7b71349 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -199,13 +199,27 @@ def parse_command_line(module, argv):
action += args
yield action
- if getattr(opts, "parse_metadata", None) is None:
- opts.parse_metadata = []
- if opts.metafromtitle is not None:
- opts.parse_metadata.append("title:%s" % opts.metafromtitle)
- opts.metafromtitle = None
- opts.parse_metadata = list(itertools.chain.from_iterable(map(
- metadataparser_actions, opts.parse_metadata)))
+ parse_metadata = getattr(opts, "parse_metadata", None)
+ if isinstance(parse_metadata, dict):
+ if opts.metafromtitle is not None:
+ if "pre_process" not in parse_metadata:
+ parse_metadata["pre_process"] = []
+ parse_metadata["pre_process"].append(
+ "title:%s" % opts.metafromtitle)
+ opts.parse_metadata = {
+ k: list(itertools.chain.from_iterable(map(
+ metadataparser_actions, v)))
+ for k, v in parse_metadata.items()
+ }
+ else:
+ if parse_metadata is None:
+ parse_metadata = []
+ if opts.metafromtitle is not None:
+ parse_metadata.append("title:%s" % opts.metafromtitle)
+ opts.parse_metadata = list(itertools.chain.from_iterable(map(
+ metadataparser_actions, parse_metadata)))
+
+ opts.metafromtitle = None
else:
opts.parse_metadata = ()