aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/chevereto.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-12-20 05:49:04 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2025-12-20 05:49:04 -0500
commita24ec1647aeac35a63b744ea856011ad6e06be3b (patch)
treeae94416de786aeddd05d99559098f7f16bb103a6 /gallery_dl/extractor/chevereto.py
parent33f8a8a37a9cba738ef25fb99955f0730da9eb48 (diff)
New upstream version 1.31.1.upstream/1.31.1
Diffstat (limited to 'gallery_dl/extractor/chevereto.py')
-rw-r--r--gallery_dl/extractor/chevereto.py73
1 files changed, 49 insertions, 24 deletions
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 1552899..9a766d0 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -17,14 +17,17 @@ class CheveretoExtractor(BaseExtractor):
basecategory = "chevereto"
directory_fmt = ("{category}", "{user}", "{album}")
archive_fmt = "{id}"
+ parent = True
def _init(self):
self.path = self.groups[-1]
- def _pagination(self, url):
- while True:
- page = self.request(url).text
+ def _pagination(self, url, callback=None):
+ page = self.request(url).text
+ if callback is not None:
+ callback(page)
+ while True:
for item in text.extract_iter(
page, '<div class="list-item-image ', 'image-container'):
yield text.urljoin(self.root, text.extr(
@@ -35,12 +38,13 @@ class CheveretoExtractor(BaseExtractor):
return
if url[0] == "/":
url = self.root + url
+ page = self.request(url).text
BASE_PATTERN = CheveretoExtractor.update({
"jpgfish": {
- "root": "https://jpg6.su",
- "pattern": r"(?:www\.)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
+ "root": "https://jpg7.cr",
+ "pattern": r"(?:www\.)?jpe?g\d?\.(?:cr|su|pet|fish(?:ing)?|church)",
},
"imagepond": {
"root": "https://imagepond.net",
@@ -56,8 +60,8 @@ BASE_PATTERN = CheveretoExtractor.update({
class CheveretoImageExtractor(CheveretoExtractor):
"""Extractor for chevereto images"""
subcategory = "image"
- pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)"
- example = "https://jpg2.su/img/TITLE.ID"
+ pattern = rf"{BASE_PATTERN}(/im(?:g|age)/[^/?#]+)"
+ example = "https://jpg7.cr/img/TITLE.ID"
def items(self):
url = self.root + self.path
@@ -74,25 +78,27 @@ class CheveretoImageExtractor(CheveretoExtractor):
url, b"seltilovessimpcity@simpcityhatesscrapers",
fromhex=True)
+ album_url, _, album_name = extr("Added to <a", "</a>").rpartition(">")
file = {
"id" : self.path.rpartition("/")[2].rpartition(".")[2],
"url" : url,
- "album": text.remove_html(extr(
- "Added to <a", "</a>").rpartition(">")[2]),
- "date" : text.parse_datetime(extr(
- '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ "album": text.remove_html(album_name),
+ "date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}
+ file["album_slug"], _, file["album_id"] = text.rextr(
+ album_url, "/", '"').rpartition(".")
+
text.nameext_from_url(file["url"], file)
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, file["url"], file
class CheveretoVideoExtractor(CheveretoExtractor):
"""Extractor for chevereto videos"""
subcategory = "video"
- pattern = BASE_PATTERN + r"(/video/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/video/[^/?#]+)"
example = "https://imagepond.net/video/TITLE.ID"
def items(self):
@@ -114,13 +120,17 @@ class CheveretoVideoExtractor(CheveretoExtractor):
'property="video:height" content="', '"')),
"duration" : extr(
'class="far fa-clock"></i>', "—"),
- "album": text.remove_html(extr(
- "Added to <a", "</a>").rpartition(">")[2]),
- "date" : text.parse_datetime(extr(
- '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ "album" : extr(
+ "Added to <a", "</a>"),
+ "date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}
+ album_url, _, album_name = file["album"].rpartition(">")
+ file["album"] = text.remove_html(album_name)
+ file["album_slug"], _, file["album_id"] = text.rextr(
+ album_url, "/", '"').rpartition(".")
+
try:
min, _, sec = file["duration"].partition(":")
file["duration"] = int(min) * 60 + int(sec)
@@ -128,15 +138,15 @@ class CheveretoVideoExtractor(CheveretoExtractor):
pass
text.nameext_from_url(file["url"], file)
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, file["url"], file
class CheveretoAlbumExtractor(CheveretoExtractor):
"""Extractor for chevereto albums"""
subcategory = "album"
- pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
- example = "https://jpg2.su/album/TITLE.ID"
+ pattern = rf"{BASE_PATTERN}(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
+ example = "https://jpg7.cr/album/TITLE.ID"
def items(self):
url = self.root + self.path
@@ -148,16 +158,31 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
else:
albums = (url,)
+ kwdict = self.kwdict
for album in albums:
- for item_url in self._pagination(album):
+ for kwdict["num"], item_url in enumerate(self._pagination(
+ album, self._extract_metadata_album), 1):
data = data_video if "/video/" in item_url else data_image
yield Message.Queue, item_url, data
+ def _extract_metadata_album(self, page):
+ url, pos = text.extract(
+ page, 'property="og:url" content="', '"')
+ title, pos = text.extract(
+ page, 'property="og:title" content="', '"', pos)
+
+ kwdict = self.kwdict
+ kwdict["album_slug"], _, kwdict["album_id"] = \
+ url[url.rfind("/")+1:].rpartition(".")
+ kwdict["album"] = text.unescape(title)
+ kwdict["count"] = text.parse_int(text.extract(
+ page, 'data-text="image-count">', "<", pos)[0])
+
class CheveretoCategoryExtractor(CheveretoExtractor):
"""Extractor for chevereto galleries"""
subcategory = "category"
- pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/category/[^/?#]+)"
example = "https://imglike.com/category/TITLE"
def items(self):
@@ -169,8 +194,8 @@ class CheveretoCategoryExtractor(CheveretoExtractor):
class CheveretoUserExtractor(CheveretoExtractor):
"""Extractor for chevereto users"""
subcategory = "user"
- pattern = BASE_PATTERN + r"(/[^/?#]+(?:/albums)?)"
- example = "https://jpg2.su/USER"
+ pattern = rf"{BASE_PATTERN}(/[^/?#]+(?:/albums)?)"
+ example = "https://jpg7.cr/USER"
def items(self):
url = self.root + self.path