aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/motherless.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-07-31 01:22:01 -0400
commita6e995c093de8aae2e91a0787281bb34c0b871eb (patch)
tree2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/extractor/motherless.py
parent7672a750cb74bf31e21d76aad2776367fd476155 (diff)
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/extractor/motherless.py')
-rw-r--r--gallery_dl/extractor/motherless.py140
1 files changed, 101 insertions, 39 deletions
diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py
index ce83ded..c81a4d1 100644
--- a/gallery_dl/extractor/motherless.py
+++ b/gallery_dl/extractor/motherless.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2024 Mike Fährmann
+# Copyright 2024-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -24,7 +24,7 @@ class MotherlessExtractor(Extractor):
archive_fmt = "{id}"
def _extract_media(self, path):
- url = self.root + "/" + path
+ url = f"{self.root}/{path}"
page = self.request(url).text
extr = text.extract_from(page)
@@ -48,10 +48,59 @@ class MotherlessExtractor(Extractor):
"uploader": text.unescape(extr('class="username">', "<").strip()),
}
- if path and path[0] == "G":
+ if not path:
+ pass
+ elif path[0] == "G":
data["gallery_id"] = path[1:]
data["gallery_title"] = self._extract_gallery_title(
page, data["gallery_id"])
+ elif path[0] == "g":
+ data["group_id"] = path[2:]
+ data["group_title"] = self._extract_group_title(
+ page, data["group_id"])
+
+ return data
+
+ def _pagination(self, page):
+ while True:
+ for thumb in text.extract_iter(
+ page, 'class="thumb-container', "</div>"):
+ yield thumb
+
+ url = text.extr(page, '<link rel="next" href="', '"')
+ if not url:
+ return
+ page = self.request(text.unescape(url)).text
+
+ def _extract_data(self, page, category):
+ extr = text.extract_from(page)
+
+ gid = self.groups[-1]
+ if category == "gallery":
+ title = self._extract_gallery_title(page, gid)
+ else:
+ title = self._extract_group_title(page, gid)
+
+ return {
+ f"{category}_id": gid,
+ f"{category}_title": title,
+ "uploader": text.remove_html(extr(
+ f'class="{category}-member-username">', "</")),
+ "count": text.parse_int(
+ extr('<span class="active">', ")")
+ .rpartition("(")[2].replace(",", "")),
+ }
+
+ def _parse_thumb_data(self, thumb):
+ extr = text.extract_from(thumb)
+
+ data = {
+ "id" : extr('data-codename="', '"'),
+ "type" : extr('data-mediatype="', '"'),
+ "thumbnail": extr('class="static" src="', '"'),
+ "title" : extr(' alt="', '"'),
+ }
+ data["url"] = data["thumbnail"].replace("thumb", data["type"])
return data
@@ -72,13 +121,23 @@ class MotherlessExtractor(Extractor):
if title:
return text.unescape(title.strip())
- pos = page.find(' href="/G' + gallery_id + '"')
+ pos = page.find(f' href="/G{gallery_id}"')
if pos >= 0:
return text.unescape(text.extract(
page, ' title="', '"', pos)[0])
return ""
+ @memcache(keyarg=2)
+ def _extract_group_title(self, page, group_id):
+ title = text.extr(
+ text.extr(page, '<h1 class="group-bio-name">', "</h1>"),
+ ">", "<")
+ if title:
+ return text.unescape(title.strip())
+
+ return ""
+
class MotherlessMediaExtractor(MotherlessExtractor):
"""Extractor for a single image/video from motherless.com"""
@@ -109,59 +168,62 @@ class MotherlessGalleryExtractor(MotherlessExtractor):
if not type:
data = {"_extractor": MotherlessGalleryExtractor}
- yield Message.Queue, self.root + "/GI" + gid, data
- yield Message.Queue, self.root + "/GV" + gid, data
+ yield Message.Queue, f"{self.root}/GI{gid}", data
+ yield Message.Queue, f"{self.root}/GV{gid}", data
return
- url = "{}/G{}{}".format(self.root, type, gid)
+ url = f"{self.root}/G{type}{gid}"
page = self.request(url).text
- data = self._extract_gallery_data(page)
+ data = self._extract_data(page, "gallery")
for num, thumb in enumerate(self._pagination(page), 1):
file = self._parse_thumb_data(thumb)
+ thumbnail = file["thumbnail"]
if file["type"] == "video":
file = self._extract_media(file["id"])
file.update(data)
file["num"] = num
+ file["thumbnail"] = thumbnail
url = file["url"]
yield Message.Directory, file
yield Message.Url, url, text.nameext_from_url(url, file)
- def _pagination(self, page):
- while True:
- for thumb in text.extract_iter(
- page, 'class="thumb-container', "</div>"):
- yield thumb
- url = text.extr(page, '<link rel="next" href="', '"')
- if not url:
- return
- page = self.request(text.unescape(url)).text
+class MotherlessGroupExtractor(MotherlessExtractor):
+ subcategory = "group"
+ directory_fmt = ("{category}", "{uploader}",
+ "{group_id} {group_title}")
+ archive_fmt = "{group_id}_{id}"
+ pattern = BASE_PATTERN + "/g([iv]?)/?([a-z0-9_]+)/?$"
+ example = "https://motherless.com/g/abc123"
- def _extract_gallery_data(self, page):
- extr = text.extract_from(page)
- return {
- "gallery_id": self.groups[-1],
- "gallery_title": text.unescape(extr(
- "<title>", "<").rpartition(" | ")[0]),
- "uploader": text.remove_html(extr(
- 'class="gallery-member-username">', "</")),
- "count": text.parse_int(
- extr('<span class="active">', ")")
- .rpartition("(")[2].replace(",", "")),
- }
+ def items(self):
+ type, gid = self.groups
- def _parse_thumb_data(self, thumb):
- extr = text.extract_from(thumb)
+ if not type:
+ data = {"_extractor": MotherlessGroupExtractor}
+ yield Message.Queue, f"{self.root}/gi/{gid}", data
+ yield Message.Queue, f"{self.root}/gv/{gid}", data
+ return
- data = {
- "id" : extr('data-codename="', '"'),
- "type" : extr('data-mediatype="', '"'),
- "thumbnail": extr('class="static" src="', '"'),
- "title" : extr(' alt="', '"'),
- }
- data["url"] = data["thumbnail"].replace("thumb", data["type"])
+ url = f"{self.root}/g{type}/{gid}"
+ page = self.request(url).text
+ data = self._extract_data(page, "group")
- return data
+ for num, thumb in enumerate(self._pagination(page), 1):
+ file = self._parse_thumb_data(thumb)
+ thumbnail = file["thumbnail"]
+
+ file = self._extract_media(file["id"])
+
+ uploader = file.get("uploader")
+ file.update(data)
+ file["num"] = num
+ file["thumbnail"] = thumbnail
+ file["uploader"] = uploader
+ file["group"] = file["group_id"]
+ url = file["url"]
+ yield Message.Directory, file
+ yield Message.Url, url, text.nameext_from_url(url, file)