diff options
Diffstat (limited to 'gallery_dl/extractor/motherless.py')
| -rw-r--r-- | gallery_dl/extractor/motherless.py | 43 |
1 files changed, 21 insertions, 22 deletions
diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py index 48137ce..c20f138 100644 --- a/gallery_dl/extractor/motherless.py +++ b/gallery_dl/extractor/motherless.py @@ -9,9 +9,8 @@ """Extractors for https://motherless.com/""" from .common import Extractor, Message -from .. import text, util, exception +from .. import text, dt, exception from ..cache import memcache -from datetime import timedelta BASE_PATTERN = r"(?:https?://)?motherless\.com" @@ -42,6 +41,8 @@ class MotherlessExtractor(Extractor): path, _, media_id = path.rpartition("/") data = { "id" : media_id, + "title": text.unescape( + (t := extr("<title>", "<")) and t[:t.rfind(" | ")]), "type" : extr("__mediatype = '", "'"), "group": extr("__group = '", "'"), "url" : extr("__fileurl = '", "'"), @@ -50,7 +51,6 @@ class MotherlessExtractor(Extractor): for tag in text.extract_iter( extr('class="media-meta-tags">', "</div>"), ">#", "<") ], - "title": text.unescape(extr("<h1>", "<")), "views": text.parse_int(extr( 'class="count">', " ").replace(",", "")), "favorites": text.parse_int(extr( @@ -115,14 +115,14 @@ class MotherlessExtractor(Extractor): return data - def _parse_datetime(self, dt): - if " ago" not in dt: - return text.parse_datetime(dt, "%d %b %Y") + def _parse_datetime(self, dt_string): + if " ago" not in dt_string: + return dt.parse(dt_string, "%d %b %Y") - value = text.parse_int(dt[:-5]) - delta = timedelta(0, value*3600) if dt[-5] == "h" else timedelta(value) - return (util.datetime_utcnow() - delta).replace( - hour=0, minute=0, second=0) + value = text.parse_int(dt_string[:-5]) + delta = (dt.timedelta(0, value*3600) if dt_string[-5] == "h" else + dt.timedelta(value)) + return (dt.now() - delta).replace(hour=0, minute=0, second=0) @memcache(keyarg=2) def _extract_gallery_title(self, page, gallery_id): @@ -132,10 +132,9 @@ class MotherlessExtractor(Extractor): if title: return text.unescape(title.strip()) - pos = page.find(f' href="/G{gallery_id}"') - if pos >= 0: - return text.unescape(text.extract( - page, ' title="', '"', pos)[0]) + if f' href="/G{gallery_id}"' in page: + return text.unescape( + (t := text.extr(page, "<title>", "<")) and t[:t.rfind(" | ")]) return "" @@ -153,15 +152,15 @@ class MotherlessExtractor(Extractor): class MotherlessMediaExtractor(MotherlessExtractor): """Extractor for a single image/video from motherless.com""" subcategory = "media" - pattern = (BASE_PATTERN + - r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?" - r"(?!G)[A-Z0-9]+)") + pattern = (rf"{BASE_PATTERN}/(" + rf"(?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?" + rf"(?!G)[A-Z0-9]+)") example = "https://motherless.com/ABC123" def items(self): file = self._extract_media(self.groups[0]) url = file["url"] - yield Message.Directory, file + yield Message.Directory, "", file yield Message.Url, url, text.nameext_from_url(url, file) @@ -171,7 +170,7 @@ class MotherlessGalleryExtractor(MotherlessExtractor): directory_fmt = ("{category}", "{uploader}", "{gallery_id} {gallery_title}") archive_fmt = "{gallery_id}_{id}" - pattern = BASE_PATTERN + "/G([IVG])?([A-Z0-9]+)/?$" + pattern = rf"{BASE_PATTERN}/G([IVG])?([A-Z0-9]+)/?$" example = "https://motherless.com/GABC123" def items(self): @@ -198,7 +197,7 @@ class MotherlessGalleryExtractor(MotherlessExtractor): file["num"] = num file["thumbnail"] = thumbnail url = file["url"] - yield Message.Directory, file + yield Message.Directory, "", file yield Message.Url, url, text.nameext_from_url(url, file) @@ -207,7 +206,7 @@ class MotherlessGroupExtractor(MotherlessExtractor): directory_fmt = ("{category}", "{uploader}", "{group_id} {group_title}") archive_fmt = "{group_id}_{id}" - pattern = BASE_PATTERN + "/g([iv]?)/?([a-z0-9_]+)/?$" + pattern = rf"{BASE_PATTERN}/g([iv]?)/?([a-z0-9_]+)/?$" example = "https://motherless.com/g/abc123" def items(self): @@ -236,5 +235,5 @@ class MotherlessGroupExtractor(MotherlessExtractor): file["uploader"] = uploader file["group"] = file["group_id"] url = file["url"] - yield Message.Directory, file + yield Message.Directory, "", file yield Message.Url, url, text.nameext_from_url(url, file) |
