diff options
| author | 2025-07-31 01:22:01 -0400 | |
|---|---|---|
| committer | 2025-07-31 01:22:01 -0400 | |
| commit | a6e995c093de8aae2e91a0787281bb34c0b871eb (patch) | |
| tree | 2d79821b05300d34d8871eb6c9662b359a2de85d /gallery_dl/extractor/girlswithmuscle.py | |
| parent | 7672a750cb74bf31e21d76aad2776367fd476155 (diff) | |
New upstream version 1.30.2.upstream/1.30.2
Diffstat (limited to 'gallery_dl/extractor/girlswithmuscle.py')
| -rw-r--r-- | gallery_dl/extractor/girlswithmuscle.py | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/gallery_dl/extractor/girlswithmuscle.py b/gallery_dl/extractor/girlswithmuscle.py new file mode 100644 index 0000000..51b979f --- /dev/null +++ b/gallery_dl/extractor/girlswithmuscle.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from .common import Extractor, Message +from .. import text, util, exception +from ..cache import cache + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com" + + +class GirlswithmuscleExtractor(Extractor): + """Base class for girlswithmuscle extractors""" + category = "girlswithmuscle" + root = "https://www.girlswithmuscle.com" + directory_fmt = ("{category}", "{model}") + filename_fmt = "{model}_{id}.{extension}" + archive_fmt = "{type}_{model}_{id}" + + def login(self): + username, password = self._get_auth_info() + if username: + self.cookies_update(self._login_impl(username, password)) + + @cache(maxage=14*86400, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/login/" + page = self.request(url).text + csrf_token = text.extr(page, 'name="csrfmiddlewaretoken" value="', '"') + + headers = { + "Origin" : self.root, + "Referer": url, + } + data = { + "csrfmiddlewaretoken": csrf_token, + "username": username, + "password": password, + "next": "/", + } + response = self.request( + url, method="POST", headers=headers, data=data) + + if not response.history: + raise exception.AuthenticationError() + + page = response.text + if ">Wrong username or password" in page: + raise exception.AuthenticationError() + if ">Log in<" in page: + raise exception.AuthenticationError("Account data is missing") + + return {c.name: c.value for c in response.history[0].cookies} + + +class GirlswithmusclePostExtractor(GirlswithmuscleExtractor): + """Extractor for individual posts on girlswithmuscle.com""" + subcategory = "post" + pattern = BASE_PATTERN + r"/(\d+)" + example = "https://www.girlswithmuscle.com/12345/" + + def items(self): + self.login() + + url = f"{self.root}/{self.groups[0]}/" + page = self.request(url).text + if not page: + raise exception.NotFoundError("post") + + metadata = self.metadata(page) + + if url := text.extr(page, 'class="main-image" src="', '"'): + metadata["type"] = "picture" + else: + url = text.extr(page, '<source src="', '"') + metadata["type"] = "video" + + text.nameext_from_url(url, metadata) + yield Message.Directory, metadata + yield Message.Url, url, metadata + + def metadata(self, page): + source = text.remove_html(text.extr( + page, '<div id="info-source" style="display: none">', "</div>")) + image_info = text.extr( + page, '<div class="image-info">', "</div>") + uploader = text.remove_html(text.extr( + image_info, '<span class="username-html">', "</a>")) + + tags = text.extr(page, 'id="tags-text">', "</div>") + score = text.parse_int(text.remove_html(text.extr( + page, "Score: <b>", "</span"))) + model = self._extract_model(page) + + return { + "id": self.groups[0], + "model": model, + "model_list": self._parse_model_list(model), + "tags": text.split_html(tags)[1::2], + "date": text.parse_datetime( + text.extr(page, 'class="hover-time" title="', '"')[:19], + "%Y-%m-%d %H:%M:%S"), + "is_favorite": self._parse_is_favorite(page), + "source_filename": source, + "uploader": uploader, + "score": score, + "comments": self._extract_comments(page), + } + + def _extract_model(self, page): + model = text.extr(page, "<title>", "</title>") + return "unknown" if model.startswith("Picture #") else model + + def _parse_model_list(self, model): + if model == "unknown": + return [] + else: + return [name.strip() for name in model.split(",")] + + def _parse_is_favorite(self, page): + fav_button = text.extr( + page, 'id="favorite-button">', "</span>") + unfav_button = text.extr( + page, 'class="actionbutton unfavorite-button">', "</span>") + + is_favorite = None + if unfav_button == "Unfavorite": + is_favorite = True + if fav_button == "Favorite": + is_favorite = False + + return is_favorite + + def _extract_comments(self, page): + comments = text.extract_iter( + page, '<div class="comment-body-inner">', "</div>") + return [comment.strip() for comment in comments] + + +class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor): + """Extractor for search results on girlswithmuscle.com""" + subcategory = "search" + pattern = BASE_PATTERN + r"/images/(.*)" + example = "https://www.girlswithmuscle.com/images/?name=MODEL" + + def pages(self): + query = self.groups[0] + url = f"{self.root}/images/{query}" + response = self.request(url) + if response.history: + msg = f'Request was redirected to "{response.url}", try logging in' + raise exception.AuthorizationError(msg) + page = response.text + + match = util.re(r"Page (\d+) of (\d+)").search(page) + current, total = match.groups() + current, total = text.parse_int(current), text.parse_int(total) + + yield page + for i in range(current + 1, total + 1): + url = f"{self.root}/images/{i}/{query}" + yield self.request(url).text + + def items(self): + self.login() + for page in self.pages(): + data = { + "_extractor" : GirlswithmusclePostExtractor, + "gallery_name": text.unescape(text.extr(page, "<title>", "<")), + } + for imgid in text.extract_iter(page, 'id="imgid-', '"'): + url = f"{self.root}/{imgid}/" + yield Message.Queue, url, data |
