From 7672a750cb74bf31e21d76aad2776367fd476155 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 26 May 2025 06:45:53 -0400 Subject: New upstream version 1.29.7. --- gallery_dl/cookies.py | 51 ++++++++---- gallery_dl/downloader/http.py | 5 +- gallery_dl/downloader/ytdl.py | 19 +++-- gallery_dl/extractor/arcalive.py | 5 +- gallery_dl/extractor/aryion.py | 2 +- gallery_dl/extractor/bluesky.py | 6 +- gallery_dl/extractor/civitai.py | 106 +++++++++++++++++------- gallery_dl/extractor/fanbox.py | 10 ++- gallery_dl/extractor/flickr.py | 112 +++++++++++++++++-------- gallery_dl/extractor/idolcomplex.py | 46 ++++++----- gallery_dl/extractor/instagram.py | 20 ++++- gallery_dl/extractor/mangadex.py | 151 +++++++++++++++++++++++++--------- gallery_dl/extractor/mastodon.py | 8 +- gallery_dl/extractor/motherless.py | 42 +++++----- gallery_dl/extractor/pinterest.py | 3 + gallery_dl/extractor/pixeldrain.py | 70 ++++++++++++++++ gallery_dl/extractor/pixiv.py | 28 ++++++- gallery_dl/extractor/sankaku.py | 21 +++++ gallery_dl/extractor/subscribestar.py | 24 +++++- gallery_dl/extractor/vipergirls.py | 29 ++++--- gallery_dl/transaction_id.py | 6 +- gallery_dl/version.py | 2 +- 22 files changed, 570 insertions(+), 196 deletions(-) (limited to 'gallery_dl') diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index 71b0b6b..f03ad58 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -26,7 +26,9 @@ from . import aes, text, util SUPPORTED_BROWSERS_CHROMIUM = { "brave", "chrome", "chromium", "edge", "opera", "thorium", "vivaldi"} -SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"} +SUPPORTED_BROWSERS_FIREFOX = {"firefox", "zen"} +SUPPORTED_BROWSERS = \ + SUPPORTED_BROWSERS_CHROMIUM | SUPPORTED_BROWSERS_FIREFOX | {"safari"} logger = logging.getLogger("cookies") @@ -34,8 +36,8 @@ logger = logging.getLogger("cookies") def load_cookies(browser_specification): browser_name, profile, keyring, container, domain = \ _parse_browser_specification(*browser_specification) - if browser_name == "firefox": - return load_cookies_firefox(profile, container, domain) + if browser_name in SUPPORTED_BROWSERS_FIREFOX: + return load_cookies_firefox(browser_name, profile, container, domain) elif browser_name == "safari": return load_cookies_safari(profile, domain) elif browser_name in SUPPORTED_BROWSERS_CHROMIUM: @@ -44,8 +46,10 @@ def load_cookies(browser_specification): raise ValueError("unknown browser '{}'".format(browser_name)) -def load_cookies_firefox(profile=None, container=None, domain=None): - path, container_id = _firefox_cookies_database(profile, container) +def load_cookies_firefox(browser_name, profile=None, + container=None, domain=None): + path, container_id = _firefox_cookies_database(browser_name, + profile, container) sql = ("SELECT name, value, host, path, isSecure, expiry " "FROM moz_cookies") @@ -83,7 +87,8 @@ def load_cookies_firefox(profile=None, container=None, domain=None): sql, parameters) ] - _log_info("Extracted %s cookies from Firefox", len(cookies)) + _log_info("Extracted %s cookies from %s", + len(cookies), browser_name.capitalize()) return cookies @@ -196,13 +201,14 @@ def load_cookies_chromium(browser_name, profile=None, # -------------------------------------------------------------------- # firefox -def _firefox_cookies_database(profile=None, container=None): +def _firefox_cookies_database(browser_name, profile=None, container=None): if not profile: - search_root = _firefox_browser_directory() + search_root = _firefox_browser_directory(browser_name) elif _is_path(profile): search_root = profile else: - search_root = os.path.join(_firefox_browser_directory(), profile) + search_root = os.path.join( + _firefox_browser_directory(browser_name), profile) path = _find_most_recently_used_file(search_root, "cookies.sqlite") if path is None: @@ -245,14 +251,27 @@ def _firefox_cookies_database(profile=None, container=None): return path, container_id -def _firefox_browser_directory(): +def _firefox_browser_directory(browser_name): + join = os.path.join + if sys.platform in ("win32", "cygwin"): - return os.path.expandvars( - r"%APPDATA%\Mozilla\Firefox\Profiles") - if sys.platform == "darwin": - return os.path.expanduser( - "~/Library/Application Support/Firefox/Profiles") - return os.path.expanduser("~/.mozilla/firefox") + appdata = os.path.expandvars("%APPDATA%") + return { + "firefox": join(appdata, R"Mozilla\Firefox\Profiles"), + "zen" : join(appdata, R"zen\Profiles") + }[browser_name] + elif sys.platform == "darwin": + appdata = os.path.expanduser("~/Library/Application Support") + return { + "firefox": join(appdata, R"Firefox/Profiles"), + "zen" : join(appdata, R"zen/Profiles") + }[browser_name] + else: + home = os.path.expanduser("~") + return { + "firefox": join(home, R".mozilla/firefox"), + "zen" : join(home, R".zen") + }[browser_name] # -------------------------------------------------------------------- diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index faea9e5..c58e2fb 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -322,7 +322,10 @@ class HttpDownloader(DownloaderBase): self.downloading = False if self.mtime: - kwdict.setdefault("_mtime", response.headers.get("Last-Modified")) + if "_http_lastmodified" in kwdict: + kwdict["_mtime"] = kwdict["_http_lastmodified"] + else: + kwdict["_mtime"] = response.headers.get("Last-Modified") else: kwdict["_mtime"] = None diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 7a20dc2..1fc2f82 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -130,18 +130,27 @@ class YoutubeDLDownloader(DownloaderBase): if pathfmt.exists(): pathfmt.temppath = "" return True - if self.part and self.partdir: - pathfmt.temppath = os.path.join( - self.partdir, pathfmt.filename) - - self._set_outtmpl(ytdl_instance, pathfmt.temppath.replace("%", "%%")) self.out.start(pathfmt.path) + if self.part: + pathfmt.kwdict["extension"] = pathfmt.prefix + "part" + filename = pathfmt.build_filename(pathfmt.kwdict) + pathfmt.kwdict["extension"] = info_dict["ext"] + if self.partdir: + path = os.path.join(self.partdir, filename) + else: + path = pathfmt.realdirectory + filename + else: + path = pathfmt.realpath + + self._set_outtmpl(ytdl_instance, path.replace("%", "%%")) try: ytdl_instance.process_info(info_dict) except Exception as exc: self.log.debug("", exc_info=exc) return False + + pathfmt.temppath = info_dict["filepath"] return True def _download_playlist(self, ytdl_instance, pathfmt, info_dict): diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py index 8c44256..3c39a1a 100644 --- a/gallery_dl/extractor/arcalive.py +++ b/gallery_dl/extractor/arcalive.py @@ -17,6 +17,7 @@ class ArcaliveExtractor(Extractor): """Base class for Arca.live extractors""" category = "arcalive" root = "https://arca.live" + useragent = "net.umanle.arca.android.playstore/0.9.75" request_interval = (0.5, 1.5) def _init(self): @@ -149,9 +150,7 @@ class ArcaliveAPI(): self.log = extractor.log self.root = extractor.root + "/api/app" - headers = extractor.session.headers - headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75" - headers["X-Device-Token"] = util.generate_token(64) + extractor.session.headers["X-Device-Token"] = util.generate_token(64) def board(self, board_slug, params): endpoint = "/list/channel/" + board_slug diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 17b780e..ca88187 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -169,7 +169,7 @@ class AryionExtractor(Extractor): "

", "

"), "", "")), "filename" : fname, "extension": ext, - "_mtime" : lmod, + "_http_lastmodified": lmod, } diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index ec274b8..6f4abd5 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -49,7 +49,11 @@ class BlueskyExtractor(Extractor): self.log.debug("Skipping %s (repost)", self._pid(post)) continue embed = post.get("embed") - post.update(post.pop("record")) + try: + post.update(post.pop("record")) + except Exception: + self.log.debug("Skipping %s (no 'record')", self._pid(post)) + continue while True: self._prepare(post) diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index de8f86c..56fe851 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -45,6 +45,20 @@ class CivitaiExtractor(Extractor): self._image_quality = "original=true" self._image_ext = "png" + quality_video = self.config("quality-videos") + if quality_video: + if not isinstance(quality_video, str): + quality_video = ",".join(quality_video) + if quality_video[0] == "+": + quality_video = (self._image_quality + "," + + quality_video.lstrip("+,")) + self._video_quality = quality_video + elif quality_video is not None and quality: + self._video_quality = self._image_quality + else: + self._video_quality = "quality=100" + self._video_ext = "webm" + metadata = self.config("metadata") if metadata: if isinstance(metadata, str): @@ -82,9 +96,8 @@ class CivitaiExtractor(Extractor): "user": post.pop("user"), } if self._meta_version: - data["version"] = version = self.api.model_version( - post["modelVersionId"]).copy() - data["model"] = version.pop("model") + data["model"], data["version"] = \ + self._extract_meta_version(post) yield Message.Directory, data for file in self._image_results(images): @@ -95,26 +108,22 @@ class CivitaiExtractor(Extractor): images = self.images() if images: for image in images: - url = self._url(image) + if self._meta_generation: - image["generation"] = self.api.image_generationdata( - image["id"]) + image["generation"] = \ + self._extract_meta_generation(image) if self._meta_version: - if "modelVersionId" in image: - version_id = image["modelVersionId"] - else: - post = image["post"] = self.api.post( - image["postId"]) - post.pop("user", None) - version_id = post["modelVersionId"] - image["version"] = version = self.api.model_version( - version_id).copy() - image["model"] = version.pop("model") - + image["model"], image["version"] = \ + self._extract_meta_version(image, False) image["date"] = text.parse_datetime( image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + + url = self._url(image) text.nameext_from_url(url, image) - image["extension"] = self._image_ext + if not image["extension"]: + image["extension"] = ( + self._video_ext if image.get("type") == "video" else + self._image_ext) yield Message.Directory, image yield Message.Url, url, image return @@ -130,20 +139,23 @@ class CivitaiExtractor(Extractor): def _url(self, image): url = image["url"] + video = image.get("type") == "video" + quality = self._video_quality if video else self._image_quality + if "/" in url: parts = url.rsplit("/", 3) image["uuid"] = parts[1] - parts[2] = self._image_quality + parts[2] = quality return "/".join(parts) - image["uuid"] = url + image["uuid"] = url name = image.get("name") if not name: mime = image.get("mimeType") or self._image_ext name = "{}.{}".format(image.get("id"), mime.rpartition("/")[2]) return ( "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/{}/{}/{}".format( - url, self._image_quality, name) + url, quality, name) ) def _image_results(self, images): @@ -154,11 +166,13 @@ class CivitaiExtractor(Extractor): "url" : self._url(file), }) if not data["extension"]: - data["extension"] = self._image_ext + data["extension"] = ( + self._video_ext if file.get("type") == "video" else + self._image_ext) if "id" not in file and data["filename"].isdecimal(): file["id"] = text.parse_int(data["filename"]) if self._meta_generation: - file["generation"] = self.api.image_generationdata(file["id"]) + file["generation"] = self._extract_meta_generation(file) yield data def _parse_query(self, value): @@ -166,6 +180,38 @@ class CivitaiExtractor(Extractor): value, {"tags", "reactions", "baseModels", "tools", "techniques", "types", "fileFormats"}) + def _extract_meta_generation(self, image): + try: + return self.api.image_generationdata(image["id"]) + except Exception as exc: + return self.log.debug("", exc_info=exc) + + def _extract_meta_version(self, item, is_post=True): + try: + version_id = self._extract_version_id(item, is_post) + if version_id: + version = self.api.model_version(version_id).copy() + return version.pop("model", None), version + except Exception as exc: + self.log.debug("", exc_info=exc) + return None, None + + def _extract_version_id(self, item, is_post=True): + version_id = item.get("modelVersionId") + if version_id: + return version_id + + version_ids = item.get("modelVersionIds") + if version_ids: + return version_ids[0] + + if is_post: + return None + + item["post"] = post = self.api.post(item["postId"]) + post.pop("user", None) + return self._extract_version_id(post) + class CivitaiModelExtractor(CivitaiExtractor): subcategory = "model" @@ -235,16 +281,20 @@ class CivitaiModelExtractor(CivitaiExtractor): files = [] for num, file in enumerate(version["files"], 1): + name, sep, ext = file["name"].rpartition(".") + if not sep: + name = ext + ext = "bin" file["uuid"] = "model-{}-{}-{}".format( model["id"], version["id"], file["id"]) files.append({ "num" : num, "file" : file, - "filename" : file["name"], - "extension": "bin", - "url" : file.get("downloadUrl") or - "{}/api/download/models/{}".format( - self.root, version["id"]), + "filename" : name, + "extension": ext, + "url" : (file.get("downloadUrl") or + "{}/api/download/models/{}".format( + self.root, version["id"])), "_http_headers" : { "Authorization": self.api.headers.get("Authorization")}, "_http_validate": self._validate_file_model, diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 3b43134..8981c29 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -26,12 +26,18 @@ class FanboxExtractor(Extractor): directory_fmt = ("{category}", "{creatorId}") filename_fmt = "{id}_{num}.{extension}" archive_fmt = "{id}_{num}" + browser = "firefox" _warning = True def _init(self): self.headers = { - "Accept": "application/json, text/plain, */*", - "Origin": self.root, + "Accept" : "application/json, text/plain, */*", + "Origin" : "https://www.fanbox.cc", + "Referer": "https://www.fanbox.cc/", + "Cookie" : None, + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-site", } self.embeds = self.config("embeds", True) diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index e85a375..eb68c3e 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -23,13 +23,10 @@ class FlickrExtractor(Extractor): request_interval = (1.0, 2.0) request_interval_min = 0.5 - def __init__(self, match): - Extractor.__init__(self, match) - self.item_id = match.group(1) - def _init(self): self.api = FlickrAPI(self) self.user = None + self.item_id = self.groups[0] def items(self): data = self.metadata() @@ -51,6 +48,8 @@ class FlickrExtractor(Extractor): def metadata(self): """Return general metadata""" self.user = self.api.urls_lookupUser(self.item_id) + if self.config("profile", False): + self.user.update(self.api.people_getInfo(self.user["nsid"])) return {"user": self.user} def photos(self): @@ -75,23 +74,26 @@ class FlickrImageExtractor(FlickrExtractor): r"|flic\.kr/p/([A-Za-z1-9]+))") example = "https://www.flickr.com/photos/USER/12345" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - if not self.item_id: + def items(self): + item_id, enc_id = self.groups + if enc_id is not None: alphabet = ("123456789abcdefghijkmnopqrstu" "vwxyzABCDEFGHJKLMNPQRSTUVWXYZ") - self.item_id = util.bdecode(match.group(2), alphabet) + item_id = util.bdecode(enc_id, alphabet) - def items(self): - photo = self.api.photos_getInfo(self.item_id) + photo = self.api.photos_getInfo(item_id) - self.api._extract_metadata(photo) + self.api._extract_metadata(photo, False) if photo["media"] == "video" and self.api.videos: self.api._extract_video(photo) else: self.api._extract_photo(photo) - photo["user"] = photo["owner"] + if self.config("profile", False): + photo["user"] = self.api.people_getInfo(photo["owner"]["nsid"]) + else: + photo["user"] = photo["owner"] + photo["title"] = photo["title"]["_content"] photo["comments"] = text.parse_int(photo["comments"]["_content"]) photo["description"] = photo["description"]["_content"] @@ -120,11 +122,8 @@ class FlickrAlbumExtractor(FlickrExtractor): pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?" example = "https://www.flickr.com/photos/USER/albums/12345" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - self.album_id = match.group(2) - def items(self): + self.album_id = self.groups[1] if self.album_id: return FlickrExtractor.items(self) return self._album_items() @@ -163,12 +162,9 @@ class FlickrGalleryExtractor(FlickrExtractor): pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)" example = "https://www.flickr.com/photos/USER/galleries/12345/" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - self.gallery_id = match.group(2) - def metadata(self): data = FlickrExtractor.metadata(self) + self.gallery_id = self.groups[1] data["gallery"] = self.api.galleries_getInfo(self.gallery_id) return data @@ -223,13 +219,10 @@ class FlickrSearchExtractor(FlickrExtractor): pattern = BASE_PATTERN + r"/search/?\?([^#]+)" example = "https://flickr.com/search/?text=QUERY" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - self.search = text.parse_query(match.group(1)) + def metadata(self): + self.search = text.parse_query(self.groups[0]) if "text" not in self.search: self.search["text"] = "" - - def metadata(self): return {"search": self.search} def photos(self): @@ -275,13 +268,27 @@ class FlickrAPI(oauth.OAuth1API): "appletv" : 1, "iphone_wifi": 0, } + LICENSES = { + "0": "All Rights Reserved", + "1": "Attribution-NonCommercial-ShareAlike License", + "2": "Attribution-NonCommercial License", + "3": "Attribution-NonCommercial-NoDerivs License", + "4": "Attribution License", + "5": "Attribution-ShareAlike License", + "6": "Attribution-NoDerivs License", + "7": "No known copyright restrictions", + "8": "United States Government Work", + "9": "Public Domain Dedication (CC0)", + "10": "Public Domain Mark", + } def __init__(self, extractor): oauth.OAuth1API.__init__(self, extractor) - self.exif = extractor.config("exif", False) self.videos = extractor.config("videos", True) - self.contexts = extractor.config("contexts", False) + self.meta_exif = extractor.config("exif", False) + self.meta_info = extractor.config("info", False) + self.meta_contexts = extractor.config("contexts", False) self.maxsize = extractor.config("size-max") if isinstance(self.maxsize, str): @@ -321,6 +328,26 @@ class FlickrAPI(oauth.OAuth1API): params = {"group_id": group_id} return self._pagination("groups.pools.getPhotos", params) + def people_getInfo(self, user_id): + """Get information about a user.""" + params = {"user_id": user_id} + user = self._call("people.getInfo", params) + + try: + user = user["person"] + for key in ("description", "username", "realname", "location", + "profileurl", "photosurl", "mobileurl"): + if isinstance(user.get(key), dict): + user[key] = user[key]["_content"] + photos = user["photos"] + for key in ("count", "firstdate", "firstdatetaken"): + if isinstance(photos.get(key), dict): + photos[key] = photos[key]["_content"] + except Exception: + pass + + return user + def people_getPhotos(self, user_id): """Return photos from the given user's photostream.""" params = {"user_id": user_id} @@ -469,14 +496,15 @@ class FlickrAPI(oauth.OAuth1API): self._extract_metadata(photo) photo["id"] = text.parse_int(photo["id"]) - if "owner" in photo: + if "owner" not in photo: + photo["owner"] = self.extractor.user + elif not self.meta_info: photo["owner"] = { "nsid" : photo["owner"], "username" : photo["ownername"], "path_alias": photo["pathalias"], } - else: - photo["owner"] = self.extractor.user + del photo["pathalias"] del photo["ownername"] @@ -522,8 +550,23 @@ class FlickrAPI(oauth.OAuth1API): photo["width"] = photo["height"] = 0 return photo - def _extract_metadata(self, photo): - if self.exif: + def _extract_metadata(self, photo, info=True): + if info and self.meta_info: + try: + photo.update(self.photos_getInfo(photo["id"])) + photo["title"] = photo["title"]["_content"] + photo["comments"] = text.parse_int( + photo["comments"]["_content"]) + photo["description"] = photo["description"]["_content"] + photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]] + photo["views"] = text.parse_int(photo["views"]) + photo["id"] = text.parse_int(photo["id"]) + except Exception as exc: + self.log.warning( + "Unable to retrieve 'info' data for %s (%s: %s)", + photo["id"], exc.__class__.__name__, exc) + + if self.meta_exif: try: photo.update(self.photos_getExif(photo["id"])) except Exception as exc: @@ -531,7 +574,7 @@ class FlickrAPI(oauth.OAuth1API): "Unable to retrieve 'exif' data for %s (%s: %s)", photo["id"], exc.__class__.__name__, exc) - if self.contexts: + if self.meta_contexts: try: photo.update(self.photos_getAllContexts(photo["id"])) except Exception as exc: @@ -539,6 +582,9 @@ class FlickrAPI(oauth.OAuth1API): "Unable to retrieve 'contexts' data for %s (%s: %s)", photo["id"], exc.__class__.__name__, exc) + if "license" in photo: + photo["license_name"] = self.LICENSES.get(photo["license"]) + @staticmethod def _clean_info(info): info["title"] = info["title"]["_content"] diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index dfd9a31..8f4a10c 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -90,9 +90,11 @@ class IdolcomplexExtractor(SankakuExtractor): "user[password]": password, "commit" : "Login", } + self.sleep(10, "login") response = self.request(url, method="POST", headers=headers, data=data) - if not response.history or response.url.endswith("/user/home"): + if not response.history or response.url.endswith( + ("/users/login", "/user/home")): raise exception.AuthenticationError() return {c.name: c.value for c in response.history[0].cookies} @@ -187,32 +189,39 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor): return {"search_tags": " ".join(tags)} def post_ids(self): - params = {"tags": self.tags} + url = self.root + "/en/posts" + params = {"auto_page": "t"} if self.next: params["next"] = self.next else: params["page"] = self.start_page + params["tags"] = self.tags while True: - page = self.request(self.root, params=params, retries=10).text - pos = ((page.find('id="more-popular-posts-link"') + 1) or - (page.find('") + for tag_type, tag_name in self._tags_findall(tag_sidebar): + tags[tag_type].append(text.unescape(text.unquote(tag_name))) + for type, values in tags.items(): + post["tags_" + type] = values + post["tag_string_" + type] = " ".join(values) + def _notes(self, post, page): if post.get("has_notes"): post["notes"] = self.api.notes(post["id"]) diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 1054a63..a83f2da 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -40,8 +40,14 @@ class SubscribestarExtractor(Extractor): for post_html in self.posts(): media = self._media_from_post(post_html) data = self._data_from_post(post_html) - data["title"] = text.unescape(text.extr( - data["content"], "

", "

")) + + content = data["content"] + if "" in content: + data["content"] = content = text.extr( + content, "", "") + data["title"] = text.unescape( + text.rextract(content, "

", "

")[0] or "") + yield Message.Directory, data for num, item in enumerate(media, 1): item.update(data) @@ -189,7 +195,12 @@ class SubscribestarExtractor(Extractor): "author_nick": text.unescape(extr('>', '<')), "date" : self._parse_datetime(extr( 'class="post-date">', '")[2]), - "content" : extr('', '').strip(), + "content" : extr( + '
', + '
', + '
'), '?tag=', '"')), } def _parse_datetime(self, dt): @@ -243,7 +254,12 @@ class SubscribestarPostExtractor(SubscribestarExtractor): "post_id" : text.parse_int(extr('data-id="', '"')), "date" : self._parse_datetime(extr( '
', '<')), - "content" : extr('', '').strip(), + "content" : extr( + '
', + '
', + '
'), '?tag=', '"')), "author_name": text.unescape(extr( 'class="star_link" href="/', '"')), "author_id" : text.parse_int(extr('data-user-id="', '"')), diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py index af3f32d..1dd3482 100644 --- a/gallery_dl/extractor/vipergirls.py +++ b/gallery_dl/extractor/vipergirls.py @@ -43,31 +43,40 @@ class VipergirlsExtractor(Extractor): def items(self): self.login() - posts = self.posts() + root = self.posts() + forum_title = root[1].attrib["title"] + thread_title = root[2].attrib["title"] like = self.config("like") if like: - user_hash = posts[0].get("hash") + user_hash = root[0].get("hash") if len(user_hash) < 16: self.log.warning("Login required to like posts") like = False - posts = posts.iter("post") + posts = root.iter("post") if self.page: util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15) for post in posts: + images = list(post) + data = post.attrib + data["forum_title"] = forum_title data["thread_id"] = self.thread_id + data["thread_title"] = thread_title + data["post_id"] = data.pop("id") + data["post_num"] = data.pop("number") + data["post_title"] = data.pop("title") + data["count"] = len(images) + del data["imagecount"] yield Message.Directory, data - - image = None - for image in post: - yield Message.Queue, image.attrib["main_url"], data - - if image is not None and like: - self.like(post, user_hash) + if images: + for data["num"], image in enumerate(images, 1): + yield Message.Queue, image.attrib["main_url"], data + if like: + self.like(post, user_hash) def login(self): if self.cookies_check(self.cookies_names): diff --git a/gallery_dl/transaction_id.py b/gallery_dl/transaction_id.py index 25f1775..89e3d5b 100644 --- a/gallery_dl/transaction_id.py +++ b/gallery_dl/transaction_id.py @@ -129,7 +129,9 @@ class ClientTransaction(): keyword="obfiowerehiring", rndnum=3): bytes_key = self.key_bytes - now = int(time.time()) - 1682924400 + nowf = time.time() + nowi = int(nowf) + now = nowi - 1682924400 bytes_time = ( (now ) & 0xFF, # noqa: E202 (now >> 8) & 0xFF, # noqa: E222 @@ -141,7 +143,7 @@ class ClientTransaction(): method, path, now, keyword, self.animation_key) bytes_hash = hashlib.sha256(payload.encode()).digest()[:16] - num = random.randrange(256) + num = (random.randrange(16) << 4) + int((nowf - nowi) * 16.0) result = bytes( byte ^ num for byte in itertools.chain( diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d40dacd..e543a31 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.29.6" +__version__ = "1.29.7" __variant__ = None -- cgit v1.2.3