From 1a457ed68769880ab7760e0746f0cbbd9ca00487 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 28 Sep 2024 20:01:25 -0400 Subject: New upstream version 1.27.5. --- gallery_dl/__init__.py | 10 +- gallery_dl/cookies.py | 24 +- gallery_dl/downloader/ytdl.py | 10 +- gallery_dl/extractor/8chan.py | 16 +- gallery_dl/extractor/__init__.py | 3 + gallery_dl/extractor/ao3.py | 302 +++++++++++++++++++++++ gallery_dl/extractor/bluesky.py | 78 +++--- gallery_dl/extractor/chevereto.py | 2 +- gallery_dl/extractor/civitai.py | 490 +++++++++++++++++++++++++++++++++++++ gallery_dl/extractor/cohost.py | 223 +++++++++++++++++ gallery_dl/extractor/common.py | 80 +++--- gallery_dl/extractor/deviantart.py | 44 +++- gallery_dl/extractor/flickr.py | 6 +- gallery_dl/extractor/inkbunny.py | 13 +- gallery_dl/extractor/newgrounds.py | 4 +- gallery_dl/extractor/pixiv.py | 5 +- gallery_dl/extractor/skeb.py | 6 +- gallery_dl/extractor/weasyl.py | 28 ++- gallery_dl/extractor/wikimedia.py | 21 +- gallery_dl/extractor/zzup.py | 30 ++- gallery_dl/formatter.py | 2 + gallery_dl/job.py | 7 +- gallery_dl/option.py | 11 +- gallery_dl/postprocessor/ugoira.py | 128 +++++++--- gallery_dl/text.py | 25 +- gallery_dl/util.py | 34 ++- gallery_dl/version.py | 2 +- 27 files changed, 1438 insertions(+), 166 deletions(-) create mode 100644 gallery_dl/extractor/ao3.py create mode 100644 gallery_dl/extractor/civitai.py create mode 100644 gallery_dl/extractor/cohost.py (limited to 'gallery_dl') diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 663fe99..7a9e0be 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -202,12 +202,18 @@ def main(): extractor.modules.append("") sys.stdout.write("\n".join(extractor.modules)) - elif args.list_extractors: + elif args.list_extractors is not None: write = sys.stdout.write fmt = ("{}{}\nCategory: {} - Subcategory: {}" "\nExample : {}\n\n").format - for extr in extractor.extractors(): + extractors = extractor.extractors() + if args.list_extractors: + fltr = util.build_extractor_filter( + args.list_extractors, negate=False) + extractors = filter(fltr, extractors) + + for extr in extractors: write(fmt( extr.__name__, "\n" + extr.__doc__ if extr.__doc__ else "", diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index deb7c7b..0ffd29a 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -50,21 +50,27 @@ def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None): sql = ("SELECT name, value, host, path, isSecure, expiry " "FROM moz_cookies") - parameters = () + conditions = [] + parameters = [] if container_id is False: - sql += " WHERE NOT INSTR(originAttributes,'userContextId=')" + conditions.append("NOT INSTR(originAttributes,'userContextId=')") elif container_id: - sql += " WHERE originAttributes LIKE ? OR originAttributes LIKE ?" + conditions.append( + "originAttributes LIKE ? OR originAttributes LIKE ?") uid = "%userContextId={}".format(container_id) - parameters = (uid, uid + "&%") - elif domain: + parameters += (uid, uid + "&%") + + if domain: if domain[0] == ".": - sql += " WHERE host == ? OR host LIKE ?" - parameters = (domain[1:], "%" + domain) + conditions.append("host == ? OR host LIKE ?") + parameters += (domain[1:], "%" + domain) else: - sql += " WHERE host == ? OR host == ?" - parameters = (domain, "." + domain) + conditions.append("host == ? OR host == ?") + parameters += (domain, "." + domain) + + if conditions: + sql = "{} WHERE ( {} )".format(sql, " ) AND ( ".join(conditions)) set_cookie = cookiejar.set_cookie for name, value, domain, path, secure, expires in db.execute( diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index b3bec21..950a72f 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -45,7 +45,7 @@ class YoutubeDLDownloader(DownloaderBase): except (ImportError, SyntaxError) as exc: self.log.error("Cannot import module '%s'", getattr(exc, "name", "")) - self.log.debug("", exc_info=True) + self.log.debug("", exc_info=exc) self.download = lambda u, p: False return False self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL( @@ -64,8 +64,8 @@ class YoutubeDLDownloader(DownloaderBase): if not info_dict: try: info_dict = ytdl_instance.extract_info(url[5:], download=False) - except Exception: - pass + except Exception as exc: + self.log.debug("", exc_info=exc) if not info_dict: return False @@ -120,8 +120,8 @@ class YoutubeDLDownloader(DownloaderBase): self.out.start(pathfmt.path) try: ytdl_instance.process_info(info_dict) - except Exception: - self.log.debug("Traceback", exc_info=True) + except Exception as exc: + self.log.debug("", exc_info=exc) return False return True diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index a5e8b27..afa3a69 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -9,9 +9,9 @@ """Extractors for https://8chan.moe/""" from .common import Extractor, Message -from .. import text +from .. import text, util from ..cache import memcache -from datetime import datetime, timedelta +from datetime import timedelta import itertools BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)" @@ -27,21 +27,23 @@ class _8chanExtractor(Extractor): Extractor.__init__(self, match) def _init(self): - self.cookies.set( - "TOS20240718", "1", domain=self.root.rpartition("/")[2]) + now = util.datetime_utcnow() + domain = self.root.rpartition("/")[2] + self.cookies.set("TOS20240928", "1", domain=domain) + self.cookies.set(now.strftime("TOS%Y%m%d"), "1", domain=domain) @memcache() def cookies_prepare(self): # fetch captcha cookies # (necessary to download without getting interrupted) - now = datetime.utcnow() + now = util.datetime_utcnow() url = self.root + "/captcha.js" params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")} self.request(url, params=params).content # adjust cookies # - remove 'expires' timestamp - # - move 'captchaexpiration' value forward by 1 month) + # - move 'captchaexpiration' value forward by 1 month domain = self.root.rpartition("/")[2] for cookie in self.cookies: if cookie.domain.endswith(domain): @@ -79,7 +81,7 @@ class _8chanThreadExtractor(_8chanExtractor): self.cookies = self.cookies_prepare() except Exception as exc: self.log.debug("Failed to fetch captcha cookies: %s: %s", - exc.__class__.__name__, exc, exc_info=True) + exc.__class__.__name__, exc, exc_info=exc) # download files posts = thread.pop("posts", ()) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e103cb1..826771c 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -23,6 +23,7 @@ modules = [ "8muses", "adultempire", "agnph", + "ao3", "architizer", "artstation", "aryion", @@ -35,6 +36,8 @@ modules = [ "catbox", "chevereto", "cien", + "civitai", + "cohost", "comicvine", "cyberdrop", "danbooru", diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py new file mode 100644 index 0000000..1f570e8 --- /dev/null +++ b/gallery_dl/extractor/ao3.py @@ -0,0 +1,302 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://archiveofourown.org/""" + +from .common import Extractor, Message +from .. import text, util, exception +from ..cache import cache + +BASE_PATTERN = (r"(?:https?://)?(?:www\.)?" + r"a(?:rchiveofourown|o3)\.(?:org|com|net)") + + +class Ao3Extractor(Extractor): + """Base class for ao3 extractors""" + category = "ao3" + root = "https://archiveofourown.org" + categorytransfer = True + cookies_domain = ".archiveofourown.org" + cookies_names = ("remember_user_token",) + request_interval = (0.5, 1.5) + + def items(self): + self.login() + + base = self.root + "/works/" + data = {"_extractor": Ao3WorkExtractor} + + for work_id in self.works(): + yield Message.Queue, base + work_id, data + + def works(self): + return self._pagination(self.groups[0]) + + def login(self): + if self.cookies_check(self.cookies_names): + return + + username, password = self._get_auth_info() + if username: + return self.cookies_update(self._login_impl(username, password)) + + @cache(maxage=90*86400, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/users/login" + page = self.request(url).text + + pos = page.find('id="loginform"') + token = text.extract( + page, ' name="authenticity_token" value="', '"', pos)[0] + if not token: + self.log.error("Unable to extract 'authenticity_token'") + + data = { + "authenticity_token": text.unescape(token), + "user[login]" : username, + "user[password]" : password, + "user[remember_me]" : "1", + "commit" : "Log In", + } + + response = self.request(url, method="POST", data=data) + if not response.history: + raise exception.AuthenticationError() + + remember = response.history[0].cookies.get("remember_user_token") + if not remember: + raise exception.AuthenticationError() + + return { + "remember_user_token": remember, + "user_credentials" : "1", + } + + def _pagination(self, path, needle='