diff options
| author | 2023-05-28 01:35:33 -0400 | |
|---|---|---|
| committer | 2023-05-28 01:35:33 -0400 | |
| commit | 645863b3144f85f1830afe23c8d412f2703d5b81 (patch) | |
| tree | 4be5d8a509ba8fac7a9c565d2d0764287b73973b | |
| parent | 1d742a229479aa2c3cb6db253c90434414a6fea3 (diff) | |
| parent | 8950c0f2ef55ec2ed36b3fccc9fd85b64b877c3b (diff) | |
Update upstream source from tag 'upstream/1.25.5'
Update to upstream version '1.25.5'
with Debian dir fb939ee149571d6a29a37441553f417ce143d419
37 files changed, 1257 insertions, 177 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 85c732d..405c117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +## 1.25.5 - 2023-05-27 +### Additions +- [8muses] add `parts` metadata field ([#3329](https://github.com/mikf/gallery-dl/issues/3329)) +- [danbooru] add `date` metadata field ([#4047](https://github.com/mikf/gallery-dl/issues/4047)) +- [e621] add `date` metadata field ([#4047](https://github.com/mikf/gallery-dl/issues/4047)) +- [gofile] add basic password support ([#4056](https://github.com/mikf/gallery-dl/issues/4056)) +- [imagechest] implement API support ([#4065](https://github.com/mikf/gallery-dl/issues/4065)) +- [instagram] add `order-files` option ([#3993](https://github.com/mikf/gallery-dl/issues/3993), [#4017](https://github.com/mikf/gallery-dl/issues/4017)) +- [instagram] add `order-posts` option ([#3993](https://github.com/mikf/gallery-dl/issues/3993), [#4017](https://github.com/mikf/gallery-dl/issues/4017)) +- [instagram] add `metadata` option ([#3107](https://github.com/mikf/gallery-dl/issues/3107)) +- [jpgfish] add `jpg.fishing` extractors ([#2657](https://github.com/mikf/gallery-dl/issues/2657), [#2719](https://github.com/mikf/gallery-dl/issues/2719)) +- [lensdump] add `lensdump.com` extractors ([#2078](https://github.com/mikf/gallery-dl/issues/2078), [#4104](https://github.com/mikf/gallery-dl/issues/4104)) +- [mangaread] add `mangaread.org` extractors ([#2425](https://github.com/mikf/gallery-dl/issues/2425), [#2781](https://github.com/mikf/gallery-dl/issues/2781)) +- [misskey] add `favorite` extractor ([#3950](https://github.com/mikf/gallery-dl/issues/3950)) +- [pixiv] add `novel` support ([#1241](https://github.com/mikf/gallery-dl/issues/1241), [#4044](https://github.com/mikf/gallery-dl/issues/4044)) +- [reddit] support cross-posted media ([#887](https://github.com/mikf/gallery-dl/issues/887), [#3586](https://github.com/mikf/gallery-dl/issues/3586), [#3976](https://github.com/mikf/gallery-dl/issues/3976)) +- [postprocessor:exec] support tilde expansion for `command` +- [formatter] support slicing strings as bytes ([#4087](https://github.com/mikf/gallery-dl/issues/4087)) +### Fixes +- [8muses] fix value of `album[url]` ([#3329](https://github.com/mikf/gallery-dl/issues/3329)) +- [danbooru] refactor pagination logic ([#4002](https://github.com/mikf/gallery-dl/issues/4002)) +- [fanbox] skip invalid posts ([#4088](https://github.com/mikf/gallery-dl/issues/4088)) +- [gofile] automatically fetch `website-token` +- [kemonoparty] fix kemono and coomer logins sharing the same cache ([#4098](https://github.com/mikf/gallery-dl/issues/4098)) +- [newgrounds] add default delay between requests ([#4046](https://github.com/mikf/gallery-dl/issues/4046)) +- [nsfwalbum] detect placeholder images +- [poipiku] extract full `descriptions` ([#4066](https://github.com/mikf/gallery-dl/issues/4066)) +- [tcbscans] update domain to `tcbscans.com` ([#4080](https://github.com/mikf/gallery-dl/issues/4080)) +- [twitter] extract TwitPic URLs in text ([#3792](https://github.com/mikf/gallery-dl/issues/3792), [#3796](https://github.com/mikf/gallery-dl/issues/3796)) +- [weibo] require numeric IDs to have length >= 10 ([#4059](https://github.com/mikf/gallery-dl/issues/4059)) +- [ytdl] fix crash due to removed `no_color` attribute +- [cookies] improve logging behavior ([#4050](https://github.com/mikf/gallery-dl/issues/4050)) + ## 1.25.4 - 2023-05-07 ### Additions - [4chanarchives] add `thread` and `board` extractors ([#4012](https://github.com/mikf/gallery-dl/issues/4012)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.25.4 +Version: 1.25.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__ Nightly Builds @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index f018c63..3d5e4e8 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-05-07" "1.25.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-05-27" "1.25.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 8008451..be234ce 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-05-07" "1.25.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-05-27" "1.25.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1976,16 +1976,14 @@ If not set, a temporary guest token will be used. .IP "Type:" 6 \f[I]string\f[] -.IP "Default:" 9 -\f[I]"12345"\f[] - .IP "Description:" 4 API token value used during API requests. -A not up-to-date value will result in \f[I]401 Unauthorized\f[] errors. +An invalid or not up-to-date value +will result in \f[I]401 Unauthorized\f[] errors. -Setting this value to \f[I]null\f[] will do an extra HTTP request to fetch -the current value used by gofile. +Keeping this option unset will use an extra HTTP request +to attempt to fetch the current value used by gofile. .SS extractor.gofile.recursive @@ -2041,6 +2039,21 @@ Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[]. but is most likely going to fail with \f[I]403 Forbidden\f[] errors. +.SS extractor.imagechest.access-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +Your personal Image Chest access token. + +These tokens allow using the API instead of having to scrape HTML pages, +providing more detailed metadata. +(\f[I]date\f[], \f[I]description\f[], etc) + +See https://imgchest.com/docs/api/1.0/general/authorization +for instructions on how to generate such a token. + + .SS extractor.imgur.client-id .IP "Type:" 6 \f[I]string\f[] @@ -2132,6 +2145,66 @@ Possible values are It is possible to use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.instagram.metadata +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Provide extended \f[I]user\f[] metadata even when referring to a user by ID, +e.g. \f[I]instagram.com/id:12345678\f[]. + +Note: This metadata is always available when referring to a user by name, +e.g. \f[I]instagram.com/USERNAME\f[]. + + +.SS extractor.instagram.order-files +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"asc"\f[] + +.IP "Description:" 4 +Controls the order in which files of each post are returned. + +.br +* \f[I]"asc"\f[]: Same order as displayed in a post +.br +* \f[I]"desc"\f[]: Reverse order as displayed in a post +.br +* \f[I]"reverse"\f[]: Same as \f[I]"desc"\f[] + +Note: This option does *not* affect \f[I]{num}\f[]. +To enumerate files in reverse order, use \f[I]count - num + 1\f[]. + + +.SS extractor.instagram.order-posts +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"asc"\f[] + +.IP "Description:" 4 +Controls the order in which posts are returned. + +.br +* \f[I]"asc"\f[]: Same order as displayed +.br +* \f[I]"desc"\f[]: Reverse order as displayed +.br +* \f[I]"id"\f[] or \f[I]"id_asc"\f[]: Ascending order by ID +.br +* \f[I]"id_desc"\f[]: Descending order by ID +.br +* \f[I]"reverse"\f[]: Same as \f[I]"desc"\f[] + +Note: This option only affects \f[I]highlights\f[]. + + .SS extractor.instagram.previews .IP "Type:" 6 \f[I]bool\f[] @@ -2397,6 +2470,14 @@ Fetch media from replies to other posts. Also emit metadata for text-only posts without media content. +.SS extractor.[misskey].access-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +Your access token, necessary to fetch favorited notes. + + .SS extractor.[misskey].renotes .IP "Type:" 6 \f[I]bool\f[] @@ -2736,6 +2817,17 @@ by using a third-party tool like \f[I]gppt\f[]. +.SS extractor.pixiv.embeds +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Download images embedded in novels. + + .SS extractor.pixiv.metadata .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 92451fd..6a3c84f 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -129,7 +129,7 @@ }, "gofile": { "api-token": null, - "website-token": "12345" + "website-token": null }, "hentaifoundry": { @@ -146,6 +146,9 @@ "password": null, "sleep-request": 5.0 }, + "imagechest": { + "access-token": null + }, "imgbb": { "username": null, @@ -166,6 +169,9 @@ "api": "rest", "cookies": null, "include": "posts", + "order-files": "asc", + "order-posts": "asc", + "previews": false, "sleep-request": [6.0, 12.0], "videos": true }, @@ -190,6 +196,7 @@ "password": null }, "misskey": { + "access-token": null, "renotes": false, "replies": true }, @@ -239,6 +246,7 @@ { "refresh-token": null, "include": "artworks", + "embeds": false, "metadata": false, "metadata-bookmark": false, "tags": "japanese", diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 3554f49..c069128 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.25.4 +Version: 1.25.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.5/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 8be6871..fde82b6 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -113,11 +113,13 @@ gallery_dl/extractor/instagram.py gallery_dl/extractor/issuu.py gallery_dl/extractor/itaku.py gallery_dl/extractor/itchio.py +gallery_dl/extractor/jpgfish.py gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py gallery_dl/extractor/kemonoparty.py gallery_dl/extractor/khinsider.py gallery_dl/extractor/komikcast.py +gallery_dl/extractor/lensdump.py gallery_dl/extractor/lexica.py gallery_dl/extractor/lightroom.py gallery_dl/extractor/lineblog.py @@ -131,6 +133,7 @@ gallery_dl/extractor/mangahere.py gallery_dl/extractor/mangakakalot.py gallery_dl/extractor/manganelo.py gallery_dl/extractor/mangapark.py +gallery_dl/extractor/mangaread.py gallery_dl/extractor/mangasee.py gallery_dl/extractor/mangoxo.py gallery_dl/extractor/mastodon.py diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index 32ba323..c5c5667 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -75,7 +75,7 @@ def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None): domain, bool(domain), domain.startswith("."), path, bool(path), secure, expires, False, None, None, {}, )) - logger.info("Extracted %s cookies from Firefox", len(cookiejar)) + _log_info("Extracted %s cookies from Firefox", len(cookiejar)) def load_cookies_safari(cookiejar, profile=None, domain=None): @@ -98,7 +98,7 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None, keyring=None, domain=None): config = _get_chromium_based_browser_settings(browser_name) path = _chrome_cookies_database(profile, config) - logger.debug("Extracting cookies from %s", path) + _log_debug("Extracting cookies from %s", path) with DatabaseCopy(path) as db: db.text_factory = bytes @@ -155,11 +155,11 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None, else: failed_message = "" - logger.info("Extracted %s cookies from %s%s", - len(cookiejar), browser_name.capitalize(), failed_message) + _log_info("Extracted %s cookies from %s%s", + len(cookiejar), browser_name.capitalize(), failed_message) counts = decryptor.cookie_counts counts["unencrypted"] = unencrypted_cookies - logger.debug("cookie version breakdown: %s", counts) + _log_debug("Cookie version breakdown: %s", counts) # -------------------------------------------------------------------- @@ -177,11 +177,11 @@ def _firefox_cookies_database(profile=None, container=None): if path is None: raise FileNotFoundError("Unable to find Firefox cookies database in " "{}".format(search_root)) - logger.debug("Extracting cookies from %s", path) + _log_debug("Extracting cookies from %s", path) if container == "none": container_id = False - logger.debug("Only loading cookies not belonging to any container") + _log_debug("Only loading cookies not belonging to any container") elif container: containers_path = os.path.join( @@ -191,8 +191,8 @@ def _firefox_cookies_database(profile=None, container=None): with open(containers_path) as file: identities = util.json_loads(file.read())["identities"] except OSError: - logger.error("Unable to read Firefox container database at %s", - containers_path) + _log_error("Unable to read Firefox container database at '%s'", + containers_path) raise except KeyError: identities = () @@ -203,10 +203,10 @@ def _firefox_cookies_database(profile=None, container=None): container_id = context["userContextId"] break else: - raise ValueError("Unable to find Firefox container {}".format( + raise ValueError("Unable to find Firefox container '{}'".format( container)) - logger.debug("Only loading cookies from container '%s' (ID %s)", - container, container_id) + _log_debug("Only loading cookies from container '%s' (ID %s)", + container, container_id) else: container_id = None @@ -229,7 +229,7 @@ def _safari_cookies_database(): path = os.path.expanduser("~/Library/Cookies/Cookies.binarycookies") return open(path, "rb") except FileNotFoundError: - logger.debug("Trying secondary cookie location") + _log_debug("Trying secondary cookie location") path = os.path.expanduser("~/Library/Containers/com.apple.Safari/Data" "/Library/Cookies/Cookies.binarycookies") return open(path, "rb") @@ -250,7 +250,7 @@ def _safari_parse_cookies_page(data, cookiejar, domain=None): number_of_cookies = p.read_uint() record_offsets = [p.read_uint() for _ in range(number_of_cookies)] if number_of_cookies == 0: - logger.debug("a cookies page of size %s has no cookies", len(data)) + _log_debug("Cookies page of size %s has no cookies", len(data)) return p.skip_to(record_offsets[0], "unknown page header field") @@ -299,8 +299,7 @@ def _safari_parse_cookies_record(data, cookiejar, host=None): p.skip_to(value_offset) value = p.read_cstring() except UnicodeDecodeError: - logger.warning("failed to parse Safari cookie " - "because UTF-8 decoding failed") + _log_warning("Failed to parse Safari cookie") return record_size p.skip_to(record_size, "space at the end of the record") @@ -328,7 +327,7 @@ def _chrome_cookies_database(profile, config): elif config["profiles"]: search_root = os.path.join(config["directory"], profile) else: - logger.warning("%s does not support profiles", config["browser"]) + _log_warning("%s does not support profiles", config["browser"]) search_root = config["directory"] path = _find_most_recently_used_file(search_root, "Cookies") @@ -479,7 +478,7 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): elif version == b"v11": self._cookie_counts["v11"] += 1 if self._v11_key is None: - logger.warning("cannot decrypt v11 cookies: no key found") + _log_warning("Unable to decrypt v11 cookies: no key found") return None return _decrypt_aes_cbc(ciphertext, self._v11_key) @@ -513,7 +512,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): if version == b"v10": self._cookie_counts["v10"] += 1 if self._v10_key is None: - logger.warning("cannot decrypt v10 cookies: no key found") + _log_warning("Unable to decrypt v10 cookies: no key found") return None return _decrypt_aes_cbc(ciphertext, self._v10_key) @@ -543,7 +542,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): if version == b"v10": self._cookie_counts["v10"] += 1 if self._v10_key is None: - logger.warning("cannot decrypt v10 cookies: no key found") + _log_warning("Unable to decrypt v10 cookies: no key found") return None # https://chromium.googlesource.com/chromium/src/+/refs/heads @@ -581,7 +580,7 @@ def _choose_linux_keyring(): SelectBackend """ desktop_environment = _get_linux_desktop_environment(os.environ) - logger.debug("Detected desktop environment: %s", desktop_environment) + _log_debug("Detected desktop environment: %s", desktop_environment) if desktop_environment == DE_KDE: return KEYRING_KWALLET if desktop_environment == DE_OTHER: @@ -609,23 +608,23 @@ def _get_kwallet_network_wallet(): ) if proc.returncode != 0: - logger.warning("failed to read NetworkWallet") + _log_warning("Failed to read NetworkWallet") return default_wallet else: network_wallet = stdout.decode().strip() - logger.debug("NetworkWallet = '%s'", network_wallet) + _log_debug("NetworkWallet = '%s'", network_wallet) return network_wallet except Exception as exc: - logger.warning("exception while obtaining NetworkWallet (%s: %s)", - exc.__class__.__name__, exc) + _log_warning("Error while obtaining NetworkWallet (%s: %s)", + exc.__class__.__name__, exc) return default_wallet def _get_kwallet_password(browser_keyring_name): - logger.debug("using kwallet-query to obtain password from kwallet") + _log_debug("Using kwallet-query to obtain password from kwallet") if shutil.which("kwallet-query") is None: - logger.error( + _log_error( "kwallet-query command not found. KWallet and kwallet-query " "must be installed to read from KWallet. kwallet-query should be " "included in the kwallet package for your distribution") @@ -642,14 +641,14 @@ def _get_kwallet_password(browser_keyring_name): ) if proc.returncode != 0: - logger.error("kwallet-query failed with return code {}. " - "Please consult the kwallet-query man page " - "for details".format(proc.returncode)) + _log_error("kwallet-query failed with return code {}. " + "Please consult the kwallet-query man page " + "for details".format(proc.returncode)) return b"" if stdout.lower().startswith(b"failed to read"): - logger.debug("Failed to read password from kwallet. " - "Using empty string instead") + _log_debug("Failed to read password from kwallet. " + "Using empty string instead") # This sometimes occurs in KDE because chrome does not check # hasEntry and instead just tries to read the value (which # kwallet returns "") whereas kwallet-query checks hasEntry. @@ -660,13 +659,12 @@ def _get_kwallet_password(browser_keyring_name): # random password and store it, but that doesn't matter here. return b"" else: - logger.debug("password found") if stdout[-1:] == b"\n": stdout = stdout[:-1] return stdout except Exception as exc: - logger.warning("exception running kwallet-query (%s: %s)", - exc.__class__.__name__, exc) + _log_warning("Error when running kwallet-query (%s: %s)", + exc.__class__.__name__, exc) return b"" @@ -674,7 +672,7 @@ def _get_gnome_keyring_password(browser_keyring_name): try: import secretstorage except ImportError: - logger.error("secretstorage not available") + _log_error("'secretstorage' Python package not available") return b"" # Gnome keyring does not seem to organise keys in the same way as KWallet, @@ -689,7 +687,7 @@ def _get_gnome_keyring_password(browser_keyring_name): if item.get_label() == label: return item.get_secret() else: - logger.error("failed to read from keyring") + _log_error("Failed to read from GNOME keyring") return b"" @@ -703,7 +701,7 @@ def _get_linux_keyring_password(browser_keyring_name, keyring): if not keyring: keyring = _choose_linux_keyring() - logger.debug("Chosen keyring: %s", keyring) + _log_debug("Chosen keyring: %s", keyring) if keyring == KEYRING_KWALLET: return _get_kwallet_password(browser_keyring_name) @@ -717,8 +715,8 @@ def _get_linux_keyring_password(browser_keyring_name, keyring): def _get_mac_keyring_password(browser_keyring_name): - logger.debug("using find-generic-password to obtain " - "password from OSX keychain") + _log_debug("Using find-generic-password to obtain " + "password from OSX keychain") try: proc, stdout = Popen_communicate( "security", "find-generic-password", @@ -731,28 +729,28 @@ def _get_mac_keyring_password(browser_keyring_name): stdout = stdout[:-1] return stdout except Exception as exc: - logger.warning("exception running find-generic-password (%s: %s)", - exc.__class__.__name__, exc) + _log_warning("Error when using find-generic-password (%s: %s)", + exc.__class__.__name__, exc) return None def _get_windows_v10_key(browser_root): path = _find_most_recently_used_file(browser_root, "Local State") if path is None: - logger.error("could not find local state file") + _log_error("Unable to find Local State file") return None - logger.debug("Found local state file at '%s'", path) + _log_debug("Found Local State file at '%s'", path) with open(path, encoding="utf-8") as file: data = util.json_loads(file.read()) try: base64_key = data["os_crypt"]["encrypted_key"] except KeyError: - logger.error("no encrypted key in Local State") + _log_error("Unable to find encrypted key in Local State") return None encrypted_key = binascii.a2b_base64(base64_key) prefix = b"DPAPI" if not encrypted_key.startswith(prefix): - logger.error("invalid key") + _log_error("Invalid Local State key") return None return _decrypt_windows_dpapi(encrypted_key[len(prefix):]) @@ -804,10 +802,10 @@ class DataParser: def skip(self, num_bytes, description="unknown"): if num_bytes > 0: - logger.debug("skipping {} bytes ({}): {!r}".format( + _log_debug("Skipping {} bytes ({}): {!r}".format( num_bytes, description, self.read_bytes(num_bytes))) elif num_bytes < 0: - raise ParserError("invalid skip of {} bytes".format(num_bytes)) + raise ParserError("Invalid skip of {} bytes".format(num_bytes)) def skip_to(self, offset, description="unknown"): self.skip(offset - self.cursor, description) @@ -929,31 +927,25 @@ def pbkdf2_sha1(password, salt, iterations, key_length): def _decrypt_aes_cbc(ciphertext, key, initialization_vector=b" " * 16): - plaintext = aes.unpad_pkcs7( - aes.aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) try: - return plaintext.decode() + return aes.unpad_pkcs7(aes.aes_cbc_decrypt_bytes( + ciphertext, key, initialization_vector)).decode() except UnicodeDecodeError: - logger.warning("failed to decrypt cookie (AES-CBC) because UTF-8 " - "decoding failed. Possibly the key is wrong?") - return None + _log_warning("Failed to decrypt cookie (AES-CBC Unicode)") + except ValueError: + _log_warning("Failed to decrypt cookie (AES-CBC)") + return None def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag): try: - plaintext = aes.aes_gcm_decrypt_and_verify_bytes( - ciphertext, key, authentication_tag, nonce) - except ValueError: - logger.warning("failed to decrypt cookie (AES-GCM) because MAC check " - "failed. Possibly the key is wrong?") - return None - - try: - return plaintext.decode() + return aes.aes_gcm_decrypt_and_verify_bytes( + ciphertext, key, authentication_tag, nonce).decode() except UnicodeDecodeError: - logger.warning("failed to decrypt cookie (AES-GCM) because UTF-8 " - "decoding failed. Possibly the key is wrong?") - return None + _log_warning("Failed to decrypt cookie (AES-GCM Unicode)") + except ValueError: + _log_warning("Failed to decrypt cookie (AES-GCM MAC)") + return None def _decrypt_windows_dpapi(ciphertext): @@ -981,7 +973,7 @@ def _decrypt_windows_dpapi(ciphertext): ctypes.byref(blob_out) # pDataOut ) if not ret: - logger.warning("failed to decrypt with DPAPI") + _log_warning("Failed to decrypt cookie (DPAPI)") return None result = ctypes.string_at(blob_out.pbData, blob_out.cbData) @@ -1009,9 +1001,26 @@ def _parse_browser_specification( browser, profile=None, keyring=None, container=None, domain=None): browser = browser.lower() if browser not in SUPPORTED_BROWSERS: - raise ValueError("unsupported browser '{}'".format(browser)) + raise ValueError("Unsupported browser '{}'".format(browser)) if keyring and keyring not in SUPPORTED_KEYRINGS: - raise ValueError("unsupported keyring '{}'".format(keyring)) + raise ValueError("Unsupported keyring '{}'".format(keyring)) if profile and _is_path(profile): profile = os.path.expanduser(profile) return browser, profile, keyring, container, domain + + +_log_cache = set() +_log_debug = logger.debug +_log_info = logger.info + + +def _log_warning(msg, *args): + if msg not in _log_cache: + _log_cache.add(msg) + logger.warning(msg, *args) + + +def _log_error(msg, *args): + if msg not in _log_cache: + _log_cache.add(msg) + logger.error(msg, *args) diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index 26ac8b2..584c6d2 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -35,8 +35,10 @@ class _8musesAlbumExtractor(Extractor): "id" : 10467, "title" : "Liar", "path" : "Fakku Comics/mogg/Liar", + "parts" : ["Fakku Comics", "mogg", "Liar"], "private": False, - "url" : str, + "url" : "https://comics.8muses.com/comics" + "/album/Fakku-Comics/mogg/Liar", "parent" : 10464, "views" : int, "likes" : int, @@ -118,9 +120,10 @@ class _8musesAlbumExtractor(Extractor): return { "id" : album["id"], "path" : album["path"], + "parts" : album["path"].split("/"), "title" : album["name"], "private": album["isPrivate"], - "url" : self.root + album["permalink"], + "url" : self.root + "/comics/album/" + album["permalink"], "parent" : text.parse_int(album["parentId"]), "views" : text.parse_int(album["numberViews"]), "likes" : text.parse_int(album["numberLikes"]), diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 5475fea..3e47c3e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -75,11 +75,13 @@ modules = [ "issuu", "itaku", "itchio", + "jpgfish", "kabeuchi", "keenspot", "kemonoparty", "khinsider", "komikcast", + "lensdump", "lexica", "lightroom", "lineblog", @@ -92,6 +94,7 @@ modules = [ "mangakakalot", "manganelo", "mangapark", + "mangaread", "mangasee", "mangoxo", "misskey", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 09737ef..50d1026 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -555,7 +555,13 @@ class GalleryExtractor(Extractor): def items(self): self.login() - page = self.request(self.gallery_url, notfound=self.subcategory).text + + if self.gallery_url: + page = self.request( + self.gallery_url, notfound=self.subcategory).text + else: + page = None + data = self.metadata(page) imgs = self.images(page) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 326b53b..5cfbf5c 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -70,6 +70,8 @@ class DanbooruExtractor(BaseExtractor): continue text.nameext_from_url(url, post) + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") if post["extension"] == "zip": if self.ugoira: @@ -92,42 +94,47 @@ class DanbooruExtractor(BaseExtractor): def posts(self): return () - def _pagination(self, endpoint, params, pages=False): + def _pagination(self, endpoint, params, prefix=None): url = self.root + endpoint params["limit"] = self.per_page params["page"] = self.page_start + first = True while True: posts = self.request(url, params=params).json() - if "posts" in posts: + if isinstance(posts, dict): posts = posts["posts"] - if self.includes and posts: - if not pages and "only" not in params: - params["page"] = "b{}".format(posts[0]["id"] + 1) - params["only"] = self.includes - data = { - meta["id"]: meta - for meta in self.request(url, params=params).json() - } - for post in posts: - post.update(data[post["id"]]) - params["only"] = None - - yield from posts + if posts: + if self.includes: + params_meta = { + "only" : self.includes, + "limit": len(posts), + "tags" : "id:" + ",".join(str(p["id"]) for p in posts), + } + data = { + meta["id"]: meta + for meta in self.request( + url, params=params_meta).json() + } + for post in posts: + post.update(data[post["id"]]) + + if prefix == "a" and not first: + posts.reverse() + + yield from posts if len(posts) < self.threshold: return - if pages: + if prefix: + params["page"] = "{}{}".format(prefix, posts[-1]["id"]) + elif params["page"]: params["page"] += 1 else: - for post in reversed(posts): - if "id" in post: - params["page"] = "b{}".format(post["id"]) - break - else: - return + params["page"] = 2 + first = False def _ugoira_frames(self, post): data = self.request("{}/posts/{}.json?only=media_metadata".format( @@ -153,7 +160,7 @@ BASE_PATTERN = DanbooruExtractor.update({ "aibooru": { "root": None, "pattern": r"(?:safe.)?aibooru\.online", - } + }, }) @@ -181,7 +188,7 @@ class DanbooruTagExtractor(DanbooruExtractor): "count": 12, }), ("https://aibooru.online/posts?tags=center_frills&z=1", { - "pattern": r"https://aibooru\.online/data/original" + "pattern": r"https://cdn\.aibooru\.online/original" r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+", "count": ">= 3", }), @@ -200,7 +207,21 @@ class DanbooruTagExtractor(DanbooruExtractor): return {"search_tags": self.tags} def posts(self): - return self._pagination("/posts.json", {"tags": self.tags}) + prefix = "b" + for tag in self.tags.split(): + if tag.startswith("order:"): + if tag == "order:id" or tag == "order:id_asc": + prefix = "a" + elif tag == "order:id_desc": + prefix = "b" + else: + prefix = None + elif tag.startswith( + ("id:", "md5", "ordfav:", "ordfavgroup:", "ordpool:")): + prefix = None + break + + return self._pagination("/posts.json", {"tags": self.tags}, prefix) class DanbooruPoolExtractor(DanbooruExtractor): @@ -234,7 +255,7 @@ class DanbooruPoolExtractor(DanbooruExtractor): def posts(self): params = {"tags": "pool:" + self.pool_id} - return self._pagination("/posts.json", params) + return self._pagination("/posts.json", params, "b") class DanbooruPostExtractor(DanbooruExtractor): @@ -245,6 +266,7 @@ class DanbooruPostExtractor(DanbooruExtractor): test = ( ("https://danbooru.donmai.us/posts/294929", { "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", + "keyword": {"date": "dt:2008-08-12 04:46:05"}, }), ("https://danbooru.donmai.us/posts/3613024", { "pattern": r"https?://.+\.zip$", @@ -307,7 +329,4 @@ class DanbooruPopularExtractor(DanbooruExtractor): return {"date": date, "scale": scale} def posts(self): - if self.page_start is None: - self.page_start = 1 - return self._pagination( - "/explore/posts/popular.json", self.params, True) + return self._pagination("/explore/posts/popular.json", self.params) diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 8f2994e..d4f6cd4 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -57,6 +57,8 @@ class E621Extractor(danbooru.DanbooruExtractor): post["filename"] = file["md5"] post["extension"] = file["ext"] + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") post.update(data) yield Message.Directory, post @@ -140,6 +142,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor): ("https://e621.net/posts/535", { "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529", "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", + "keyword": {"date": "dt:2007-02-17 19:02:32"}, }), ("https://e621.net/posts/3181052", { "options": (("metadata", "notes,pools"),), @@ -216,9 +219,7 @@ class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor): ) def posts(self): - if self.page_start is None: - self.page_start = 1 - return self._pagination("/popular.json", self.params, True) + return self._pagination("/popular.json", self.params) class E621FavoriteExtractor(E621Extractor): @@ -249,6 +250,4 @@ class E621FavoriteExtractor(E621Extractor): return {"user_id": self.query.get("user_id", "")} def posts(self): - if self.page_start is None: - self.page_start = 1 - return self._pagination("/favorites.json", self.query, True) + return self._pagination("/favorites.json", self.query) diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 57c4333..4ca0852 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -52,8 +52,11 @@ class FanboxExtractor(Extractor): url = text.ensure_http_scheme(url) body = self.request(url, headers=headers).json()["body"] for item in body["items"]: - yield self._get_post_data(item["id"]) - + try: + yield self._get_post_data(item["id"]) + except Exception as exc: + self.log.warning("Skipping post %s (%s: %s)", + item["id"], exc.__class__.__name__, exc) url = body["nextUrl"] def _get_post_data(self, post_id): diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py index b53ebbe..044dddb 100644 --- a/gallery_dl/extractor/gofile.py +++ b/gallery_dl/extractor/gofile.py @@ -6,7 +6,8 @@ from .common import Extractor, Message from .. import text, exception -from ..cache import memcache +from ..cache import cache, memcache +import hashlib class GofileFolderExtractor(Extractor): @@ -66,6 +67,7 @@ class GofileFolderExtractor(Extractor): def items(self): recursive = self.config("recursive") + password = self.config("password") token = self.config("api-token") if not token: @@ -73,12 +75,10 @@ class GofileFolderExtractor(Extractor): self.session.cookies.set("accountToken", token, domain=".gofile.io") self.api_token = token - token = self.config("website-token", "12345") - if not token: - token = self._get_website_token() - self.website_token = token + self.website_token = (self.config("website-token") or + self._get_website_token()) - folder = self._get_content(self.content_id) + folder = self._get_content(self.content_id, password) yield Message.Directory, folder num = 0 @@ -109,17 +109,20 @@ class GofileFolderExtractor(Extractor): self.log.debug("Creating temporary account") return self._api_request("createAccount")["token"] - @memcache() + @cache(maxage=86400) def _get_website_token(self): self.log.debug("Fetching website token") - page = self.request(self.root + "/contents/files.html").text - return text.extract(page, "websiteToken:", ",")[0].strip("\" ") + page = self.request(self.root + "/dist/js/alljs.js").text + return text.extr(page, 'fetchData.websiteToken = "', '"') - def _get_content(self, content_id): + def _get_content(self, content_id, password=None): + if password is not None: + password = hashlib.sha256(password.encode()).hexdigest() return self._api_request("getContent", { "contentId" : content_id, "token" : self.api_token, "websiteToken": self.website_token, + "password" : password, }) def _api_request(self, endpoint, params=None): diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py index 086b95d..9229617 100644 --- a/gallery_dl/extractor/imagechest.py +++ b/gallery_dl/extractor/imagechest.py @@ -37,6 +37,9 @@ class ImagechestGalleryExtractor(GalleryExtractor): "url": "f5674e8ba79d336193c9f698708d9dcc10e78cc7", "count": 52, }), + ("https://imgchest.com/p/xxxxxxxxxxx", { + "exception": exception.NotFoundError, + }), ) def __init__(self, match): @@ -44,6 +47,12 @@ class ImagechestGalleryExtractor(GalleryExtractor): url = self.root + "/p/" + self.gallery_id GalleryExtractor.__init__(self, match, url) + self.access_token = self.config("access-token") + if self.access_token: + self.gallery_url = None + self.metadata = self._metadata_api + self.images = self._images_api + def metadata(self, page): if "Sorry, but the page you requested could not be found." in page: raise exception.NotFoundError("gallery") @@ -71,3 +80,69 @@ class ImagechestGalleryExtractor(GalleryExtractor): (url, None) for url in text.extract_iter(page, 'data-url="', '"') ] + + def _metadata_api(self, page): + api = ImagechestAPI(self, self.access_token) + post = api.post(self.gallery_id) + + post["date"] = text.parse_datetime( + post["created"], "%Y-%m-%dT%H:%M:%S.%fZ") + for img in post["images"]: + img["date"] = text.parse_datetime( + img["created"], "%Y-%m-%dT%H:%M:%S.%fZ") + + post["gallery_id"] = self.gallery_id + post.pop("image_count", None) + self._image_list = post.pop("images") + + return post + + def _images_api(self, page): + return [ + (img["link"], img) + for img in self._image_list + ] + + +class ImagechestAPI(): + """Interface for the Image Chest API + + https://imgchest.com/docs/api/1.0/general/overview + """ + root = "https://api.imgchest.com" + + def __init__(self, extractor, access_token): + self.extractor = extractor + self.headers = {"Authorization": "Bearer " + access_token} + + def file(self, file_id): + endpoint = "/v1/file/" + file_id + return self._call(endpoint) + + def post(self, post_id): + endpoint = "/v1/post/" + post_id + return self._call(endpoint) + + def user(self, username): + endpoint = "/v1/user/" + username + return self._call(endpoint) + + def _call(self, endpoint): + url = self.root + endpoint + + while True: + response = self.extractor.request( + url, headers=self.headers, fatal=None, allow_redirects=False) + + if response.status_code < 300: + return response.json()["data"] + + elif response.status_code < 400: + raise exception.AuthenticationError("Invalid API access token") + + elif response.status_code == 429: + self.extractor.wait(seconds=600) + + else: + self.extractor.log.debug(response.text) + raise exception.StopExtraction("API request failed") diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 4c1be0f..677cbdd 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -55,6 +55,9 @@ class InstagramExtractor(Extractor): previews = self.config("previews", False) video_headers = {"User-Agent": "Mozilla/5.0"} + order = self.config("order-files") + reverse = order[0] in ("r", "d") if order else False + for post in self.posts(): if "__typename" in post: @@ -71,6 +74,8 @@ class InstagramExtractor(Extractor): if "date" in post: del post["date"] + if reverse: + files.reverse() for file in files: file.update(post) @@ -756,10 +761,20 @@ class InstagramRestAPI(): endpoint = "/v1/guides/guide/{}/".format(guide_id) return self._pagination_guides(endpoint) - def highlights_media(self, user_id): - chunk_size = 5 + def highlights_media(self, user_id, chunk_size=5): reel_ids = [hl["id"] for hl in self.highlights_tray(user_id)] + order = self.extractor.config("order-posts") + if order: + if order in ("desc", "reverse"): + reel_ids.reverse() + elif order in ("id", "id_asc"): + reel_ids.sort(key=lambda r: int(r[10:])) + elif order == "id_desc": + reel_ids.sort(key=lambda r: int(r[10:]), reverse=True) + elif order != "asc": + self.extractor.log.warning("Unknown posts order '%s'", order) + for offset in range(0, len(reel_ids), chunk_size): yield from self.reels_media( reel_ids[offset : offset+chunk_size]) @@ -799,13 +814,17 @@ class InstagramRestAPI(): params = {"username": screen_name} return self._call(endpoint, params=params)["data"]["user"] + @memcache(keyarg=1) def user_by_id(self, user_id): endpoint = "/v1/users/{}/info/".format(user_id) return self._call(endpoint)["user"] def user_id(self, screen_name, check_private=True): if screen_name.startswith("id:"): + if self.extractor.config("metadata"): + self.extractor._user = self.user_by_id(screen_name[3:]) return screen_name[3:] + user = self.user_by_name(screen_name) if user is None: raise exception.AuthorizationError( diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py new file mode 100644 index 0000000..cdcf35c --- /dev/null +++ b/gallery_dl/extractor/jpgfish.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://jpg.fishing/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?jpg\.(?:fishing|church)" + + +class JpgfishExtractor(Extractor): + """Base class for jpgfish extractors""" + category = "jpgfish" + root = "https://jpg.fishing" + directory_fmt = ("{category}", "{user}", "{album}",) + archive_fmt = "{id}" + + def _pagination(self, url): + while url: + page = self.request(url).text + + for item in text.extract_iter( + page, '<div class="list-item-image ', 'image-container'): + yield text.extract(item, '<a href="', '"')[0] + + url = text.extract( + page, '<a data-pagination="next" href="', '" ><')[0] + + +class JpgfishImageExtractor(JpgfishExtractor): + """Extractor for jpgfish Images""" + subcategory = "image" + pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))" + test = ( + ("https://jpg.fishing/img/funnymeme.LecXGS", { + "pattern": r"https://simp3\.jpg\.church/images/funnymeme\.jpg", + "content": "098e5e9b17ad634358426e0ffd1c93871474d13c", + "keyword": { + "album": "", + "extension": "jpg", + "filename": "funnymeme", + "id": "LecXGS", + "url": "https://simp3.jpg.church/images/funnymeme.jpg", + "user": "exearco", + }, + }), + ("https://jpg.church/img/auCruA", { + "pattern": r"https://simp2\.jpg\.church/hannahowo_00457\.jpg", + "keyword": {"album": "401-500"}, + }), + ("https://jpg.church/img/hannahowo-00424.au64iA"), + ) + + def __init__(self, match): + JpgfishExtractor.__init__(self, match) + self.path, self.image_id = match.groups() + + def items(self): + url = "{}/img/{}".format(self.root, self.path) + extr = text.extract_from(self.request(url).text) + + image = { + "id" : self.image_id, + "url" : extr('<meta property="og:image" content="', '"'), + "album": text.extract(extr( + "Added to <a", "/a>"), ">", "<")[0] or "", + "user" : extr('username: "', '"'), + } + + text.nameext_from_url(image["url"], image) + yield Message.Directory, image + yield Message.Url, image["url"], image + + +class JpgfishAlbumExtractor(JpgfishExtractor): + """Extractor for jpgfish Albums""" + subcategory = "album" + pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?" + test = ( + ("https://jpg.fishing/album/CDilP/?sort=date_desc&page=1", { + "count": 2, + }), + ("https://jpg.church/a/gunggingnsk.N9OOI", { + "count": 114, + }), + ("https://jpg.church/a/101-200.aNJ6A/", { + "count": 100, + }), + ("https://jpg.church/a/hannahowo.aNTdH/sub", { + "count": 606, + }), + ) + + def __init__(self, match): + JpgfishExtractor.__init__(self, match) + self.album, self.sub_albums = match.groups() + + def items(self): + url = "{}/a/{}".format(self.root, self.album) + data = {"_extractor": JpgfishImageExtractor} + + if self.sub_albums: + albums = self._pagination(url + "/sub") + else: + albums = (url,) + + for album in albums: + for image in self._pagination(album): + yield Message.Queue, image, data + + +class JpgfishUserExtractor(JpgfishExtractor): + """Extractor for jpgfish Users""" + subcategory = "user" + pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?" + test = ( + ("https://jpg.fishing/exearco", { + "count": 3, + }), + ("https://jpg.church/exearco/albums", { + "count": 1, + }), + ) + + def __init__(self, match): + JpgfishExtractor.__init__(self, match) + self.user, self.albums = match.groups() + + def items(self): + url = "{}/{}".format(self.root, self.user) + + if self.albums: + url += "/albums" + data = {"_extractor": JpgfishAlbumExtractor} + else: + data = {"_extractor": JpgfishImageExtractor} + + for url in self._pagination(url): + yield Message.Queue, url, data diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 33e8370..915fbe6 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -125,10 +125,12 @@ class KemonopartyExtractor(Extractor): def login(self): username, password = self._get_auth_info() if username: - self._update_cookies(self._login_impl(username, password)) + self._update_cookies(self._login_impl( + (username, self.cookiedomain), password)) @cache(maxage=28*24*3600, keyarg=1) def _login_impl(self, username, password): + username = username[0] self.log.info("Logging in as %s", username) url = self.root + "/account/login" diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py new file mode 100644 index 0000000..8990621 --- /dev/null +++ b/gallery_dl/extractor/lensdump.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://lensdump.com/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?lensdump\.com" + + +class LensdumpBase(): + """Base class for lensdump extractors""" + category = "lensdump" + root = "https://lensdump.com" + + def nodes(self, page=None): + if page is None: + page = self.request(self.url).text + + # go through all pages starting from the oldest + page_url = text.urljoin(self.root, text.extr( + text.extr(page, ' id="list-most-oldest-link"', '>'), + 'href="', '"')) + while page_url is not None: + if page_url == self.url: + current_page = page + else: + current_page = self.request(page_url).text + + for node in text.extract_iter( + current_page, ' class="list-item ', '>'): + yield node + + # find url of next page + page_url = text.extr( + text.extr(current_page, ' data-pagination="next"', '>'), + 'href="', '"') + if page_url is not None and len(page_url) > 0: + page_url = text.urljoin(self.root, page_url) + else: + page_url = None + + +class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor): + subcategory = "album" + pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))" + test = ( + ("https://lensdump.com/a/1IhJr", { + "url": "7428cc906e7b291c778d446a11c602b81ba72840", + "keyword": { + "extension": "png", + "name": str, + "num": int, + "title": str, + "url": str, + "width": int, + }, + }), + ) + + def __init__(self, match): + GalleryExtractor.__init__(self, match, match.string) + self.gallery_id = match.group(1) or match.group(2) + + def metadata(self, page): + return { + "gallery_id": self.gallery_id, + "title": text.unescape(text.extr( + page, 'property="og:title" content="', '"').strip()) + } + + def images(self, page): + for node in self.nodes(page): + # get urls and filenames of images in current page + json_data = util.json_loads(text.unquote( + text.extr(node, 'data-object="', '"'))) + image_id = json_data.get('name') + image_url = json_data.get('url') + image_title = json_data.get('title') + if image_title is not None: + image_title = text.unescape(image_title) + yield (image_url, { + 'id': image_id, + 'url': image_url, + 'title': image_title, + 'name': json_data.get('filename'), + 'filename': image_id, + 'extension': json_data.get('extension'), + 'height': text.parse_int(json_data.get('height')), + 'width': text.parse_int(json_data.get('width')), + }) + + +class LensdumpAlbumsExtractor(LensdumpBase, Extractor): + """Extractor for album list from lensdump.com""" + subcategory = "albums" + pattern = BASE_PATTERN + r"/\w+/albums" + test = ("https://lensdump.com/vstar925/albums",) + + def items(self): + for node in self.nodes(): + album_url = text.urljoin(self.root, text.extr( + node, 'data-url-short="', '"')) + yield Message.Queue, album_url, { + "_extractor": LensdumpAlbumExtractor} + + +class LensdumpImageExtractor(LensdumpBase, Extractor): + """Extractor for individual images on lensdump.com""" + subcategory = "image" + filename_fmt = "{category}_{id}{title:?_//}.{extension}" + directory_fmt = ("{category}",) + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/i/(\w+)" + test = ( + ("https://lensdump.com/i/tyoAyM", { + "pattern": r"https://i\d\.lensdump\.com/i/tyoAyM\.webp", + "url": "ae9933f5f3bd9497bfc34e3e70a0fbef6c562d38", + "content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46", + "keyword": { + "date": "dt:2022-08-01 08:24:28", + "extension": "webp", + "filename": "tyoAyM", + "height": 400, + "id": "tyoAyM", + "title": "MYOBI clovis bookcaseset", + "url": "https://i2.lensdump.com/i/tyoAyM.webp", + "width": 620, + }, + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.key = match.group(1) + + def items(self): + url = "{}/i/{}".format(self.root, self.key) + extr = text.extract_from(self.request(url).text) + + data = { + "id" : self.key, + "title" : text.unescape(extr( + 'property="og:title" content="', '"')), + "url" : extr( + 'property="og:image" content="', '"'), + "width" : text.parse_int(extr( + 'property="image:width" content="', '"')), + "height": text.parse_int(extr( + 'property="image:height" content="', '"')), + "date" : text.parse_datetime(extr( + '<span title="', '"'), "%Y-%m-%d %H:%M:%S"), + } + + text.nameext_from_url(data["url"], data) + yield Message.Directory, data + yield Message.Url, data["url"], data diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 12b8f39..e111fee 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -193,7 +193,10 @@ class MangadexFeedExtractor(MangadexExtractor): class MangadexAPI(): - """Interface for the MangaDex API v5""" + """Interface for the MangaDex API v5 + + https://api.mangadex.org/docs/ + """ def __init__(self, extr): self.extractor = extr diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py new file mode 100644 index 0000000..49d4d7d --- /dev/null +++ b/gallery_dl/extractor/mangaread.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://mangaread.org/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +import re + + +class MangareadBase(): + """Base class for Mangaread extractors""" + category = "mangaread" + root = "https://www.mangaread.org" + + @staticmethod + def parse_chapter_string(chapter_string, data): + match = re.match( + r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?", + text.unescape(chapter_string).strip()) + manga, chapter, minor, title = match.groups() + manga = manga.strip() if manga else "" + data["manga"] = data.pop("manga", manga) + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = minor or "" + data["title"] = title or "" + data["lang"] = "en" + data["language"] = "English" + + +class MangareadChapterExtractor(MangareadBase, ChapterExtractor): + """Extractor for manga-chapters from mangaread.org""" + pattern = (r"(?:https?://)?(?:www\.)?mangaread\.org" + r"(/manga/[^/?#]+/[^/?#]+)") + test = ( + ("https://www.mangaread.org/manga/one-piece/chapter-1053-3/", { + "pattern": (r"https://www\.mangaread\.org/wp-content/uploads" + r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"), + "count": 11, + "keyword": { + "manga" : "One Piece", + "title" : "", + "chapter" : 1053, + "chapter_minor": ".3", + "tags" : ["Oda Eiichiro"], + "lang" : "en", + "language": "English", + } + }), + ("https://www.mangaread.org/manga/one-piece/chapter-1000000/", { + "exception": exception.NotFoundError, + }), + (("https://www.mangaread.org" + "/manga/kanan-sama-wa-akumade-choroi/chapter-10/"), { + "pattern": (r"https://www\.mangaread\.org/wp-content/uploads" + r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"), + "count": 9, + "keyword": { + "manga" : "Kanan-sama wa Akumade Choroi", + "title" : "", + "chapter" : 10, + "chapter_minor": "", + "tags" : list, + "lang" : "en", + "language": "English", + } + }), + # 'Chapter146.5' + # ^^ no whitespace + ("https://www.mangaread.org/manga/above-all-gods/chapter146-5/", { + "pattern": (r"https://www\.mangaread\.org/wp-content/uploads" + r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"), + "count": 6, + "keyword": { + "manga" : "Above All Gods", + "title" : "", + "chapter" : 146, + "chapter_minor": ".5", + "tags" : list, + "lang" : "en", + "language": "English", + } + }), + ) + + def metadata(self, page): + data = {"tags": list(text.extract_iter(page, "class>", "<"))} + info = text.extr(page, '<h1 id="chapter-heading">', "</h1>") + if not info: + raise exception.NotFoundError("chapter") + self.parse_chapter_string(info, data) + return data + + def images(self, page): + page = text.extr( + page, '<div class="reading-content">', '<div class="entry-header') + return [ + (url.strip(), None) + for url in text.extract_iter(page, 'data-src="', '"') + ] + + +class MangareadMangaExtractor(MangareadBase, MangaExtractor): + """Extractor for manga from mangaread.org""" + chapterclass = MangareadChapterExtractor + pattern = r"(?:https?://)?(?:www\.)?mangaread\.org(/manga/[^/?#]+)/?$" + test = ( + ("https://www.mangaread.org/manga/kanan-sama-wa-akumade-choroi", { + "pattern": (r"https://www\.mangaread\.org/manga" + r"/kanan-sama-wa-akumade-choroi" + r"/chapter-\d+(-.+)?/"), + "count" : ">= 13", + "keyword": { + "manga" : "Kanan-sama wa Akumade Choroi", + "author" : ["nonco"], + "artist" : ["nonco"], + "type" : "Manga", + "genres" : ["Comedy", "Romance", "Shounen", "Supernatural"], + "rating" : float, + "release": 2022, + "status" : "OnGoing", + "lang" : "en", + "language" : "English", + "manga_alt" : list, + "description": str, + } + }), + ("https://www.mangaread.org/manga/one-piece", { + "pattern": (r"https://www\.mangaread\.org/manga" + r"/one-piece/chapter-\d+(-.+)?/"), + "count" : ">= 1066", + "keyword": { + "manga" : "One Piece", + "author" : ["Oda Eiichiro"], + "artist" : ["Oda Eiichiro"], + "type" : "Manga", + "genres" : list, + "rating" : float, + "release": 1997, + "status" : "OnGoing", + "lang" : "en", + "language" : "English", + "manga_alt" : ["One Piece"], + "description": str, + } + }), + ("https://www.mangaread.org/manga/doesnotexist", { + "exception": exception.NotFoundError, + }), + ) + + def chapters(self, page): + if 'class="error404' in page: + raise exception.NotFoundError("manga") + data = self.metadata(page) + result = [] + for chapter in text.extract_iter( + page, '<li class="wp-manga-chapter', "</li>"): + url , pos = text.extract(chapter, '<a href="', '"') + info, _ = text.extract(chapter, ">", "</a>", pos) + self.parse_chapter_string(info, data) + result.append((url, data.copy())) + return result + + def metadata(self, page): + extr = text.extract_from(text.extr( + page, 'class="summary_content">', 'class="manga-action"')) + return { + "manga" : text.extr(page, "<h1>", "</h1>").strip(), + "description": text.unescape(text.remove_html(text.extract( + page, ">", "</div>", page.index("summary__content"))[0])), + "rating" : text.parse_float( + extr('total_votes">', "</span>").strip()), + "manga_alt" : text.remove_html( + extr("Alternative </h5>\n</div>", "</div>")).split("; "), + "author" : list(text.extract_iter( + extr('class="author-content">', "</div>"), '"tag">', "</a>")), + "artist" : list(text.extract_iter( + extr('class="artist-content">', "</div>"), '"tag">', "</a>")), + "genres" : list(text.extract_iter( + extr('class="genres-content">', "</div>"), '"tag">', "</a>")), + "type" : text.remove_html( + extr("Type </h5>\n</div>", "</div>")), + "release" : text.parse_int(text.remove_html( + extr("Release </h5>\n</div>", "</div>"))), + "status" : text.remove_html( + extr("Status </h5>\n</div>", "</div>")), + } diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py index 03e9104..37efac0 100644 --- a/gallery_dl/extractor/misskey.py +++ b/gallery_dl/extractor/misskey.py @@ -7,7 +7,7 @@ """Extractors for Misskey instances""" from .common import BaseExtractor, Message -from .. import text +from .. import text, exception class MisskeyExtractor(BaseExtractor): @@ -27,6 +27,8 @@ class MisskeyExtractor(BaseExtractor): def items(self): for note in self.notes(): + if "note" in note: + note = note["note"] files = note.pop("files") or [] renote = note.get("renote") if renote: @@ -68,7 +70,7 @@ BASE_PATTERN = MisskeyExtractor.update({ }, "lesbian.energy": { "root": "https://lesbian.energy", - "pattern": r"lesbian\.energy" + "pattern": r"lesbian\.energy", }, "sushi.ski": { "root": "https://sushi.ski", @@ -152,6 +154,21 @@ class MisskeyNoteExtractor(MisskeyExtractor): return (self.api.notes_show(self.item),) +class MisskeyFavoriteExtractor(MisskeyExtractor): + """Extractor for favorited notes""" + subcategory = "favorite" + pattern = BASE_PATTERN + r"/(?:my|api/i)/favorites" + test = ( + ("https://misskey.io/my/favorites"), + ("https://misskey.io/api/i/favorites"), + ("https://lesbian.energy/my/favorites"), + ("https://sushi.ski/my/favorites"), + ) + + def notes(self): + return self.api.i_favorites() + + class MisskeyAPI(): """Interface for Misskey API @@ -164,6 +181,7 @@ class MisskeyAPI(): self.root = extractor.root self.extractor = extractor self.headers = {"Content-Type": "application/json"} + self.access_token = extractor.config("access-token") def user_id_by_username(self, username): endpoint = "/users/show" @@ -187,6 +205,13 @@ class MisskeyAPI(): data = {"noteId": note_id} return self._call(endpoint, data) + def i_favorites(self): + endpoint = "/i/favorites" + if not self.access_token: + raise exception.AuthenticationError() + data = {"i": self.access_token} + return self._pagination(endpoint, data) + def _call(self, endpoint, data): url = self.root + "/api" + endpoint return self.extractor.request( diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 2b759ec..5d100a4 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -23,6 +23,7 @@ class NewgroundsExtractor(Extractor): root = "https://www.newgrounds.com" cookiedomain = ".newgrounds.com" cookienames = ("NG_GG_username", "vmk1du5I8m") + request_interval = 1.0 def __init__(self, match): Extractor.__init__(self, match) diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py index be736d1..6433fbd 100644 --- a/gallery_dl/extractor/nsfwalbum.py +++ b/gallery_dl/extractor/nsfwalbum.py @@ -75,7 +75,8 @@ class NsfwalbumAlbumExtractor(GalleryExtractor): @staticmethod def _validate_response(response): - return not response.request.url.endswith("/no_image.jpg") + return not response.request.url.endswith( + ("/no_image.jpg", "/placeholder.png")) @staticmethod def _annihilate(value, base=6): diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index b704031..cdaf595 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -172,6 +172,7 @@ class PixivUserExtractor(PixivExtractor): (PixivBackgroundExtractor, base + "background"), (PixivArtworksExtractor , base + "artworks"), (PixivFavoriteExtractor , base + "bookmarks/artworks"), + (PixivNovelUserExtractor , base + "novels"), ), ("artworks",)) @@ -750,6 +751,182 @@ class PixivSeriesExtractor(PixivExtractor): params["p"] += 1 +class PixivNovelExtractor(PixivExtractor): + """Extractor for pixiv novels""" + subcategory = "novel" + request_interval = 1.0 + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" + r"/n(?:ovel/show\.php\?id=|/)(\d+)") + test = ( + ("https://www.pixiv.net/novel/show.php?id=19612040", { + "count": 1, + "content": "8c818474153cbd2f221ee08766e1d634c821d8b4", + "keyword": { + "caption": r"re:「無能な名無し」と呼ばれ虐げられて育った鈴\(すず\)は、", + "comment_access_control": 0, + "create_date": "2023-04-02T15:18:58+09:00", + "date": "dt:2023-04-02 06:18:58", + "id": 19612040, + "is_bookmarked": False, + "is_muted": False, + "is_mypixiv_only": False, + "is_original": True, + "is_x_restricted": False, + "novel_ai_type": 1, + "page_count": 1, + "rating": "General", + "restrict": 0, + "series": { + "id": 10278364, + "title": "龍の贄嫁〜無能な名無しと虐げられていましたが、" + "どうやら異母妹に霊力を搾取されていたようです〜", + }, + "tags": ["和風ファンタジー", "溺愛", "神様", "ヤンデレ", "執着", + "異能", "ざまぁ", "学園", "神嫁"], + "text_length": 5974, + "title": "異母妹から「無能な名無し」と虐げられていた私、" + "どうやら異母妹に霊力を搾取されていたようです(1)", + "user": { + "account": "yukinaga_chifuyu", + "id": 77055466, + }, + "visible": True, + "x_restrict": 0, + }, + }), + # embeds + ("https://www.pixiv.net/novel/show.php?id=16422450", { + "options": (("embeds", True),), + "count": 3, + }), + ("https://www.pixiv.net/n/19612040"), + ) + + def __init__(self, match): + PixivExtractor.__init__(self, match) + self.novel_id = match.group(1) + + def items(self): + tags = self.config("tags", "japanese") + if tags == "original": + transform_tags = None + elif tags == "translated": + def transform_tags(work): + work["tags"] = list(dict.fromkeys( + tag["translated_name"] or tag["name"] + for tag in work["tags"])) + else: + def transform_tags(work): + work["tags"] = [tag["name"] for tag in work["tags"]] + + ratings = {0: "General", 1: "R-18", 2: "R-18G"} + meta_user = self.config("metadata") + meta_bookmark = self.config("metadata-bookmark") + embeds = self.config("embeds") + + if embeds: + headers = { + "User-Agent" : "Mozilla/5.0", + "App-OS" : None, + "App-OS-Version": None, + "App-Version" : None, + "Referer" : self.root + "/", + "Authorization" : None, + } + + novels = self.novels() + if self.max_posts: + novels = itertools.islice(novels, self.max_posts) + for novel in novels: + if meta_user: + novel.update(self.api.user_detail(novel["user"]["id"])) + if meta_bookmark and novel["is_bookmarked"]: + detail = self.api.novel_bookmark_detail(novel["id"]) + novel["tags_bookmark"] = [tag["name"] for tag in detail["tags"] + if tag["is_registered"]] + if transform_tags: + transform_tags(novel) + novel["num"] = 0 + novel["date"] = text.parse_datetime(novel["create_date"]) + novel["rating"] = ratings.get(novel["x_restrict"]) + novel["suffix"] = "" + + yield Message.Directory, novel + + novel["extension"] = "txt" + content = self.api.novel_text(novel["id"])["novel_text"] + yield Message.Url, "text:" + content, novel + + if embeds: + desktop = False + illusts = {} + + for marker in text.extract_iter(content, "[", "]"): + if marker.startswith("[jumpuri:"): + desktop = True + elif marker.startswith("pixivimage:"): + illusts[marker[11:].partition("-")[0]] = None + + if desktop: + novel_id = str(novel["id"]) + url = "{}/novel/show.php?id={}".format( + self.root, novel_id) + data = util.json_loads(text.extr( + self.request(url, headers=headers).text, + "id=\"meta-preload-data\" content='", "'")) + + for image in (data["novel"][novel_id] + ["textEmbeddedImages"]).values(): + url = image.pop("urls")["original"] + novel.update(image) + novel["date_url"] = self._date_from_url(url) + novel["num"] += 1 + novel["suffix"] = "_p{:02}".format(novel["num"]) + text.nameext_from_url(url, novel) + yield Message.Url, url, novel + + if illusts: + novel["_extractor"] = PixivWorkExtractor + novel["date_url"] = None + for illust_id in illusts: + novel["num"] += 1 + novel["suffix"] = "_p{:02}".format(novel["num"]) + url = "{}/artworks/{}".format(self.root, illust_id) + yield Message.Queue, url, novel + + def novels(self): + return (self.api.novel_detail(self.novel_id),) + + +class PixivNovelUserExtractor(PixivNovelExtractor): + """Extractor for pixiv users' novels""" + subcategory = "novel-user" + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" + r"/(?:en/)?users/(\d+)/novels") + test = ("https://www.pixiv.net/en/users/77055466/novels", { + "pattern": "^text:", + "range": "1-5", + "count": 5, + }) + + def novels(self): + return self.api.user_novels(self.novel_id) + + +class PixivNovelSeriesExtractor(PixivNovelExtractor): + """Extractor for pixiv novel series""" + subcategory = "novel-series" + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" + r"/novel/series/(\d+)") + test = ("https://www.pixiv.net/novel/series/10278364", { + "count": 4, + "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2", + }) + + def novels(self): + return self.api.novel_series(self.novel_id) + + class PixivSketchExtractor(Extractor): """Extractor for user pages on sketch.pixiv.net""" category = "pixiv" @@ -907,6 +1084,23 @@ class PixivAppAPI(): params = {"illust_id": illust_id} return self._pagination("/v2/illust/related", params) + def novel_bookmark_detail(self, novel_id): + params = {"novel_id": novel_id} + return self._call( + "/v2/novel/bookmark/detail", params)["bookmark_detail"] + + def novel_detail(self, novel_id): + params = {"novel_id": novel_id} + return self._call("/v2/novel/detail", params)["novel"] + + def novel_series(self, series_id): + params = {"series_id": series_id} + return self._pagination("/v1/novel/series", params, "novels") + + def novel_text(self, novel_id): + params = {"novel_id": novel_id} + return self._call("/v1/novel/text", params) + def search_illust(self, word, sort=None, target=None, duration=None, date_start=None, date_end=None): params = {"word": word, "search_target": target, @@ -938,6 +1132,10 @@ class PixivAppAPI(): params = {"user_id": user_id} return self._pagination("/v1/user/illusts", params) + def user_novels(self, user_id): + params = {"user_id": user_id} + return self._pagination("/v1/user/novels", params, "novels") + def ugoira_metadata(self, illust_id): params = {"illust_id": illust_id} return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"] diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py index 49da9ce..14c25c4 100644 --- a/gallery_dl/extractor/poipiku.py +++ b/gallery_dl/extractor/poipiku.py @@ -41,7 +41,7 @@ class PoipikuExtractor(Extractor): "user_name" : text.unescape(extr( '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]), "description": text.unescape(extr( - 'class="IllustItemDesc" >', '<')), + 'class="IllustItemDesc" >', '</h1>')), "_http_headers": {"Referer": post_url}, } @@ -172,7 +172,9 @@ class PoipikuPostExtractor(PoipikuExtractor): "count": 3, "keyword": { "count": "3", - "description": "ORANGE OASISボスネタバレ", + "description": "ORANGE OASISボスネタバレ<br />曲も大好き<br />" + "2枚目以降はほとんど見えなかった1枚目背景" + "のヒエログリフ小ネタです𓀀", "num": int, "post_category": "SPOILER", "post_id": "5776587", diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index cefe8d3..3f09e13 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -55,21 +55,26 @@ class RedditExtractor(Extractor): visited.add(submission["id"]) submission["num"] = 0 - url = submission["url"] + if "crosspost_parent_list" in submission: + media = submission["crosspost_parent_list"][-1] + else: + media = submission + + url = media["url"] if url and url.startswith("https://i.redd.it/"): text.nameext_from_url(url, submission) yield Message.Url, url, submission - elif "gallery_data" in submission: + elif "gallery_data" in media: for submission["num"], url in enumerate( - self._extract_gallery(submission), 1): + self._extract_gallery(media), 1): text.nameext_from_url(url, submission) yield Message.Url, url, submission - elif submission["is_video"]: + elif media["is_video"]: if videos: text.nameext_from_url(url, submission) - url = "ytdl:" + self._extract_video(submission) + url = "ytdl:" + self._extract_video(media) yield Message.Url, url, submission elif not submission["is_self"]: @@ -280,14 +285,19 @@ class RedditSubmissionExtractor(RedditExtractor): ("https://www.reddit.com/r/kpopfap/comments/qjj04q/", { "count": 0, }), - ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://redd.it/2a00np/"), + # user page submission (#2301) ("https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", { "pattern": r"https://i.redd.it/8fpgv17yqlh81.jpg", "count": 1, }), + # cross-posted video (#887, #3586, #3976) + ("https://www.reddit.com/r/kittengifs/comments/12m0b8d", { + "pattern": r"ytdl:https://v\.redd\.it/cvabpjacrvta1", + }), + ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), + ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), + ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), + ("https://redd.it/2a00np/"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py index cac5a54..b5a730a 100644 --- a/gallery_dl/extractor/tcbscans.py +++ b/gallery_dl/extractor/tcbscans.py @@ -4,19 +4,20 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://onepiecechapters.com/""" +"""Extractors for https://tcbscans.com/""" from .common import ChapterExtractor, MangaExtractor from .. import text +BASE_PATTERN = r"(?:https?://)?(?:tcbscans|onepiecechapters)\.com" + class TcbscansChapterExtractor(ChapterExtractor): category = "tcbscans" - pattern = (r"(?:https?://)?onepiecechapters\.com" - r"(/chapters/\d+/[^/?#]+)") - root = "https://onepiecechapters.com" + root = "https://tcbscans.com" + pattern = BASE_PATTERN + r"(/chapters/\d+/[^/?#]+)" test = ( - (("https://onepiecechapters.com" + (("https://tcbscans.com" "/chapters/4708/chainsaw-man-chapter-108"), { "pattern": (r"https://cdn\.[^/]+" r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"), @@ -66,12 +67,11 @@ class TcbscansChapterExtractor(ChapterExtractor): class TcbscansMangaExtractor(MangaExtractor): category = "tcbscans" + root = "https://tcbscans.com" chapterclass = TcbscansChapterExtractor - pattern = (r"(?:https?://)?onepiecechapters\.com" - r"(/mangas/\d+/[^/?#]+)") - root = "https://onepiecechapters.com" + pattern = BASE_PATTERN + r"(/mangas/\d+/[^/?#]+)" test = ( - ("https://onepiecechapters.com/mangas/13/chainsaw-man", { + ("https://tcbscans.com/mangas/13/chainsaw-man", { "pattern": TcbscansChapterExtractor.pattern, "range" : "1-50", "count" : 50, diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 5e68f13..c47021e 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -13,6 +13,7 @@ from .. import text, util, exception from ..cache import cache import itertools import json +import re BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com" @@ -75,6 +76,10 @@ class TwitterExtractor(Extractor): else: seen_tweets = None + if self.twitpic: + self._find_twitpic = re.compile( + r"https?(://twitpic\.com/(?!photos/)\w+)").findall + for tweet in self.tweets(): if "legacy" in tweet: @@ -231,12 +236,24 @@ class TwitterExtractor(Extractor): files.append({"url": url}) def _extract_twitpic(self, tweet, files): - for url in tweet["entities"].get("urls", ()): + urls = {} + + # collect URLs from entities + for url in tweet["entities"].get("urls") or (): url = url["expanded_url"] if "//twitpic.com/" not in url or "/photos/" in url: continue if url.startswith("http:"): url = "https" + url[4:] + urls[url] = None + + # collect URLs from text + for url in self._find_twitpic( + tweet.get("full_text") or tweet.get("text") or ""): + urls["https" + url] = None + + # extract actual URLs + for url in urls: response = self.request(url, fatal=False) if response.status_code >= 400: continue @@ -781,7 +798,13 @@ class TwitterTweetExtractor(TwitterExtractor): ("https://twitter.com/i/web/status/112900228289540096", { "options": (("twitpic", True), ("cards", False)), "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg", - "count": 3, + "count": 2, # 1 duplicate + }), + # TwitPic URL not in 'urls' (#3792) + ("https://twitter.com/shimoigusaP/status/8138669971", { + "options": (("twitpic", True),), + "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png", + "count": 1, }), # Twitter card (#1005) ("https://twitter.com/billboard/status/1306599586602135555", { diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 388ee03..2cbfad6 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -132,7 +132,7 @@ class WeiboExtractor(Extractor): return self.request(url).json() def _user_id(self): - if self.user.isdecimal(): + if len(self.user) >= 10 and self.user.isdecimal(): return self.user[-10:] else: url = "{}/ajax/profile/info?{}={}".format( diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index fc36fa2..2ff48c3 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -9,6 +9,7 @@ """String formatters""" import os +import sys import time import string import _string @@ -255,7 +256,11 @@ def parse_field_name(field_name): func = operator.itemgetter try: if ":" in key: - key = _slice(key) + if key[0] == "b": + func = _bytesgetter + key = _slice(key[1:]) + else: + key = _slice(key) else: key = key.strip("\"'") except TypeError: @@ -276,6 +281,14 @@ def _slice(indices): ) +def _bytesgetter(slice, encoding=sys.getfilesystemencoding()): + + def apply_slice_bytes(obj): + return obj.encode(encoding)[slice].decode(encoding, "ignore") + + return apply_slice_bytes + + def _build_format_func(format_spec, default): if format_spec: return _FORMAT_SPECIFIERS.get( @@ -295,11 +308,20 @@ def _parse_optional(format_spec, default): def _parse_slice(format_spec, default): indices, _, format_spec = format_spec.partition("]") - slice = _slice(indices[1:]) fmt = _build_format_func(format_spec, default) - def apply_slice(obj): - return fmt(obj[slice]) + if indices[1] == "b": + slice_bytes = _bytesgetter(_slice(indices[2:])) + + def apply_slice(obj): + return fmt(slice_bytes(obj)) + + else: + slice = _slice(indices[1:]) + + def apply_slice(obj): + return fmt(obj[slice]) + return apply_slice diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index e81c6cf..39188f1 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -11,6 +11,7 @@ from .common import PostProcessor from .. import util, formatter import subprocess +import os if util.WINDOWS: @@ -60,6 +61,7 @@ class ExecPP(PostProcessor): kwdict["_path"] = pathfmt.realpath args = [arg.format_map(kwdict) for arg in self.args] + args[0] = os.path.expanduser(args[0]) self._exec(args, False) if archive: diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 4f9e49a..3e0290c 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.25.4" +__version__ = "1.25.5" diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py index eb09b9b..0a0bf86 100644 --- a/gallery_dl/ytdl.py +++ b/gallery_dl/ytdl.py @@ -399,7 +399,7 @@ def parse_command_line(module, argv): "playlist_items": opts.playlist_items, "xattr_set_filesize": opts.xattr_set_filesize, "match_filter": match_filter, - "no_color": opts.no_color, + "no_color": getattr(opts, "no_color", None), "ffmpeg_location": opts.ffmpeg_location, "hls_prefer_native": opts.hls_prefer_native, "hls_use_mpegts": opts.hls_use_mpegts, diff --git a/test/test_formatter.py b/test/test_formatter.py index 2258966..1bda9d9 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -23,6 +23,7 @@ class TestFormatter(unittest.TestCase): kwdict = { "a": "hElLo wOrLd", "b": "äöü", + "j": "げんそうきょう", "d": {"a": "foo", "b": 0, "c": None}, "l": ["a", "b", "c"], "n": None, @@ -133,7 +134,7 @@ class TestFormatter(unittest.TestCase): self._run_test("{d['a']}", "foo") self._run_test('{d["a"]}', "foo") - def test_slicing(self): + def test_slice_str(self): v = self.kwdict["a"] self._run_test("{a[1:10]}" , v[1:10]) self._run_test("{a[-10:-1]}", v[-10:-1]) @@ -165,6 +166,26 @@ class TestFormatter(unittest.TestCase): self._run_test("{a:[:50:2]}", v[:50:2]) self._run_test("{a:[::]}" , v) + def test_slice_bytes(self): + v = self.kwdict["j"] + self._run_test("{j[b1:10]}" , v[1:3]) + self._run_test("{j[b-10:-1]}", v[-3:-1]) + self._run_test("{j[b5:]}" , v[2:]) + self._run_test("{j[b50:]}" , v[50:]) + self._run_test("{j[b:5]}" , v[:1]) + self._run_test("{j[b:50]}" , v[:50]) + self._run_test("{j[b:]}" , v) + self._run_test("{j[b::]}" , v) + + self._run_test("{j:[b1:10]}" , v[1:3]) + self._run_test("{j:[b-10:-1]}", v[-3:-1]) + self._run_test("{j:[b5:]}" , v[2:]) + self._run_test("{j:[b50:]}" , v[50:]) + self._run_test("{j:[b:5]}" , v[:1]) + self._run_test("{j:[b:50]}" , v[:50]) + self._run_test("{j:[b:]}" , v) + self._run_test("{j:[b::]}" , v) + def test_maxlen(self): v = self.kwdict["a"] self._run_test("{a:L5/foo/}" , "foo") @@ -413,10 +434,10 @@ def noarg(): fmt4 = formatter.parse("\fM " + path + ":lengths") self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name") - self.assertEqual(fmt2.format_map(self.kwdict), "89") + self.assertEqual(fmt2.format_map(self.kwdict), "96") self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name") - self.assertEqual(fmt4.format_map(self.kwdict), "89") + self.assertEqual(fmt4.format_map(self.kwdict), "96") with self.assertRaises(TypeError): self.assertEqual(fmt0.format_map(self.kwdict), "") |
