diff options
29 files changed, 778 insertions, 231 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d352f5..c7e75a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,40 @@ -## 1.29.6 - 2025-05-04 +## 1.29.7 - 2025-05-23 ### Extractors #### Additions -- [manganelo] support `nelomanga.net` and mirror domains ([#7423](https://github.com/mikf/gallery-dl/issues/7423)) +- [mangadex] add `following` extractor ([#7487](https://github.com/mikf/gallery-dl/issues/7487)) +- [pixeldrain] add support for filesystem URLs ([#7473](https://github.com/mikf/gallery-dl/issues/7473)) #### Fixes -- [deviantart] unescape `\'` in JSON data ([#6653](https://github.com/mikf/gallery-dl/issues/6653)) -- [kemonoparty] revert to using default creator posts endpoint ([#7438](https://github.com/mikf/gallery-dl/issues/7438) [#7450](https://github.com/mikf/gallery-dl/issues/7450) [#7462](https://github.com/mikf/gallery-dl/issues/7462)) -- [pixiv:novel] fix `embeds` extraction by using AJAX API ([#7422](https://github.com/mikf/gallery-dl/issues/7422) [#7435](https://github.com/mikf/gallery-dl/issues/7435)) -- [scrolller] fix exception for albums with missing media ([#7428](https://github.com/mikf/gallery-dl/issues/7428)) -- [twitter] fix `404 Not Found ()` errors ([#7382](https://github.com/mikf/gallery-dl/issues/7382) [#7386](https://github.com/mikf/gallery-dl/issues/7386) [#7426](https://github.com/mikf/gallery-dl/issues/7426) [#7430](https://github.com/mikf/gallery-dl/issues/7430) [#7431](https://github.com/mikf/gallery-dl/issues/7431) [#7445](https://github.com/mikf/gallery-dl/issues/7445) [#7459](https://github.com/mikf/gallery-dl/issues/7459)) +- [bluesky] handle posts without `record` data ([#7499](https://github.com/mikf/gallery-dl/issues/7499)) +- [civitai] fix & improve video downloads ([#7502](https://github.com/mikf/gallery-dl/issues/7502)) +- [civitai] fix exception for images without `modelVersionId` ([#7432](https://github.com/mikf/gallery-dl/issues/7432)) +- [civitai] make metadata extraction non-fatal ([#7562](https://github.com/mikf/gallery-dl/issues/7562)) +- [fanbox] use `"browser": "firefox"` by default ([#7490](https://github.com/mikf/gallery-dl/issues/7490)) +- [idolcomplex] fix pagination logic ([#7549](https://github.com/mikf/gallery-dl/issues/7549)) +- [idolcomplex] fix 429 error during login by adding a 10s delay +- [instagram:stories] fix `post_date` metadata ([#7521](https://github.com/mikf/gallery-dl/issues/7521)) +- [motherless] fix video gallery downloads ([#7530](https://github.com/mikf/gallery-dl/issues/7530)) +- [pinterest] handle `story_pin_product_sticker_block` blocks ([#7563](https://github.com/mikf/gallery-dl/issues/7563)) +- [subscribestar] fix `content` and `title` metadata ([#7486](https://github.com/mikf/gallery-dl/issues/7486) [#7526](https://github.com/mikf/gallery-dl/issues/7526)) #### Improvements -- [kemonoparty] add `endpoint` option ([#7438](https://github.com/mikf/gallery-dl/issues/7438) [#7450](https://github.com/mikf/gallery-dl/issues/7450) [#7462](https://github.com/mikf/gallery-dl/issues/7462)) -- [tumblr] improve error message for dashboard-only blogs ([#7455](https://github.com/mikf/gallery-dl/issues/7455)) -- [weasyl] support `/view/` URLs ([#7469](https://github.com/mikf/gallery-dl/issues/7469)) +- [arcalive] allow overriding default `User-Agent` header ([#7556](https://github.com/mikf/gallery-dl/issues/7556)) +- [fanbox] update API headers ([#7490](https://github.com/mikf/gallery-dl/issues/7490)) +- [flickr] add `info` option ([#4720](https://github.com/mikf/gallery-dl/issues/4720) [#6817](https://github.com/mikf/gallery-dl/issues/6817)) +- [flickr] add `profile` option +- [instagram:stories] add `split` option ([#7521](https://github.com/mikf/gallery-dl/issues/7521)) +- [mangadex] implement login with client credentials +- [mangadex] send `Authorization` header only when necessary +- [mastodon] support Akkoma/Pleroma `/notice/:ID` URLs ([#7496](https://github.com/mikf/gallery-dl/issues/7496)) +- [mastodon] support Akkoma/Pleroma `/objects/:UUID` URLs ([#7497](https://github.com/mikf/gallery-dl/issues/7497)) +- [pixiv] Implement sanity handling for ugoira works ([#4327](https://github.com/mikf/gallery-dl/issues/4327) [#6297](https://github.com/mikf/gallery-dl/issues/6297) [#7285](https://github.com/mikf/gallery-dl/issues/7285) [#7434](https://github.com/mikf/gallery-dl/issues/7434)) +- [twitter:ctid] reduce chance of generating the same ID #### Metadata -- [chevereto] extract `date` metadata ([#7437](https://github.com/mikf/gallery-dl/issues/7437)) -- [civitai] implement retrieving `model` and `version` metadata ([#7432](https://github.com/mikf/gallery-dl/issues/7432)) -- [manganelo] extract more metadata -### Post Processors -- [directory] add `directory` post processor ([#7432](https://github.com/mikf/gallery-dl/issues/7432)) -### Miscellaneous -- [job] do not reset skip count when `skip-filter` fails ([#7433](https://github.com/mikf/gallery-dl/issues/7433)) +- [civitai] provide proper `extension` for model files ([#7432](https://github.com/mikf/gallery-dl/issues/7432)) +- [flickr] provide `license_name` metadata +- [sankaku] support new `tags` categories ([#7333](https://github.com/mikf/gallery-dl/issues/7333) [#7553](https://github.com/mikf/gallery-dl/issues/7553)) +- [vipergirls] provide `num` and `count` metadata ([#7479](https://github.com/mikf/gallery-dl/issues/7479)) +- [vipergirls] extract more metadata & rename fields ([#7479](https://github.com/mikf/gallery-dl/issues/7479)) +### Downloaders +- [http] fix setting `mtime` per file ([#7529](https://github.com/mikf/gallery-dl/issues/7529)) +- [ytdl] improve temp/part file handling ([#6949](https://github.com/mikf/gallery-dl/issues/6949) [#7494](https://github.com/mikf/gallery-dl/issues/7494)) +### Cookies +- support Zen browser ([#7233](https://github.com/mikf/gallery-dl/issues/7233) [#7546](https://github.com/mikf/gallery-dl/issues/7546)) @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.29.6 +Version: 1.29.7 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -133,9 +133,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.6/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.7/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.6/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.7/gallery-dl.bin>`__ Nightly Builds @@ -77,9 +77,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.6/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.7/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.6/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.7/gallery-dl.bin>`__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index a50a0c0..77403b1 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2025-05-04" "1.29.6" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2025-05-23" "1.29.7" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index ba2e048..1c2a2a0 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2025-05-04" "1.29.6" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2025-05-23" "1.29.7" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -814,6 +814,8 @@ or a \f[I]list\f[] with IP and explicit port number as elements. .br * \f[I]"gallery-dl/VERSION (by mikf)"\f[]: \f[I][E621]\f[] .br +* \f[I]"net.umanle.arca.android.playstore/0.9.75"\f[]: \f[I]arcalive\f[] +.br * \f[I]"Patreon/72.2.28 (Android; Android 14; Scale/2.10)"\f[]: \f[I]patreon\f[] .br * \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/LATEST.0.0.0 Safari/537.36"\f[]: \f[I]instagram\f[] @@ -838,7 +840,7 @@ is enabled. .IP "Default:" 9 .br -* \f[I]"firefox"\f[]: \f[I]artstation\f[], \f[I]mangasee\f[], \f[I]twitter\f[] +* \f[I]"firefox"\f[]: \f[I]artstation\f[], \f[I]fanbox\f[], \f[I]mangasee\f[], \f[I]twitter\f[] .br * \f[I]null\f[]: otherwise @@ -2190,7 +2192,7 @@ Note: This requires 1 additional HTTP request per image or video. \f[I]true\f[] .IP "Description:" 4 -Download images rated NSFW. +Download NSFW-rated images. .br * For \f[I]"api": "rest"\f[], this can be one of @@ -2201,9 +2203,9 @@ to set the highest returned mature content flag. * For \f[I]"api": "trpc"\f[], this can be an \f[I]integer\f[] whose bits select the returned mature content flags. -For example, \f[I]12\f[] (\f[I]4\f[I]8\f[]) would return only -\f[I]Mature\f[] and \f[I]X\f[] rated images, -while \f[I]3\f[] (\f[I]1\f[]2\f[]) would return only +For example, \f[I]28\f[] (\f[I]4\f[I]8\f[]16\f[]) would return only +\f[I]R\f[], \f[I]X\f[], and \f[I]XXX\f[] rated images, +while \f[I]3\f[] (\f[I]1|2\f[]) would return only \f[I]None\f[] and \f[I]Soft\f[] rated images, @@ -2233,6 +2235,32 @@ Note: Set this option to an arbitrary letter, e.g., \f[I]"w"\f[], to download images in JPEG format at their original resolution. +.SS extractor.civitai.quality-videos +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]"quality=100"\f[] + +.IP "Example:" 4 +.br +* "+transcode=true,quality=100" +.br +* ["+", "transcode=true", "quality=100"] + +.IP "Description:" 4 +A (comma-separated) list of video quality options +to pass with every video URL. + +Known available options include \f[I]original\f[], \f[I]quality\f[], \f[I]transcode\f[] + +Use \f[I]+\f[] as first character to add the given options to the +\f[I]quality\f[] ones. + + .SS extractor.cyberdrop.domain .IP "Type:" 6 \f[I]string\f[] @@ -3096,6 +3124,20 @@ Note: This requires 1 additional API call per photo. See \f[I]flickr.photos.getExif\f[] for details. +.SS extractor.flickr.info +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +For each photo, retrieve its "full" metadata as provided by +\f[I]flickr.photos.getInfo\f[] + +Note: This requires 1 additional API call per photo. + + .SS extractor.flickr.metadata .IP "Type:" 6 .br @@ -3124,6 +3166,20 @@ in \f[I]Flickr's API docs\f[] for possible field names. +.SS extractor.flickr.profile +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract additional \f[I]user\f[] profile metadata. + +Note: This requires 1 additional API call per user profile. +See \f[I]flickr.people.getInfo\f[] for details. + + .SS extractor.flickr.videos .IP "Type:" 6 \f[I]bool\f[] @@ -3583,6 +3639,18 @@ Download pre-merged video formats Do not download videos +.SS extractor.instagram.stories.split +.IP "Type:" 6 +.br +* \f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Split \f[I]stories\f[] elements into separate posts. + + .SS extractor.itaku.videos .IP "Type:" 6 \f[I]bool\f[] @@ -4471,6 +4539,17 @@ Download from video pins. Your account's \f[I]API key\f[] +.SS extractor.pixeldrain.recursive +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Recursively download files from subfolders. + + .SS extractor.pixiv.include .IP "Type:" 6 .br @@ -4975,6 +5054,47 @@ Format of \f[I]id\f[] metadata fields. Refresh download URLs before they expire. +.SS extractor.sankaku.tags +.IP "Type:" 6 +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Group \f[I]tags\f[] by type and +.br +provide them as \f[I]tags_TYPE\f[] and \f[I]tag_string_TYPE\f[] metadata fields, +for example \f[I]tags_artist\f[] and \f[I]tags_character\f[]. +.br + +\f[I]true\f[] +Enable general \f[I]tags\f[] categories + +Requires: + +.br +* 1 additional API request per 100 tags per post + +\f[I]"extended"\f[] +Group \f[I]tags\f[] by the new, extended tag category system +used on \f[I]chan.sankakucomplex.com\f[] + +Requires: + +.br +* 1 additional HTTP request per post +.br +* logged-in \f[I]cookies\f[] +to fetch full \f[I]tags\f[] category data + +\f[I]false\f[] +Disable \f[I]tags\f[] categories + + .SS extractor.sankakucomplex.embeds .IP "Type:" 6 \f[I]bool\f[] @@ -8804,6 +8924,31 @@ and click "SUBMIT" as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[] +.SS extractor.mangadex.client-id & .client-secret +.IP "Type:" 6 +\f[I]string\f[] + +.IP "How To:" 4 +.br +* login and go to your \f[I]User Settings\f[] +.br +* open the "API Clients" section +.br +* click "\f[I]+ Create\f[]" +.br +* choose a name +.br +* click "\f[I]✔️ Create\f[]" +.br +* wait for approval / reload the page +.br +* copy the value after "AUTOAPPROVED ACTIVE" in the form "personal-client-..." +and put it in your configuration file as \f[I]"client-id"\f[] +.br +* click "\f[I]Get Secret\f[]", then "\f[I]Copy Secret\f[]", +and paste it into your configuration file as \f[I]"client-secret"\f[] + + .SS extractor.reddit.client-id & .user-agent .IP "Type:" 6 \f[I]string\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 2df1ec3..eac3390 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -188,7 +188,8 @@ "include" : ["user-models", "user-posts"], "metadata": false, "nsfw" : true, - "quality" : "original=true" + "quality" : "original=true", + "quality-videos": "quality=100" }, "coomerparty": { @@ -277,7 +278,9 @@ "contexts": false, "exif" : false, + "info" : false, "metadata": false, + "profile" : false, "size-max": null, "videos" : true }, @@ -362,7 +365,11 @@ "order-files": "asc", "order-posts": "asc", "previews" : false, - "videos" : true + "videos" : true, + + "stories": { + "split": false + } }, "itaku": { @@ -408,6 +415,8 @@ }, "mangadex": { + "client-id" : "", + "client-secret": "", "username": "", "password": "", @@ -479,7 +488,8 @@ }, "pixeldrain": { - "api-key": null + "api-key" : null, + "recursive": false }, "pixiv": { diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 0c2a61e..c022f84 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.29.6 +Version: 1.29.7 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -133,9 +133,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.6/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.7/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.6/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.7/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index 71b0b6b..f03ad58 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -26,7 +26,9 @@ from . import aes, text, util SUPPORTED_BROWSERS_CHROMIUM = { "brave", "chrome", "chromium", "edge", "opera", "thorium", "vivaldi"} -SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"} +SUPPORTED_BROWSERS_FIREFOX = {"firefox", "zen"} +SUPPORTED_BROWSERS = \ + SUPPORTED_BROWSERS_CHROMIUM | SUPPORTED_BROWSERS_FIREFOX | {"safari"} logger = logging.getLogger("cookies") @@ -34,8 +36,8 @@ logger = logging.getLogger("cookies") def load_cookies(browser_specification): browser_name, profile, keyring, container, domain = \ _parse_browser_specification(*browser_specification) - if browser_name == "firefox": - return load_cookies_firefox(profile, container, domain) + if browser_name in SUPPORTED_BROWSERS_FIREFOX: + return load_cookies_firefox(browser_name, profile, container, domain) elif browser_name == "safari": return load_cookies_safari(profile, domain) elif browser_name in SUPPORTED_BROWSERS_CHROMIUM: @@ -44,8 +46,10 @@ def load_cookies(browser_specification): raise ValueError("unknown browser '{}'".format(browser_name)) -def load_cookies_firefox(profile=None, container=None, domain=None): - path, container_id = _firefox_cookies_database(profile, container) +def load_cookies_firefox(browser_name, profile=None, + container=None, domain=None): + path, container_id = _firefox_cookies_database(browser_name, + profile, container) sql = ("SELECT name, value, host, path, isSecure, expiry " "FROM moz_cookies") @@ -83,7 +87,8 @@ def load_cookies_firefox(profile=None, container=None, domain=None): sql, parameters) ] - _log_info("Extracted %s cookies from Firefox", len(cookies)) + _log_info("Extracted %s cookies from %s", + len(cookies), browser_name.capitalize()) return cookies @@ -196,13 +201,14 @@ def load_cookies_chromium(browser_name, profile=None, # -------------------------------------------------------------------- # firefox -def _firefox_cookies_database(profile=None, container=None): +def _firefox_cookies_database(browser_name, profile=None, container=None): if not profile: - search_root = _firefox_browser_directory() + search_root = _firefox_browser_directory(browser_name) elif _is_path(profile): search_root = profile else: - search_root = os.path.join(_firefox_browser_directory(), profile) + search_root = os.path.join( + _firefox_browser_directory(browser_name), profile) path = _find_most_recently_used_file(search_root, "cookies.sqlite") if path is None: @@ -245,14 +251,27 @@ def _firefox_cookies_database(profile=None, container=None): return path, container_id -def _firefox_browser_directory(): +def _firefox_browser_directory(browser_name): + join = os.path.join + if sys.platform in ("win32", "cygwin"): - return os.path.expandvars( - r"%APPDATA%\Mozilla\Firefox\Profiles") - if sys.platform == "darwin": - return os.path.expanduser( - "~/Library/Application Support/Firefox/Profiles") - return os.path.expanduser("~/.mozilla/firefox") + appdata = os.path.expandvars("%APPDATA%") + return { + "firefox": join(appdata, R"Mozilla\Firefox\Profiles"), + "zen" : join(appdata, R"zen\Profiles") + }[browser_name] + elif sys.platform == "darwin": + appdata = os.path.expanduser("~/Library/Application Support") + return { + "firefox": join(appdata, R"Firefox/Profiles"), + "zen" : join(appdata, R"zen/Profiles") + }[browser_name] + else: + home = os.path.expanduser("~") + return { + "firefox": join(home, R".mozilla/firefox"), + "zen" : join(home, R".zen") + }[browser_name] # -------------------------------------------------------------------- diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index faea9e5..c58e2fb 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -322,7 +322,10 @@ class HttpDownloader(DownloaderBase): self.downloading = False if self.mtime: - kwdict.setdefault("_mtime", response.headers.get("Last-Modified")) + if "_http_lastmodified" in kwdict: + kwdict["_mtime"] = kwdict["_http_lastmodified"] + else: + kwdict["_mtime"] = response.headers.get("Last-Modified") else: kwdict["_mtime"] = None diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 7a20dc2..1fc2f82 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -130,18 +130,27 @@ class YoutubeDLDownloader(DownloaderBase): if pathfmt.exists(): pathfmt.temppath = "" return True - if self.part and self.partdir: - pathfmt.temppath = os.path.join( - self.partdir, pathfmt.filename) - - self._set_outtmpl(ytdl_instance, pathfmt.temppath.replace("%", "%%")) self.out.start(pathfmt.path) + if self.part: + pathfmt.kwdict["extension"] = pathfmt.prefix + "part" + filename = pathfmt.build_filename(pathfmt.kwdict) + pathfmt.kwdict["extension"] = info_dict["ext"] + if self.partdir: + path = os.path.join(self.partdir, filename) + else: + path = pathfmt.realdirectory + filename + else: + path = pathfmt.realpath + + self._set_outtmpl(ytdl_instance, path.replace("%", "%%")) try: ytdl_instance.process_info(info_dict) except Exception as exc: self.log.debug("", exc_info=exc) return False + + pathfmt.temppath = info_dict["filepath"] return True def _download_playlist(self, ytdl_instance, pathfmt, info_dict): diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py index 8c44256..3c39a1a 100644 --- a/gallery_dl/extractor/arcalive.py +++ b/gallery_dl/extractor/arcalive.py @@ -17,6 +17,7 @@ class ArcaliveExtractor(Extractor): """Base class for Arca.live extractors""" category = "arcalive" root = "https://arca.live" + useragent = "net.umanle.arca.android.playstore/0.9.75" request_interval = (0.5, 1.5) def _init(self): @@ -149,9 +150,7 @@ class ArcaliveAPI(): self.log = extractor.log self.root = extractor.root + "/api/app" - headers = extractor.session.headers - headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75" - headers["X-Device-Token"] = util.generate_token(64) + extractor.session.headers["X-Device-Token"] = util.generate_token(64) def board(self, board_slug, params): endpoint = "/list/channel/" + board_slug diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 17b780e..ca88187 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -169,7 +169,7 @@ class AryionExtractor(Extractor): "<p>", "</p>"), "", "")), "filename" : fname, "extension": ext, - "_mtime" : lmod, + "_http_lastmodified": lmod, } diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index ec274b8..6f4abd5 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -49,7 +49,11 @@ class BlueskyExtractor(Extractor): self.log.debug("Skipping %s (repost)", self._pid(post)) continue embed = post.get("embed") - post.update(post.pop("record")) + try: + post.update(post.pop("record")) + except Exception: + self.log.debug("Skipping %s (no 'record')", self._pid(post)) + continue while True: self._prepare(post) diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index de8f86c..56fe851 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -45,6 +45,20 @@ class CivitaiExtractor(Extractor): self._image_quality = "original=true" self._image_ext = "png" + quality_video = self.config("quality-videos") + if quality_video: + if not isinstance(quality_video, str): + quality_video = ",".join(quality_video) + if quality_video[0] == "+": + quality_video = (self._image_quality + "," + + quality_video.lstrip("+,")) + self._video_quality = quality_video + elif quality_video is not None and quality: + self._video_quality = self._image_quality + else: + self._video_quality = "quality=100" + self._video_ext = "webm" + metadata = self.config("metadata") if metadata: if isinstance(metadata, str): @@ -82,9 +96,8 @@ class CivitaiExtractor(Extractor): "user": post.pop("user"), } if self._meta_version: - data["version"] = version = self.api.model_version( - post["modelVersionId"]).copy() - data["model"] = version.pop("model") + data["model"], data["version"] = \ + self._extract_meta_version(post) yield Message.Directory, data for file in self._image_results(images): @@ -95,26 +108,22 @@ class CivitaiExtractor(Extractor): images = self.images() if images: for image in images: - url = self._url(image) + if self._meta_generation: - image["generation"] = self.api.image_generationdata( - image["id"]) + image["generation"] = \ + self._extract_meta_generation(image) if self._meta_version: - if "modelVersionId" in image: - version_id = image["modelVersionId"] - else: - post = image["post"] = self.api.post( - image["postId"]) - post.pop("user", None) - version_id = post["modelVersionId"] - image["version"] = version = self.api.model_version( - version_id).copy() - image["model"] = version.pop("model") - + image["model"], image["version"] = \ + self._extract_meta_version(image, False) image["date"] = text.parse_datetime( image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + + url = self._url(image) text.nameext_from_url(url, image) - image["extension"] = self._image_ext + if not image["extension"]: + image["extension"] = ( + self._video_ext if image.get("type") == "video" else + self._image_ext) yield Message.Directory, image yield Message.Url, url, image return @@ -130,20 +139,23 @@ class CivitaiExtractor(Extractor): def _url(self, image): url = image["url"] + video = image.get("type") == "video" + quality = self._video_quality if video else self._image_quality + if "/" in url: parts = url.rsplit("/", 3) image["uuid"] = parts[1] - parts[2] = self._image_quality + parts[2] = quality return "/".join(parts) - image["uuid"] = url + image["uuid"] = url name = image.get("name") if not name: mime = image.get("mimeType") or self._image_ext name = "{}.{}".format(image.get("id"), mime.rpartition("/")[2]) return ( "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/{}/{}/{}".format( - url, self._image_quality, name) + url, quality, name) ) def _image_results(self, images): @@ -154,11 +166,13 @@ class CivitaiExtractor(Extractor): "url" : self._url(file), }) if not data["extension"]: - data["extension"] = self._image_ext + data["extension"] = ( + self._video_ext if file.get("type") == "video" else + self._image_ext) if "id" not in file and data["filename"].isdecimal(): file["id"] = text.parse_int(data["filename"]) if self._meta_generation: - file["generation"] = self.api.image_generationdata(file["id"]) + file["generation"] = self._extract_meta_generation(file) yield data def _parse_query(self, value): @@ -166,6 +180,38 @@ class CivitaiExtractor(Extractor): value, {"tags", "reactions", "baseModels", "tools", "techniques", "types", "fileFormats"}) + def _extract_meta_generation(self, image): + try: + return self.api.image_generationdata(image["id"]) + except Exception as exc: + return self.log.debug("", exc_info=exc) + + def _extract_meta_version(self, item, is_post=True): + try: + version_id = self._extract_version_id(item, is_post) + if version_id: + version = self.api.model_version(version_id).copy() + return version.pop("model", None), version + except Exception as exc: + self.log.debug("", exc_info=exc) + return None, None + + def _extract_version_id(self, item, is_post=True): + version_id = item.get("modelVersionId") + if version_id: + return version_id + + version_ids = item.get("modelVersionIds") + if version_ids: + return version_ids[0] + + if is_post: + return None + + item["post"] = post = self.api.post(item["postId"]) + post.pop("user", None) + return self._extract_version_id(post) + class CivitaiModelExtractor(CivitaiExtractor): subcategory = "model" @@ -235,16 +281,20 @@ class CivitaiModelExtractor(CivitaiExtractor): files = [] for num, file in enumerate(version["files"], 1): + name, sep, ext = file["name"].rpartition(".") + if not sep: + name = ext + ext = "bin" file["uuid"] = "model-{}-{}-{}".format( model["id"], version["id"], file["id"]) files.append({ "num" : num, "file" : file, - "filename" : file["name"], - "extension": "bin", - "url" : file.get("downloadUrl") or - "{}/api/download/models/{}".format( - self.root, version["id"]), + "filename" : name, + "extension": ext, + "url" : (file.get("downloadUrl") or + "{}/api/download/models/{}".format( + self.root, version["id"])), "_http_headers" : { "Authorization": self.api.headers.get("Authorization")}, "_http_validate": self._validate_file_model, diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 3b43134..8981c29 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -26,12 +26,18 @@ class FanboxExtractor(Extractor): directory_fmt = ("{category}", "{creatorId}") filename_fmt = "{id}_{num}.{extension}" archive_fmt = "{id}_{num}" + browser = "firefox" _warning = True def _init(self): self.headers = { - "Accept": "application/json, text/plain, */*", - "Origin": self.root, + "Accept" : "application/json, text/plain, */*", + "Origin" : "https://www.fanbox.cc", + "Referer": "https://www.fanbox.cc/", + "Cookie" : None, + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-site", } self.embeds = self.config("embeds", True) diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index e85a375..eb68c3e 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -23,13 +23,10 @@ class FlickrExtractor(Extractor): request_interval = (1.0, 2.0) request_interval_min = 0.5 - def __init__(self, match): - Extractor.__init__(self, match) - self.item_id = match.group(1) - def _init(self): self.api = FlickrAPI(self) self.user = None + self.item_id = self.groups[0] def items(self): data = self.metadata() @@ -51,6 +48,8 @@ class FlickrExtractor(Extractor): def metadata(self): """Return general metadata""" self.user = self.api.urls_lookupUser(self.item_id) + if self.config("profile", False): + self.user.update(self.api.people_getInfo(self.user["nsid"])) return {"user": self.user} def photos(self): @@ -75,23 +74,26 @@ class FlickrImageExtractor(FlickrExtractor): r"|flic\.kr/p/([A-Za-z1-9]+))") example = "https://www.flickr.com/photos/USER/12345" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - if not self.item_id: + def items(self): + item_id, enc_id = self.groups + if enc_id is not None: alphabet = ("123456789abcdefghijkmnopqrstu" "vwxyzABCDEFGHJKLMNPQRSTUVWXYZ") - self.item_id = util.bdecode(match.group(2), alphabet) + item_id = util.bdecode(enc_id, alphabet) - def items(self): - photo = self.api.photos_getInfo(self.item_id) + photo = self.api.photos_getInfo(item_id) - self.api._extract_metadata(photo) + self.api._extract_metadata(photo, False) if photo["media"] == "video" and self.api.videos: self.api._extract_video(photo) else: self.api._extract_photo(photo) - photo["user"] = photo["owner"] + if self.config("profile", False): + photo["user"] = self.api.people_getInfo(photo["owner"]["nsid"]) + else: + photo["user"] = photo["owner"] + photo["title"] = photo["title"]["_content"] photo["comments"] = text.parse_int(photo["comments"]["_content"]) photo["description"] = photo["description"]["_content"] @@ -120,11 +122,8 @@ class FlickrAlbumExtractor(FlickrExtractor): pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?" example = "https://www.flickr.com/photos/USER/albums/12345" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - self.album_id = match.group(2) - def items(self): + self.album_id = self.groups[1] if self.album_id: return FlickrExtractor.items(self) return self._album_items() @@ -163,12 +162,9 @@ class FlickrGalleryExtractor(FlickrExtractor): pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)" example = "https://www.flickr.com/photos/USER/galleries/12345/" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - self.gallery_id = match.group(2) - def metadata(self): data = FlickrExtractor.metadata(self) + self.gallery_id = self.groups[1] data["gallery"] = self.api.galleries_getInfo(self.gallery_id) return data @@ -223,13 +219,10 @@ class FlickrSearchExtractor(FlickrExtractor): pattern = BASE_PATTERN + r"/search/?\?([^#]+)" example = "https://flickr.com/search/?text=QUERY" - def __init__(self, match): - FlickrExtractor.__init__(self, match) - self.search = text.parse_query(match.group(1)) + def metadata(self): + self.search = text.parse_query(self.groups[0]) if "text" not in self.search: self.search["text"] = "" - - def metadata(self): return {"search": self.search} def photos(self): @@ -275,13 +268,27 @@ class FlickrAPI(oauth.OAuth1API): "appletv" : 1, "iphone_wifi": 0, } + LICENSES = { + "0": "All Rights Reserved", + "1": "Attribution-NonCommercial-ShareAlike License", + "2": "Attribution-NonCommercial License", + "3": "Attribution-NonCommercial-NoDerivs License", + "4": "Attribution License", + "5": "Attribution-ShareAlike License", + "6": "Attribution-NoDerivs License", + "7": "No known copyright restrictions", + "8": "United States Government Work", + "9": "Public Domain Dedication (CC0)", + "10": "Public Domain Mark", + } def __init__(self, extractor): oauth.OAuth1API.__init__(self, extractor) - self.exif = extractor.config("exif", False) self.videos = extractor.config("videos", True) - self.contexts = extractor.config("contexts", False) + self.meta_exif = extractor.config("exif", False) + self.meta_info = extractor.config("info", False) + self.meta_contexts = extractor.config("contexts", False) self.maxsize = extractor.config("size-max") if isinstance(self.maxsize, str): @@ -321,6 +328,26 @@ class FlickrAPI(oauth.OAuth1API): params = {"group_id": group_id} return self._pagination("groups.pools.getPhotos", params) + def people_getInfo(self, user_id): + """Get information about a user.""" + params = {"user_id": user_id} + user = self._call("people.getInfo", params) + + try: + user = user["person"] + for key in ("description", "username", "realname", "location", + "profileurl", "photosurl", "mobileurl"): + if isinstance(user.get(key), dict): + user[key] = user[key]["_content"] + photos = user["photos"] + for key in ("count", "firstdate", "firstdatetaken"): + if isinstance(photos.get(key), dict): + photos[key] = photos[key]["_content"] + except Exception: + pass + + return user + def people_getPhotos(self, user_id): """Return photos from the given user's photostream.""" params = {"user_id": user_id} @@ -469,14 +496,15 @@ class FlickrAPI(oauth.OAuth1API): self._extract_metadata(photo) photo["id"] = text.parse_int(photo["id"]) - if "owner" in photo: + if "owner" not in photo: + photo["owner"] = self.extractor.user + elif not self.meta_info: photo["owner"] = { "nsid" : photo["owner"], "username" : photo["ownername"], "path_alias": photo["pathalias"], } - else: - photo["owner"] = self.extractor.user + del photo["pathalias"] del photo["ownername"] @@ -522,8 +550,23 @@ class FlickrAPI(oauth.OAuth1API): photo["width"] = photo["height"] = 0 return photo - def _extract_metadata(self, photo): - if self.exif: + def _extract_metadata(self, photo, info=True): + if info and self.meta_info: + try: + photo.update(self.photos_getInfo(photo["id"])) + photo["title"] = photo["title"]["_content"] + photo["comments"] = text.parse_int( + photo["comments"]["_content"]) + photo["description"] = photo["description"]["_content"] + photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]] + photo["views"] = text.parse_int(photo["views"]) + photo["id"] = text.parse_int(photo["id"]) + except Exception as exc: + self.log.warning( + "Unable to retrieve 'info' data for %s (%s: %s)", + photo["id"], exc.__class__.__name__, exc) + + if self.meta_exif: try: photo.update(self.photos_getExif(photo["id"])) except Exception as exc: @@ -531,7 +574,7 @@ class FlickrAPI(oauth.OAuth1API): "Unable to retrieve 'exif' data for %s (%s: %s)", photo["id"], exc.__class__.__name__, exc) - if self.contexts: + if self.meta_contexts: try: photo.update(self.photos_getAllContexts(photo["id"])) except Exception as exc: @@ -539,6 +582,9 @@ class FlickrAPI(oauth.OAuth1API): "Unable to retrieve 'contexts' data for %s (%s: %s)", photo["id"], exc.__class__.__name__, exc) + if "license" in photo: + photo["license_name"] = self.LICENSES.get(photo["license"]) + @staticmethod def _clean_info(info): info["title"] = info["title"]["_content"] diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index dfd9a31..8f4a10c 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -90,9 +90,11 @@ class IdolcomplexExtractor(SankakuExtractor): "user[password]": password, "commit" : "Login", } + self.sleep(10, "login") response = self.request(url, method="POST", headers=headers, data=data) - if not response.history or response.url.endswith("/user/home"): + if not response.history or response.url.endswith( + ("/users/login", "/user/home")): raise exception.AuthenticationError() return {c.name: c.value for c in response.history[0].cookies} @@ -187,32 +189,39 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor): return {"search_tags": " ".join(tags)} def post_ids(self): - params = {"tags": self.tags} + url = self.root + "/en/posts" + params = {"auto_page": "t"} if self.next: params["next"] = self.next else: params["page"] = self.start_page + params["tags"] = self.tags while True: - page = self.request(self.root, params=params, retries=10).text - pos = ((page.find('id="more-popular-posts-link"') + 1) or - (page.find('<span class="thumb') + 1)) + response = self.request(url, params=params, retries=10) + if response.history and "/posts/premium" in response.url: + self.log.warning("HTTP redirect to %s", response.url) + page = response.text - yield from self.find_pids(page, pos) + yield from text.extract_iter(page, '"id":"', '"') - next_url = text.extract(page, 'next-page-url="', '"', pos)[0] - if not next_url: + next_page_url = text.extr(page, 'next-page-url="', '"') + if not next_page_url: return - next_params = text.parse_query(text.unquote(text.unescape( - text.unescape(next_url).lstrip("?/")))) + url, _, next_params = text.unquote( + text.unescape(text.unescape(next_page_url))).partition("?") + next_params = text.parse_query(next_params) if "next" in next_params: # stop if the same "next" value occurs twice in a row (#265) if "next" in params and params["next"] == next_params["next"]: return next_params["page"] = "2" + + if url[0] == "/": + url = self.root + url params = next_params @@ -225,10 +234,6 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor): example = "https://idol.sankakucomplex.com/pools/0123456789abcdef" per_page = 24 - def __init__(self, match): - IdolcomplexExtractor.__init__(self, match) - self.pool_id = match.group(1) - def skip(self, num): pages, posts = divmod(num, self.per_page) self.start_page += pages @@ -236,10 +241,13 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor): return num def metadata(self): - return {"pool": self.pool_id} + return {"pool": self.groups[0]} def post_ids(self): - url = self.root + "/pools/show/" + self.pool_id + if not self.logged_in: + self.log.warning("Login required") + + url = self.root + "/pools/show/" + self.groups[0] params = {"page": self.start_page} while True: @@ -260,9 +268,5 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor): pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)" example = "https://idol.sankakucomplex.com/posts/0123456789abcdef" - def __init__(self, match): - IdolcomplexExtractor.__init__(self, match) - self.post_id = match.group(1) - def post_ids(self): - return (self.post_id,) + return (self.groups[0],) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 0f88cac..624bba2 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -165,13 +165,16 @@ class InstagramExtractor(Extractor): if "items" in post: # story or highlight items = post["items"] reel_id = str(post["id"]).rpartition(":")[2] + expires = post.get("expiring_at") data = { - "expires": text.parse_timestamp(post.get("expiring_at")), + "expires": text.parse_timestamp(expires), "post_id": reel_id, "post_shortcode": shortcode_from_id(reel_id), } if "title" in post: data["highlight_title"] = post["title"] + if expires and not post.get("seen"): + post["seen"] = expires - 86400 else: # regular image/video post data = { @@ -583,7 +586,10 @@ class InstagramStoriesExtractor(InstagramExtractor): reel_id = self.highlight_id or self.api.user_id(self.user) reels = self.api.reels_media(reel_id) - if self.media_id and reels: + if not reels: + return () + + if self.media_id: reel = reels[0] for item in reel["items"]: if item["pk"] == self.media_id: @@ -592,6 +598,16 @@ class InstagramStoriesExtractor(InstagramExtractor): else: raise exception.NotFoundError("story") + elif self.config("split"): + reel = reels[0] + reels = [] + for item in reel["items"]: + item.pop("user", None) + copy = reel.copy() + copy.update(item) + copy["items"] = (item,) + reels.append(copy) + return reels diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 7f87cff..42a508d 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -29,11 +29,8 @@ class MangadexExtractor(Extractor): useragent = util.USERAGENT _cache = {} - def __init__(self, match): - Extractor.__init__(self, match) - self.uuid = match.group(1) - def _init(self): + self.uuid = self.groups[0] self.api = MangadexAPI(self) def items(self): @@ -44,6 +41,12 @@ class MangadexExtractor(Extractor): self._cache[uuid] = data yield Message.Queue, self.root + "/chapter/" + uuid, data + def _items_manga(self): + data = {"_extractor": MangadexMangaExtractor} + for manga in self.manga(): + url = "{}/title/{}".format(self.root, manga["id"]) + yield Message.Queue, url, data + def _transform(self, chapter): relationships = defaultdict(list) for item in chapter["relationships"]: @@ -130,7 +133,7 @@ class MangadexChapterExtractor(MangadexExtractor): class MangadexMangaExtractor(MangadexExtractor): """Extractor for manga from mangadex.org""" subcategory = "manga" - pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)" + pattern = BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" example = ("https://mangadex.org/title" "/01234567-89ab-cdef-0123-456789abcdef") @@ -139,17 +142,29 @@ class MangadexMangaExtractor(MangadexExtractor): class MangadexFeedExtractor(MangadexExtractor): - """Extractor for chapters from your Followed Feed""" + """Extractor for chapters from your Updates Feed""" subcategory = "feed" - pattern = BASE_PATTERN + r"/title/feed$()" + pattern = BASE_PATTERN + r"/titles?/feed$()" example = "https://mangadex.org/title/feed" def chapters(self): return self.api.user_follows_manga_feed() +class MangadexFollowingExtractor(MangadexExtractor): + """Extractor for followed manga from your Library""" + subcategory = "following" + pattern = BASE_PATTERN + r"/titles?/follows(?:\?([^#]+))?$" + example = "https://mangadex.org/title/follows" + + items = MangadexExtractor._items_manga + + def manga(self): + return self.api.user_follows_manga() + + class MangadexListExtractor(MangadexExtractor): - """Extractor for mangadex lists""" + """Extractor for mangadex MDLists""" subcategory = "list" pattern = (BASE_PATTERN + r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?") @@ -161,17 +176,17 @@ class MangadexListExtractor(MangadexExtractor): if match.group(2) == "feed": self.subcategory = "list-feed" else: - self.items = self._items_titles + self.items = self._items_manga def chapters(self): return self.api.list_feed(self.uuid) - def _items_titles(self): - data = {"_extractor": MangadexMangaExtractor} - for item in self.api.list(self.uuid)["relationships"]: - if item["type"] == "manga": - url = "{}/title/{}".format(self.root, item["id"]) - yield Message.Queue, url, data + def manga(self): + return [ + item + for item in self.api.list(self.uuid)["relationships"] + if item["type"] == "manga" + ] class MangadexAuthorExtractor(MangadexExtractor): @@ -196,10 +211,18 @@ class MangadexAPI(): def __init__(self, extr): self.extractor = extr - self.headers = {} + self.headers = None + self.headers_auth = {} self.username, self.password = extr._get_auth_info() - if not self.username: + if self.username: + self.client_id = cid = extr.config("client-id") + self.client_secret = extr.config("client-secret") + if cid: + self._authenticate_impl = self._authenticate_impl_client + else: + self._authenticate_impl = self._authenticate_impl_legacy + else: self.authenticate = util.noop server = extr.config("api-server") @@ -218,10 +241,10 @@ class MangadexAPI(): return self._call("/chapter/" + uuid, params)["data"] def list(self, uuid): - return self._call("/list/" + uuid)["data"] + return self._call("/list/" + uuid, None, True)["data"] def list_feed(self, uuid): - return self._pagination_chapters("/list/" + uuid + "/feed") + return self._pagination_chapters("/list/" + uuid + "/feed", None, True) @memcache(keyarg=1) def manga(self, uuid): @@ -240,28 +263,73 @@ class MangadexAPI(): } return self._pagination_chapters("/manga/" + uuid + "/feed", params) + def user_follows_manga(self): + params = {"contentRating": None} + return self._pagination_manga( + "/user/follows/manga", params, True) + def user_follows_manga_feed(self): params = {"order[publishAt]": "desc"} - return self._pagination_chapters("/user/follows/manga/feed", params) + return self._pagination_chapters( + "/user/follows/manga/feed", params, True) def authenticate(self): - self.headers["Authorization"] = \ + self.headers_auth["Authorization"] = \ self._authenticate_impl(self.username, self.password) @cache(maxage=900, keyarg=1) - def _authenticate_impl(self, username, password): + def _authenticate_impl_client(self, username, password): + refresh_token = _refresh_token_cache((username, "personal")) + if refresh_token: + self.extractor.log.info("Refreshing access token") + data = { + "grant_type" : "refresh_token", + "refresh_token": refresh_token, + "client_id" : self.client_id, + "client_secret": self.client_secret, + } + else: + self.extractor.log.info("Logging in as %s", username) + data = { + "grant_type" : "password", + "username" : self.username, + "password" : self.password, + "client_id" : self.client_id, + "client_secret": self.client_secret, + } + + self.extractor.log.debug("Using client-id '%s…'", self.client_id[:24]) + url = ("https://auth.mangadex.org/realms/mangadex" + "/protocol/openid-connect/token") + data = self.extractor.request( + url, method="POST", data=data, fatal=None).json() + + try: + access_token = data["access_token"] + except Exception: + raise exception.AuthenticationError(data.get("error_description")) + + if refresh_token != data.get("refresh_token"): + _refresh_token_cache.update( + (username, "personal"), data["refresh_token"]) + + return "Bearer " + access_token + + @cache(maxage=900, keyarg=1) + def _authenticate_impl_legacy(self, username, password): refresh_token = _refresh_token_cache(username) if refresh_token: self.extractor.log.info("Refreshing access token") url = self.root + "/auth/refresh" - data = {"token": refresh_token} + json = {"token": refresh_token} else: self.extractor.log.info("Logging in as %s", username) url = self.root + "/auth/login" - data = {"username": username, "password": password} + json = {"username": username, "password": password} + self.extractor.log.debug("Using legacy login method") data = self.extractor.request( - url, method="POST", json=data, fatal=None).json() + url, method="POST", json=json, fatal=None).json() if data.get("result") != "ok": raise exception.AuthenticationError() @@ -269,13 +337,15 @@ class MangadexAPI(): _refresh_token_cache.update(username, data["token"]["refresh"]) return "Bearer " + data["token"]["session"] - def _call(self, endpoint, params=None): + def _call(self, endpoint, params=None, auth=False): url = self.root + endpoint + headers = self.headers_auth if auth else self.headers while True: - self.authenticate() + if auth: + self.authenticate() response = self.extractor.request( - url, params=params, headers=self.headers, fatal=None) + url, params=params, headers=headers, fatal=None) if response.status_code < 400: return response.json() @@ -284,12 +354,12 @@ class MangadexAPI(): self.extractor.wait(until=until) continue - msg = ", ".join('{title}: {detail}'.format_map(error) + msg = ", ".join('{title}: "{detail}"'.format_map(error) for error in response.json()["errors"]) raise exception.StopExtraction( "%s %s (%s)", response.status_code, response.reason, msg) - def _pagination_chapters(self, endpoint, params=None): + def _pagination_chapters(self, endpoint, params=None, auth=False): if params is None: params = {} @@ -299,21 +369,22 @@ class MangadexAPI(): params["translatedLanguage[]"] = lang params["includes[]"] = ("scanlation_group",) - return self._pagination(endpoint, params) + return self._pagination(endpoint, params, auth) - def _pagination_manga(self, endpoint, params=None): + def _pagination_manga(self, endpoint, params=None, auth=False): if params is None: params = {} - return self._pagination(endpoint, params) + return self._pagination(endpoint, params, auth) - def _pagination(self, endpoint, params): + def _pagination(self, endpoint, params, auth=False): config = self.extractor.config - ratings = config("ratings") - if ratings is None: - ratings = ("safe", "suggestive", "erotica", "pornographic") - params["contentRating[]"] = ratings + if "contentRating" not in params: + ratings = config("ratings") + if ratings is None: + ratings = ("safe", "suggestive", "erotica", "pornographic") + params["contentRating[]"] = ratings params["offset"] = 0 api_params = config("api-parameters") @@ -321,7 +392,7 @@ class MangadexAPI(): params.update(api_params) while True: - data = self._call(endpoint, params) + data = self._call(endpoint, params, auth) yield from data["data"] params["offset"] = data["offset"] + data["limit"] @@ -329,6 +400,6 @@ class MangadexAPI(): return -@cache(maxage=28*86400, keyarg=0) +@cache(maxage=90*86400, keyarg=0) def _refresh_token_cache(username): return None diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 5e78ad4..8b38474 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -196,11 +196,15 @@ class MastodonFollowingExtractor(MastodonExtractor): class MastodonStatusExtractor(MastodonExtractor): """Extractor for images from a status""" subcategory = "status" - pattern = (BASE_PATTERN + r"/(?:@[^/?#]+|(?:users/[^/?#]+/)?statuses)" - r"/(?!following)([^/?#]+)") + pattern = (BASE_PATTERN + r"/(?:@[^/?#]+|(?:users/[^/?#]+/)?" + r"(?:statuses|notice|objects()))/(?!following)([^/?#]+)") example = "https://mastodon.social/@USER/12345" def statuses(self): + if self.groups[-2] is not None: + url = "{}/objects/{}".format(self.root, self.item) + location = self.request_location(url) + self.item = location.rpartition("/")[2] return (MastodonAPI(self).status(self.item),) diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py index c5b9322..ce83ded 100644 --- a/gallery_dl/extractor/motherless.py +++ b/gallery_dl/extractor/motherless.py @@ -23,21 +23,6 @@ class MotherlessExtractor(Extractor): filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}" - -class MotherlessMediaExtractor(MotherlessExtractor): - """Extractor for a single image/video from motherless.com""" - subcategory = "media" - pattern = (BASE_PATTERN + - r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?" - r"(?!G)[A-Z0-9]+)") - example = "https://motherless.com/ABC123" - - def items(self): - file = self._extract_media(self.groups[0]) - url = file["url"] - yield Message.Directory, file - yield Message.Url, url, text.nameext_from_url(url, file) - def _extract_media(self, path): url = self.root + "/" + path page = self.request(url).text @@ -95,6 +80,21 @@ class MotherlessMediaExtractor(MotherlessExtractor): return "" +class MotherlessMediaExtractor(MotherlessExtractor): + """Extractor for a single image/video from motherless.com""" + subcategory = "media" + pattern = (BASE_PATTERN + + r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?" + r"(?!G)[A-Z0-9]+)") + example = "https://motherless.com/ABC123" + + def items(self): + file = self._extract_media(self.groups[0]) + url = file["url"] + yield Message.Directory, file + yield Message.Url, url, text.nameext_from_url(url, file) + + class MotherlessGalleryExtractor(MotherlessExtractor): """Extractor for a motherless.com gallery""" subcategory = "gallery" @@ -119,6 +119,10 @@ class MotherlessGalleryExtractor(MotherlessExtractor): for num, thumb in enumerate(self._pagination(page), 1): file = self._parse_thumb_data(thumb) + + if file["type"] == "video": + file = self._extract_media(file["id"]) + file.update(data) file["num"] = num url = file["url"] @@ -151,17 +155,13 @@ class MotherlessGalleryExtractor(MotherlessExtractor): def _parse_thumb_data(self, thumb): extr = text.extract_from(thumb) + data = { "id" : extr('data-codename="', '"'), "type" : extr('data-mediatype="', '"'), "thumbnail": extr('class="static" src="', '"'), "title" : extr(' alt="', '"'), } - - type = data["type"] - url = data["thumbnail"].replace("thumb", type) - if type == "video": - url = "{}/{}.mp4".format(url.rpartition("/")[0], data["id"]) - data["url"] = url + data["url"] = data["thumbnail"].replace("thumb", data["type"]) return data diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index ad8c681..62fa9be 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -132,6 +132,9 @@ class PinterestExtractor(Extractor): "extension": "txt", "media_id": block.get("id")} + elif type == "story_pin_product_sticker_block": + continue + elif type == "story_pin_static_sticker_block": continue diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py index 83f3577..7a4d1a5 100644 --- a/gallery_dl/extractor/pixeldrain.py +++ b/gallery_dl/extractor/pixeldrain.py @@ -96,3 +96,73 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor): file["date"] = self.parse_datetime(file["date_upload"]) text.nameext_from_url(file["name"], file) yield Message.Url, url, file + + +class PixeldrainFolderExtractor(PixeldrainExtractor): + """Extractor for pixeldrain filesystem files and directories""" + subcategory = "folder" + filename_fmt = "{filename[:230]}.{extension}" + archive_fmt = "{path}_{num}" + pattern = BASE_PATTERN + r"/(?:d|api/filesystem)/([^?]+)" + example = "https://pixeldrain.com/d/abcdefgh" + + def metadata(self, data): + return { + "type" : data["type"], + "path" : data["path"], + "name" : data["name"], + "mime_type" : data["file_type"], + "size" : data["file_size"], + "hash_sha256": data["sha256_sum"], + "date" : self.parse_datetime(data["created"]), + } + + def items(self): + recursive = self.config("recursive") + + url = "{}/api/filesystem/{}".format(self.root, self.groups[0]) + stat = self.request(url + "?stat").json() + + paths = stat["path"] + path = paths[stat["base_index"]] + if path["type"] == "dir": + children = [ + child + for child in stat["children"] + if child["name"] != ".search_index.gz" + ] + else: + children = (path,) + + folder = self.metadata(path) + folder["id"] = paths[0]["id"] + + yield Message.Directory, folder + + num = 0 + for child in children: + if child["type"] == "file": + num += 1 + url = "{}/api/filesystem{}?attach".format( + self.root, child["path"]) + share_url = "{}/d{}".format(self.root, child["path"]) + data = self.metadata(child) + data.update({ + "id" : folder["id"], + "num" : num, + "url" : url, + "share_url": share_url, + }) + data["filename"], _, data["extension"] = \ + child["name"].rpartition(".") + yield Message.Url, url, data + + elif child["type"] == "dir": + if recursive: + url = "{}/d{}".format(self.root, child["path"]) + child["_extractor"] = PixeldrainFolderExtractor + yield Message.Queue, url, child + + else: + self.log.debug("'%s' is of unknown type (%s)", + child.get("name"), child["type"]) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index c063216..73c5c1c 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -136,7 +136,21 @@ class PixivExtractor(Extractor): self.log.warning("%s: 'limit_sanity_level' warning", work_id) if self.sanity_workaround: body = self._request_ajax("/illust/" + str(work_id)) - return self._extract_ajax(work, body) + if work["type"] == "ugoira": + if not self.load_ugoira: + return () + self.log.info("%s: Retrieving Ugoira AJAX metadata", + work["id"]) + try: + self._extract_ajax(work, body) + return self._extract_ugoira(work, url) + except Exception as exc: + self.log.debug("", exc_info=exc) + self.log.warning( + "%s: Unable to extract Ugoira URL. Provide " + "logged-in cookies to access it", work["id"]) + else: + return self._extract_ajax(work, body) elif limit_type == "limit_mypixiv_360.png": work["_mypixiv"] = True @@ -161,7 +175,12 @@ class PixivExtractor(Extractor): return () def _extract_ugoira(self, work, img_url): - ugoira = self.api.ugoira_metadata(work["id"]) + if work.get("_ajax"): + ugoira = self._request_ajax( + "/illust/" + str(work["id"]) + "/ugoira_meta") + img_url = ugoira["src"] + else: + ugoira = self.api.ugoira_metadata(work["id"]) work["_ugoira_frame_data"] = work["frames"] = frames = ugoira["frames"] work["_ugoira_original"] = self.load_ugoira_original work["_http_adjust_extension"] = False @@ -198,7 +217,10 @@ class PixivExtractor(Extractor): ] else: - zip_url = ugoira["zip_urls"]["medium"] + if work.get("_ajax"): + zip_url = ugoira["originalSrc"] + else: + zip_url = ugoira["zip_urls"]["medium"] work["date_url"] = self._date_from_url(zip_url) url = zip_url.replace("_ugoira600x600", "_ugoira1920x1080", 1) return ({"url": url},) diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index c7303f2..3485db9 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -47,6 +47,10 @@ class SankakuExtractor(BooruExtractor): def _init(self): self.api = SankakuAPI(self) + if self.config("tags") == "extended": + self._tags = self._tags_extended + self._tags_findall = re.compile( + r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall def _file_url(self, post): url = post["file_url"] @@ -85,6 +89,23 @@ class SankakuExtractor(BooruExtractor): post["tags_" + name] = values post["tag_string_" + name] = " ".join(values) + def _tags_extended(self, post, page): + try: + url = "https://chan.sankakucomplex.com/posts/" + post["id"] + page = self.request(url).text + except Exception as exc: + return self.log.warning( + "%s: Failed to extract extended tag categories (%s: %s)", + post["id"], exc.__class__.__name__, exc) + + tags = collections.defaultdict(list) + tag_sidebar = text.extr(page, '<ul id="tag-sidebar"', "</ul>") + for tag_type, tag_name in self._tags_findall(tag_sidebar): + tags[tag_type].append(text.unescape(text.unquote(tag_name))) + for type, values in tags.items(): + post["tags_" + type] = values + post["tag_string_" + type] = " ".join(values) + def _notes(self, post, page): if post.get("has_notes"): post["notes"] = self.api.notes(post["id"]) diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 1054a63..a83f2da 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -40,8 +40,14 @@ class SubscribestarExtractor(Extractor): for post_html in self.posts(): media = self._media_from_post(post_html) data = self._data_from_post(post_html) - data["title"] = text.unescape(text.extr( - data["content"], "<h1>", "</h1>")) + + content = data["content"] + if "<html><body>" in content: + data["content"] = content = text.extr( + content, "<body>", "</body>") + data["title"] = text.unescape( + text.rextract(content, "<h1>", "</h1>")[0] or "") + yield Message.Directory, data for num, item in enumerate(media, 1): item.update(data) @@ -189,7 +195,12 @@ class SubscribestarExtractor(Extractor): "author_nick": text.unescape(extr('>', '<')), "date" : self._parse_datetime(extr( 'class="post-date">', '</').rpartition(">")[2]), - "content" : extr('<body>', '</body>').strip(), + "content" : extr( + '<div class="post-content" data-role="post_content-text">', + '</div><div class="post-uploads for-youtube"').strip(), + "tags" : list(text.extract_iter(extr( + '<div class="post_tags for-post">', + '<div class="post-actions">'), '?tag=', '"')), } def _parse_datetime(self, dt): @@ -243,7 +254,12 @@ class SubscribestarPostExtractor(SubscribestarExtractor): "post_id" : text.parse_int(extr('data-id="', '"')), "date" : self._parse_datetime(extr( '<div class="section-title_date">', '<')), - "content" : extr('<body>', '</body>').strip(), + "content" : extr( + '<div class="post-content" data-role="post_content-text">', + '</div><div class="post-uploads for-youtube"').strip(), + "tags" : list(text.extract_iter(extr( + '<div class="post_tags for-post">', + '<div class="post-actions">'), '?tag=', '"')), "author_name": text.unescape(extr( 'class="star_link" href="/', '"')), "author_id" : text.parse_int(extr('data-user-id="', '"')), diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py index af3f32d..1dd3482 100644 --- a/gallery_dl/extractor/vipergirls.py +++ b/gallery_dl/extractor/vipergirls.py @@ -43,31 +43,40 @@ class VipergirlsExtractor(Extractor): def items(self): self.login() - posts = self.posts() + root = self.posts() + forum_title = root[1].attrib["title"] + thread_title = root[2].attrib["title"] like = self.config("like") if like: - user_hash = posts[0].get("hash") + user_hash = root[0].get("hash") if len(user_hash) < 16: self.log.warning("Login required to like posts") like = False - posts = posts.iter("post") + posts = root.iter("post") if self.page: util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15) for post in posts: + images = list(post) + data = post.attrib + data["forum_title"] = forum_title data["thread_id"] = self.thread_id + data["thread_title"] = thread_title + data["post_id"] = data.pop("id") + data["post_num"] = data.pop("number") + data["post_title"] = data.pop("title") + data["count"] = len(images) + del data["imagecount"] yield Message.Directory, data - - image = None - for image in post: - yield Message.Queue, image.attrib["main_url"], data - - if image is not None and like: - self.like(post, user_hash) + if images: + for data["num"], image in enumerate(images, 1): + yield Message.Queue, image.attrib["main_url"], data + if like: + self.like(post, user_hash) def login(self): if self.cookies_check(self.cookies_names): diff --git a/gallery_dl/transaction_id.py b/gallery_dl/transaction_id.py index 25f1775..89e3d5b 100644 --- a/gallery_dl/transaction_id.py +++ b/gallery_dl/transaction_id.py @@ -129,7 +129,9 @@ class ClientTransaction(): keyword="obfiowerehiring", rndnum=3): bytes_key = self.key_bytes - now = int(time.time()) - 1682924400 + nowf = time.time() + nowi = int(nowf) + now = nowi - 1682924400 bytes_time = ( (now ) & 0xFF, # noqa: E202 (now >> 8) & 0xFF, # noqa: E222 @@ -141,7 +143,7 @@ class ClientTransaction(): method, path, now, keyword, self.animation_key) bytes_hash = hashlib.sha256(payload.encode()).digest()[:16] - num = random.randrange(256) + num = (random.randrange(16) << 4) + int((nowf - nowi) * 16.0) result = bytes( byte ^ num for byte in itertools.chain( diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d40dacd..e543a31 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.29.6" +__version__ = "1.29.7" __variant__ = None |
