From 8a644b7a06c504263a478d3681eed10b4161b5be Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 5 Jun 2021 20:55:36 -0400 Subject: New upstream version 1.17.5. --- CHANGELOG.md | 49 +++++ PKG-INFO | 8 +- README.rst | 6 +- data/completion/_gallery-dl | 2 +- data/man/gallery-dl.1 | 4 +- data/man/gallery-dl.conf.5 | 157 ++++++++++++++- docs/gallery-dl.conf | 9 +- gallery_dl.egg-info/PKG-INFO | 8 +- gallery_dl.egg-info/SOURCES.txt | 1 + gallery_dl/__init__.py | 4 +- gallery_dl/cache.py | 37 ++-- gallery_dl/config.py | 4 +- gallery_dl/downloader/http.py | 17 +- gallery_dl/exception.py | 8 +- gallery_dl/extractor/35photo.py | 7 +- gallery_dl/extractor/500px.py | 8 +- gallery_dl/extractor/aryion.py | 12 +- gallery_dl/extractor/danbooru.py | 1 + gallery_dl/extractor/deviantart.py | 10 +- gallery_dl/extractor/exhentai.py | 2 +- gallery_dl/extractor/foolfuuka.py | 2 +- gallery_dl/extractor/gelbooru.py | 22 +++ gallery_dl/extractor/imagebam.py | 119 ++++++----- gallery_dl/extractor/imgur.py | 6 +- gallery_dl/extractor/inkbunny.py | 29 ++- gallery_dl/extractor/instagram.py | 4 +- gallery_dl/extractor/kemonoparty.py | 42 +++- gallery_dl/extractor/manganelo.py | 63 +++--- gallery_dl/extractor/mangapark.py | 6 +- gallery_dl/extractor/nozomi.py | 4 +- gallery_dl/extractor/patreon.py | 18 +- gallery_dl/extractor/pillowfort.py | 201 +++++++++++++------ gallery_dl/extractor/pixiv.py | 37 +++- gallery_dl/extractor/reactor.py | 4 +- gallery_dl/extractor/readcomiconline.py | 26 +-- gallery_dl/extractor/sankaku.py | 7 +- gallery_dl/extractor/twitter.py | 20 +- gallery_dl/extractor/unsplash.py | 5 +- gallery_dl/extractor/weasyl.py | 6 +- gallery_dl/extractor/weibo.py | 24 ++- gallery_dl/extractor/wikiart.py | 8 +- gallery_dl/job.py | 87 ++++---- gallery_dl/option.py | 2 +- gallery_dl/output.py | 12 +- gallery_dl/postprocessor/ugoira.py | 87 +++++--- gallery_dl/text.py | 2 +- gallery_dl/util.py | 13 +- gallery_dl/version.py | 2 +- test/test_job.py | 338 ++++++++++++++++++++++++++++++++ test/test_results.py | 2 +- test/test_text.py | 4 + test/test_util.py | 9 + 52 files changed, 1213 insertions(+), 352 deletions(-) create mode 100644 test/test_job.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 59691b7..dcc1299 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,54 @@ # Changelog +## 1.17.5 - 2021-05-30 +### Additions +- [kemonoparty] add `metadata` option ([#1548](https://github.com/mikf/gallery-dl/issues/1548)) +- [kemonoparty] add `type` metadata field ([#1556](https://github.com/mikf/gallery-dl/issues/1556)) +- [mangapark] recognize v2.mangapark URLs ([#1578](https://github.com/mikf/gallery-dl/issues/1578)) +- [patreon] extract user-defined `tags` ([#1539](https://github.com/mikf/gallery-dl/issues/1539), [#1540](https://github.com/mikf/gallery-dl/issues/1540)) +- [pillowfort] implement login with username & password ([#846](https://github.com/mikf/gallery-dl/issues/846)) +- [pillowfort] add `inline` and `external` options ([#846](https://github.com/mikf/gallery-dl/issues/846)) +- [pixiv] implement `max-posts` option ([#1558](https://github.com/mikf/gallery-dl/issues/1558)) +- [pixiv] add `metadata` option ([#1551](https://github.com/mikf/gallery-dl/issues/1551)) +- [twitter] add `text-tweets` option ([#570](https://github.com/mikf/gallery-dl/issues/570)) +- [weibo] extend `retweets` option ([#1542](https://github.com/mikf/gallery-dl/issues/1542)) +- [postprocessor:ugoira] support using the `image2` demuxer ([#1550](https://github.com/mikf/gallery-dl/issues/1550)) +- [postprocessor:ugoira] add `repeat-last-frame` option ([#1550](https://github.com/mikf/gallery-dl/issues/1550)) +- support `XDG_CONFIG_HOME` ([#1545](https://github.com/mikf/gallery-dl/issues/1545)) +- implement `parent-skip` and `"skip": "terminate"` options ([#1399](https://github.com/mikf/gallery-dl/issues/1399)) +### Changes +- [twitter] resolve `t.co` URLs in `content` ([#1532](https://github.com/mikf/gallery-dl/issues/1532)) +### Fixes +- [500px] update query hashes ([#1573](https://github.com/mikf/gallery-dl/issues/1573)) +- [aryion] find text posts in `recursive=false` mode ([#1568](https://github.com/mikf/gallery-dl/issues/1568)) +- [imagebam] fix extraction of NSFW images ([#1534](https://github.com/mikf/gallery-dl/issues/1534)) +- [imgur] update URL patterns ([#1561](https://github.com/mikf/gallery-dl/issues/1561)) +- [manganelo] update domain to `manganato.com` +- [reactor] skip deleted/empty posts +- [twitter] add missing retweet media entities ([#1555](https://github.com/mikf/gallery-dl/issues/1555)) +- fix ISO 639-1 code for Japanese (`jp` -> `ja`) + +## 1.17.4 - 2021-05-07 +### Additions +- [gelbooru] add extractor for `/redirect.php` URLs ([#1530](https://github.com/mikf/gallery-dl/issues/1530)) +- [inkbunny] add `favorite` extractor ([#1521](https://github.com/mikf/gallery-dl/issues/1521)) +- add `output.skip` option +- add an optional argument to `--clear-cache` to select which cache entries to remove ([#1230](https://github.com/mikf/gallery-dl/issues/1230)) +### Changes +- [pixiv] update `translated-tags` option ([#1507](https://github.com/mikf/gallery-dl/issues/1507)) + - rename to `tags` + - accept `"japanese"`, `"translated"`, and `"original"` as values +### Fixes +- [500px] update query hashes +- [kemonoparty] fix download URLs ([#1514](https://github.com/mikf/gallery-dl/issues/1514)) +- [imagebam] fix extraction +- [instagram] update query hashes +- [nozomi] update default archive-fmt for `tag` and `search` extractors ([#1529](https://github.com/mikf/gallery-dl/issues/1529)) +- [pixiv] remove duplicate translated tags ([#1507](https://github.com/mikf/gallery-dl/issues/1507)) +- [readcomiconline] change domain to `readcomiconline.li` ([#1517](https://github.com/mikf/gallery-dl/issues/1517)) +- [sankaku] update invalid-token detection ([#1515](https://github.com/mikf/gallery-dl/issues/1515)) +- fix crash when using `--no-download` with `--ugoira-conv` ([#1507](https://github.com/mikf/gallery-dl/issues/1507)) + ## 1.17.3 - 2021-04-25 ### Additions - [danbooru] add option for extended metadata extraction ([#1458](https://github.com/mikf/gallery-dl/issues/1458)) diff --git a/PKG-INFO b/PKG-INFO index 3df2fe0..14d8ed3 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.17.3 +Version: 1.17.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -190,6 +190,7 @@ Description: ========== Linux, macOS, etc.: * ``/etc/gallery-dl.conf`` + * ``${XDG_CONFIG_HOME}/gallery-dl/config.json`` * ``${HOME}/.config/gallery-dl/config.json`` * ``${HOME}/.gallery-dl.conf`` @@ -220,6 +221,7 @@ Description: ========== ``inkbunny``, ``instagram``, ``mangoxo``, + ``pillowfort``, ``pinterest``, ``sankaku``, ``subscribestar``, diff --git a/README.rst b/README.rst index d659faf..66e71e7 100644 --- a/README.rst +++ b/README.rst @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -179,6 +179,7 @@ Windows: Linux, macOS, etc.: * ``/etc/gallery-dl.conf`` + * ``${XDG_CONFIG_HOME}/gallery-dl/config.json`` * ``${HOME}/.config/gallery-dl/config.json`` * ``${HOME}/.gallery-dl.conf`` @@ -209,6 +210,7 @@ and optional for ``inkbunny``, ``instagram``, ``mangoxo``, +``pillowfort``, ``pinterest``, ``sankaku``, ``subscribestar``, diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 76afd8a..436260b 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -11,7 +11,7 @@ _arguments -C -S \ {-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'':_files \ --cookies'[File to load additional cookies from]':'':_files \ --proxy'[Use the specified proxy]':'' \ ---clear-cache'[Delete all cached login sessions, cookies, etc.]' \ +--clear-cache'[Delete all cached login sessions, cookies, etc.]':'' \ {-q,--quiet}'[Activate quiet mode]' \ {-v,--verbose}'[Print various debugging information]' \ {-g,--get-urls}'[Print URLs instead of downloading]' \ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 6a22a07..719b8b4 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-04-25" "1.17.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-05-30" "1.17.5" "gallery-dl Manual" .\" disable hyphenation .nh @@ -35,7 +35,7 @@ File to load additional cookies from .B "\-\-proxy" \f[I]URL\f[] Use the specified proxy .TP -.B "\-\-clear\-cache" +.B "\-\-clear\-cache" \f[I]MODULE\f[] Delete all cached login sessions, cookies, etc. .TP .B "\-q, \-\-quiet" diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 0190b7f..f35f218 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-04-25" "1.17.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-05-30" "1.17.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -166,6 +166,17 @@ for any spawned child extractors. Overwrite any metadata provided by a child extractor with its parent's. +.SS extractor.*.parent-skip +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Share number of skipped downloads between parent and child extractors. + + .SS extractor.*.path-restrict .IP "Type:" 6 \f[I]string\f[] or \f[I]object\f[] @@ -267,11 +278,17 @@ exists or its ID is in a \f[I]download archive\f[]. * \f[I]false\f[]: Overwrite already existing files .br -* \f[I]"abort"\f[]: Abort the current extractor run +* \f[I]"abort"\f[]: Stop the current extractor run .br -* \f[I]"abort:N"\f[]: Skip downloads and abort extractor run +* \f[I]"abort:N"\f[]: Skip downloads and stop the current extractor run after \f[I]N\f[] consecutive skips +.br +* \f[I]"terminate"\f[]: Stop the current extractor run, including parent extractors +.br +* \f[I]"terminate:N"\f[]: Skip downloads and stop the current extractor run, +including parent extractors, after \f[I]N\f[] consecutive skips + .br * \f[I]"exit"\f[]: Exit the program altogether .br @@ -357,6 +374,8 @@ and optional for .br * \f[I]mangoxo\f[] .br +* \f[I]pillowfort\f[] +.br * \f[I]pinterest\f[] .br * \f[I]sankaku\f[] @@ -1286,7 +1305,8 @@ A (comma-separated) list of subcategories to include when processing a user profile. Possible values are -\f[I]"posts"\f[], \f[I]reels\f[], \f[I]"stories"\f[], \f[I]"highlights"\f[], \f[I]"channel"\f[]. +\f[I]"posts"\f[], \f[I]"reels"\f[], \f[I]"channel"\f[], \f[I]"tagged"\f[], +\f[I]"stories"\f[], \f[I]"highlights"\f[]. You can use \f[I]"all"\f[] instead of listing all values separately. @@ -1302,6 +1322,17 @@ You can use \f[I]"all"\f[] instead of listing all values separately. Download video files. +.SS extractor.kemonoparty.metadata +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract \f[I]username\f[] metadata + + .SS extractor.khinsider.format .IP "Type:" 6 \f[I]string\f[] @@ -1434,6 +1465,28 @@ port than the default. Download subalbums. +.SS extractor.pillowfort.external +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Follow links to external sites, e.g. Twitter, + + +.SS extractor.pillowfort.inline +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Extract inline images. + + .SS extractor.pillowfort.reblogs .IP "Type:" 6 \f[I]bool\f[] @@ -1478,7 +1531,7 @@ Download from video pins. Download user avatars. -.SS extractor.pixiv.work.related +.SS extractor.pixiv.user.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -1486,10 +1539,10 @@ Download user avatars. \f[I]false\f[] .IP "Description:" 4 -Also download related artworks. +Fetch extended \f[I]user\f[] metadata. -.SS extractor.pixiv.translated-tags +.SS extractor.pixiv.work.related .IP "Type:" 6 \f[I]bool\f[] @@ -1497,7 +1550,25 @@ Also download related artworks. \f[I]false\f[] .IP "Description:" 4 -Provide translated ´tags`. +Also download related artworks. + + +.SS extractor.pixiv.tags +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"japanese"\f[] + +.IP "Description:" 4 +Controls the \f[I]tags\f[] metadata field. + +.br +* "japanese": List of Japanese tags +.br +* "translated": List of translated tags +.br +* "original": Unmodified list with both Japanese and translated tags .SS extractor.pixiv.ugoira @@ -1517,6 +1588,18 @@ Use an ugoira post processor to convert them to watchable videos. (Example__) +.SS extractor.pixiv.max-posts +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]0\f[] + +.IP "Description:" 4 +When downloading galleries, this sets the maximum number of posts to get. +A value of \f[I]0\f[] means no limit. + + .SS extractor.plurk.comments .IP "Type:" 6 \f[I]bool\f[] @@ -1848,6 +1931,21 @@ If this value is \f[I]"original"\f[], metadata for these files will be taken from the original Tweets, not the Retweets. +.SS extractor.twitter.text-tweets +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Also emit metadata for text-only Tweets without media content. + +This only has an effect with a \f[I]metadata\f[] (or \f[I]exec\f[]) post processor +with \f[I]"event": "post"\f[] +and appropriate \f[I]filename\f[]. + + .SS extractor.twitter.twitpic .IP "Type:" 6 \f[I]bool\f[] @@ -1965,7 +2063,10 @@ to use your account's browsing settings and filters. \f[I]true\f[] .IP "Description:" 4 -Extract media from retweeted posts. +Fetch media from retweeted posts. + +If this value is \f[I]"original"\f[], metadata for these files +will be taken from the original posts, not the retweeted posts. .SS extractor.weibo.videos @@ -2287,6 +2388,17 @@ Controls whether the output strings should be shortened to fit on one console line. +.SS output.skip +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Show skipped file downloads. + + .SS output.progress .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -2558,7 +2670,7 @@ The event for which metadata gets written to a file. The available events are: \f[I]init\f[] -After post procesor initialization +After post processor initialization and before the first file download \f[I]finalize\f[] On extractor shutdown, e.g. after all files were downloaded @@ -2631,6 +2743,19 @@ Filename extension for the resulting video files. Additional FFmpeg command-line arguments. +.SS ugoira.ffmpeg-demuxer +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]image2\f[] + +.IP "Description:" 4 +FFmpeg demuxer to read input files with. Possible values are +"\f[I]image2\f[]" and +"\f[I]concat\f[]". + + .SS ugoira.ffmpeg-location .IP "Type:" 6 \f[I]Path\f[] @@ -2714,6 +2839,18 @@ to the list of FFmpeg command-line arguments to reduce an odd width/height by 1 pixel and make them even. +.SS ugoira.repeat-last-frame +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Allow repeating the last frame when necessary +to prevent it from only being displayed for a very short amount of time. + + .SS zip.extension .IP "Type:" 6 \f[I]string\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 4eaf1b8..7497cd6 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -59,6 +59,8 @@ }, "deviantart": { + "client-id": null, + "client-secret": null, "extra": false, "flat": true, "folders": false, @@ -174,6 +176,8 @@ }, "pillowfort": { + "external": false, + "inline": true, "reblogs": false }, "pinterest": @@ -183,8 +187,9 @@ }, "pixiv": { + "refresh-token": null, "avatar": false, - "translated-tags": false, + "tags": "japanese", "ugoira": true }, "reactor": @@ -254,6 +259,7 @@ "quoted": true, "replies": true, "retweets": true, + "text-tweets": false, "twitpic": false, "users": "timeline", "videos": true @@ -320,6 +326,7 @@ "mode": "auto", "progress": true, "shorten": true, + "skip": true, "log": "[{name}][{levelname}] {message}", "logfile": null, "unsupportedfile": null diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index e192d75..7fe851f 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.17.3 +Version: 1.17.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -190,6 +190,7 @@ Description: ========== Linux, macOS, etc.: * ``/etc/gallery-dl.conf`` + * ``${XDG_CONFIG_HOME}/gallery-dl/config.json`` * ``${HOME}/.config/gallery-dl/config.json`` * ``${HOME}/.gallery-dl.conf`` @@ -220,6 +221,7 @@ Description: ========== ``inkbunny``, ``instagram``, ``mangoxo``, + ``pillowfort``, ``pinterest``, ``sankaku``, ``subscribestar``, diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 3cc2071..9655896 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -182,6 +182,7 @@ test/test_config.py test/test_cookies.py test/test_downloader.py test/test_extractor.py +test/test_job.py test/test_oauth.py test/test_postprocessor.py test/test_results.py diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 5bf229a..8154afc 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -186,7 +186,7 @@ def main(): elif args.clear_cache: from . import cache log = logging.getLogger("cache") - cnt = cache.clear() + cnt = cache.clear(args.clear_cache) if cnt is None: log.error("Database file not available") @@ -249,6 +249,8 @@ def main(): retval |= jobtype(url.value).run() else: retval |= jobtype(url).run() + except exception.TerminateExtraction: + pass except exception.NoExtractorError: log.error("No suitable extractor found for '%s'", url) retval |= 64 diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index a874f63..5ab68bf 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2020 Mike Fährmann +# Copyright 2016-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -168,24 +168,33 @@ def cache(maxage=3600, keyarg=None): return wrap -def clear(): - """Delete all database entries""" +def clear(module="all"): + """Delete database entries for 'module'""" db = DatabaseCacheDecorator.db + if not db: + return None - if db: - rowcount = 0 - cursor = db.cursor() - try: + rowcount = 0 + cursor = db.cursor() + module = module.lower() + + try: + if module == "all": cursor.execute("DELETE FROM data") - except sqlite3.OperationalError: - pass # database is not initialized, can't be modified, etc. else: - rowcount = cursor.rowcount - db.commit() + cursor.execute( + "DELETE FROM data " + "WHERE key LIKE 'gallery_dl.extractor.' || ? || '.%'", + (module,) + ) + except sqlite3.OperationalError: + pass # database is not initialized, can't be modified, etc. + else: + rowcount = cursor.rowcount + db.commit() + if rowcount: cursor.execute("VACUUM") - return rowcount - - return None + return rowcount def _path(): diff --git a/gallery_dl/config.py b/gallery_dl/config.py index e0a5459..953b1b1 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -31,6 +31,8 @@ if util.WINDOWS: else: _default_configs = [ "/etc/gallery-dl.conf", + "${XDG_CONFIG_HOME}/gallery-dl/config.json" + if os.environ.get("XDG_CONFIG_HOME") else "${HOME}/.config/gallery-dl/config.json", "${HOME}/.gallery-dl.conf", ] diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index bc42d7c..76ec46f 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -80,6 +80,10 @@ class HttpDownloader(DownloaderBase): tries = 0 msg = "" + kwdict = pathfmt.kwdict + adjust_extension = kwdict.get( + "_http_adjust_extension", self.adjust_extension) + if self.part: pathfmt.part_enable(self.partdir) @@ -105,7 +109,7 @@ class HttpDownloader(DownloaderBase): if self.headers: headers.update(self.headers) # file-specific headers - extra = pathfmt.kwdict.get("_http_headers") + extra = kwdict.get("_http_headers") if extra: headers.update(extra) @@ -139,7 +143,7 @@ class HttpDownloader(DownloaderBase): return False # check for invalid responses - validate = pathfmt.kwdict.get("_http_validate") + validate = kwdict.get("_http_validate") if validate and not validate(response): self.log.warning("Invalid response") return False @@ -168,7 +172,7 @@ class HttpDownloader(DownloaderBase): content = response.iter_content(self.chunk_size) # check filename extension against file header - if self.adjust_extension and not offset and \ + if adjust_extension and not offset and \ pathfmt.extension in FILE_SIGNATURES: try: file_header = next( @@ -198,7 +202,7 @@ class HttpDownloader(DownloaderBase): if file_header: fp.write(file_header) elif offset: - if self.adjust_extension and \ + if adjust_extension and \ pathfmt.extension in FILE_SIGNATURES: self._adjust_extension(pathfmt, fp.read(16)) fp.seek(offset) @@ -222,10 +226,9 @@ class HttpDownloader(DownloaderBase): self.downloading = False if self.mtime: - pathfmt.kwdict.setdefault( - "_mtime", response.headers.get("Last-Modified")) + kwdict.setdefault("_mtime", response.headers.get("Last-Modified")) else: - pathfmt.kwdict["_mtime"] = None + kwdict["_mtime"] = None return True diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index f553d41..0433dc9 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -23,6 +23,7 @@ Exception +-- FilterError +-- NoExtractorError +-- StopExtraction + +-- TerminateExtraction """ @@ -109,3 +110,8 @@ class StopExtraction(GalleryDLException): GalleryDLException.__init__(self) self.message = message % args if args else message self.code = 1 if message else 0 + + +class TerminateExtraction(GalleryDLException): + """Terminate data extraction""" + code = 0 diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py index edb9d46..27634de 100644 --- a/gallery_dl/extractor/35photo.py +++ b/gallery_dl/extractor/35photo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -104,7 +104,8 @@ class _35photoUserExtractor(_35photoExtractor): r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)") test = ( ("https://35photo.pro/liya", { - "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg", + "pattern": r"https://([a-z][0-9]\.)?35photo\.pro" + r"/photos_(main|series)/.*\.jpg", "count": 9, }), ("https://35photo.pro/suhoveev", { @@ -214,7 +215,7 @@ class _35photoImageExtractor(_35photoExtractor): test = ("https://35photo.pro/photo_753340/", { "count": 1, "keyword": { - "url" : r"re:https://m\d+.35photo.pro/photos_main/.*.jpg", + "url" : r"re:https://35photo\.pro/photos_main/.*\.jpg", "id" : 753340, "title" : "Winter walk", "description": str, diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 0583eb9..c2c5a66 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -146,7 +146,7 @@ class _500pxGalleryExtractor(_500pxExtractor): }), # unavailable photos (#1335) ("https://500px.com/p/Light_Expression_Photography/galleries/street", { - "count": ">= 7", + "count": 0, }), ("https://500px.com/fashvamp/galleries/lera"), ) @@ -172,7 +172,7 @@ class _500pxGalleryExtractor(_500pxExtractor): } gallery = self._request_graphql( "GalleriesDetailQueryRendererQuery", variables, - "fb8bb66d31b58903e2f01ebe66bbe7937b982753be3211855b7bce4e286c1a49", + "eda3c77ca4efe4b3347ec9c08befe3bd2c58099ebfb1f680d829fcd26d34f12d", )["gallery"] self._photos = gallery["photos"] @@ -200,8 +200,8 @@ class _500pxGalleryExtractor(_500pxExtractor): variables["cursor"] = photos["pageInfo"]["endCursor"] photos = self._request_graphql( "GalleriesDetailPaginationContainerQuery", variables, - "457c66d976f56863c81795f03e98cb54" - "3c7c6cdae7abeab8fe9e8e8a67479fa9", + "466cf6661a07e7fdca465edb39118efb" + "80fb157c6d3f620c7f518cdae0832c78", )["galleryByOwnerIdAndSlugOrToken"]["photos"] diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index ded2ae3..0d0ad70 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -12,7 +12,6 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache - BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4" @@ -33,6 +32,8 @@ class AryionExtractor(Extractor): self._needle = "class='gallery-item' id='" def login(self): + if self._check_cookies(self.cookienames): + return username, password = self._get_auth_info() if username: self._update_cookies(self._login_impl(username, password)) @@ -73,8 +74,7 @@ class AryionExtractor(Extractor): def _pagination(self, url): while True: page = self.request(url).text - yield from text.extract_iter( - page, self._needle, "'") + yield from text.extract_iter(page, self._needle, "'") pos = page.find("Next >>") if pos < 0: @@ -173,7 +173,7 @@ class AryionGalleryExtractor(AryionExtractor): def skip(self, num): if self.recursive: - num = 0 + return 0 self.offset += num return num @@ -182,7 +182,7 @@ class AryionGalleryExtractor(AryionExtractor): url = "{}/g4/gallery/{}".format(self.root, self.user) return self._pagination(url) else: - self._needle = "class='thumb' href='/g4/view/" + self._needle = "thumb' href='/g4/view/" url = "{}/g4/latest.php?name={}".format(self.root, self.user) return util.advance(self._pagination(url), self.offset) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 1f86ea5..3b96a4e 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -61,6 +61,7 @@ class DanbooruExtractor(Extractor): "{}/posts/{}.json?only=pixiv_ugoira_frame_data".format( self.root, post["id"]) ).json()["pixiv_ugoira_frame_data"]["data"] + post["_http_adjust_extension"] = False else: url = post["large_file_url"] post["extension"] = "webm" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 47f589a..9a461a4 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -930,10 +930,12 @@ class DeviantartOAuthAPI(): self.folders = extractor.config("folders", False) self.metadata = extractor.extra or extractor.config("metadata", False) - self.client_id = extractor.config( - "client-id", self.CLIENT_ID) - self.client_secret = extractor.config( - "client-secret", self.CLIENT_SECRET) + self.client_id = extractor.config("client-id") + if self.client_id: + self.client_secret = extractor.config("client-secret") + else: + self.client_id = self.CLIENT_ID + self.client_secret = self.CLIENT_SECRET token = extractor.config("refresh-token") if token is None or token == "cache": diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 910da7d..64a6cb7 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -128,7 +128,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "gid": 1200119, "height": int, "image_token": "re:[0-9a-f]{10}", - "lang": "jp", + "lang": "ja", "language": "Japanese", "parent": "", "rating": r"re:\d\.\d+", diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 0bcec2b..5962b9e 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -135,7 +135,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): "url": "61896d9d9a2edb556b619000a308a984307b6d30", }), ("https://thebarchive.com/b/thread/739772332/", { - "url": "e8b18001307d130d67db31740ce57c8561b5d80c", + "url": "07d39d2cb48f40fb337dc992993d965b0cd5f7cd", }), ) diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 863cead..df45d0d 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -8,8 +8,10 @@ """Extractors for https://gelbooru.com/""" +from .common import Extractor, Message from . import gelbooru_v02 from .. import text, exception +import binascii class GelbooruBase(): @@ -131,3 +133,23 @@ class GelbooruPostExtractor(GelbooruBase, } }), ) + + +class GelbooruRedirectExtractor(GelbooruBase, Extractor): + subcategory = "redirect" + pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com" + r"/redirect\.php\?s=([^&#]+)") + test = (("https://gelbooru.com/redirect.php?s=Ly9nZWxib29ydS5jb20vaW5kZXgu" + "cGhwP3BhZ2U9cG9zdCZzPXZpZXcmaWQ9MTgzMDA0Ng=="), { + "pattern": r"https://gelbooru.com/index.php" + r"\?page=post&s=view&id=1830046" + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.redirect_url = text.ensure_http_scheme( + binascii.a2b_base64(match.group(1)).decode()) + + def items(self): + data = {"_extractor": GelbooruPostExtractor} + yield Message.Queue, self.redirect_url, data diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index 76b2c38..9370840 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from http://www.imagebam.com/""" +"""Extractors for https://www.imagebam.com/""" from .common import Extractor, Message from .. import text, exception @@ -15,34 +15,44 @@ from .. import text, exception class ImagebamExtractor(Extractor): """Base class for imagebam extractors""" category = "imagebam" - root = "http://www.imagebam.com" + root = "https://www.imagebam.com" + cookies = None - def get_image_data(self, page_url, data): - """Fill 'data' and return image URL""" + def __init__(self, match): + Extractor.__init__(self, match) + self.key = match.group(1) + if self.cookies: + self.session.cookies = self.cookies + + def get_image_data(self, data): + page_url = "{}/image/{}".format(self.root, data["image_key"]) page = self.request(page_url).text - image_url = text.extract(page, 'property="og:image" content="', '"')[0] - data["extension"] = image_url.rpartition(".")[2] - data["image_key"] = page_url.rpartition("/")[2] - data["image_id"] = data["image_key"][6:] - return image_url + image_url, pos = text.extract(page, '", "")[0] + filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0]) + data["url"] = "https://images" + image_url + data["filename"], _, data["extension"] = filename.rpartition(".") class ImagebamGalleryExtractor(ImagebamExtractor): """Extractor for image galleries from imagebam.com""" subcategory = "gallery" - directory_fmt = ("{category}", "{title} - {gallery_key}") - filename_fmt = "{num:>03}-{image_key}.{extension}" + directory_fmt = ("{category}", "{title} {gallery_key}") + filename_fmt = "{num:>03} {filename}.{extension}" archive_fmt = "{gallery_key}_{image_key}" pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)" test = ( - ("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", { + ("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", { "url": "76d976788ae2757ac81694736b07b72356f5c4c8", - "keyword": "9e25b8827474ac93c54855e798d60aa3cbecbd7a", + "keyword": "b048478b1bbba3072a7fa9fcc40630b3efad1f6c", "content": "596e6bfa157f2c7169805d50075c2986549973a8", }), ("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", { @@ -51,78 +61,67 @@ class ImagebamGalleryExtractor(ImagebamExtractor): "url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1", }), ("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", { - "exception": exception.NotFoundError, + "exception": exception.HttpError, }), ) - def __init__(self, match): - ImagebamExtractor.__init__(self, match) - self.gallery_key = match.group(1) - def items(self): - url = "{}/gallery/{}".format(self.root, self.gallery_key) - page = self.request_page(url) - if not page or ">Error<" in page: - raise exception.NotFoundError("gallery") + url = "{}/gallery/{}".format(self.root, self.key) + page = self.request(url).text data = self.get_metadata(page) - imgs = self.get_image_pages(page) - data["count"] = len(imgs) - data["gallery_key"] = self.gallery_key + keys = self.get_image_keys(page) + keys.reverse() + data["count"] = len(keys) + data["gallery_key"] = self.key - yield Message.Version, 1 yield Message.Directory, data - for data["num"], page_url in enumerate(imgs, 1): - image_url = self.get_image_data(page_url, data) - yield Message.Url, image_url, data + for data["num"], data["image_key"] in enumerate(keys, 1): + self.get_image_data(data) + yield Message.Url, data["url"], data @staticmethod def get_metadata(page): """Return gallery metadata""" - return text.extract_all(page, ( - ("title" , "'> ", " ", ""), - ("description", ":#FCFCFC;'>", ""), - ))[0] - - def get_image_pages(self, page): - """Return a list of all image pages""" - pages = [] + title = text.extract(page, 'id="gallery-name">', '<')[0] + return {"title": text.unescape(title.strip())} + + def get_image_keys(self, page): + """Return a list of all image keys""" + keys = [] while True: - pages.extend(text.extract_iter(page, "\n", " | Kemono<")[0]) + else: + username = None + for post in self.posts(): files = [] - if post["file"]: - files.append(post["file"]) - if post["attachments"]: - files.extend(post["attachments"]) + append = files.append + file = post["file"] + + if file: + file["type"] = "file" + append(file) + for attachment in post["attachments"]: + attachment["type"] = "attachment" + append(attachment) for path in find_inline(post["content"] or ""): - files.append({"path": path, "name": path}) + append({"path": path, "name": path, "type": "inline"}) post["date"] = text.parse_datetime( post["published"], "%a, %d %b %Y %H:%M:%S %Z") + if username: + post["username"] = username yield Message.Directory, post for post["num"], file in enumerate(files, 1): + post["type"] = file["type"] url = file["path"] if url[0] == "/": - url = self.root + url + url = "https://data.kemono.party" + url + elif url.startswith("https://kemono.party/"): + url = "https://data.kemono.party" + url[20:] + text.nameext_from_url(file["name"], post) yield Message.Url, url, post @@ -64,6 +81,7 @@ class KemonopartyUserExtractor(KemonopartyExtractor): KemonopartyExtractor.__init__(self, match) service, user_id = match.groups() self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id) + self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) def posts(self): url = self.api_url @@ -84,7 +102,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor): pattern = BASE_PATTERN + r"/post/([^/?#]+)" test = ( ("https://kemono.party/fanbox/user/6993449/post/506575", { - "pattern": r"https://kemono\.party/files/fanbox" + "pattern": r"https://data\.kemono\.party/files/fanbox" r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", "keyword": { "added": "Wed, 06 May 2020 20:28:02 GMT", @@ -101,16 +119,21 @@ class KemonopartyPostExtractor(KemonopartyExtractor): "shared_file": False, "subcategory": "post", "title": "c96取り置き", + "type": "file", "user": "6993449", }, }), # inline image (#1286) ("https://kemono.party/fanbox/user/7356311/post/802343", { - "pattern": r"https://kemono\.party/inline/fanbox" + "pattern": r"https://data\.kemono\.party/inline/fanbox" r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg", }), + # kemono.party -> data.kemono.party + ("https://kemono.party/gumroad/user/trylsc/post/IURjT", { + "pattern": r"https://data\.kemono\.party/(file|attachment)s" + r"/gumroad/trylsc/IURjT/", + }), ("https://kemono.party/subscribestar/user/alcorart/post/184330"), - ("https://kemono.party/gumroad/user/trylsc/post/IURjT"), ) def __init__(self, match): @@ -118,6 +141,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor): service, user_id, post_id = match.groups() self.api_url = "{}/api/{}/user/{}/post/{}".format( self.root, service, user_id, post_id) + self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) def posts(self): posts = self.request(self.api_url).json() diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py index f8e1473..833d18e 100644 --- a/gallery_dl/extractor/manganelo.py +++ b/gallery_dl/extractor/manganelo.py @@ -4,35 +4,23 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://manganelo.com/""" +"""Extractors for https://manganato.com/""" from .common import ChapterExtractor, MangaExtractor from .. import text import re +BASE_PATTERN = \ + r"(?:https?://)?((?:(?:read)?manganato|(?:www\.)?manganelo)\.com)" -class ManganeloBase(): - """Base class for manganelo extractors""" - category = "manganelo" - root = "https://manganelo.com" - - @staticmethod - def parse_page(page, data): - """Parse metadata on 'page' and add it to 'data'""" - text.extract_all(page, ( - ("manga" , '

', '

'), - ('author' , 'Author(s) :', ''), - ), values=data) - data["author"] = text.remove_html(data["author"]) - return data - -class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor): +class ManganeloChapterExtractor(ChapterExtractor): """Extractor for manga-chapters from manganelo.com""" - pattern = (r"(?:https?://)?(?:www\.)?manganelo\.com" - r"(/chapter/\w+/chapter_[^/?#]+)") + category = "manganelo" + root = "https://readmanganato.com" + pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)" test = ( - ("https://manganelo.com/chapter/gq921227/chapter_23", { + ("https://readmanganato.com/manga-gn983696/chapter-23", { "pattern": r"https://s\d+\.\w+\.com/mangakakalot/g\d+/gq921227/" r"vol3_chapter_23_24_yen/\d+\.jpg", "keyword": "3748087cf41abc97f991530e6fd53b291490d6d0", @@ -43,11 +31,12 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor): "content": "fbec629c71f66b246bfa0604204407c0d1c8ae38", "count": 39, }), + ("https://manganelo.com/chapter/gq921227/chapter_23"), ) def __init__(self, match): - self.path = match.group(1) - ChapterExtractor.__init__(self, match, self.root + self.path) + domain, path = match.groups() + ChapterExtractor.__init__(self, match, "https://" + domain + path) self.session.headers['Referer'] = self.root def metadata(self, page): @@ -85,21 +74,29 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor): ] -class ManganeloMangaExtractor(ManganeloBase, MangaExtractor): +class ManganeloMangaExtractor(MangaExtractor): """Extractor for manga from manganelo.com""" + category = "manganelo" + root = "https://readmanganato.com" chapterclass = ManganeloChapterExtractor - pattern = (r"(?:https?://)?(?:www\.)?manganelo\.com" - r"(/(?:manga/|read_)\w+)") + pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$" test = ( - ("https://manganelo.com/manga/ol921234", { - "url": "6ba7f083a6944e414ad8214b74a0a40cb60d4562", + ("https://manganato.com/manga-gu983703", { + "pattern": ManganeloChapterExtractor.pattern, + "count": ">= 70", }), ("https://manganelo.com/manga/read_otome_no_teikoku", { "pattern": ManganeloChapterExtractor.pattern, - "count": ">= 40" + "count": ">= 40", }), + ("https://manganelo.com/manga/ol921234/"), ) + def __init__(self, match): + domain, path = match.groups() + MangaExtractor.__init__(self, match, "https://" + domain + path) + self.session.headers['Referer'] = self.root + def chapters(self, page): results = [] data = self.parse_page(page, {"lang": "en", "language": "English"}) @@ -117,3 +114,13 @@ class ManganeloMangaExtractor(ManganeloBase, MangaExtractor): data["chapter"] = text.parse_int(chapter) data["chapter_minor"] = sep + minor results.append((url, data.copy())) + + @staticmethod + def parse_page(page, data): + """Parse metadata on 'page' and add it to 'data'""" + text.extract_all(page, ( + ("manga" , '

', '

'), + ('author' , 'Author(s) :', ''), + ), values=data) + data["author"] = text.remove_html(data["author"]) + return data diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 558e682..9b6d4ba 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -17,7 +17,7 @@ import re class MangaparkBase(): """Base class for mangapark extractors""" category = "mangapark" - root_fmt = "https://mangapark.{}" + root_fmt = "https://v2.mangapark.{}" browser = "firefox" @staticmethod @@ -51,7 +51,7 @@ class MangaparkBase(): class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): """Extractor for manga-chapters from mangapark.net""" - pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)" + pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)" r"/manga/([^?#]+/i\d+)") test = ( ("https://mangapark.net/manga/gosu/i811653/c055/1", { @@ -117,7 +117,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): class MangaparkMangaExtractor(MangaparkBase, MangaExtractor): """Extractor for manga from mangapark.net""" chapterclass = MangaparkChapterExtractor - pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)" + pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)" r"(/manga/[^/?#]+)/?$") test = ( ("https://mangapark.net/manga/aria", { diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index e1081da..b74355d 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -158,7 +158,7 @@ class NozomiTagExtractor(NozomiExtractor): """Extractor for posts from tag searches on nozomi.la""" subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") - archive_fmt = "t_{search_tags}_{postid}" + archive_fmt = "t_{search_tags}_{dataid}" pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\." test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", { "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$", @@ -180,7 +180,7 @@ class NozomiSearchExtractor(NozomiExtractor): """Extractor for search results on nozomi.la""" subcategory = "search" directory_fmt = ("{category}", "{search_tags:J }") - archive_fmt = "t_{search_tags}_{postid}" + archive_fmt = "t_{search_tags}_{dataid}" pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)" test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", { "count": ">= 5", diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 839e0b8..9c32d7a 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -117,12 +117,22 @@ class PatreonExtractor(Extractor): attr = post["attributes"] attr["id"] = text.parse_int(post["id"]) - if post.get("current_user_can_view", True): + if attr.get("current_user_can_view", True): + + relationships = post["relationships"] attr["images"] = self._files(post, included, "images") attr["attachments"] = self._files(post, included, "attachments") attr["date"] = text.parse_datetime( attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") - user = post["relationships"]["user"] + + tags = relationships.get("user_defined_tags") + attr["tags"] = [ + tag["id"].replace("user_defined;", "") + for tag in tags["data"] + if tag["type"] == "post_tag" + ] if tags else [] + + user = relationships["user"] attr["creator"] = ( self._user(user["links"]["related"]) or included["user"][user["data"]["id"]]) @@ -299,6 +309,10 @@ class PatreonPostExtractor(PatreonExtractor): ("https://www.patreon.com/posts/19987002", { "count": 4, }), + # tags (#1539) + ("https://www.patreon.com/posts/free-post-12497641", { + "keyword": {"tags": ["AWMedia"]}, + }), ("https://www.patreon.com/posts/not-found-123", { "exception": exception.NotFoundError, }), diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py index cbd65d7..3c3fcd4 100644 --- a/gallery_dl/extractor/pillowfort.py +++ b/gallery_dl/extractor/pillowfort.py @@ -9,7 +9,9 @@ """Extractors for https://www.pillowfort.social/""" from .common import Extractor, Message -from .. import text +from ..cache import cache +from .. import text, exception +import re BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social" @@ -19,94 +21,171 @@ class PillowfortExtractor(Extractor): category = "pillowfort" root = "https://www.pillowfort.social" directory_fmt = ("{category}", "{username}") - filename_fmt = ("{post_id} {title|original_post[title]} " + filename_fmt = ("{post_id} {title|original_post[title]:?/ /}" "{num:>02}.{extension}") archive_fmt = "{id}" + cookiedomain = "www.pillowfort.social" def __init__(self, match): Extractor.__init__(self, match) self.item = match.group(1) - self.reblogs = self.config("reblogs", False) def items(self): - for post in self.posts(): + self.login() + inline = self.config("inline", True) + reblogs = self.config("reblogs", False) + external = self.config("external", False) + + if inline: + inline = re.compile(r'src="(https://img\d+\.pillowfort\.social' + r'/posts/[^"]+)').findall - if "original_post" in post and not self.reblogs: + for post in self.posts(): + if "original_post" in post and not reblogs: continue - files = post["media"] - del post["media"] + files = post.pop("media") + if inline: + for url in inline(post["content"]): + files.append({"url": url}) post["date"] = text.parse_datetime( post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + post["post_id"] = post.pop("id") yield Message.Directory, post post["num"] = 0 for file in files: url = file["url"] - if url: - post.update(file) + if not url: + continue + + if file.get("embed_code"): + if not external: + continue + msgtype = Message.Queue + else: post["num"] += 1 + msgtype = Message.Url + + post.update(file) + text.nameext_from_url(url, post) + post["hash"], _, post["filename"] = \ + post["filename"].partition("_") + + if "id" not in file: + post["id"] = post["hash"] + if "created_at" in file: post["date"] = text.parse_datetime( file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") - yield Message.Url, url, text.nameext_from_url(url, post) + + yield msgtype, url, post + + def login(self): + cget = self.session.cookies.get + if cget("_Pf_new_session", domain=self.cookiedomain) \ + or cget("remember_user_token", domain=self.cookiedomain): + return + + username, password = self._get_auth_info() + if username: + cookies = self._login_impl(username, password) + self._update_cookies(cookies) + + @cache(maxage=14*24*3600, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = "https://www.pillowfort.social/users/sign_in" + page = self.request(url).text + auth = text.extract(page, 'name="authenticity_token" value="', '"')[0] + + headers = {"Origin": self.root, "Referer": url} + data = { + "utf8" : "✓", + "authenticity_token": auth, + "user[email]" : username, + "user[password]" : password, + "user[remember_me]" : "1", + } + response = self.request(url, method="POST", headers=headers, data=data) + + if not response.history: + raise exception.AuthenticationError() + + return { + cookie.name: cookie.value + for cookie in response.history[0].cookies + } class PillowfortPostExtractor(PillowfortExtractor): """Extractor for a single pillowfort post""" subcategory = "post" pattern = BASE_PATTERN + r"/posts/(\d+)" - test = ("https://www.pillowfort.social/posts/27510", { - "pattern": r"https://img\d+\.pillowfort\.social/posts/\w+_out\d+\.png", - "count": 4, - "keyword": { - "avatar_url": str, - "col": 0, - "commentable": True, - "comments_count": int, - "community_id": None, - "content": str, - "created_at": str, - "date": "type:datetime", - "deleted": None, - "deleted_at": None, - "deleted_by_mod": None, - "deleted_for_flag_id": None, - "embed_code": None, - "id": int, - "last_activity": str, - "last_activity_elapsed": str, - "last_edited_at": None, - "likes_count": int, - "media_type": "picture", - "nsfw": False, - "num": int, - "original_post_id": None, - "original_post_user_id": None, - "picture_content_type": None, - "picture_file_name": None, - "picture_file_size": None, - "picture_updated_at": None, - "post_id": 27510, - "post_type": "picture", - "privacy": "public", - "reblog_copy_info": list, - "rebloggable": True, - "reblogged_from_post_id": None, - "reblogged_from_user_id": None, - "reblogs_count": int, - "row": int, - "small_image_url": None, - "tags": list, - "time_elapsed": str, - "timestamp": str, - "title": "What is Pillowfort.io? ", - "updated_at": str, - "url": r"re:https://img3.pillowfort.social/posts/.*\.png", - "user_id": 5, - "username": "Staff" - }, - }) + test = ( + ("https://www.pillowfort.social/posts/27510", { + "pattern": r"https://img\d+\.pillowfort\.social" + r"/posts/\w+_out\d+\.png", + "count": 4, + "keyword": { + "avatar_url": str, + "col": 0, + "commentable": True, + "comments_count": int, + "community_id": None, + "content": str, + "created_at": str, + "date": "type:datetime", + "deleted": None, + "deleted_at": None, + "deleted_by_mod": None, + "deleted_for_flag_id": None, + "embed_code": None, + "id": int, + "last_activity": str, + "last_activity_elapsed": str, + "last_edited_at": None, + "likes_count": int, + "media_type": "picture", + "nsfw": False, + "num": int, + "original_post_id": None, + "original_post_user_id": None, + "picture_content_type": None, + "picture_file_name": None, + "picture_file_size": None, + "picture_updated_at": None, + "post_id": 27510, + "post_type": "picture", + "privacy": "public", + "reblog_copy_info": list, + "rebloggable": True, + "reblogged_from_post_id": None, + "reblogged_from_user_id": None, + "reblogs_count": int, + "row": int, + "small_image_url": None, + "tags": list, + "time_elapsed": str, + "timestamp": str, + "title": "What is Pillowfort.io? ", + "updated_at": str, + "url": r"re:https://img3.pillowfort.social/posts/.*\.png", + "user_id": 5, + "username": "Staff" + }, + }), + ("https://www.pillowfort.social/posts/1557500", { + "options": (("external", True), ("inline", False)), + "pattern": r"https://twitter\.com/Aliciawitdaart/status" + r"/1282862493841457152", + }), + ("https://www.pillowfort.social/posts/1672518", { + "options": (("inline", True),), + "count": 3, + }), + ) def posts(self): url = "{}/posts/{}/json/".format(self.root, self.item) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 8bfae06..8076fff 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -29,14 +29,28 @@ class PixivExtractor(Extractor): Extractor.__init__(self, match) self.api = PixivAppAPI(self) self.load_ugoira = self.config("ugoira", True) - self.translated_tags = self.config("translated-tags", False) + self.max_posts = self.config("max-posts", 0) def items(self): - tkey = "translated_name" if self.translated_tags else "name" + tags = self.config("tags", "japanese") + if tags == "original": + transform_tags = None + elif tags == "translated": + def transform_tags(work): + work["tags"] = list(set( + tag["translated_name"] or tag["name"] + for tag in work["tags"])) + else: + def transform_tags(work): + work["tags"] = [tag["name"] for tag in work["tags"]] + ratings = {0: "General", 1: "R-18", 2: "R-18G"} metadata = self.metadata() - for work in self.works(): + works = self.works() + if self.max_posts: + works = itertools.islice(works, self.max_posts) + for work in works: if not work["user"]["id"]: continue @@ -45,12 +59,10 @@ class PixivExtractor(Extractor): del work["meta_single_page"] del work["image_urls"] del work["meta_pages"] + + if transform_tags: + transform_tags(work) work["num"] = 0 - if self.translated_tags: - work["untranslated_tags"] = [ - tag["name"] for tag in work["tags"] - ] - work["tags"] = [tag[tkey] or tag["name"] for tag in work["tags"]] work["date"] = text.parse_datetime(work["create_date"]) work["rating"] = ratings.get(work["x_restrict"]) work["suffix"] = "" @@ -66,6 +78,7 @@ class PixivExtractor(Extractor): url = ugoira["zip_urls"]["medium"].replace( "_ugoira600x600", "_ugoira1920x1080") work["frames"] = ugoira["frames"] + work["_http_adjust_extension"] = False yield Message.Url, url, text.nameext_from_url(url, work) elif work["page_count"] == 1: @@ -115,7 +128,8 @@ class PixivUserExtractor(PixivExtractor): }), # deleted account ("http://www.pixiv.net/member_illust.php?id=173531", { - "count": 0, + "options": (("metadata", True),), + "exception": exception.NotFoundError, }), ("https://www.pixiv.net/en/users/173530"), ("https://www.pixiv.net/en/users/173530/manga"), @@ -138,6 +152,11 @@ class PixivUserExtractor(PixivExtractor): self.user_id = u1 or u2 or u3 self.tag = t1 or t2 + def metadata(self): + if self.config("metadata"): + return {"user": self.api.user_detail(self.user_id)} + return {} + def works(self): works = self.api.user_illusts(self.user_id) diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index 971347b..c62a942 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -78,6 +78,8 @@ class ReactorExtractor(Extractor): def _parse_post(self, post): post, _, script = post.partition('