diff options
author | Unit 193 <unit193@unit193.net> | 2021-06-22 22:30:44 -0400 |
---|---|---|
committer | Unit 193 <unit193@unit193.net> | 2021-06-22 22:30:44 -0400 |
commit | 29b8ce4676815053724f96769fa09d42428a79af (patch) | |
tree | fe96f96b15332550c500a55ec92731117424ce8e | |
parent | df933b07457921cd21eb95a87bd74375b76613ab (diff) | |
parent | 32de2b06db501c7de81678bce8e3e0c3e63d340c (diff) | |
download | gallery-dl-29b8ce4676815053724f96769fa09d42428a79af.tar.bz2 gallery-dl-29b8ce4676815053724f96769fa09d42428a79af.tar.xz gallery-dl-29b8ce4676815053724f96769fa09d42428a79af.tar.zst |
Update upstream source from tag 'upstream/1.18.0'
Update to upstream version '1.18.0'
with Debian dir ef07ea3c642369ef40d1f6eadc566c8be0eea8a9
48 files changed, 1056 insertions, 392 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index dcc1299..0a4c90c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +## 1.18.0 - 2021-06-19 +### Additions +- [foolfuuka] support `archive.wakarimasen.moe` ([#1595](https://github.com/mikf/gallery-dl/issues/1595)) +- [mangadex] implement login with username & password ([#1535](https://github.com/mikf/gallery-dl/issues/1535)) +- [mangadex] add extractor for a user's followed feed ([#1535](https://github.com/mikf/gallery-dl/issues/1535)) +- [pixiv] support fetching privately followed users ([#1628](https://github.com/mikf/gallery-dl/issues/1628)) +- implement conditional filenames ([#1394](https://github.com/mikf/gallery-dl/issues/1394)) +- implement `filter` option for post processors ([#1460](https://github.com/mikf/gallery-dl/issues/1460)) +- add `-T/--terminate` command-line option ([#1399](https://github.com/mikf/gallery-dl/issues/1399)) +- add `-P/--postprocessor` command-line option ([#1583](https://github.com/mikf/gallery-dl/issues/1583)) +### Changes +- [kemonoparty] update default filenames and archive IDs ([#1514](https://github.com/mikf/gallery-dl/issues/1514)) +- [twitter] update default settings + - change `retweets` and `quoted` options from `true` to `false` + - change directory format for search results to the same as other extractors +- require an argument for `--clear-cache` +### Fixes +- [500px] update GraphQL queries +- [furaffinity] improve metadata extraction ([#1630](https://github.com/mikf/gallery-dl/issues/1630)) +- [hitomi] update image URL generation ([#1637](https://github.com/mikf/gallery-dl/issues/1637)) +- [idolcomplex] improve and fix pagination ([#1594](https://github.com/mikf/gallery-dl/issues/1594), [#1601](https://github.com/mikf/gallery-dl/issues/1601)) +- [instagram] fix login ([#1631](https://github.com/mikf/gallery-dl/issues/1631)) +- [instagram] update query hashes +- [mangadex] update to API v5 ([#1535](https://github.com/mikf/gallery-dl/issues/1535)) +- [mangafox] improve URL pattern ([#1608](https://github.com/mikf/gallery-dl/issues/1608)) +- [oauth] prevent exceptions when reporting errors ([#1603](https://github.com/mikf/gallery-dl/issues/1603)) +- [philomena] fix tag escapes handling ([#1629](https://github.com/mikf/gallery-dl/issues/1629)) +- [redgifs] update API server address ([#1632](https://github.com/mikf/gallery-dl/issues/1632)) +- [sankaku] handle empty tags ([#1617](https://github.com/mikf/gallery-dl/issues/1617)) +- [subscribestar] improve attachment filenames ([#1609](https://github.com/mikf/gallery-dl/issues/1609)) +- [unsplash] update collections URL pattern ([#1627](https://github.com/mikf/gallery-dl/issues/1627)) +- [postprocessor:metadata] handle dicts in `mode:tags` ([#1598](https://github.com/mikf/gallery-dl/issues/1598)) + ## 1.17.5 - 2021-05-30 ### Additions - [kemonoparty] add `metadata` option ([#1548](https://github.com/mikf/gallery-dl/issues/1548)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.17.5 +Version: 1.18.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -220,6 +220,7 @@ Description: ========== ``imgbb``, ``inkbunny``, ``instagram``, + ``mangadex``, ``mangoxo``, ``pillowfort``, ``pinterest``, @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -209,6 +209,7 @@ and optional for ``imgbb``, ``inkbunny``, ``instagram``, +``mangadex``, ``mangoxo``, ``pillowfort``, ``pinterest``, diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 436260b..15806e8 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -11,7 +11,7 @@ _arguments -C -S \ {-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'<file>':_files \ --cookies'[File to load additional cookies from]':'<file>':_files \ --proxy'[Use the specified proxy]':'<url>' \ ---clear-cache'[Delete all cached login sessions, cookies, etc.]':'<module>' \ +--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \ {-q,--quiet}'[Activate quiet mode]' \ {-v,--verbose}'[Print various debugging information]' \ {-g,--get-urls}'[Print URLs instead of downloading]' \ @@ -27,7 +27,6 @@ _arguments -C -S \ --write-pages'[Write downloaded intermediary pages to files in the current directory to debug problems]' \ {-r,--limit-rate}'[Maximum download rate (e.g. 500k or 2.5M)]':'<rate>' \ {-R,--retries}'[Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)]':'<n>' \ -{-A,--abort}'[Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist]':'<n>' \ --http-timeout'[Timeout for HTTP connections (default: 30.0)]':'<seconds>' \ --sleep'[Number of seconds to sleep before each download]':'<seconds>' \ --filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'<size>' \ @@ -44,7 +43,9 @@ _arguments -C -S \ {-u,--username}'[Username to login with]':'<user>' \ {-p,--password}'[Password belonging to the given username]':'<pass>' \ --netrc'[Enable .netrc authentication data]' \ ---download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it.]':'<file>':_files \ +--download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it]':'<file>':_files \ +{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \ +{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \ --range'[Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"]':'<range>' \ --chapter-range'[Like "--range", but applies to manga-chapters and other delegated URLs]':'<range>' \ --filter'[Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \ @@ -56,6 +57,7 @@ _arguments -C -S \ --write-tags'[Write image tags to separate text files]' \ --mtime-from-date'[Set file modification times according to "date" metadata]' \ --exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \ ---exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'<cmd>' && rc=0 +--exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'<cmd>' \ +{-P,--postprocessor}'[Activate the specified post processor]':'<name>' && rc=0 return rc diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 9a3a63e..f3d1100 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 719b8b4..25da021 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-05-30" "1.17.5" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-06-19" "1.18.0" "gallery-dl Manual" .\" disable hyphenation .nh @@ -36,7 +36,7 @@ File to load additional cookies from Use the specified proxy .TP .B "\-\-clear\-cache" \f[I]MODULE\f[] -Delete all cached login sessions, cookies, etc. +Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything) .TP .B "\-q, \-\-quiet" Activate quiet mode @@ -83,9 +83,6 @@ Maximum download rate (e.g. 500k or 2.5M) .B "\-R, \-\-retries" \f[I]N\f[] Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4) .TP -.B "\-A, \-\-abort" \f[I]N\f[] -Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist -.TP .B "\-\-http\-timeout" \f[I]SECONDS\f[] Timeout for HTTP connections (default: 30.0) .TP @@ -132,7 +129,13 @@ Password belonging to the given username Enable .netrc authentication data .TP .B "\-\-download\-archive" \f[I]FILE\f[] -Record all downloaded files in the archive file and skip downloading any file already in it. +Record all downloaded files in the archive file and skip downloading any file already in it +.TP +.B "\-A, \-\-abort" \f[I]N\f[] +Stop current extractor run after N consecutive file downloads were skipped +.TP +.B "\-T, \-\-terminate" \f[I]N\f[] +Stop current and parent extractor runs after N consecutive file downloads were skipped .TP .B "\-\-range" \f[I]RANGE\f[] Index-range(s) specifying which images to download. For example '5-10' or '1,3-5,10-' @@ -169,6 +172,9 @@ Execute CMD for each downloaded file. Example: --exec 'convert {} {}.png && rm { .TP .B "\-\-exec\-after" \f[I]CMD\f[] Execute CMD after all files were downloaded successfully. Example: --exec-after 'cd {} && convert * ../doc.pdf' +.TP +.B "\-P, \-\-postprocessor" \f[I]NAME\f[] +Activate the specified post processor .SH EXAMPLES .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index f35f218..84e8e0e 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-05-30" "1.17.5" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-06-19" "1.18.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -75,14 +75,31 @@ those as makeshift comments by settings their values to arbitrary strings. .SH EXTRACTOR OPTIONS .SS extractor.*.filename .IP "Type:" 6 -\f[I]string\f[] +\f[I]string\f[] or \f[I]object\f[] .IP "Example:" 4 +.br +* .. code:: + "{manga}_c{chapter}_{page:>03}.{extension}" +.br +* .. code:: json + +{ +"extension == 'mp4'": "{id}_video.{extension}", +"'nature' in title" : "{id}_{title}.{extension}", +"" : "{id}_default.{extension}" +} + + .IP "Description:" 4 -A \f[I]format string\f[] to build the resulting filename -for a downloaded file. +A \f[I]format string\f[] to build filenames for downloaded files with. + +If this is an \f[I]object\f[], it must contain Python expressions mapping to the +filename format strings to use. +These expressions are evaluated in the order as specified in Python 3.6+ +and in an undetermined order in Python 3.4 and 3.5. The available replacement keys depend on the extractor used. A list of keys for a specific one can be acquired by calling *gallery-dl* @@ -358,9 +375,9 @@ and optional for .br * \f[I]aryion\f[] .br -* \f[I]danbooru\f[] +* \f[I]danbooru\f[] (*) .br -* \f[I]e621\f[] +* \f[I]e621\f[] (*) .br * \f[I]exhentai\f[] .br @@ -372,6 +389,8 @@ and optional for .br * \f[I]instagram\f[] .br +* \f[I]mangadex\f[] +.br * \f[I]mangoxo\f[] .br * \f[I]pillowfort\f[] @@ -392,7 +411,7 @@ These values can also be specified via the \f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or by using a \f[I].netrc\f[] file. (see Authentication_) -Note: The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be +(*) The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be the API key found in your user profile, not the actual account password. @@ -1900,7 +1919,7 @@ Fetch media from all Tweets and replies in a \f[I]conversation \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Fetch media from quoted Tweets. @@ -1922,7 +1941,7 @@ Fetch media from replies to other Tweets. \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Fetch media from Retweets. @@ -3206,12 +3225,18 @@ logging output to a file. "name" : "zip", "compression": "store", "extension" : "cbz", +"filter" : "extension not in ('zip', 'rar')", "whitelist" : ["mangadex", "exhentai", "nhentai"] } .IP "Description:" 4 An \f[I]object\f[] containing a \f[I]"name"\f[] attribute specifying the post-processor type, as well as any of its \f[I]options\f[]. + +It is possible to set a \f[I]"filter"\f[] expression similar to +\f[I]image-filter\f[] to only run a post-processor +conditionally. + It is also possible set a \f[I]"whitelist"\f[] or \f[I]"blacklist"\f[] to only enable or disable a post-processor for the specified extractor categories. diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 7497cd6..9514c7a 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -256,9 +256,9 @@ "password": null, "cards": false, "conversations": false, - "quoted": true, + "quoted": false, "replies": true, - "retweets": true, + "retweets": false, "text-tweets": false, "twitpic": false, "users": "timeline", diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 7fe851f..b53c326 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.17.5 +Version: 1.18.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -220,6 +220,7 @@ Description: ========== ``imgbb``, ``inkbunny``, ``instagram``, + ``mangadex``, ``mangoxo``, ``pillowfort``, ``pinterest``, diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 8154afc..d5893b7 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -6,23 +6,16 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from __future__ import unicode_literals, print_function +import sys +import json +import logging +from . import version, config, option, output, extractor, job, util, exception __author__ = "Mike Fährmann" __copyright__ = "Copyright 2014-2021 Mike Fährmann" __license__ = "GPLv2" __maintainer__ = "Mike Fährmann" __email__ = "mike_faehrmann@web.de" - -import sys - -if sys.hexversion < 0x3040000: - sys.exit("Python 3.4+ required") - -import json -import logging -from . import version, config, option, output, extractor, job, util, exception - __version__ = version.__version__ @@ -126,6 +119,8 @@ def main(): config.set((), "postprocessors", args.postprocessors) if args.abort: config.set((), "skip", "abort:" + str(args.abort)) + if args.terminate: + config.set((), "skip", "terminate:" + str(args.terminate)) for opts in args.options: config.set(*opts) diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index 5ab68bf..7a49b61 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -168,7 +168,7 @@ def cache(maxage=3600, keyarg=None): return wrap -def clear(module="all"): +def clear(module): """Delete database entries for 'module'""" db = DatabaseCacheDecorator.db if not db: @@ -176,19 +176,18 @@ def clear(module="all"): rowcount = 0 cursor = db.cursor() - module = module.lower() try: - if module == "all": + if module == "ALL": cursor.execute("DELETE FROM data") else: cursor.execute( "DELETE FROM data " "WHERE key LIKE 'gallery_dl.extractor.' || ? || '.%'", - (module,) + (module.lower(),) ) except sqlite3.OperationalError: - pass # database is not initialized, can't be modified, etc. + pass # database not initialized, cannot be modified, etc. else: rowcount = cursor.rowcount db.commit() diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index c2c5a66..4cf5e48 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -11,7 +11,6 @@ from .common import Extractor, Message import json - BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com" @@ -78,15 +77,14 @@ class _500pxExtractor(Extractor): headers = {"Origin": self.root, "X-CSRF-Token": csrf_token} return self.request(url, headers=headers, params=params).json() - def _request_graphql(self, opname, variables, query_hash): + def _request_graphql(self, opname, variables): url = "https://api.500px.com/graphql" - params = { + data = { "operationName": opname, "variables" : json.dumps(variables), - "extensions" : '{"persistedQuery":{"version":1' - ',"sha256Hash":"' + query_hash + '"}}', + "query" : QUERIES[opname], } - return self.request(url, params=params).json()["data"] + return self.request(url, method="POST", json=data).json()["data"] class _500pxUserExtractor(_500pxExtractor): @@ -111,8 +109,6 @@ class _500pxUserExtractor(_500pxExtractor): variables = {"username": self.user, "pageSize": 20} photos = self._request_graphql( "OtherPhotosQuery", variables, - "018a5e5117bd72bdf28066aad02c4f2d" - "8acdf7f6127215d231da60e24080eb1b", )["user"]["photos"] while True: @@ -124,8 +120,6 @@ class _500pxUserExtractor(_500pxExtractor): variables["cursor"] = photos["pageInfo"]["endCursor"] photos = self._request_graphql( "OtherPhotosPaginationContainerQuery", variables, - "b4af70d42c71a5e43f0be36ce60dc81e" - "9742ebc117cde197350f2b86b5977d98", )["userByUsername"]["photos"] @@ -159,7 +153,6 @@ class _500pxGalleryExtractor(_500pxExtractor): def metadata(self): user = self._request_graphql( "ProfileRendererQuery", {"username": self.user_name}, - "fcecc7028c308115b0defebc63acec3fe3c12df86a602c3e1785ba5cfb8fff47", )["profile"] self.user_id = str(user["legacyId"]) @@ -172,7 +165,6 @@ class _500pxGalleryExtractor(_500pxExtractor): } gallery = self._request_graphql( "GalleriesDetailQueryRendererQuery", variables, - "eda3c77ca4efe4b3347ec9c08befe3bd2c58099ebfb1f680d829fcd26d34f12d", )["gallery"] self._photos = gallery["photos"] @@ -200,8 +192,6 @@ class _500pxGalleryExtractor(_500pxExtractor): variables["cursor"] = photos["pageInfo"]["endCursor"] photos = self._request_graphql( "GalleriesDetailPaginationContainerQuery", variables, - "466cf6661a07e7fdca465edb39118efb" - "80fb157c6d3f620c7f518cdae0832c78", )["galleryByOwnerIdAndSlugOrToken"]["photos"] @@ -261,3 +251,394 @@ class _500pxImageExtractor(_500pxExtractor): def photos(self): edges = ({"node": {"legacyId": self.photo_id}},) return self._extend(edges) + + +QUERIES = { + + "OtherPhotosQuery": """\ +query OtherPhotosQuery($username: String!, $pageSize: Int) { + user: userByUsername(username: $username) { + ...OtherPhotosPaginationContainer_user_RlXb8 + id + } +} + +fragment OtherPhotosPaginationContainer_user_RlXb8 on User { + photos(first: $pageSize, privacy: PROFILE, sort: ID_DESC) { + edges { + node { + id + legacyId + canonicalPath + width + height + name + isLikedByMe + notSafeForWork + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + followedByUsers { + isFollowedByMe + } + } + images(sizes: [33, 35]) { + size + url + jpegUrl + webpUrl + id + } + __typename + } + cursor + } + totalCount + pageInfo { + endCursor + hasNextPage + } + } +} +""", + + "OtherPhotosPaginationContainerQuery": """\ +query OtherPhotosPaginationContainerQuery($username: String!, $pageSize: Int, $cursor: String) { + userByUsername(username: $username) { + ...OtherPhotosPaginationContainer_user_3e6UuE + id + } +} + +fragment OtherPhotosPaginationContainer_user_3e6UuE on User { + photos(first: $pageSize, after: $cursor, privacy: PROFILE, sort: ID_DESC) { + edges { + node { + id + legacyId + canonicalPath + width + height + name + isLikedByMe + notSafeForWork + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + followedByUsers { + isFollowedByMe + } + } + images(sizes: [33, 35]) { + size + url + jpegUrl + webpUrl + id + } + __typename + } + cursor + } + totalCount + pageInfo { + endCursor + hasNextPage + } + } +} +""", + + "ProfileRendererQuery": """\ +query ProfileRendererQuery($username: String!) { + profile: userByUsername(username: $username) { + id + legacyId + userType: type + username + firstName + displayName + registeredAt + canonicalPath + avatar { + ...ProfileAvatar_avatar + id + } + userProfile { + firstname + lastname + state + country + city + about + id + } + socialMedia { + website + twitter + instagram + facebook + id + } + coverPhotoUrl + followedByUsers { + totalCount + isFollowedByMe + } + followingUsers { + totalCount + } + membership { + expiryDate + membershipTier: tier + photoUploadQuota + refreshPhotoUploadQuotaAt + paymentStatus + id + } + profileTabs { + tabs { + name + visible + } + } + ...EditCover_cover + photoStats { + likeCount + viewCount + } + photos(privacy: PROFILE) { + totalCount + } + licensingPhotos(status: ACCEPTED) { + totalCount + } + portfolio { + id + status + userDisabled + } + } +} + +fragment EditCover_cover on User { + coverPhotoUrl +} + +fragment ProfileAvatar_avatar on UserAvatar { + images(sizes: [MEDIUM, LARGE]) { + size + url + id + } +} +""", + + "GalleriesDetailQueryRendererQuery": """\ +query GalleriesDetailQueryRendererQuery($galleryOwnerLegacyId: ID!, $ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $gallerySize: Int) { + galleries(galleryOwnerLegacyId: $galleryOwnerLegacyId, first: $gallerySize) { + edges { + node { + legacyId + description + name + privacy + canonicalPath + notSafeForWork + buttonName + externalUrl + cover { + images(sizes: [35, 33]) { + size + webpUrl + jpegUrl + id + } + id + } + photos { + totalCount + } + id + } + } + } + gallery: galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) { + ...GalleriesDetailPaginationContainer_gallery_RlXb8 + id + } +} + +fragment GalleriesDetailPaginationContainer_gallery_RlXb8 on Gallery { + id + legacyId + name + privacy + notSafeForWork + ownPhotosOnly + canonicalPath + publicSlug + lastPublishedAt + photosAddedSinceLastPublished + reportStatus + creator { + legacyId + id + } + cover { + images(sizes: [33, 32, 36, 2048]) { + url + size + webpUrl + id + } + id + } + description + externalUrl + buttonName + photos(first: $pageSize) { + totalCount + edges { + cursor + node { + id + legacyId + canonicalPath + name + description + category + uploadedAt + location + width + height + isLikedByMe + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + avatar { + images(sizes: SMALL) { + url + id + } + id + } + followedByUsers { + totalCount + isFollowedByMe + } + } + images(sizes: [33, 32]) { + size + url + webpUrl + id + } + __typename + } + } + pageInfo { + endCursor + hasNextPage + } + } +} +""", + + "GalleriesDetailPaginationContainerQuery": """\ +query GalleriesDetailPaginationContainerQuery($ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $cursor: String) { + galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) { + ...GalleriesDetailPaginationContainer_gallery_3e6UuE + id + } +} + +fragment GalleriesDetailPaginationContainer_gallery_3e6UuE on Gallery { + id + legacyId + name + privacy + notSafeForWork + ownPhotosOnly + canonicalPath + publicSlug + lastPublishedAt + photosAddedSinceLastPublished + reportStatus + creator { + legacyId + id + } + cover { + images(sizes: [33, 32, 36, 2048]) { + url + size + webpUrl + id + } + id + } + description + externalUrl + buttonName + photos(first: $pageSize, after: $cursor) { + totalCount + edges { + cursor + node { + id + legacyId + canonicalPath + name + description + category + uploadedAt + location + width + height + isLikedByMe + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + avatar { + images(sizes: SMALL) { + url + id + } + id + } + followedByUsers { + totalCount + isFollowedByMe + } + } + images(sizes: [33, 32]) { + size + url + webpUrl + id + } + __typename + } + } + pageInfo { + endCursor + hasNextPage + } + } +} +""", + +} diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index a057b84..e354cb7 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -8,7 +8,7 @@ from .common import Extractor, Message from .. import text -import base64 +import binascii class CyberdropAlbumExtractor(Extractor): @@ -52,7 +52,7 @@ class CyberdropAlbumExtractor(Extractor): yield Message.Directory, data for file_b64 in files: - file = base64.b64decode(file_b64.encode()).decode() + file = binascii.a2b_base64(file_b64).decode() text.nameext_from_url(file, data) data["filename"], _, data["id"] = data["filename"].rpartition("-") yield Message.Url, "https://f.cyberdrop.cc/" + file, data diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 9a461a4..70e268d 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -918,7 +918,7 @@ class DeviantartOAuthAPI(): def __init__(self, extractor): self.extractor = extractor self.log = extractor.log - self.headers = {} + self.headers = {"dA-minor-version": "20200519"} self.delay = extractor.config("wait-min", 0) self.delay_min = max(2, self.delay) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 5962b9e..5ea3adb 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -90,7 +90,9 @@ BASE_PATTERN = FoolfuukaExtractor.update({ }, "thebarchive": { "root": "https://thebarchive.com", - "pattern": r"thebarchive\.com", + }, + "wakarimasen": { + "root": "https://archive.wakarimasen.moe", }, }) @@ -137,6 +139,9 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): ("https://thebarchive.com/b/thread/739772332/", { "url": "07d39d2cb48f40fb337dc992993d965b0cd5f7cd", }), + ("https://archive.wakarimasen.moe/a/thread/223157648/", { + "url": "fef0758d2eb81b1ba783051fd5ec491d70107a78", + }), ) def __init__(self, match): @@ -175,6 +180,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor): ("https://archive.nyafuu.org/c/"), ("https://rbt.asia/g/"), ("https://thebarchive.com/b/"), + ("https://archive.wakarimasen.moe/a/"), ) def __init__(self, match): @@ -218,6 +224,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): ("https://archive.nyafuu.org/_/search/text/test/"), ("https://rbt.asia/_/search/text/test/"), ("https://thebarchive.com/_/search/text/test/"), + ("https://archive.wakarimasen.moe/a/search/text/test/"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index a7b0356..86e1678 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,7 +11,6 @@ from .common import Extractor, Message from .. import text, util - BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net" @@ -19,7 +18,7 @@ class FuraffinityExtractor(Extractor): """Base class for furaffinity extractors""" category = "furaffinity" directory_fmt = ("{category}", "{user!l}") - filename_fmt = "{id} {title}.{extension}" + filename_fmt = "{id}{title:? //}.{extension}" archive_fmt = "{id}" cookiedomain = ".furaffinity.net" root = "https://www.furaffinity.net" @@ -55,9 +54,6 @@ class FuraffinityExtractor(Extractor): def _parse_post(self, post_id): url = "{}/view/{}/".format(self.root, post_id) extr = text.extract_from(self.request(url).text) - title, _, artist = text.unescape(extr( - 'property="og:title" content="', '"')).rpartition(" by ") - artist_url = artist.replace("_", "").lower() path = extr('href="//d', '"') if not path: @@ -74,18 +70,16 @@ class FuraffinityExtractor(Extractor): rh = text.remove_html data = text.nameext_from_url(path, { - "id" : pi(post_id), - "title" : title, - "artist" : artist, - "artist_url": artist_url, - "user" : self.user or artist_url, - "url" : "https://d" + path + "id" : pi(post_id), + "url": "https://d" + path, }) tags = extr('class="tags-row">', '</section>') if tags: # new site layout data["tags"] = text.split_html(tags) + data["title"] = text.unescape(extr("<h2><p>", "</p></h2>")) + data["artist"] = extr("<strong>", "<") data["description"] = self._process_description(extr( 'class="section-body">', '</div>')) data["views"] = pi(rh(extr('class="views">', '</span>'))) @@ -100,6 +94,8 @@ class FuraffinityExtractor(Extractor): data["height"] = pi(extr("", "p")) else: # old site layout + data["title"] = text.unescape(extr("<h2>", "</h2>")) + data["artist"] = extr(">", "<") data["fa_category"] = extr("<b>Category:</b>", "<").strip() data["theme"] = extr("<b>Theme:</b>", "<").strip() data["species"] = extr("<b>Species:</b>", "<").strip() @@ -114,6 +110,9 @@ class FuraffinityExtractor(Extractor): data["rating"] = extr('<img alt="', ' ') data["description"] = self._process_description(extr( "</table>", "</table>")) + + data["artist_url"] = data["artist"].replace("_", "").lower() + data["user"] = self.user or data["artist_url"] data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) return data diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 4e62165..5732816 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -177,7 +177,10 @@ class GfycatAPI(): @cache(keyarg=1, maxage=3600) def _authenticate_impl(self, category): - url = "https://weblogin." + category + ".com/oauth/webtoken" + if category == "redgifs": + url = "https://api.redgifs.com/v1/oauth/webtoken" + else: + url = "https://weblogin." + category + ".com/oauth/webtoken" data = {"access_key": self.ACCESS_KEY} headers = {"Referer": self.extractor.root + "/", "Origin" : self.extractor.root} diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index 93ef6f1..7ad06c9 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,14 +13,13 @@ from .. import text from ..cache import memcache import re - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net|info)" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\d?\.(?:com|net|info)" class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" - root = "https://hiperdex.com" + root = "https://hiperdex2.com" @memcache(keyarg=1) def manga_data(self, manga, page=None): @@ -66,8 +65,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for manga chapters from hiperdex.com""" pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))" test = ( - ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { - "pattern": r"https://hiperdex.(com|net|info)/wp-content/uploads" + ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/", { + "pattern": r"https://hiperdex\d?.(com|net|info)/wp-content/uploads" r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp", "count": 9, "keyword": { @@ -107,7 +106,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): chapterclass = HiperdexChapterExtractor pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$" test = ( - ("https://hiperdex.com/manga/youre-not-that-special/", { + ("https://hiperdex2.com/manga/youre-not-that-special/", { "count": 51, "pattern": HiperdexChapterExtractor.pattern, "keyword": { @@ -159,7 +158,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): reverse = False pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?#]+))" test = ( - ("https://hiperdex.com/manga-artist/beck-ho-an/"), + ("https://hiperdex2.com/manga-artist/beck-ho-an/"), ("https://hiperdex.net/manga-artist/beck-ho-an/"), ("https://hiperdex.info/manga-artist/beck-ho-an/"), ("https://hiperdex.com/manga-author/viagra/", { diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 994e1b7..497509d 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "ec3fe9b708ee376ec579b90d053ad485c0777552", + "url": "8dfbcb1e51cec43a7112d58b7e92153155ada3b9", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "bf4ed4e726204da5bc37a236ca476a2a96081388", + "url": "a5af7fdca1f5c93c289af128914a8488ea345036", "count": 1413, }), # gallery with "broken" redirect @@ -140,8 +140,8 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js inum = int(ihash[-3:-1], 16) - frontends = 2 if inum < 0x30 else 3 - inum = 1 if inum < 0x09 else inum + frontends = 2 if inum < 0x70 else 3 + inum = 1 if inum < 0x49 else inum url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( chr(97 + (inum % frontends)), diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 3d4bcfb..9701f1e 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -132,11 +132,16 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor): archive_fmt = "t_{search_tags}_{id}" pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)" test = ( - ("https://idol.sankakucomplex.com/?tags=lyumos+wreath", { - "count": ">= 6", + ("https://idol.sankakucomplex.com/?tags=lyumos", { + "count": 5, + "range": "18-22", "pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+", }), + ("https://idol.sankakucomplex.com/?tags=order:favcount", { + "count": 5, + "range": "18-22", + }), ("https://idol.sankakucomplex.com" "/?tags=lyumos+wreath&page=3&next=694215"), ) @@ -184,21 +189,21 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor): while True: page = self.request(self.root, params=params, retries=10).text pos = page.find("<div id=more-popular-posts-link>") + 1 + yield from text.extract_iter(page, '" id=p', '>', pos) - ids = list(text.extract_iter(page, '" id=p', '>', pos)) - if not ids: + next_url = text.extract(page, 'next-page-url="', '"', pos)[0] + if not next_url: return - yield from ids - next_qs = text.extract(page, 'next-page-url="/?', '"', pos)[0] - next_id = text.parse_query(next_qs).get("next") - - # stop if the same "next" parameter occurs twice in a row (#265) - if "next" in params and params["next"] == next_id: - return + next_params = text.parse_query(text.unescape( + next_url).lstrip("?/")) - params["next"] = next_id or (text.parse_int(ids[-1]) - 1) - params["page"] = "2" + if "next" in next_params: + # stop if the same "next" value occurs twice in a row (#265) + if "next" in params and params["next"] == next_params["next"]: + return + next_params["page"] = "2" + params = next_params class IdolcomplexPoolExtractor(IdolcomplexExtractor): diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index e3db789..b015556 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -120,10 +120,7 @@ class InstagramExtractor(Extractor): if not self._check_cookies(self.cookienames): username, password = self._get_auth_info() if username: - self.session.cookies.set( - "ig_cb", "2", domain="www.instagram.com") self._update_cookies(self._login_impl(username, password)) - self.session.cookies.set( "csrftoken", self.csrf_token, domain=self.cookiedomain) @@ -131,33 +128,42 @@ class InstagramExtractor(Extractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - page = self.request(self.root + "/accounts/login/").text + url = self.root + "/accounts/login/" + page = self.request(url).text + headers = { - "Referer" : self.root + "/accounts/login/", + "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0], "X-IG-App-ID" : "936619743392459", + "X-ASBD-ID" : "437806", + "X-IG-WWW-Claim" : "0", "X-Requested-With": "XMLHttpRequest", + "Referer" : url, } + url = self.root + "/data/shared_data/" + data = self.request(url, headers=headers).json() - response = self.request(self.root + "/web/__mid/", headers=headers) - headers["X-CSRFToken"] = response.cookies["csrftoken"] - headers["X-Instagram-AJAX"] = text.extract( - page, '"rollout_hash":"', '"')[0] - - url = self.root + "/accounts/login/ajax/" + headers["X-CSRFToken"] = data["config"]["csrf_token"] + headers["X-Instagram-AJAX"] = data["rollout_hash"] + headers["Origin"] = self.root data = { "username" : username, "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format( int(time.time()), password), - "queryParams" : "{}", - "optIntoOneTap": "false", + "queryParams" : "{}", + "optIntoOneTap" : "false", + "stopDeletionNonce" : "", + "trustedDeviceRecords": "{}", } + url = self.root + "/accounts/login/ajax/" response = self.request(url, method="POST", headers=headers, data=data) if not response.json().get("authenticated"): raise exception.AuthenticationError() + + cget = self.session.cookies.get return { - key: self.session.cookies.get(key) - for key in ("sessionid", "mid", "csrftoken") + name: cget(name) + for name in ("sessionid", "mid", "ig_did") } def _parse_post_graphql(self, post): @@ -408,7 +414,7 @@ class InstagramPostsExtractor(InstagramExtractor): url = "{}/{}/".format(self.root, self.item) user = self._extract_profile_page(url) - query_hash = "32b14723a678bd4628d70c1f877b94c9" + query_hash = "7ea6ae3cf6fb05e73fcbe1732b1d2a42" variables = {"id": user["id"], "first": 50} edge = self._get_edge_data(user, "edge_owner_to_timeline_media") return self._pagination_graphql(query_hash, variables, edge) @@ -427,7 +433,7 @@ class InstagramTaggedExtractor(InstagramExtractor): url = "{}/{}/".format(self.root, self.item) user = self._extract_profile_page(url) - query_hash = "31fe64d9463cbbe58319dced405c6206" + query_hash = "be13233562af2d229b008d2976b998b5" variables = {"id": user["id"], "first": 50} edge = self._get_edge_data(user, None) return self._pagination_graphql(query_hash, variables, edge) @@ -613,13 +619,13 @@ class InstagramPostExtractor(InstagramExtractor): ) def posts(self): - query_hash = "d4e8ae69cb68f66329dcebe82fb69f6d" + query_hash = "971f52b26328008c768b7d8e4ac9ce3c" variables = { "shortcode" : self.item, "child_comment_count" : 3, "fetch_comment_count" : 40, "parent_comment_count" : 24, - "has_threaded_comments": True + "has_threaded_comments": True, } data = self._request_graphql(query_hash, variables) media = data.get("shortcode_media") diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 1b5e5e9..8c51d5d 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -20,8 +20,8 @@ class KemonopartyExtractor(Extractor): category = "kemonoparty" root = "https://kemono.party" directory_fmt = ("{category}", "{service}", "{user}") - filename_fmt = "{id}_{title}_{filename}.{extension}" - archive_fmt = "{service}_{user}_{id}_{filename}.{extension}" + filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}" + archive_fmt = "{service}_{user}_{id}_{num}" def items(self): find_inline = re.compile(r'src="(/inline/[^"]+)').findall diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 6a88d58..0fe46b1 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -10,202 +10,270 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import memcache +from ..cache import cache, memcache +from collections import defaultdict + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)" class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" + directory_fmt = ( + "{category}", "{manga}", + "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}") + filename_fmt = ( + "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") + archive_fmt = "{chapter_id}_{page}" root = "https://mangadex.org" - api_root = "https://api.mangadex.org" - - # mangadex-to-iso639-1 codes - iso639_map = { - "br": "pt", - "ct": "ca", - "gb": "en", - "vn": "vi", - } + _cache = {} def __init__(self, match): Extractor.__init__(self, match) + self.api = MangadexAPI(self) + self.uuid = match.group(1) + + def items(self): + for chapter in self.chapters(): + uuid = chapter["data"]["id"] + data = self._transform(chapter) + data["_extractor"] = MangadexChapterExtractor + self._cache[uuid] = (chapter, data) + yield Message.Queue, self.root + "/chapter/" + uuid, data - server = self.config("api-server") - if server is not None: - self.api_root = server.rstrip("/") + def _transform(self, chapter): + relationships = defaultdict(list) + for item in chapter["relationships"]: + relationships[item["type"]].append(item["id"]) + manga = self.api.manga(relationships["manga"][0]) + for item in manga["relationships"]: + relationships[item["type"]].append(item["id"]) - def chapter_data(self, chapter_id): - """Request API results for 'chapter_id'""" - url = "{}/v2/chapter/{}".format(self.api_root, chapter_id) - return self.request(url).json()["data"] + cattributes = chapter["data"]["attributes"] + mattributes = manga["data"]["attributes"] + lang = cattributes["translatedLanguage"].partition("-")[0] - @memcache(keyarg=1) - def manga_data(self, manga_id): - """Request API results for 'manga_id'""" - url = "{}/v2/manga/{}".format(self.api_root, manga_id) - return self.request(url).json()["data"] - - def manga_chapters(self, manga_id): - """Request chapter list for 'manga_id'""" - url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id) - data = self.request(url).json()["data"] - - groups = { - group["id"]: group["name"] - for group in data["groups"] + if cattributes["chapter"]: + chnum, sep, minor = cattributes["chapter"].partition(".") + else: + chnum, sep, minor = 0, "", "" + + data = { + "manga" : mattributes["title"]["en"], + "manga_id": manga["data"]["id"], + "title" : cattributes["title"], + "volume" : text.parse_int(cattributes["volume"]), + "chapter" : text.parse_int(chnum), + "chapter_minor": sep + minor, + "chapter_id": chapter["data"]["id"], + "date" : text.parse_datetime(cattributes["publishAt"]), + "lang" : lang, + "language": util.code_to_language(lang), + "count" : len(cattributes["data"]), } - for chapter in data["chapters"]: - cgroups = chapter["groups"] - for idx, group_id in enumerate(cgroups): - cgroups[idx] = groups[group_id] - yield chapter + if self.config("metadata"): + data["artist"] = [ + self.api.author(uuid)["data"]["attributes"]["name"] + for uuid in relationships["artist"]] + data["author"] = [ + self.api.author(uuid)["data"]["attributes"]["name"] + for uuid in relationships["author"]] + data["group"] = [ + self.api.group(uuid)["data"]["attributes"]["name"] + for uuid in relationships["scanlation_group"]] + + return data class MangadexChapterExtractor(MangadexExtractor): """Extractor for manga-chapters from mangadex.org""" subcategory = "chapter" - directory_fmt = ( - "{category}", "{manga}", - "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}") - filename_fmt = ( - "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") - archive_fmt = "{chapter_id}_{page}" - pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)" + pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)" test = ( - ("https://mangadex.org/chapter/122094", { - "keyword": "89d1b24b4baa1fb737d32711d9f2ade6ea426987", + ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", { + "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd", # "content": "50383a4c15124682057b197d40261641a98db514", }), # oneshot - ("https://mangadex.cc/chapter/138086", { + ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", { + "options": (("metadata", True),), "count": 64, - "keyword": "c53a0e4c12250578a4e630281085875e59532c03", + "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb", }), # MANGA Plus (#1154) - ("https://mangadex.org/chapter/1122815", { - "exception": exception.HttpError, + ("https://mangadex.org/chapter/8d50ed68-8298-4ac9-b63d-cb2aea143dd0", { + "exception": exception.StopExtraction, }), ) - def __init__(self, match): - MangadexExtractor.__init__(self, match) - self.chapter_id = match.group(1) - def items(self): - cdata = self.chapter_data(self.chapter_id) - if "server" not in cdata: - if cdata["status"] == "external": - raise exception.StopExtraction( - "Chapter is not available on MangaDex and can be read on " - "the official publisher's website at %s.", cdata["pages"]) - raise exception.StopExtraction("No download server available.") - mdata = self.manga_data(cdata["mangaId"]) - - chapter, sep, minor = cdata["chapter"].partition(".") - lang = self.iso639_map.get(cdata["language"], cdata["language"]) - - base = cdata["server"] + cdata["hash"] + "/" - if base[0] == "/": - base = text.urljoin(self.root, base) - - if "serverFallback" in cdata: - fallback = cdata["serverFallback"] + cdata["hash"] + "/" - else: - fallback = None - - data = { - "manga" : text.unescape(mdata["title"]), - "manga_id": mdata["id"], - "artist" : mdata["artist"], - "author" : mdata["author"], - "title" : text.unescape(cdata["title"]), - "volume" : text.parse_int(cdata["volume"]), - "chapter" : text.parse_int(chapter), - "chapter_minor": sep + minor, - "chapter_id": cdata["id"], - "group" : [group["name"] for group in cdata["groups"]], - "date" : text.parse_timestamp(cdata["timestamp"]), - "lang" : lang, - "language": util.code_to_language(lang), - "count" : len(cdata["pages"]), - } - + try: + chapter, data = self._cache.pop(self.uuid) + except KeyError: + chapter = self.api.chapter(self.uuid) + data = self._transform(chapter) yield Message.Directory, data - for data["page"], page in enumerate(cdata["pages"], 1): - if fallback: - data["_fallback"] = (fallback + page,) - yield Message.Url, base + page, text.nameext_from_url(page, data) + + cattributes = chapter["data"]["attributes"] + base = "{}/data/{}/".format( + self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"]) + for data["page"], page in enumerate(cattributes["data"], 1): + text.nameext_from_url(page, data) + yield Message.Url, base + page, data class MangadexMangaExtractor(MangadexExtractor): """Extractor for manga from mangadex.org""" subcategory = "manga" - categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)" - r"/(?:title|manga)/(\d+)") + pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)" test = ( - ("https://mangadex.org/manga/2946/souten-no-koumori", { - "pattern": r"https://mangadex.org/chapter/\d+", + ("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", { "keyword": { "manga" : "Souten no Koumori", - "manga_id": 2946, + "manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc", "title" : "re:One[Ss]hot", "volume" : 0, "chapter" : 0, "chapter_minor": "", - "chapter_id": int, - "group" : list, + "chapter_id": str, "date" : "type:datetime", "lang" : str, "language": str, }, }), - ("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", { + ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", { + "options": (("lang", "en"),), "count": ">= 100", }), - ("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", { - "count": 0, + ("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", { + "count": 1, }), ) - def __init__(self, match): - MangadexExtractor.__init__(self, match) - self.manga_id = match.group(1) + def chapters(self): + return self.api.manga_feed(self.uuid) - def items(self): - yield Message.Version, 1 - for data in self.chapters(): - url = "{}/chapter/{}".format(self.root, data["chapter_id"]) - yield Message.Queue, url, data + +class MangadexFeedExtractor(MangadexExtractor): + """Extractor for chapters from your Followed Feed""" + subcategory = "feed" + pattern = BASE_PATTERN + r"/title/feed$()" + test = ("https://mangadex.org/title/feed",) def chapters(self): - """Return a sorted list of chapter-metadata dicts""" - manga = self.manga_data(int(self.manga_id)) - results = [] - - for cdata in self.manga_chapters(self.manga_id): - chapter, sep, minor = cdata["chapter"].partition(".") - lang = self.iso639_map.get(cdata["language"], cdata["language"]) - results.append({ - "manga" : text.unescape(manga["title"]), - "manga_id": text.parse_int(self.manga_id), - "artist" : manga["artist"], - "author" : manga["author"], - "title" : text.unescape(cdata["title"]), - "volume" : text.parse_int(cdata["volume"]), - "chapter" : text.parse_int(chapter), - "chapter_minor": sep + minor, - "chapter_id": text.parse_int(cdata["id"]), - "group" : cdata["groups"], - "date" : text.parse_timestamp(cdata["timestamp"]), - "lang" : lang, - "language": util.code_to_language(lang), - "_extractor": MangadexChapterExtractor, - }) - - results.sort( - key=lambda x: (x["chapter"], x["chapter_minor"]), - reverse=self.config("chapter-reverse", False), - ) - return results + return self.api.user_follows_manga_feed() + + +class MangadexAPI(): + """Interface for the MangaDex API v5""" + + def __init__(self, extr): + self.extractor = extr + self.headers = {} + + self.username, self.password = self.extractor._get_auth_info() + if not self.username: + self.authenticate = util.noop + + server = extr.config("api-server") + self.root = ("https://api.mangadex.org" if server is None + else text.ensure_http_scheme(server).rstrip("/")) + + def athome_server(self, uuid): + return self._call("/at-home/server/" + uuid) + + @memcache(keyarg=1) + def author(self, uuid): + return self._call("/author/" + uuid) + + def chapter(self, uuid): + return self._call("/chapter/" + uuid) + + @memcache(keyarg=1) + def group(self, uuid): + return self._call("/group/" + uuid) + + @memcache(keyarg=1) + def manga(self, uuid): + return self._call("/manga/" + uuid) + + def manga_feed(self, uuid): + config = self.extractor.config + order = "desc" if config("chapter-reverse") else "asc" + params = { + "order[volume]" : order, + "order[chapter]" : order, + "translatedLanguage[]": config("lang"), + } + return self._pagination("/manga/" + uuid + "/feed", params) + + def user_follows_manga_feed(self): + params = { + "order[publishAt]" : "desc", + "translatedLanguage[]": self.extractor.config("lang"), + } + return self._pagination("/user/follows/manga/feed", params) + + def authenticate(self): + self.headers["Authorization"] = \ + self._authenticate_impl(self.username, self.password) + + @cache(maxage=900, keyarg=1) + def _authenticate_impl(self, username, password): + refresh_token = _refresh_token_cache(username) + if refresh_token: + self.extractor.log.info("Refreshing access token") + url = self.root + "/auth/refresh" + data = {"token": refresh_token} + else: + self.extractor.log.info("Logging in as %s", username) + url = self.root + "/auth/login" + data = {"username": username, "password": password} + + data = self.extractor.request( + url, method="POST", json=data, fatal=None).json() + if data.get("result") != "ok": + raise exception.AuthenticationError() + + if refresh_token != data["token"]["refresh"]: + _refresh_token_cache.update(username, data["token"]["refresh"]) + return "Bearer " + data["token"]["session"] + + def _call(self, endpoint, params=None): + url = self.root + endpoint + + while True: + self.authenticate() + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) + + if response.status_code < 400: + return response.json() + if response.status_code == 429: + until = response.headers.get("X-RateLimit-Retry-After") + self.extractor.wait(until=until) + continue + + msg = ", ".join('{title}: {detail}'.format_map(error) + for error in response.json()["errors"]) + raise exception.StopExtraction( + "%s %s (%s)", response.status_code, response.reason, msg) + + def _pagination(self, endpoint, params=None): + if params is None: + params = {} + params["offset"] = 0 + + while True: + data = self._call(endpoint, params) + yield from data["results"] + + params["offset"] = data["offset"] + data["limit"] + if params["offset"] >= data["total"]: + return + + +@cache(maxage=28*24*3600, keyarg=0) +def _refresh_token_cache(username): + return None diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py index a123783..a9d504e 100644 --- a/gallery_dl/extractor/mangafox.py +++ b/gallery_dl/extractor/mangafox.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://fanfox.net/""" +"""Extractors for from https://fanfox.net/""" from .common import ChapterExtractor from .. import text @@ -15,14 +15,15 @@ from .. import text class MangafoxChapterExtractor(ChapterExtractor): """Extractor for manga-chapters from fanfox.net""" category = "mangafox" - pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:mangafox\.me|fanfox\.net)" - r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?#]*)))") + pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:fanfox\.net|mangafox\.me)" + r"(/manga/[^/]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))") test = ( ("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", { "keyword": "5661dab258d42d09d98f194f7172fb9851a49766", "content": "5c50c252dcf12ffecf68801f4db8a2167265f66c", }), ("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"), + ("http://fanfox.net/manga/black_clover/vTBD/c295/1.html"), ) root = "https://m.fanfox.net" diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 483c657..c798ad0 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -73,6 +73,9 @@ class OAuthBase(Extractor): print(url, end="\n\n", flush=True) return (recv or self.recv)() + def error(self, msg): + return self.send("Remote server reported an error:\n\n" + str(msg)) + def _oauth1_authorization_flow( self, request_token_url, authorize_url, access_token_url): """Perform the OAuth 1.0a authorization flow""" @@ -135,8 +138,7 @@ class OAuthBase(Extractor): )) return if "error" in params: - self.send(params["error"]) - return + return self.error(params) # exchange the authorization code for a token data = { @@ -156,8 +158,7 @@ class OAuthBase(Extractor): # check token response if "error" in data: - self.send(data["error"]) - return + return self.error(data) token = data[key] token_name = key.replace("_", "-") diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index 3cfcb0e..64fc938 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -107,11 +107,11 @@ class PhilomenaPostExtractor(PhilomenaExtractor): "source_url": "https://www.deviantart.com/speccysy/art" "/Afternoon-Flight-215193985", "spoilered": False, - "tag_count": 37, + "tag_count": 38, "tag_ids": list, "tags": list, "thumbnails_generated": True, - "updated_at": "2021-04-07T06:01:30Z", + "updated_at": "2021-05-28T17:39:38Z", "uploader": "Clover the Clever", "uploader_id": 211188, "upvotes": int, @@ -149,6 +149,10 @@ class PhilomenaSearchExtractor(PhilomenaExtractor): "range": "40-60", "count": 21, }), + (("https://derpibooru.org/tags/" + "artist-colon--dash-_-fwslash--fwslash-%255Bkorroki%255D_aternak"), { + "count": ">= 2", + }), ("https://ponybooru.org/search?q=cute", { "range": "40-60", "count": 21, @@ -159,7 +163,18 @@ class PhilomenaSearchExtractor(PhilomenaExtractor): PhilomenaExtractor.__init__(self, match) groups = match.groups() if groups[-1]: - self.params = {"q": groups[-1]} + q = groups[-1] + for old, new in ( + ("-colon-" , ":"), + ("-dash-" , "-"), + ("-dot-" , "."), + ("-plus-" , "+"), + ("-fwslash-", "/"), + ("-bwslash-", "\\"), + ): + if old in q: + q = q.replace(old, new) + self.params = {"q": text.unquote(text.unquote(q))} else: self.params = text.parse_query(groups[-2]) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 8076fff..ff07a57 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -321,34 +321,30 @@ class PixivFavoriteExtractor(PixivExtractor): def __init__(self, match): uid, kind, self.tag, query = match.groups() + query = text.parse_query(query) - if query: - self.query = text.parse_query(query) - uid = self.query.get("id") + if not uid: + uid = query.get("id") if not uid: self.subcategory = "bookmark" - elif self.query.get("type") == "user": - self.subcategory = "following" - self.items = self._items_following - else: - self.query = {} - if kind == "following": - self.subcategory = "following" - self.items = self._items_following + + if kind == "following" or query.get("type") == "user": + self.subcategory = "following" + self.items = self._items_following PixivExtractor.__init__(self, match) + self.query = query self.user_id = uid def works(self): tag = None - restrict = "public" - if "tag" in self.query: tag = text.unquote(self.query["tag"]) elif self.tag: tag = text.unquote(self.tag) - if "rest" in self.query and self.query["rest"] == "hide": + restrict = "public" + if self.query.get("rest") == "hide": restrict = "private" return self.api.user_bookmarks_illust(self.user_id, tag, restrict) @@ -364,9 +360,11 @@ class PixivFavoriteExtractor(PixivExtractor): return {"user_bookmark": user} def _items_following(self): - yield Message.Version, 1 + restrict = "public" + if self.query.get("rest") == "hide": + restrict = "private" - for preview in self.api.user_following(self.user_id): + for preview in self.api.user_following(self.user_id, restrict): user = preview["user"] user["_extractor"] = PixivUserExtractor url = "https://www.pixiv.net/users/{}".format(user["id"]) @@ -622,8 +620,8 @@ class PixivAppAPI(): params = {"user_id": user_id} return self._call("v1/user/detail", params)["user"] - def user_following(self, user_id): - params = {"user_id": user_id} + def user_following(self, user_id, restrict="public"): + params = {"user_id": user_id, "restrict": restrict} return self._pagination("v1/user/following", params, "user_previews") def user_illusts(self, user_id): diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 8611dcb..576564c 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -71,6 +71,6 @@ class RedgifsImageExtractor(RedgifsExtractor): class RedgifsAPI(GfycatAPI): - API_ROOT = "https://napi.redgifs.com" + API_ROOT = "https://api.redgifs.com" ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe" "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9") diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 9808cb8..2ea6f57 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -54,14 +54,16 @@ class SankakuExtractor(BooruExtractor): def _prepare(post): post["created_at"] = post["created_at"]["s"] post["date"] = text.parse_timestamp(post["created_at"]) - post["tags"] = [tag["name"] for tag in post["tags"]] + post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]] post["tag_string"] = " ".join(post["tags"]) def _extended_tags(self, post): tags = collections.defaultdict(list) types = self.TAG_TYPES for tag in post["tags"]: - tags[types[tag["type"]]].append(tag["name"]) + name = tag["name"] + if name: + tags[types[tag["type"]]].append(name) for key, value in tags.items(): post["tags_" + key] = value post["tag_string_" + key] = " ".join(value) @@ -160,6 +162,15 @@ class SankakuPostExtractor(SankakuExtractor): "pattern": r"https://s\.sankakucomplex\.com" r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg", }), + # empty tags (#1617) + ("https://sankaku.app/post/show/20758561", { + "options": (("tags", True),), + "count": 1, + "keyword": { + "tags": list, + "tags_general": ["key(mangaka)", "key(mangaka)"], + }, + }), ("https://beta.sankakucomplex.com/post/show/360451"), ("https://chan.sankakucomplex.com/post/show/360451"), ) diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 753f266..83836e5 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,7 +13,6 @@ from .. import text, exception from ..cache import cache import json - BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)" @@ -45,8 +44,8 @@ class SubscribestarExtractor(Extractor): yield Message.Directory, data for item in media: item.update(data) - url = item["url"] - yield Message.Url, url, text.nameext_from_url(url, item) + text.nameext_from_url(item.get("name") or item["url"], item) + yield Message.Url, item["url"], item def posts(self): """Yield HTML content of all relevant posts""" @@ -105,6 +104,8 @@ class SubscribestarExtractor(Extractor): media.append({ "id" : text.parse_int(text.extract( att, 'data-upload-id="', '"')[0]), + "name": text.unescape(text.extract( + att, 'doc_preview-title">', '<')[0] or ""), "url" : text.extract(att, 'href="', '"')[0], "type": "attachment", }) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index afeebb0..5550f96 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -33,10 +33,10 @@ class TwitterExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) self.textonly = self.config("text-tweets", False) - self.retweets = self.config("retweets", True) + self.retweets = self.config("retweets", False) self.replies = self.config("replies", True) self.twitpic = self.config("twitpic", False) - self.quoted = self.config("quoted", True) + self.quoted = self.config("quoted", False) self.videos = self.config("videos", True) self.cards = self.config("cards", False) self._user_cache = {} @@ -44,7 +44,6 @@ class TwitterExtractor(Extractor): def items(self): self.login() metadata = self.metadata() - yield Message.Version, 1 for tweet in self.tweets(): @@ -406,7 +405,6 @@ class TwitterFollowingExtractor(TwitterExtractor): class TwitterSearchExtractor(TwitterExtractor): """Extractor for all images from a search timeline""" subcategory = "search" - directory_fmt = ("{category}", "Search", "{search}") pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)" test = ("https://twitter.com/search?q=nature", { "range": "1-40", @@ -456,14 +454,14 @@ class TwitterTweetExtractor(TwitterExtractor): "options": (("replies", False),), "count": 0, }), - # quoted tweet (#526, #854) + # "quoted" option (#854) ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { + "options": (("quoted", True),), "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+=jpg", "count": 8, }), - # "quoted" option (#854) + # quoted tweet (#526, #854) ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { - "options": (("quoted", False),), "pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg", "count": 4, }), @@ -499,6 +497,7 @@ class TwitterTweetExtractor(TwitterExtractor): }), # retweet with missing media entities (#1555) ("https://twitter.com/morino_ya/status/1392763691599237121", { + "options": (("retweets", True),), "count": 4, }), ) diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py index e89a5b7..6cfc69e 100644 --- a/gallery_dl/extractor/unsplash.py +++ b/gallery_dl/extractor/unsplash.py @@ -172,13 +172,16 @@ class UnsplashFavoriteExtractor(UnsplashExtractor): class UnsplashCollectionExtractor(UnsplashExtractor): """Extractor for an unsplash collection""" subcategory = "collection" - pattern = BASE_PATTERN + r"/collections/(\d+)" - test = ("https://unsplash.com/collections/3178572/winter", { - "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" - r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", - "range": "1-30", - "count": 30, - }) + pattern = BASE_PATTERN + r"/collections/([^/?#]+)" + test = ( + ("https://unsplash.com/collections/3178572/winter", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }), + ("https://unsplash.com/collections/_8qJQ2bCMWE/2021.05"), + ) def photos(self): url = "{}/napi/collections/{}/photos".format(self.root, self.item) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 164c2a9..dddc03a 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -12,6 +12,7 @@ import time import errno import logging import operator +import functools import collections from . import extractor, downloader, postprocessor from . import config, text, util, output, exception @@ -375,17 +376,17 @@ class DownloadJob(Job): def initialize(self, kwdict=None): """Delayed initialization of PathFormat, etc.""" - config = self.extractor.config + cfg = self.extractor.config pathfmt = self.pathfmt = util.PathFormat(self.extractor) if kwdict: pathfmt.set_directory(kwdict) - self.sleep = config("sleep") - if not config("download", True): + self.sleep = cfg("sleep") + if not cfg("download", True): # monkey-patch method to do nothing and always return True self.download = pathfmt.fix_extension - archive = config("archive") + archive = cfg("archive") if archive: path = util.expand_path(archive) try: @@ -399,7 +400,7 @@ class DownloadJob(Job): else: self.extractor.log.debug("Using download archive '%s'", path) - skip = config("skip", True) + skip = cfg("skip", True) if skip: self._skipexc = None if skip == "enumerate": @@ -427,7 +428,10 @@ class DownloadJob(Job): category = self.extractor.category basecategory = self.extractor.basecategory + pp_conf = config.get((), "postprocessor") or {} for pp_dict in postprocessors: + if isinstance(pp_dict, str): + pp_dict = pp_conf.get(pp_dict) or {"name": pp_dict} whitelist = pp_dict.get("whitelist") if whitelist and category not in whitelist and \ @@ -459,6 +463,23 @@ class DownloadJob(Job): for callback in self.hooks["init"]: callback(pathfmt) + def register_hooks(self, hooks, options=None): + expr = options.get("filter") if options else None + + if expr: + condition = util.compile_expression(expr) + for hook, callback in hooks.items(): + self.hooks[hook].append(functools.partial( + self._call_hook, callback, condition)) + else: + for hook, callback in hooks.items(): + self.hooks[hook].append(callback) + + @staticmethod + def _call_hook(callback, condition, pathfmt): + if condition(pathfmt.kwdict): + callback(pathfmt) + def _build_blacklist(self): wlist = self.extractor.config("whitelist") if wlist is not None: diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 6018542..a046a27 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -114,8 +114,9 @@ def build_parser(): ) general.add_argument( "--clear-cache", - dest="clear_cache", metavar="MODULE", nargs="?", const="all", - help="Delete all cached login sessions, cookies, etc.", + dest="clear_cache", metavar="MODULE", + help="Delete cached login sessions, cookies, etc. for MODULE " + "(ALL to delete everything)", ) output = parser.add_argument_group("Output Options") @@ -204,13 +205,6 @@ def build_parser(): "or -1 for infinite retries (default: 4)"), ) downloader.add_argument( - "-A", "--abort", - dest="abort", metavar="N", type=int, - help=("Abort extractor run after N consecutive file downloads have " - "been skipped, e.g. if files with the same filename already " - "exist"), - ) - downloader.add_argument( "--http-timeout", dest="timeout", metavar="SECONDS", type=float, action=ConfigAction, help="Timeout for HTTP connections (default: 30.0)", @@ -301,7 +295,19 @@ def build_parser(): "--download-archive", dest="archive", metavar="FILE", action=ConfigAction, help=("Record all downloaded files in the archive file and " - "skip downloading any file already in it."), + "skip downloading any file already in it"), + ) + selection.add_argument( + "-A", "--abort", + dest="abort", metavar="N", type=int, + help=("Stop current extractor run " + "after N consecutive file downloads were skipped"), + ) + selection.add_argument( + "-T", "--terminate", + dest="terminate", metavar="N", type=int, + help=("Stop current and parent extractor runs " + "after N consecutive file downloads were skipped"), ) selection.add_argument( "--range", @@ -335,7 +341,7 @@ def build_parser(): postprocessor.add_argument( "--zip", dest="postprocessors", - action="append_const", const={"name": "zip"}, + action="append_const", const="zip", help="Store downloaded files in a ZIP archive", ) postprocessor.add_argument( @@ -362,7 +368,7 @@ def build_parser(): postprocessor.add_argument( "--write-metadata", dest="postprocessors", - action="append_const", const={"name": "metadata"}, + action="append_const", const="metadata", help="Write metadata to separate JSON files", ) postprocessor.add_argument( @@ -374,7 +380,7 @@ def build_parser(): postprocessor.add_argument( "--mtime-from-date", dest="postprocessors", - action="append_const", const={"name": "mtime"}, + action="append_const", const="mtime", help="Set file modification times according to 'date' metadata", ) postprocessor.add_argument( @@ -392,6 +398,11 @@ def build_parser(): help=("Execute CMD after all files were downloaded successfully. " "Example: --exec-after 'cd {} && convert * ../doc.pdf'"), ) + postprocessor.add_argument( + "-P", "--postprocessor", + dest="postprocessors", metavar="NAME", action="append", + help="Activate the specified post processor", + ) parser.add_argument( "urls", diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py index eda092d..34af1d9 100644 --- a/gallery_dl/postprocessor/classify.py +++ b/gallery_dl/postprocessor/classify.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -31,9 +31,8 @@ class ClassifyPP(PostProcessor): for directory, exts in mapping.items() for ext in exts } - - job.hooks["prepare"].append(self.prepare) - job.hooks["file"].append(self.move) + job.register_hooks( + {"prepare": self.prepare, "file": self.move}, options) def prepare(self, pathfmt): ext = pathfmt.extension diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py index ca416c9..1bca593 100644 --- a/gallery_dl/postprocessor/compare.py +++ b/gallery_dl/postprocessor/compare.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -18,11 +18,12 @@ class ComparePP(PostProcessor): PostProcessor.__init__(self, job) if options.get("shallow"): self._compare = self._compare_size - job.hooks["file"].append( + + job.register_hooks({"file": ( self.enumerate if options.get("action") == "enumerate" else self.compare - ) + )}, options) def compare(self, pathfmt): try: diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index 2514219..8fed723 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -41,8 +41,7 @@ class ExecPP(PostProcessor): events = ("after",) elif isinstance(events, str): events = events.split(",") - for event in events: - job.hooks[event].append(execute) + job.register_hooks({event: execute for event in events}, options) def exec_list(self, pathfmt, status=None): if status: diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index 49696a0..ef1d304 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -57,8 +57,7 @@ class MetadataPP(PostProcessor): events = ("file",) elif isinstance(events, str): events = events.split(",") - for event in events: - job.hooks[event].append(self.run) + job.register_hooks({event: self.run for event in events}, options) def run(self, pathfmt): directory = self._directory(pathfmt) @@ -103,11 +102,18 @@ class MetadataPP(PostProcessor): if not tags: return - if not isinstance(tags, list): + if isinstance(tags, str): taglist = tags.split(", ") if len(taglist) < len(tags) / 16: taglist = tags.split(" ") tags = taglist + elif isinstance(tags, dict): + taglists = tags.values() + tags = [] + extend = tags.extend + for taglist in taglists: + extend(taglist) + tags.sort() fp.write("\n".join(tags) + "\n") diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py index e4c28ea..d2f1915 100644 --- a/gallery_dl/postprocessor/mtime.py +++ b/gallery_dl/postprocessor/mtime.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,7 +17,7 @@ class MtimePP(PostProcessor): def __init__(self, job, options): PostProcessor.__init__(self, job) self.key = options.get("key", "date") - job.hooks["file"].append(self.run) + job.register_hooks({"file": self.run}, options) def run(self, pathfmt): mtime = pathfmt.kwdict.get(self.key) diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index ac094b7..e5bdebc 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -55,8 +55,8 @@ class UgoiraPP(PostProcessor): else: self.prevent_odd = False - job.hooks["prepare"].append(self.prepare) - job.hooks["file"].append(self.convert) + job.register_hooks( + {"prepare": self.prepare, "file": self.convert}, options) def prepare(self, pathfmt): self._frames = None diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index e820280..1c4bd03 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -38,8 +38,10 @@ class ZipPP(PostProcessor): self.args = (self.path[:-1] + ext, "a", self.COMPRESSION_ALGORITHMS[algorithm], True) - job.hooks["file"].append( - self.write_safe if options.get("mode") == "safe" else self.write) + job.register_hooks({ + "file": + self.write_safe if options.get("mode") == "safe" else self.write, + }, options) job.hooks["finalize"].append(self.finalize) def write(self, pathfmt, zfile=None): @@ -56,7 +58,7 @@ class ZipPP(PostProcessor): def write_safe(self, pathfmt): with zipfile.ZipFile(*self.args) as zfile: - self._write(pathfmt, zfile) + self.write(pathfmt, zfile) def finalize(self, pathfmt, status): if self.zfile: diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 78663a0..fbede3e 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -21,6 +21,7 @@ import sqlite3 import binascii import datetime import operator +import functools import itertools import urllib.parse from http.cookiejar import Cookie @@ -346,8 +347,6 @@ CODES = { "zh": "Chinese", } -SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"} - class UniversalNone(): """None-style object that supports more operations than None itself""" @@ -373,6 +372,20 @@ class UniversalNone(): NONE = UniversalNone() WINDOWS = (os.name == "nt") SENTINEL = object() +SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"} +GLOBALS = { + "parse_int": text.parse_int, + "urlsplit" : urllib.parse.urlsplit, + "datetime" : datetime.datetime, + "abort" : raises(exception.StopExtraction), + "terminate": raises(exception.TerminateExtraction), + "re" : re, +} + + +def compile_expression(expr, name="<expr>", globals=GLOBALS): + code_object = compile(expr, name, "eval") + return functools.partial(eval, code_object, globals) def build_predicate(predicates): @@ -472,20 +485,13 @@ class UniquePredicate(): class FilterPredicate(): """Predicate; True if evaluating the given expression returns True""" - def __init__(self, filterexpr, target="image"): + def __init__(self, expr, target="image"): name = "<{} filter>".format(target) - self.codeobj = compile(filterexpr, name, "eval") - self.globals = { - "parse_int": text.parse_int, - "urlsplit" : urllib.parse.urlsplit, - "datetime" : datetime.datetime, - "abort" : raises(exception.StopExtraction), - "re" : re, - } + self.expr = compile_expression(expr, name) - def __call__(self, url, kwds): + def __call__(self, _, kwdict): try: - return eval(self.codeobj, self.globals, kwds) + return self.expr(kwdict) except exception.GalleryDLException: raise except Exception as exc: @@ -749,25 +755,30 @@ class PathFormat(): } def __init__(self, extractor): - filename_fmt = extractor.config("filename") - if filename_fmt is None: - filename_fmt = extractor.filename_fmt - - directory_fmt = extractor.config("directory") - if directory_fmt is None: - directory_fmt = extractor.directory_fmt - - extension_map = extractor.config("extension-map") - if extension_map is None: - extension_map = self.EXTENSION_MAP - self.extension_map = extension_map.get + config = extractor.config + kwdefault = config("keywords-default") - kwdefault = extractor.config("keywords-default") + filename_fmt = config("filename") try: + if filename_fmt is None: + filename_fmt = extractor.filename_fmt + elif isinstance(filename_fmt, dict): + self.filename_conditions = [ + (compile_expression(expr), + Formatter(fmt, kwdefault).format_map) + for expr, fmt in filename_fmt.items() if expr + ] + self.build_filename = self.build_filename_conditional + filename_fmt = filename_fmt.get("", extractor.filename_fmt) + self.filename_formatter = Formatter( filename_fmt, kwdefault).format_map except Exception as exc: raise exception.FilenameFormatError(exc) + + directory_fmt = config("directory") + if directory_fmt is None: + directory_fmt = extractor.directory_fmt try: self.directory_formatters = [ Formatter(dirfmt, kwdefault).format_map @@ -784,7 +795,7 @@ class PathFormat(): basedir = extractor._parentdir if not basedir: - basedir = extractor.config("base-directory") + basedir = config("base-directory") if basedir is None: basedir = "." + os.sep + "gallery-dl" + os.sep elif basedir: @@ -795,8 +806,13 @@ class PathFormat(): basedir += os.sep self.basedirectory = basedir - restrict = extractor.config("path-restrict", "auto") - replace = extractor.config("path-replace", "_") + extension_map = config("extension-map") + if extension_map is None: + extension_map = self.EXTENSION_MAP + self.extension_map = extension_map.get + + restrict = config("path-restrict", "auto") + replace = config("path-replace", "_") if restrict == "auto": restrict = "\\\\|/<>:\"?*" if WINDOWS else "/" elif restrict == "unix": @@ -807,7 +823,7 @@ class PathFormat(): restrict = "^0-9A-Za-z_." self.clean_segment = self._build_cleanfunc(restrict, replace) - remove = extractor.config("path-remove", "\x00-\x1f\x7f") + remove = config("path-remove", "\x00-\x1f\x7f") self.clean_path = self._build_cleanfunc(remove, "") @staticmethod @@ -927,6 +943,19 @@ class PathFormat(): except Exception as exc: raise exception.FilenameFormatError(exc) + def build_filename_conditional(self): + kwdict = self.kwdict + + try: + for condition, formatter in self.filename_conditions: + if condition(kwdict): + break + else: + formatter = self.filename_formatter + return self.clean_path(self.clean_segment(formatter(kwdict))) + except Exception as exc: + raise exception.FilenameFormatError(exc) + def build_path(self): """Combine directory and filename to full paths""" if self._create_directory: diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 018554e..1a3e0e4 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.17.5" +__version__ = "1.18.0" @@ -1,6 +1,8 @@ [flake8] exclude = gallery_dl/__init__.py,gallery_dl/__main__.py,setup.py,build,scripts,archive ignore = E203,E226,W504 +per-file-ignores = + gallery_dl/extractor/500px.py: E501 [egg_info] tag_build = @@ -7,9 +7,6 @@ import os.path import warnings from setuptools import setup -if sys.hexversion < 0x3040000: - sys.exit("Python 3.4+ required") - def read(fname): path = os.path.join(os.path.dirname(__file__), fname) diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 6bf887c..00c17b2 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -30,13 +30,17 @@ class MockPostprocessorModule(Mock): class FakeJob(): - def __init__(self): - self.extractor = extractor.find("test:") - self.pathfmt = util.PathFormat(self.extractor) + def __init__(self, extr=extractor.find("test:")): + self.extractor = extr + self.pathfmt = util.PathFormat(extr) self.out = output.NullOutput() self.get_logger = logging.getLogger self.hooks = collections.defaultdict(list) + def register_hooks(self, hooks, options): + for hook, callback in hooks.items(): + self.hooks[hook].append(callback) + class TestPostprocessorModule(unittest.TestCase): @@ -239,6 +243,15 @@ class MetadataTest(BasePostprocessorTest): self._trigger() self.assertEqual(self._output(m), "foo\nbar\nbaz\n") + def test_metadata_tags_dict(self): + self._create( + {"mode": "tags"}, + {"tags": {"g": ["foobar1", "foobar2"], "m": ["foobarbaz"]}}, + ) + with patch("builtins.open", mock_open()) as m: + self._trigger() + self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n") + def test_metadata_custom(self): def test(pp_info): pp = self._create(pp_info, {"foo": "bar"}) diff --git a/test/test_results.py b/test/test_results.py index bf2496b..5b22ecd 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -312,7 +312,7 @@ def setup_test_config(): config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma") for category in ("danbooru", "instagram", "twitter", "subscribestar", - "e621", "inkbunny", "tapas", "pillowfort"): + "e621", "inkbunny", "tapas", "pillowfort", "mangadex"): config.set(("extractor", category), "username", None) config.set(("extractor", "mastodon.social"), "access-token", diff --git a/test/test_util.py b/test/test_util.py index e2f5084..d90d5ad 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -493,6 +493,30 @@ class TestOther(unittest.TestCase): def test_noop(self): self.assertEqual(util.noop(), None) + def test_compile_expression(self): + expr = util.compile_expression("1 + 2 * 3") + self.assertEqual(expr(), 7) + self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7) + self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 7) + + expr = util.compile_expression("a + b * c") + self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7) + self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 90) + + with self.assertRaises(NameError): + expr() + with self.assertRaises(NameError): + expr({"a": 2}) + + with self.assertRaises(SyntaxError): + util.compile_expression("") + with self.assertRaises(SyntaxError): + util.compile_expression("x++") + + expr = util.compile_expression("1 and abort()") + with self.assertRaises(exception.StopExtraction): + expr() + def test_generate_token(self): tokens = set() for _ in range(100): |