From 32de2b06db501c7de81678bce8e3e0c3e63d340c Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 22 Jun 2021 22:30:36 -0400 Subject: New upstream version 1.18.0. --- CHANGELOG.md | 33 +++ PKG-INFO | 7 +- README.rst | 5 +- data/completion/_gallery-dl | 10 +- data/completion/gallery-dl | 2 +- data/man/gallery-dl.1 | 18 +- data/man/gallery-dl.conf.5 | 43 +++- docs/gallery-dl.conf | 4 +- gallery_dl.egg-info/PKG-INFO | 7 +- gallery_dl/__init__.py | 17 +- gallery_dl/cache.py | 9 +- gallery_dl/extractor/500px.py | 409 ++++++++++++++++++++++++++++++++-- gallery_dl/extractor/cyberdrop.py | 4 +- gallery_dl/extractor/deviantart.py | 2 +- gallery_dl/extractor/foolfuuka.py | 9 +- gallery_dl/extractor/furaffinity.py | 23 +- gallery_dl/extractor/gfycat.py | 5 +- gallery_dl/extractor/hiperdex.py | 15 +- gallery_dl/extractor/hitomi.py | 8 +- gallery_dl/extractor/idolcomplex.py | 31 +-- gallery_dl/extractor/instagram.py | 44 ++-- gallery_dl/extractor/kemonoparty.py | 4 +- gallery_dl/extractor/mangadex.py | 354 +++++++++++++++++------------ gallery_dl/extractor/mangafox.py | 9 +- gallery_dl/extractor/oauth.py | 9 +- gallery_dl/extractor/philomena.py | 21 +- gallery_dl/extractor/pixiv.py | 34 ++- gallery_dl/extractor/redgifs.py | 2 +- gallery_dl/extractor/sankaku.py | 15 +- gallery_dl/extractor/subscribestar.py | 9 +- gallery_dl/extractor/twitter.py | 13 +- gallery_dl/extractor/unsplash.py | 17 +- gallery_dl/job.py | 31 ++- gallery_dl/option.py | 37 +-- gallery_dl/postprocessor/classify.py | 7 +- gallery_dl/postprocessor/compare.py | 7 +- gallery_dl/postprocessor/exec.py | 3 +- gallery_dl/postprocessor/metadata.py | 12 +- gallery_dl/postprocessor/mtime.py | 4 +- gallery_dl/postprocessor/ugoira.py | 4 +- gallery_dl/postprocessor/zip.py | 10 +- gallery_dl/util.py | 89 +++++--- gallery_dl/version.py | 2 +- setup.cfg | 2 + setup.py | 3 - test/test_postprocessor.py | 19 +- test/test_results.py | 2 +- test/test_util.py | 24 ++ 48 files changed, 1056 insertions(+), 392 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dcc1299..0a4c90c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +## 1.18.0 - 2021-06-19 +### Additions +- [foolfuuka] support `archive.wakarimasen.moe` ([#1595](https://github.com/mikf/gallery-dl/issues/1595)) +- [mangadex] implement login with username & password ([#1535](https://github.com/mikf/gallery-dl/issues/1535)) +- [mangadex] add extractor for a user's followed feed ([#1535](https://github.com/mikf/gallery-dl/issues/1535)) +- [pixiv] support fetching privately followed users ([#1628](https://github.com/mikf/gallery-dl/issues/1628)) +- implement conditional filenames ([#1394](https://github.com/mikf/gallery-dl/issues/1394)) +- implement `filter` option for post processors ([#1460](https://github.com/mikf/gallery-dl/issues/1460)) +- add `-T/--terminate` command-line option ([#1399](https://github.com/mikf/gallery-dl/issues/1399)) +- add `-P/--postprocessor` command-line option ([#1583](https://github.com/mikf/gallery-dl/issues/1583)) +### Changes +- [kemonoparty] update default filenames and archive IDs ([#1514](https://github.com/mikf/gallery-dl/issues/1514)) +- [twitter] update default settings + - change `retweets` and `quoted` options from `true` to `false` + - change directory format for search results to the same as other extractors +- require an argument for `--clear-cache` +### Fixes +- [500px] update GraphQL queries +- [furaffinity] improve metadata extraction ([#1630](https://github.com/mikf/gallery-dl/issues/1630)) +- [hitomi] update image URL generation ([#1637](https://github.com/mikf/gallery-dl/issues/1637)) +- [idolcomplex] improve and fix pagination ([#1594](https://github.com/mikf/gallery-dl/issues/1594), [#1601](https://github.com/mikf/gallery-dl/issues/1601)) +- [instagram] fix login ([#1631](https://github.com/mikf/gallery-dl/issues/1631)) +- [instagram] update query hashes +- [mangadex] update to API v5 ([#1535](https://github.com/mikf/gallery-dl/issues/1535)) +- [mangafox] improve URL pattern ([#1608](https://github.com/mikf/gallery-dl/issues/1608)) +- [oauth] prevent exceptions when reporting errors ([#1603](https://github.com/mikf/gallery-dl/issues/1603)) +- [philomena] fix tag escapes handling ([#1629](https://github.com/mikf/gallery-dl/issues/1629)) +- [redgifs] update API server address ([#1632](https://github.com/mikf/gallery-dl/issues/1632)) +- [sankaku] handle empty tags ([#1617](https://github.com/mikf/gallery-dl/issues/1617)) +- [subscribestar] improve attachment filenames ([#1609](https://github.com/mikf/gallery-dl/issues/1609)) +- [unsplash] update collections URL pattern ([#1627](https://github.com/mikf/gallery-dl/issues/1627)) +- [postprocessor:metadata] handle dicts in `mode:tags` ([#1598](https://github.com/mikf/gallery-dl/issues/1598)) + ## 1.17.5 - 2021-05-30 ### Additions - [kemonoparty] add `metadata` option ([#1548](https://github.com/mikf/gallery-dl/issues/1548)) diff --git a/PKG-INFO b/PKG-INFO index 14d8ed3..ef2b047 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.17.5 +Version: 1.18.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -220,6 +220,7 @@ Description: ========== ``imgbb``, ``inkbunny``, ``instagram``, + ``mangadex``, ``mangoxo``, ``pillowfort``, ``pinterest``, diff --git a/README.rst b/README.rst index 66e71e7..f3a42fc 100644 --- a/README.rst +++ b/README.rst @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -209,6 +209,7 @@ and optional for ``imgbb``, ``inkbunny``, ``instagram``, +``mangadex``, ``mangoxo``, ``pillowfort``, ``pinterest``, diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 436260b..15806e8 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -11,7 +11,7 @@ _arguments -C -S \ {-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'':_files \ --cookies'[File to load additional cookies from]':'':_files \ --proxy'[Use the specified proxy]':'' \ ---clear-cache'[Delete all cached login sessions, cookies, etc.]':'' \ +--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'' \ {-q,--quiet}'[Activate quiet mode]' \ {-v,--verbose}'[Print various debugging information]' \ {-g,--get-urls}'[Print URLs instead of downloading]' \ @@ -27,7 +27,6 @@ _arguments -C -S \ --write-pages'[Write downloaded intermediary pages to files in the current directory to debug problems]' \ {-r,--limit-rate}'[Maximum download rate (e.g. 500k or 2.5M)]':'' \ {-R,--retries}'[Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)]':'' \ -{-A,--abort}'[Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist]':'' \ --http-timeout'[Timeout for HTTP connections (default: 30.0)]':'' \ --sleep'[Number of seconds to sleep before each download]':'' \ --filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'' \ @@ -44,7 +43,9 @@ _arguments -C -S \ {-u,--username}'[Username to login with]':'' \ {-p,--password}'[Password belonging to the given username]':'' \ --netrc'[Enable .netrc authentication data]' \ ---download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it.]':'':_files \ +--download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it]':'':_files \ +{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'' \ +{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'' \ --range'[Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"]':'' \ --chapter-range'[Like "--range", but applies to manga-chapters and other delegated URLs]':'' \ --filter'[Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'' \ @@ -56,6 +57,7 @@ _arguments -C -S \ --write-tags'[Write image tags to separate text files]' \ --mtime-from-date'[Set file modification times according to "date" metadata]' \ --exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'' \ ---exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'' && rc=0 +--exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'' \ +{-P,--postprocessor}'[Activate the specified post processor]':'' && rc=0 return rc diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 9a3a63e..f3d1100 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 719b8b4..25da021 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-05-30" "1.17.5" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-06-19" "1.18.0" "gallery-dl Manual" .\" disable hyphenation .nh @@ -36,7 +36,7 @@ File to load additional cookies from Use the specified proxy .TP .B "\-\-clear\-cache" \f[I]MODULE\f[] -Delete all cached login sessions, cookies, etc. +Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything) .TP .B "\-q, \-\-quiet" Activate quiet mode @@ -83,9 +83,6 @@ Maximum download rate (e.g. 500k or 2.5M) .B "\-R, \-\-retries" \f[I]N\f[] Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4) .TP -.B "\-A, \-\-abort" \f[I]N\f[] -Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist -.TP .B "\-\-http\-timeout" \f[I]SECONDS\f[] Timeout for HTTP connections (default: 30.0) .TP @@ -132,7 +129,13 @@ Password belonging to the given username Enable .netrc authentication data .TP .B "\-\-download\-archive" \f[I]FILE\f[] -Record all downloaded files in the archive file and skip downloading any file already in it. +Record all downloaded files in the archive file and skip downloading any file already in it +.TP +.B "\-A, \-\-abort" \f[I]N\f[] +Stop current extractor run after N consecutive file downloads were skipped +.TP +.B "\-T, \-\-terminate" \f[I]N\f[] +Stop current and parent extractor runs after N consecutive file downloads were skipped .TP .B "\-\-range" \f[I]RANGE\f[] Index-range(s) specifying which images to download. For example '5-10' or '1,3-5,10-' @@ -169,6 +172,9 @@ Execute CMD for each downloaded file. Example: --exec 'convert {} {}.png && rm { .TP .B "\-\-exec\-after" \f[I]CMD\f[] Execute CMD after all files were downloaded successfully. Example: --exec-after 'cd {} && convert * ../doc.pdf' +.TP +.B "\-P, \-\-postprocessor" \f[I]NAME\f[] +Activate the specified post processor .SH EXAMPLES .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index f35f218..84e8e0e 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-05-30" "1.17.5" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-06-19" "1.18.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -75,14 +75,31 @@ those as makeshift comments by settings their values to arbitrary strings. .SH EXTRACTOR OPTIONS .SS extractor.*.filename .IP "Type:" 6 -\f[I]string\f[] +\f[I]string\f[] or \f[I]object\f[] .IP "Example:" 4 +.br +* .. code:: + "{manga}_c{chapter}_{page:>03}.{extension}" +.br +* .. code:: json + +{ +"extension == 'mp4'": "{id}_video.{extension}", +"'nature' in title" : "{id}_{title}.{extension}", +"" : "{id}_default.{extension}" +} + + .IP "Description:" 4 -A \f[I]format string\f[] to build the resulting filename -for a downloaded file. +A \f[I]format string\f[] to build filenames for downloaded files with. + +If this is an \f[I]object\f[], it must contain Python expressions mapping to the +filename format strings to use. +These expressions are evaluated in the order as specified in Python 3.6+ +and in an undetermined order in Python 3.4 and 3.5. The available replacement keys depend on the extractor used. A list of keys for a specific one can be acquired by calling *gallery-dl* @@ -358,9 +375,9 @@ and optional for .br * \f[I]aryion\f[] .br -* \f[I]danbooru\f[] +* \f[I]danbooru\f[] (*) .br -* \f[I]e621\f[] +* \f[I]e621\f[] (*) .br * \f[I]exhentai\f[] .br @@ -372,6 +389,8 @@ and optional for .br * \f[I]instagram\f[] .br +* \f[I]mangadex\f[] +.br * \f[I]mangoxo\f[] .br * \f[I]pillowfort\f[] @@ -392,7 +411,7 @@ These values can also be specified via the \f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or by using a \f[I].netrc\f[] file. (see Authentication_) -Note: The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be +(*) The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be the API key found in your user profile, not the actual account password. @@ -1900,7 +1919,7 @@ Fetch media from all Tweets and replies in a \f[I]conversation \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Fetch media from quoted Tweets. @@ -1922,7 +1941,7 @@ Fetch media from replies to other Tweets. \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Fetch media from Retweets. @@ -3206,12 +3225,18 @@ logging output to a file. "name" : "zip", "compression": "store", "extension" : "cbz", +"filter" : "extension not in ('zip', 'rar')", "whitelist" : ["mangadex", "exhentai", "nhentai"] } .IP "Description:" 4 An \f[I]object\f[] containing a \f[I]"name"\f[] attribute specifying the post-processor type, as well as any of its \f[I]options\f[]. + +It is possible to set a \f[I]"filter"\f[] expression similar to +\f[I]image-filter\f[] to only run a post-processor +conditionally. + It is also possible set a \f[I]"whitelist"\f[] or \f[I]"blacklist"\f[] to only enable or disable a post-processor for the specified extractor categories. diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 7497cd6..9514c7a 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -256,9 +256,9 @@ "password": null, "cards": false, "conversations": false, - "quoted": true, + "quoted": false, "replies": true, - "retweets": true, + "retweets": false, "text-tweets": false, "twitpic": false, "users": "timeline", diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 7fe851f..b53c326 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.17.5 +Version: 1.18.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -220,6 +220,7 @@ Description: ========== ``imgbb``, ``inkbunny``, ``instagram``, + ``mangadex``, ``mangoxo``, ``pillowfort``, ``pinterest``, diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 8154afc..d5893b7 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -6,23 +6,16 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from __future__ import unicode_literals, print_function +import sys +import json +import logging +from . import version, config, option, output, extractor, job, util, exception __author__ = "Mike Fährmann" __copyright__ = "Copyright 2014-2021 Mike Fährmann" __license__ = "GPLv2" __maintainer__ = "Mike Fährmann" __email__ = "mike_faehrmann@web.de" - -import sys - -if sys.hexversion < 0x3040000: - sys.exit("Python 3.4+ required") - -import json -import logging -from . import version, config, option, output, extractor, job, util, exception - __version__ = version.__version__ @@ -126,6 +119,8 @@ def main(): config.set((), "postprocessors", args.postprocessors) if args.abort: config.set((), "skip", "abort:" + str(args.abort)) + if args.terminate: + config.set((), "skip", "terminate:" + str(args.terminate)) for opts in args.options: config.set(*opts) diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index 5ab68bf..7a49b61 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -168,7 +168,7 @@ def cache(maxage=3600, keyarg=None): return wrap -def clear(module="all"): +def clear(module): """Delete database entries for 'module'""" db = DatabaseCacheDecorator.db if not db: @@ -176,19 +176,18 @@ def clear(module="all"): rowcount = 0 cursor = db.cursor() - module = module.lower() try: - if module == "all": + if module == "ALL": cursor.execute("DELETE FROM data") else: cursor.execute( "DELETE FROM data " "WHERE key LIKE 'gallery_dl.extractor.' || ? || '.%'", - (module,) + (module.lower(),) ) except sqlite3.OperationalError: - pass # database is not initialized, can't be modified, etc. + pass # database not initialized, cannot be modified, etc. else: rowcount = cursor.rowcount db.commit() diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index c2c5a66..4cf5e48 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -11,7 +11,6 @@ from .common import Extractor, Message import json - BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com" @@ -78,15 +77,14 @@ class _500pxExtractor(Extractor): headers = {"Origin": self.root, "X-CSRF-Token": csrf_token} return self.request(url, headers=headers, params=params).json() - def _request_graphql(self, opname, variables, query_hash): + def _request_graphql(self, opname, variables): url = "https://api.500px.com/graphql" - params = { + data = { "operationName": opname, "variables" : json.dumps(variables), - "extensions" : '{"persistedQuery":{"version":1' - ',"sha256Hash":"' + query_hash + '"}}', + "query" : QUERIES[opname], } - return self.request(url, params=params).json()["data"] + return self.request(url, method="POST", json=data).json()["data"] class _500pxUserExtractor(_500pxExtractor): @@ -111,8 +109,6 @@ class _500pxUserExtractor(_500pxExtractor): variables = {"username": self.user, "pageSize": 20} photos = self._request_graphql( "OtherPhotosQuery", variables, - "018a5e5117bd72bdf28066aad02c4f2d" - "8acdf7f6127215d231da60e24080eb1b", )["user"]["photos"] while True: @@ -124,8 +120,6 @@ class _500pxUserExtractor(_500pxExtractor): variables["cursor"] = photos["pageInfo"]["endCursor"] photos = self._request_graphql( "OtherPhotosPaginationContainerQuery", variables, - "b4af70d42c71a5e43f0be36ce60dc81e" - "9742ebc117cde197350f2b86b5977d98", )["userByUsername"]["photos"] @@ -159,7 +153,6 @@ class _500pxGalleryExtractor(_500pxExtractor): def metadata(self): user = self._request_graphql( "ProfileRendererQuery", {"username": self.user_name}, - "fcecc7028c308115b0defebc63acec3fe3c12df86a602c3e1785ba5cfb8fff47", )["profile"] self.user_id = str(user["legacyId"]) @@ -172,7 +165,6 @@ class _500pxGalleryExtractor(_500pxExtractor): } gallery = self._request_graphql( "GalleriesDetailQueryRendererQuery", variables, - "eda3c77ca4efe4b3347ec9c08befe3bd2c58099ebfb1f680d829fcd26d34f12d", )["gallery"] self._photos = gallery["photos"] @@ -200,8 +192,6 @@ class _500pxGalleryExtractor(_500pxExtractor): variables["cursor"] = photos["pageInfo"]["endCursor"] photos = self._request_graphql( "GalleriesDetailPaginationContainerQuery", variables, - "466cf6661a07e7fdca465edb39118efb" - "80fb157c6d3f620c7f518cdae0832c78", )["galleryByOwnerIdAndSlugOrToken"]["photos"] @@ -261,3 +251,394 @@ class _500pxImageExtractor(_500pxExtractor): def photos(self): edges = ({"node": {"legacyId": self.photo_id}},) return self._extend(edges) + + +QUERIES = { + + "OtherPhotosQuery": """\ +query OtherPhotosQuery($username: String!, $pageSize: Int) { + user: userByUsername(username: $username) { + ...OtherPhotosPaginationContainer_user_RlXb8 + id + } +} + +fragment OtherPhotosPaginationContainer_user_RlXb8 on User { + photos(first: $pageSize, privacy: PROFILE, sort: ID_DESC) { + edges { + node { + id + legacyId + canonicalPath + width + height + name + isLikedByMe + notSafeForWork + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + followedByUsers { + isFollowedByMe + } + } + images(sizes: [33, 35]) { + size + url + jpegUrl + webpUrl + id + } + __typename + } + cursor + } + totalCount + pageInfo { + endCursor + hasNextPage + } + } +} +""", + + "OtherPhotosPaginationContainerQuery": """\ +query OtherPhotosPaginationContainerQuery($username: String!, $pageSize: Int, $cursor: String) { + userByUsername(username: $username) { + ...OtherPhotosPaginationContainer_user_3e6UuE + id + } +} + +fragment OtherPhotosPaginationContainer_user_3e6UuE on User { + photos(first: $pageSize, after: $cursor, privacy: PROFILE, sort: ID_DESC) { + edges { + node { + id + legacyId + canonicalPath + width + height + name + isLikedByMe + notSafeForWork + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + followedByUsers { + isFollowedByMe + } + } + images(sizes: [33, 35]) { + size + url + jpegUrl + webpUrl + id + } + __typename + } + cursor + } + totalCount + pageInfo { + endCursor + hasNextPage + } + } +} +""", + + "ProfileRendererQuery": """\ +query ProfileRendererQuery($username: String!) { + profile: userByUsername(username: $username) { + id + legacyId + userType: type + username + firstName + displayName + registeredAt + canonicalPath + avatar { + ...ProfileAvatar_avatar + id + } + userProfile { + firstname + lastname + state + country + city + about + id + } + socialMedia { + website + twitter + instagram + facebook + id + } + coverPhotoUrl + followedByUsers { + totalCount + isFollowedByMe + } + followingUsers { + totalCount + } + membership { + expiryDate + membershipTier: tier + photoUploadQuota + refreshPhotoUploadQuotaAt + paymentStatus + id + } + profileTabs { + tabs { + name + visible + } + } + ...EditCover_cover + photoStats { + likeCount + viewCount + } + photos(privacy: PROFILE) { + totalCount + } + licensingPhotos(status: ACCEPTED) { + totalCount + } + portfolio { + id + status + userDisabled + } + } +} + +fragment EditCover_cover on User { + coverPhotoUrl +} + +fragment ProfileAvatar_avatar on UserAvatar { + images(sizes: [MEDIUM, LARGE]) { + size + url + id + } +} +""", + + "GalleriesDetailQueryRendererQuery": """\ +query GalleriesDetailQueryRendererQuery($galleryOwnerLegacyId: ID!, $ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $gallerySize: Int) { + galleries(galleryOwnerLegacyId: $galleryOwnerLegacyId, first: $gallerySize) { + edges { + node { + legacyId + description + name + privacy + canonicalPath + notSafeForWork + buttonName + externalUrl + cover { + images(sizes: [35, 33]) { + size + webpUrl + jpegUrl + id + } + id + } + photos { + totalCount + } + id + } + } + } + gallery: galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) { + ...GalleriesDetailPaginationContainer_gallery_RlXb8 + id + } +} + +fragment GalleriesDetailPaginationContainer_gallery_RlXb8 on Gallery { + id + legacyId + name + privacy + notSafeForWork + ownPhotosOnly + canonicalPath + publicSlug + lastPublishedAt + photosAddedSinceLastPublished + reportStatus + creator { + legacyId + id + } + cover { + images(sizes: [33, 32, 36, 2048]) { + url + size + webpUrl + id + } + id + } + description + externalUrl + buttonName + photos(first: $pageSize) { + totalCount + edges { + cursor + node { + id + legacyId + canonicalPath + name + description + category + uploadedAt + location + width + height + isLikedByMe + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + avatar { + images(sizes: SMALL) { + url + id + } + id + } + followedByUsers { + totalCount + isFollowedByMe + } + } + images(sizes: [33, 32]) { + size + url + webpUrl + id + } + __typename + } + } + pageInfo { + endCursor + hasNextPage + } + } +} +""", + + "GalleriesDetailPaginationContainerQuery": """\ +query GalleriesDetailPaginationContainerQuery($ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $cursor: String) { + galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) { + ...GalleriesDetailPaginationContainer_gallery_3e6UuE + id + } +} + +fragment GalleriesDetailPaginationContainer_gallery_3e6UuE on Gallery { + id + legacyId + name + privacy + notSafeForWork + ownPhotosOnly + canonicalPath + publicSlug + lastPublishedAt + photosAddedSinceLastPublished + reportStatus + creator { + legacyId + id + } + cover { + images(sizes: [33, 32, 36, 2048]) { + url + size + webpUrl + id + } + id + } + description + externalUrl + buttonName + photos(first: $pageSize, after: $cursor) { + totalCount + edges { + cursor + node { + id + legacyId + canonicalPath + name + description + category + uploadedAt + location + width + height + isLikedByMe + photographer: uploader { + id + legacyId + username + displayName + canonicalPath + avatar { + images(sizes: SMALL) { + url + id + } + id + } + followedByUsers { + totalCount + isFollowedByMe + } + } + images(sizes: [33, 32]) { + size + url + webpUrl + id + } + __typename + } + } + pageInfo { + endCursor + hasNextPage + } + } +} +""", + +} diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index a057b84..e354cb7 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -8,7 +8,7 @@ from .common import Extractor, Message from .. import text -import base64 +import binascii class CyberdropAlbumExtractor(Extractor): @@ -52,7 +52,7 @@ class CyberdropAlbumExtractor(Extractor): yield Message.Directory, data for file_b64 in files: - file = base64.b64decode(file_b64.encode()).decode() + file = binascii.a2b_base64(file_b64).decode() text.nameext_from_url(file, data) data["filename"], _, data["id"] = data["filename"].rpartition("-") yield Message.Url, "https://f.cyberdrop.cc/" + file, data diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 9a461a4..70e268d 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -918,7 +918,7 @@ class DeviantartOAuthAPI(): def __init__(self, extractor): self.extractor = extractor self.log = extractor.log - self.headers = {} + self.headers = {"dA-minor-version": "20200519"} self.delay = extractor.config("wait-min", 0) self.delay_min = max(2, self.delay) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 5962b9e..5ea3adb 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -90,7 +90,9 @@ BASE_PATTERN = FoolfuukaExtractor.update({ }, "thebarchive": { "root": "https://thebarchive.com", - "pattern": r"thebarchive\.com", + }, + "wakarimasen": { + "root": "https://archive.wakarimasen.moe", }, }) @@ -137,6 +139,9 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): ("https://thebarchive.com/b/thread/739772332/", { "url": "07d39d2cb48f40fb337dc992993d965b0cd5f7cd", }), + ("https://archive.wakarimasen.moe/a/thread/223157648/", { + "url": "fef0758d2eb81b1ba783051fd5ec491d70107a78", + }), ) def __init__(self, match): @@ -175,6 +180,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor): ("https://archive.nyafuu.org/c/"), ("https://rbt.asia/g/"), ("https://thebarchive.com/b/"), + ("https://archive.wakarimasen.moe/a/"), ) def __init__(self, match): @@ -218,6 +224,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): ("https://archive.nyafuu.org/_/search/text/test/"), ("https://rbt.asia/_/search/text/test/"), ("https://thebarchive.com/_/search/text/test/"), + ("https://archive.wakarimasen.moe/a/search/text/test/"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index a7b0356..86e1678 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,7 +11,6 @@ from .common import Extractor, Message from .. import text, util - BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net" @@ -19,7 +18,7 @@ class FuraffinityExtractor(Extractor): """Base class for furaffinity extractors""" category = "furaffinity" directory_fmt = ("{category}", "{user!l}") - filename_fmt = "{id} {title}.{extension}" + filename_fmt = "{id}{title:? //}.{extension}" archive_fmt = "{id}" cookiedomain = ".furaffinity.net" root = "https://www.furaffinity.net" @@ -55,9 +54,6 @@ class FuraffinityExtractor(Extractor): def _parse_post(self, post_id): url = "{}/view/{}/".format(self.root, post_id) extr = text.extract_from(self.request(url).text) - title, _, artist = text.unescape(extr( - 'property="og:title" content="', '"')).rpartition(" by ") - artist_url = artist.replace("_", "").lower() path = extr('href="//d', '"') if not path: @@ -74,18 +70,16 @@ class FuraffinityExtractor(Extractor): rh = text.remove_html data = text.nameext_from_url(path, { - "id" : pi(post_id), - "title" : title, - "artist" : artist, - "artist_url": artist_url, - "user" : self.user or artist_url, - "url" : "https://d" + path + "id" : pi(post_id), + "url": "https://d" + path, }) tags = extr('class="tags-row">', '') if tags: # new site layout data["tags"] = text.split_html(tags) + data["title"] = text.unescape(extr("

", "

")) + data["artist"] = extr("", "<") data["description"] = self._process_description(extr( 'class="section-body">', '')) data["views"] = pi(rh(extr('class="views">', ''))) @@ -100,6 +94,8 @@ class FuraffinityExtractor(Extractor): data["height"] = pi(extr("", "p")) else: # old site layout + data["title"] = text.unescape(extr("

", "

")) + data["artist"] = extr(">", "<") data["fa_category"] = extr("Category:", "<").strip() data["theme"] = extr("Theme:", "<").strip() data["species"] = extr("Species:", "<").strip() @@ -114,6 +110,9 @@ class FuraffinityExtractor(Extractor): data["rating"] = extr('', ' ')
             data[", "")) + + data["artist_url"] = data["artist"].replace("_", "").lower() + data["user"] = self.user or data["artist_url"] data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) return data diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 4e62165..5732816 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -177,7 +177,10 @@ class GfycatAPI(): @cache(keyarg=1, maxage=3600) def _authenticate_impl(self, category): - url = "https://weblogin." + category + ".com/oauth/webtoken" + if category == "redgifs": + url = "https://api.redgifs.com/v1/oauth/webtoken" + else: + url = "https://weblogin." + category + ".com/oauth/webtoken" data = {"access_key": self.ACCESS_KEY} headers = {"Referer": self.extractor.root + "/", "Origin" : self.extractor.root} diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index 93ef6f1..7ad06c9 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,14 +13,13 @@ from .. import text from ..cache import memcache import re - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net|info)" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\d?\.(?:com|net|info)" class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" - root = "https://hiperdex.com" + root = "https://hiperdex2.com" @memcache(keyarg=1) def manga_data(self, manga, page=None): @@ -66,8 +65,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for manga chapters from hiperdex.com""" pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))" test = ( - ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { - "pattern": r"https://hiperdex.(com|net|info)/wp-content/uploads" + ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/", { + "pattern": r"https://hiperdex\d?.(com|net|info)/wp-content/uploads" r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp", "count": 9, "keyword": { @@ -107,7 +106,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): chapterclass = HiperdexChapterExtractor pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$" test = ( - ("https://hiperdex.com/manga/youre-not-that-special/", { + ("https://hiperdex2.com/manga/youre-not-that-special/", { "count": 51, "pattern": HiperdexChapterExtractor.pattern, "keyword": { @@ -159,7 +158,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): reverse = False pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?#]+))" test = ( - ("https://hiperdex.com/manga-artist/beck-ho-an/"), + ("https://hiperdex2.com/manga-artist/beck-ho-an/"), ("https://hiperdex.net/manga-artist/beck-ho-an/"), ("https://hiperdex.info/manga-artist/beck-ho-an/"), ("https://hiperdex.com/manga-author/viagra/", { diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 994e1b7..497509d 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "ec3fe9b708ee376ec579b90d053ad485c0777552", + "url": "8dfbcb1e51cec43a7112d58b7e92153155ada3b9", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "bf4ed4e726204da5bc37a236ca476a2a96081388", + "url": "a5af7fdca1f5c93c289af128914a8488ea345036", "count": 1413, }), # gallery with "broken" redirect @@ -140,8 +140,8 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js inum = int(ihash[-3:-1], 16) - frontends = 2 if inum < 0x30 else 3 - inum = 1 if inum < 0x09 else inum + frontends = 2 if inum < 0x70 else 3 + inum = 1 if inum < 0x49 else inum url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( chr(97 + (inum % frontends)), diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 3d4bcfb..9701f1e 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -132,11 +132,16 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor): archive_fmt = "t_{search_tags}_{id}" pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)" test = ( - ("https://idol.sankakucomplex.com/?tags=lyumos+wreath", { - "count": ">= 6", + ("https://idol.sankakucomplex.com/?tags=lyumos", { + "count": 5, + "range": "18-22", "pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+", }), + ("https://idol.sankakucomplex.com/?tags=order:favcount", { + "count": 5, + "range": "18-22", + }), ("https://idol.sankakucomplex.com" "/?tags=lyumos+wreath&page=3&next=694215"), ) @@ -184,21 +189,21 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor): while True: page = self.request(self.root, params=params, retries=10).text pos = page.find("