From bc435e826dbe37969d9cbe280f58810d054932cc Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Thu, 9 Jan 2020 22:31:29 -0500 Subject: New upstream version 1.12.2 --- PKG-INFO | 8 ++-- README.rst | 6 +-- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 16 ++++++- gallery_dl.egg-info/PKG-INFO | 8 ++-- gallery_dl/cache.py | 2 +- gallery_dl/extractor/artstation.py | 59 ++++++++++++++---------- gallery_dl/extractor/common.py | 23 +++++++++- gallery_dl/extractor/deviantart.py | 87 +++++++++++++++++++++++++++++------- gallery_dl/extractor/directlink.py | 2 + gallery_dl/extractor/exhentai.py | 2 +- gallery_dl/extractor/flickr.py | 3 +- gallery_dl/extractor/foolfuuka.py | 4 +- gallery_dl/extractor/imagefap.py | 28 +++++++----- gallery_dl/extractor/imgur.py | 2 +- gallery_dl/extractor/luscious.py | 4 +- gallery_dl/extractor/mangadex.py | 22 ++++----- gallery_dl/extractor/mangahere.py | 9 ++++ gallery_dl/extractor/mastodon.py | 3 +- gallery_dl/extractor/newgrounds.py | 2 +- gallery_dl/extractor/pinterest.py | 2 +- gallery_dl/extractor/pixiv.py | 50 +++++++++++++++++---- gallery_dl/extractor/reddit.py | 12 ++--- gallery_dl/extractor/smugmug.py | 3 +- gallery_dl/extractor/tumblr.py | 27 ++++------- gallery_dl/extractor/twitter.py | 58 +++++++++++++++++++----- gallery_dl/extractor/wikiart.py | 4 +- gallery_dl/job.py | 21 +++++++-- gallery_dl/postprocessor/metadata.py | 31 +++++++++---- gallery_dl/util.py | 11 ++++- gallery_dl/version.py | 2 +- test/test_postprocessor.py | 29 +++++++++++- 32 files changed, 392 insertions(+), 150 deletions(-) diff --git a/PKG-INFO b/PKG-INFO index b7094a1..0e6429e 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.12.1 +Version: 1.12.2 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -95,8 +95,8 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -241,7 +241,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/README.rst b/README.rst index 90ca29a..e6846b6 100644 --- a/README.rst +++ b/README.rst @@ -84,8 +84,8 @@ Download a standalone executable file, put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -230,7 +230,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index a530760..7249537 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2019-12-22" "1.12.1" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-01-05" "1.12.2" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 07f1b88..7e7993a 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2019-12-22" "1.12.1" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-01-05" "1.12.2" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1662,6 +1662,20 @@ Select how to write metadata. * \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[] to a file's metadata dictionary +.SS metadata.directory +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"."\f[] + +.IP "Example:" 4 +"metadata" + +.IP "Description:" 4 +Directory where metadata files are stored in relative to the +current target location for file downloads. + .SS metadata.extension .IP "Type:" 6 \f[I]string\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index a8700a6..3aa6d61 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.12.1 +Version: 1.12.2 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -95,8 +95,8 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -241,7 +241,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index 1824195..c48b53f 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -37,7 +37,7 @@ class CacheDecorator(): def update(self, key, value): self.cache[key] = value - def invalidate(self, key): + def invalidate(self, key=""): try: del self.cache[key] except KeyError: diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 2892bd4..ceda29c 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -83,14 +83,20 @@ class ArtstationExtractor(Extractor): response = self.request(url, notfound="user") return response.json() - def _pagination(self, url, params=None): - if not params: - params = {} + def _pagination(self, url, params=None, json=None): + if json: + params = json + kwargs = {"json": json} + else: + if not params: + params = {} + kwargs = {"params": params} + params["page"] = 1 total = 0 while True: - data = self.request(url, params=params).json() + data = self.request(url, **kwargs).json() yield from data["data"] total += len(data["data"]) @@ -268,34 +274,38 @@ class ArtstationChallengeExtractor(ArtstationExtractor): class ArtstationSearchExtractor(ArtstationExtractor): """Extractor for artstation search results""" subcategory = "search" - directory_fmt = ("{category}", "Searches", "{search[searchterm]}") - archive_fmt = "s_{search[searchterm]}_{asset[id]}" + directory_fmt = ("{category}", "Searches", "{search[query]}") + archive_fmt = "s_{search[query]}_{asset[id]}" pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com" r"/search/?\?([^#]+)") - test = ("https://www.artstation.com/search?sorting=recent&q=ancient",) + test = ("https://www.artstation.com/search?q=ancient&sort_by=rank", { + "range": "1-20", + "count": 20, + }) def __init__(self, match): ArtstationExtractor.__init__(self, match) query = text.parse_query(match.group(1)) - self.searchterm = query.get("q", "") - self.order = query.get("sorting", "recent").lower() + self.query = query.get("q", "") + self.sorting = query.get("sort_by", "rank").lower() def metadata(self): return {"search": { - "searchterm": self.searchterm, - "order": self.order, + "query" : self.query, + "sorting": self.sorting, }} def projects(self): - order = "likes_count" if self.order == "likes" else "published_at" - url = "{}/search/projects.json".format(self.root) - params = { - "direction": "desc", - "order": order, - "q": self.searchterm, - # "show_pro_first": "true", - } - return self._pagination(url, params) + url = "{}/api/v2/search/projects.json".format(self.root) + return self._pagination(url, json={ + "additional_fields": "[]", + "filters" : "[]", + "page" : None, + "per_page" : "50", + "pro_first" : "1", + "query" : self.query, + "sorting" : self.sorting, + }) class ArtstationArtworkExtractor(ArtstationExtractor): @@ -305,7 +315,10 @@ class ArtstationArtworkExtractor(ArtstationExtractor): archive_fmt = "A_{asset[id]}" pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com" r"/artwork/?\?([^#]+)") - test = ("https://www.artstation.com/artwork?sorting=latest",) + test = ("https://www.artstation.com/artwork?sorting=latest", { + "range": "1-20", + "count": 20, + }) def __init__(self, match): ArtstationExtractor.__init__(self, match) @@ -316,9 +329,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor): def projects(self): url = "{}/projects.json".format(self.root) - params = self.query.copy() - params["page"] = 1 - return self._pagination(url, params) + return self._pagination(url, self.query.copy()) class ArtstationImageExtractor(ArtstationExtractor): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index a1a4890..380bcc7 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -122,6 +122,24 @@ class Extractor(): raise exception.HttpError(msg) + def wait(self, *, seconds=None, until=None, reason=None, adjust=1): + now = datetime.datetime.now() + + if seconds: + seconds = float(seconds) + until = now + datetime.timedelta(seconds=seconds) + elif until: + until = datetime.datetime.fromtimestamp(float(until)) + seconds = (until - now).total_seconds() + else: + raise ValueError("Either 'seconds' or 'until' is required") + + if reason: + t = until.time() + isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second) + self.log.info("Waiting until %s for %s.", isotime, reason) + time.sleep(seconds + adjust) + def _get_auth_info(self): """Return authentication information as (username, password) tuple""" username = self.config("username") @@ -170,6 +188,9 @@ class Extractor(): def _init_cookies(self): """Populate the session's cookiejar""" + if self.cookiedomain is None: + return + cookies = self.config("cookies") if cookies: if isinstance(cookies, dict): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 604966f..02a14e3 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -31,6 +31,7 @@ class DeviantartExtractor(Extractor): category = "deviantart" directory_fmt = ("{category}", "{username}") filename_fmt = "{category}_{index}_{title}.{extension}" + cookiedomain = None root = "https://www.deviantart.com" def __init__(self, match=None): @@ -475,7 +476,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{username}", "Favourites") archive_fmt = "f_{_username}_{index}.{extension}" - pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$" + pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/h3813067/favourites/", { "options": (("metadata", True), ("flat", False)), # issue #271 @@ -484,8 +485,10 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): ("https://www.deviantart.com/h3813067/favourites/", { "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", }), + ("https://www.deviantart.com/h3813067/favourites/all"), ("https://www.deviantart.com/h3813067/favourites/?catpath=/"), ("https://h3813067.deviantart.com/favourites/"), + ("https://h3813067.deviantart.com/favourites/all"), ("https://h3813067.deviantart.com/favourites/?catpath=/"), ) @@ -573,12 +576,22 @@ class DeviantartPopularExtractor(DeviantartExtractor): directory_fmt = ("{category}", "Popular", "{popular[range]}", "{popular[search]}") archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}" - pattern = (r"(?:https?://)?www\.deviantart\.com" - r"((?:/\w+)*)/(?:popular-([^/?&#]+))/?(?:\?([^#]*))?") + pattern = (r"(?:https?://)?www\.deviantart\.com/(?:" + r"search(?:/deviations)?" + r"|(?:deviations/?)?\?order=(popular-[^/?&#]+)" + r"|((?:[\w-]+/)*)(popular-[^/?&#]+)" + r")/?(?:\?([^#]*))?") test = ( + ("https://www.deviantart.com/?order=popular-all-time", { + "options": (("original", False),), + "range": "1-30", + "count": 30, + }), ("https://www.deviantart.com/popular-24-hours/?q=tree+house", { "options": (("original", False),), }), + ("https://www.deviantart.com/search?q=tree"), + ("https://www.deviantart.com/search/deviations?order=popular-1-week"), ("https://www.deviantart.com/artisan/popular-all-time/?q=tree"), ) @@ -587,13 +600,20 @@ class DeviantartPopularExtractor(DeviantartExtractor): self.search_term = self.time_range = self.category_path = None self.user = "" - path, trange, query = match.groups() + trange1, path, trange2, query = match.groups() + trange = trange1 or trange2 + query = text.parse_query(query) + + if not trange: + trange = query.get("order") + if path: - self.category_path = path.lstrip("/") + self.category_path = path.strip("/") if trange: + trange = trange[8:] if trange.startswith("popular-") else "" self.time_range = trange.replace("-", "").replace("hours", "hr") if query: - self.search_term = text.parse_query(query).get("q") + self.search_term = query.get("q") self.popular = { "search": self.search_term or "", @@ -739,6 +759,15 @@ class DeviantartExtractorV2(DeviantartExtractor): deviation["target"] = target return deviation + def _pagination(self, url, params, headers=None): + while True: + data = self.request(url, params=params, headers=headers).json() + yield from data["results"] + + if not data["hasMore"]: + return + params["offset"] = data["nextOffset"] + class DeviantartDeviationExtractor(DeviantartExtractorV2): """Extractor for single deviations""" @@ -863,15 +892,40 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): "Referer": "{}/{}/gallery/scraps".format(self.root, self.user), } - while True: - data = self.request(url, params=params, headers=headers).json() + for obj in self._pagination(url, params, headers): + yield obj["deviation"] - for obj in data["results"]: - yield obj["deviation"] - if not data["hasMore"]: - return - params["offset"] = data["nextOffset"] +class DeviantartFollowingExtractor(DeviantartExtractorV2): + subcategory = "following" + pattern = BASE_PATTERN + "/about#watching$" + test = ("https://www.deviantart.com/shimoda7/about#watching", { + "pattern": DeviantartUserExtractor.pattern, + "range": "1-50", + "count": 50, + }) + + def items(self): + url = "{}/_napi/da-user-profile/api/module/watching".format(self.root) + params = { + "username": self.user, + "moduleid": self._module_id(self.user), + "offset" : "0", + "limit" : "24", + } + + yield Message.Version, 1 + for user in self._pagination(url, params): + url = "{}/{}".format(self.root, user["username"]) + yield Message.Queue, url, user + + def _module_id(self, username): + url = "{}/{}/about".format(self.root, username) + page = self.request(url).text + pos = page.find('\\"type\\":\\"watching\\"') + if pos < 0: + raise exception.NotFoundError("module") + return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') class DeviantartAPI(): @@ -1076,7 +1130,7 @@ class DeviantartAPI(): return data def _pagination(self, endpoint, params, extend=True): - public = True + public = warn = True while True: data = self._call(endpoint, params, public=public) if "results" not in data: @@ -1089,7 +1143,8 @@ class DeviantartAPI(): self.log.debug("Switching to private access token") public = False continue - elif data["has_more"]: + elif data["has_more"] and warn: + warn = False self.log.warning( "Private deviations detected! Run 'gallery-dl " "oauth:deviantart' and follow the instructions to " diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 80db096..1d17658 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -55,9 +55,11 @@ class DirectlinkExtractor(Extractor): for key, value in data.items(): if value: data[key] = text.unquote(value) + data["path"], _, name = data["path"].rpartition("/") data["filename"], _, ext = name.rpartition(".") data["extension"] = ext.lower() + data["_http_headers"] = {"Referer": self.url} yield Message.Version, 1 yield Message.Directory, data diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index cba9627..6cc3abc 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -23,7 +23,7 @@ BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org" class ExhentaiExtractor(Extractor): """Base class for exhentai extractors""" category = "exhentai" - directory_fmt = ("{category}", "{gallery_id} {title}") + directory_fmt = ("{category}", "{gallery_id} {title[:247]}") filename_fmt = ( "{gallery_id}_{num:>04}_{image_token}_{filename}.{extension}") archive_fmt = "{gallery_id}_{num}" diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index bd34bdb..967fd9c 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -16,6 +16,7 @@ class FlickrExtractor(Extractor): """Base class for flickr extractors""" category = "flickr" filename_fmt = "{category}_{id}.{extension}" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 428f3c3..4af9d4a 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -120,8 +120,8 @@ EXTRACTORS = { }, "fireden": { "root": "https://boards.fireden.net", - "test-thread": ("https://boards.fireden.net/a/thread/159803223/", { - "url": "01b7baacfb0656a68e566368290e3072b27f86c9", + "test-thread": ("https://boards.fireden.net/sci/thread/11264294/", { + "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43", }), }, "nyafuu": { diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index d6eea7f..fd97605 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,6 +13,9 @@ from .. import text import json +BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com" + + class ImagefapExtractor(Extractor): """Base class for imagefap extractors""" category = "imagefap" @@ -29,8 +32,8 @@ class ImagefapExtractor(Extractor): class ImagefapGalleryExtractor(ImagefapExtractor): """Extractor for image galleries from imagefap.com""" subcategory = "gallery" - pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/" - r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)") + pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)" + test = ( ("https://www.imagefap.com/pictures/7102714", { "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg", @@ -42,6 +45,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor): "keyword": "3e24eace5b09639b881ebd393165862feb46adde", }), ("https://www.imagefap.com/gallery.php?gid=7102714"), + ("https://beta.imagefap.com/gallery.php?gid=7102714"), ) def __init__(self, match): @@ -99,11 +103,14 @@ class ImagefapGalleryExtractor(ImagefapExtractor): class ImagefapImageExtractor(ImagefapExtractor): """Extractor for single images from imagefap.com""" subcategory = "image" - pattern = r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)" - test = ("https://www.imagefap.com/photo/1369341772/", { - "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg", - "keyword": "8894e45f7262020d8d66ce59917315def1fc475b", - }) + pattern = BASE_PATTERN + r"/photo/(\d+)" + test = ( + ("https://www.imagefap.com/photo/1369341772/", { + "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg", + "keyword": "8894e45f7262020d8d66ce59917315def1fc475b", + }), + ("https://beta.imagefap.com/photo/1369341772/"), + ) def __init__(self, match): ImagefapExtractor.__init__(self, match) @@ -143,8 +150,8 @@ class ImagefapUserExtractor(ImagefapExtractor): """Extractor for all galleries from a user at imagefap.com""" subcategory = "user" categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/" - r"(?:profile(?:\.php\?user=|/)([^/?&#]+)" + pattern = (BASE_PATTERN + + r"/(?:profile(?:\.php\?user=|/)([^/?&#]+)" r"|usergallery\.php\?userid=(\d+))") test = ( ("https://www.imagefap.com/profile/LucyRae/galleries", { @@ -154,6 +161,7 @@ class ImagefapUserExtractor(ImagefapExtractor): "url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd", }), ("https://www.imagefap.com/profile.php?user=LucyRae"), + ("https://beta.imagefap.com/profile.php?user=LucyRae"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index ce3e1ce..6ff6588 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -153,7 +153,7 @@ class ImgurAlbumExtractor(ImgurExtractor): "is_album" : True, "layout" : "blog", "link" : "https://imgur.com/a/TcBmP", - "nsfw" : False, + "nsfw" : True, "privacy" : "hidden", "section" : None, "title" : "138", diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 0aeeb4a..c80cf14 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -56,7 +56,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "__typename" : "Album", "audiences" : list, "content" : "Hentai", - "cover" : "re:https://cdnio.luscious.net/.+/277031/", + "cover" : "re:https://\\w+.luscious.net/.+/277031/", "created" : 1479625853, "created_by" : "NTRshouldbeillegal", "date" : "type:datetime", @@ -102,7 +102,7 @@ class LusciousAlbumExtractor(LusciousExtractor): }, }), ("https://luscious.net/albums/virgin-killer-sweater_282582/", { - "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c", + "url": "0be0cc279be1de99f727764819e03435e2a79915", }), ("https://luscious.net/albums/not-found_277035/", { "exception": exception.NotFoundError, diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index d0eb2a9..558aa9d 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://mangadex.org/""" +"""Extract manga-chapters and entire manga from https://mangadex.cc/""" from .common import Extractor, Message from .. import text, util @@ -16,7 +16,7 @@ from ..cache import memcache class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" - root = "https://mangadex.org" + root = "https://mangadex.cc" # mangadex-to-iso639-1 codes iso639_map = { @@ -39,7 +39,7 @@ class MangadexExtractor(Extractor): class MangadexChapterExtractor(MangadexExtractor): - """Extractor for manga-chapters from mangadex.org""" + """Extractor for manga-chapters from mangadex.cc""" subcategory = "chapter" directory_fmt = ( "{category}", "{manga}", @@ -47,14 +47,14 @@ class MangadexChapterExtractor(MangadexExtractor): filename_fmt = ( "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") archive_fmt = "{chapter_id}_{page}" - pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)" + pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)/chapter/(\d+)" test = ( - ("https://mangadex.org/chapter/122094", { + ("https://mangadex.cc/chapter/122094", { "keyword": "1c834dca33025f521e1874aee1f71c51e28ebf99", - "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", + # "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", }), # oneshot - ("https://mangadex.org/chapter/138086", { + ("https://mangadex.cc/chapter/138086", { "count": 64, "keyword": "178777bd0352fb19eb934cbee5630d16e3fb60ab", }), @@ -107,14 +107,14 @@ class MangadexChapterExtractor(MangadexExtractor): class MangadexMangaExtractor(MangadexExtractor): - """Extractor for manga from mangadex.org""" + """Extractor for manga from mangadex.cc""" subcategory = "manga" categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)" + pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)" r"/(?:title|manga)/(\d+)") test = ( - ("https://mangadex.org/manga/2946/souten-no-koumori", { - "pattern": r"https://mangadex.org/chapter/\d+", + ("https://mangadex.cc/manga/2946/souten-no-koumori", { + "pattern": r"https://mangadex.cc/chapter/\d+", "keywords": { "manga": "Souten no Koumori", "manga_id": 2946, diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index e15acbe..52cc672 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -97,10 +97,19 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor): "url": "654850570aa03825cd57e2ae2904af489602c523", "keyword": "c8084d89a9ea6cf40353093669f9601a39bf5ca2", }), + # adult filter (#556) + ("http://www.mangahere.cc/manga/gunnm_mars_chronicle/", { + "pattern": MangahereChapterExtractor.pattern, + "count": ">= 50", + }), ("https://www.mangahere.co/manga/aria/"), ("https://m.mangahere.co/manga/aria/"), ) + def __init__(self, match): + MangaExtractor.__init__(self, match) + self.session.cookies.set("isAdult", "1", domain="www.mangahere.cc") + def chapters(self, page): results = [] manga, pos = text.extract(page, '', '<').replace(",", "")) data["score"] = text.parse_float(extr('id="score_number">', '<')) data["tags"] = text.split_html(extr( - '
', '
')) + '
', '
')) data["artist"] = [ text.extract(user, '//', '.')[0] for user in text.extract_iter(page, '
', '>') diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index bcdd082..b72a896 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -175,7 +175,7 @@ class PinterestPinitExtractor(PinterestExtractor): "https://pin.it/None", "https://www.pinterest.com"): raise exception.NotFoundError("pin") - yield Message.Queue, location, {} + yield Message.Queue, location, {"_extractor": PinterestPinExtractor} class PinterestAPI(): diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 7901149..36fa0fe 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,6 +13,7 @@ from .. import text, exception from ..cache import cache from datetime import datetime, timedelta import hashlib +import time class PixivExtractor(Extractor): @@ -21,6 +22,7 @@ class PixivExtractor(Extractor): directory_fmt = ("{category}", "{user[id]} {user[account]}") filename_fmt = "{id}_p{num}.{extension}" archive_fmt = "{id}{suffix}.{extension}" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) @@ -141,10 +143,11 @@ class PixivMeExtractor(PixivExtractor): def items(self): url = "https://pixiv.me/" + self.account + data = {"_extractor": PixivUserExtractor} response = self.request( url, method="HEAD", allow_redirects=False, notfound="user") yield Message.Version, 1 - yield Message.Queue, response.headers["Location"], {} + yield Message.Queue, response.headers["Location"], data class PixivWorkExtractor(PixivExtractor): @@ -217,6 +220,11 @@ class PixivFavoriteExtractor(PixivExtractor): ("https://www.pixiv.net/bookmark.php", { "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba", }), + # followed users (#515) + ("https://www.pixiv.net/bookmark.php?id=173530&type=user", { + "pattern": PixivUserExtractor.pattern, + "count": ">= 12", + }), # touch URLs ("https://touch.pixiv.net/bookmark.php?id=173530"), ("https://touch.pixiv.net/bookmark.php"), @@ -227,6 +235,9 @@ class PixivFavoriteExtractor(PixivExtractor): self.query = text.parse_query(match.group(1)) if "id" not in self.query: self.subcategory = "bookmark" + elif self.query.get("type") == "user": + self.subcategory = "following" + self.items = self._items_following def works(self): tag = None @@ -249,6 +260,15 @@ class PixivFavoriteExtractor(PixivExtractor): self.user_id = user["id"] return {"user_bookmark": user} + def _items_following(self): + yield Message.Version, 1 + + for preview in self.api.user_following(self.query["id"]): + user = preview["user"] + user["_extractor"] = PixivUserExtractor + url = "https://www.pixiv.net/member.php?id={}".format(user["id"]) + yield Message.Queue, url, user + class PixivRankingExtractor(PixivExtractor): """Extractor for pixiv ranking pages""" @@ -493,6 +513,10 @@ class PixivAppAPI(): params = {"user_id": user_id} return self._call("v1/user/detail", params)["user"] + def user_following(self, user_id): + params = {"user_id": user_id} + return self._pagination("v1/user/following", params, "user_previews") + def user_illusts(self, user_id): params = {"user_id": user_id} return self._pagination("v1/user/illusts", params) @@ -506,17 +530,25 @@ class PixivAppAPI(): self.login() response = self.extractor.request(url, params=params, fatal=False) + data = response.json() + + if "error" in data: + if response.status_code == 404: + raise exception.NotFoundError() + + error = data["error"] + if "rate limit" in (error.get("message") or "").lower(): + self.log.info("Waiting two minutes for API rate limit reset.") + time.sleep(120) + return self._call(endpoint, params) + raise exception.StopExtraction("API request failed: %s", error) - if response.status_code < 400: - return response.json() - if response.status_code == 404: - raise exception.NotFoundError() - raise exception.StopExtraction("API request failed: %s", response.text) + return data - def _pagination(self, endpoint, params): + def _pagination(self, endpoint, params, key="illusts"): while True: data = self._call(endpoint, params) - yield from data["illusts"] + yield from data[key] if not data["next_url"]: return diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 656148e..4c83019 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,12 +11,12 @@ from .common import Extractor, Message from .. import text, util, extractor, exception from ..cache import cache -import time class RedditExtractor(Extractor): """Base class for reddit extractors""" category = "reddit" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) @@ -277,11 +277,13 @@ class RedditAPI(): params["raw_json"] = 1 self.authenticate() response = self.extractor.request(url, params=params, fatal=None) + remaining = response.headers.get("x-ratelimit-remaining") if remaining and float(remaining) < 2: - wait = int(response.headers["x-ratelimit-reset"]) - self.log.info("Waiting %d seconds for ratelimit reset", wait) - time.sleep(wait) + reset = response.headers["x-ratelimit-reset"] + self.extractor.wait(seconds=reset, reason="rate limit reset") + return self._call(endpoint, params) + data = response.json() if "error" in data: if data["error"] == 403: diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index 0c13825..69b8cb9 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,6 +21,7 @@ class SmugmugExtractor(Extractor): category = "smugmug" filename_fmt = ("{category}_{User[NickName]:?/_/}" "{Image[UploadKey]}_{Image[ImageKey]}.{extension}") + cookiedomain = None empty_user = { "Uri": "", "ResponseLevel": "Public", diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 1d37419..a1f2199 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -12,7 +12,6 @@ from .common import Extractor, Message from .. import text, oauth, extractor, exception from datetime import datetime, timedelta import re -import time def _original_inline_image(url): @@ -45,6 +44,7 @@ class TumblrExtractor(Extractor): directory_fmt = ("{category}", "{name}") filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) @@ -407,27 +407,18 @@ class TumblrAPI(oauth.OAuth1API): # daily rate limit if response.headers.get("x-ratelimit-perday-remaining") == "0": reset = response.headers.get("x-ratelimit-perday-reset") + t = (datetime.now() + timedelta(seconds=float(reset))).time() + + self.log.error("Daily API rate limit exceeded") raise exception.StopExtraction( - "Daily API rate limit exceeded: aborting; " - "rate limit will reset at %s", self._to_time(reset), - ) + "Aborting - Rate limit will reset at %s", + "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)) # hourly rate limit reset = response.headers.get("x-ratelimit-perhour-reset") if reset: - self.log.info( - "Hourly API rate limit exceeded; waiting until " - "%s for rate limit reset", self._to_time(reset), - ) - time.sleep(int(reset) + 1) + self.log.info("Hourly API rate limit exceeded") + self.extractor.wait(seconds=reset, reason="rate limit reset") return self._call(blog, endpoint, params) raise exception.StopExtraction(data) - - @staticmethod - def _to_time(reset): - try: - reset_time = datetime.now() + timedelta(seconds=int(reset)) - except (ValueError, TypeError): - return "?" - return reset_time.strftime("%H:%M:%S") diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 8ef966f..610e0ee 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -53,10 +53,12 @@ class TwitterExtractor(Extractor): if self.videos == "ytdl": data["extension"] = None - url = "ytdl:{}/{}/status/{}".format( - self.root, data["user"], data["tweet_id"]) + url = "ytdl:{}/i/web/status/{}".format( + self.root, data["tweet_id"]) else: url = self._video_from_tweet(data["tweet_id"]) + if not url: + continue ext = text.ext_from_url(url) if ext == "m3u8": url = "ytdl:" + url @@ -155,6 +157,16 @@ class TwitterExtractor(Extractor): cl, _, cr = content.rpartition("pic.twitter.com/") data["content"] = cl if cl and len(cr) < 16 else content + if extr('
')[2]), + } + return data def _video_from_tweet(self, tweet_id): @@ -173,19 +185,28 @@ class TwitterExtractor(Extractor): if self.logged_in: headers["x-twitter-auth-type"] = "OAuth2Session" else: - token = self._guest_token(headers) + token = _guest_token(self, headers) cookies = {"gt": token} headers["x-guest-token"] = token - data = self.request(url, cookies=cookies, headers=headers).json() - return data["track"]["playbackUrl"] + response = self.request( + url, cookies=cookies, headers=headers, fatal=None) + + if response.status_code == 429 or \ + response.headers.get("x-rate-limit-remaining") == "0": + if self.logged_in: + reset = response.headers.get("x-rate-limit-reset") + self.wait(until=reset, reason="rate limit reset") + else: + _guest_token.invalidate() + return self._video_from_tweet(tweet_id) - @memcache() - def _guest_token(self, headers): - return self.request( - "https://api.twitter.com/1.1/guest/activate.json", - method="POST", headers=headers, - ).json().get("guest_token") + elif response.status_code >= 400: + self.log.warning("Unable to fetch video data for %s ('%s %s')", + tweet_id, response.status_code, response.reason) + return None + + return response.json()["track"]["playbackUrl"] def _tweets_from_api(self, url, max_position=None): params = { @@ -313,12 +334,17 @@ class TwitterTweetExtractor(TwitterExtractor): # Reply to another tweet (#403) ("https://twitter.com/tyson_hesse/status/1103767554424598528", { "options": (("videos", "ytdl"),), - "pattern": r"ytdl:https://twitter.com/.+/1103767554424598528", + "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528", }), # /i/web/ URL ("https://twitter.com/i/web/status/1155074198240292865", { "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig", }), + # quoted tweet (#526) + ("https://twitter.com/Meiyu_miu/status/1070693241413021696", { + "count": 4, + "keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8", + }), ) def __init__(self, match): @@ -342,3 +368,11 @@ class TwitterTweetExtractor(TwitterExtractor): end = page.index('class="js-tweet-stats-container') beg = page.rindex('