diff options
author | Unit 193 <unit193@ubuntu.com> | 2020-01-09 22:31:31 -0500 |
---|---|---|
committer | Unit 193 <unit193@ubuntu.com> | 2020-01-09 22:31:31 -0500 |
commit | 89bf167db5a998a217135f55593391a337bdad31 (patch) | |
tree | adcd59e7b6f49e0826ef07f0f0e138282a8ae108 | |
parent | 3bf3f951e09ae597552e35996d843b554e593c78 (diff) | |
parent | bc435e826dbe37969d9cbe280f58810d054932cc (diff) | |
download | gallery-dl-89bf167db5a998a217135f55593391a337bdad31.tar.bz2 gallery-dl-89bf167db5a998a217135f55593391a337bdad31.tar.xz gallery-dl-89bf167db5a998a217135f55593391a337bdad31.tar.zst |
Update upstream source from tag 'upstream/1.12.2'
Update to upstream version '1.12.2'
with Debian dir 767bd062bb1809128547cb7c3ace169e3501bbcc
32 files changed, 392 insertions, 150 deletions
@@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.12.1 +Version: 1.12.2 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -95,8 +95,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -241,7 +241,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -84,8 +84,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -230,7 +230,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index a530760..7249537 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2019-12-22" "1.12.1" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-01-05" "1.12.2" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 07f1b88..7e7993a 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2019-12-22" "1.12.1" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-01-05" "1.12.2" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1662,6 +1662,20 @@ Select how to write metadata. * \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[] to a file's metadata dictionary +.SS metadata.directory +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"."\f[] + +.IP "Example:" 4 +"metadata" + +.IP "Description:" 4 +Directory where metadata files are stored in relative to the +current target location for file downloads. + .SS metadata.extension .IP "Type:" 6 \f[I]string\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index a8700a6..3aa6d61 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.12.1 +Version: 1.12.2 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -95,8 +95,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -241,7 +241,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index 1824195..c48b53f 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -37,7 +37,7 @@ class CacheDecorator(): def update(self, key, value): self.cache[key] = value - def invalidate(self, key): + def invalidate(self, key=""): try: del self.cache[key] except KeyError: diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 2892bd4..ceda29c 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -83,14 +83,20 @@ class ArtstationExtractor(Extractor): response = self.request(url, notfound="user") return response.json() - def _pagination(self, url, params=None): - if not params: - params = {} + def _pagination(self, url, params=None, json=None): + if json: + params = json + kwargs = {"json": json} + else: + if not params: + params = {} + kwargs = {"params": params} + params["page"] = 1 total = 0 while True: - data = self.request(url, params=params).json() + data = self.request(url, **kwargs).json() yield from data["data"] total += len(data["data"]) @@ -268,34 +274,38 @@ class ArtstationChallengeExtractor(ArtstationExtractor): class ArtstationSearchExtractor(ArtstationExtractor): """Extractor for artstation search results""" subcategory = "search" - directory_fmt = ("{category}", "Searches", "{search[searchterm]}") - archive_fmt = "s_{search[searchterm]}_{asset[id]}" + directory_fmt = ("{category}", "Searches", "{search[query]}") + archive_fmt = "s_{search[query]}_{asset[id]}" pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com" r"/search/?\?([^#]+)") - test = ("https://www.artstation.com/search?sorting=recent&q=ancient",) + test = ("https://www.artstation.com/search?q=ancient&sort_by=rank", { + "range": "1-20", + "count": 20, + }) def __init__(self, match): ArtstationExtractor.__init__(self, match) query = text.parse_query(match.group(1)) - self.searchterm = query.get("q", "") - self.order = query.get("sorting", "recent").lower() + self.query = query.get("q", "") + self.sorting = query.get("sort_by", "rank").lower() def metadata(self): return {"search": { - "searchterm": self.searchterm, - "order": self.order, + "query" : self.query, + "sorting": self.sorting, }} def projects(self): - order = "likes_count" if self.order == "likes" else "published_at" - url = "{}/search/projects.json".format(self.root) - params = { - "direction": "desc", - "order": order, - "q": self.searchterm, - # "show_pro_first": "true", - } - return self._pagination(url, params) + url = "{}/api/v2/search/projects.json".format(self.root) + return self._pagination(url, json={ + "additional_fields": "[]", + "filters" : "[]", + "page" : None, + "per_page" : "50", + "pro_first" : "1", + "query" : self.query, + "sorting" : self.sorting, + }) class ArtstationArtworkExtractor(ArtstationExtractor): @@ -305,7 +315,10 @@ class ArtstationArtworkExtractor(ArtstationExtractor): archive_fmt = "A_{asset[id]}" pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com" r"/artwork/?\?([^#]+)") - test = ("https://www.artstation.com/artwork?sorting=latest",) + test = ("https://www.artstation.com/artwork?sorting=latest", { + "range": "1-20", + "count": 20, + }) def __init__(self, match): ArtstationExtractor.__init__(self, match) @@ -316,9 +329,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor): def projects(self): url = "{}/projects.json".format(self.root) - params = self.query.copy() - params["page"] = 1 - return self._pagination(url, params) + return self._pagination(url, self.query.copy()) class ArtstationImageExtractor(ArtstationExtractor): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index a1a4890..380bcc7 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -122,6 +122,24 @@ class Extractor(): raise exception.HttpError(msg) + def wait(self, *, seconds=None, until=None, reason=None, adjust=1): + now = datetime.datetime.now() + + if seconds: + seconds = float(seconds) + until = now + datetime.timedelta(seconds=seconds) + elif until: + until = datetime.datetime.fromtimestamp(float(until)) + seconds = (until - now).total_seconds() + else: + raise ValueError("Either 'seconds' or 'until' is required") + + if reason: + t = until.time() + isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second) + self.log.info("Waiting until %s for %s.", isotime, reason) + time.sleep(seconds + adjust) + def _get_auth_info(self): """Return authentication information as (username, password) tuple""" username = self.config("username") @@ -170,6 +188,9 @@ class Extractor(): def _init_cookies(self): """Populate the session's cookiejar""" + if self.cookiedomain is None: + return + cookies = self.config("cookies") if cookies: if isinstance(cookies, dict): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 604966f..02a14e3 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -31,6 +31,7 @@ class DeviantartExtractor(Extractor): category = "deviantart" directory_fmt = ("{category}", "{username}") filename_fmt = "{category}_{index}_{title}.{extension}" + cookiedomain = None root = "https://www.deviantart.com" def __init__(self, match=None): @@ -475,7 +476,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{username}", "Favourites") archive_fmt = "f_{_username}_{index}.{extension}" - pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$" + pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/h3813067/favourites/", { "options": (("metadata", True), ("flat", False)), # issue #271 @@ -484,8 +485,10 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): ("https://www.deviantart.com/h3813067/favourites/", { "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", }), + ("https://www.deviantart.com/h3813067/favourites/all"), ("https://www.deviantart.com/h3813067/favourites/?catpath=/"), ("https://h3813067.deviantart.com/favourites/"), + ("https://h3813067.deviantart.com/favourites/all"), ("https://h3813067.deviantart.com/favourites/?catpath=/"), ) @@ -573,12 +576,22 @@ class DeviantartPopularExtractor(DeviantartExtractor): directory_fmt = ("{category}", "Popular", "{popular[range]}", "{popular[search]}") archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}" - pattern = (r"(?:https?://)?www\.deviantart\.com" - r"((?:/\w+)*)/(?:popular-([^/?&#]+))/?(?:\?([^#]*))?") + pattern = (r"(?:https?://)?www\.deviantart\.com/(?:" + r"search(?:/deviations)?" + r"|(?:deviations/?)?\?order=(popular-[^/?&#]+)" + r"|((?:[\w-]+/)*)(popular-[^/?&#]+)" + r")/?(?:\?([^#]*))?") test = ( + ("https://www.deviantart.com/?order=popular-all-time", { + "options": (("original", False),), + "range": "1-30", + "count": 30, + }), ("https://www.deviantart.com/popular-24-hours/?q=tree+house", { "options": (("original", False),), }), + ("https://www.deviantart.com/search?q=tree"), + ("https://www.deviantart.com/search/deviations?order=popular-1-week"), ("https://www.deviantart.com/artisan/popular-all-time/?q=tree"), ) @@ -587,13 +600,20 @@ class DeviantartPopularExtractor(DeviantartExtractor): self.search_term = self.time_range = self.category_path = None self.user = "" - path, trange, query = match.groups() + trange1, path, trange2, query = match.groups() + trange = trange1 or trange2 + query = text.parse_query(query) + + if not trange: + trange = query.get("order") + if path: - self.category_path = path.lstrip("/") + self.category_path = path.strip("/") if trange: + trange = trange[8:] if trange.startswith("popular-") else "" self.time_range = trange.replace("-", "").replace("hours", "hr") if query: - self.search_term = text.parse_query(query).get("q") + self.search_term = query.get("q") self.popular = { "search": self.search_term or "", @@ -739,6 +759,15 @@ class DeviantartExtractorV2(DeviantartExtractor): deviation["target"] = target return deviation + def _pagination(self, url, params, headers=None): + while True: + data = self.request(url, params=params, headers=headers).json() + yield from data["results"] + + if not data["hasMore"]: + return + params["offset"] = data["nextOffset"] + class DeviantartDeviationExtractor(DeviantartExtractorV2): """Extractor for single deviations""" @@ -863,15 +892,40 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): "Referer": "{}/{}/gallery/scraps".format(self.root, self.user), } - while True: - data = self.request(url, params=params, headers=headers).json() + for obj in self._pagination(url, params, headers): + yield obj["deviation"] - for obj in data["results"]: - yield obj["deviation"] - if not data["hasMore"]: - return - params["offset"] = data["nextOffset"] +class DeviantartFollowingExtractor(DeviantartExtractorV2): + subcategory = "following" + pattern = BASE_PATTERN + "/about#watching$" + test = ("https://www.deviantart.com/shimoda7/about#watching", { + "pattern": DeviantartUserExtractor.pattern, + "range": "1-50", + "count": 50, + }) + + def items(self): + url = "{}/_napi/da-user-profile/api/module/watching".format(self.root) + params = { + "username": self.user, + "moduleid": self._module_id(self.user), + "offset" : "0", + "limit" : "24", + } + + yield Message.Version, 1 + for user in self._pagination(url, params): + url = "{}/{}".format(self.root, user["username"]) + yield Message.Queue, url, user + + def _module_id(self, username): + url = "{}/{}/about".format(self.root, username) + page = self.request(url).text + pos = page.find('\\"type\\":\\"watching\\"') + if pos < 0: + raise exception.NotFoundError("module") + return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') class DeviantartAPI(): @@ -1076,7 +1130,7 @@ class DeviantartAPI(): return data def _pagination(self, endpoint, params, extend=True): - public = True + public = warn = True while True: data = self._call(endpoint, params, public=public) if "results" not in data: @@ -1089,7 +1143,8 @@ class DeviantartAPI(): self.log.debug("Switching to private access token") public = False continue - elif data["has_more"]: + elif data["has_more"] and warn: + warn = False self.log.warning( "Private deviations detected! Run 'gallery-dl " "oauth:deviantart' and follow the instructions to " diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 80db096..1d17658 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -55,9 +55,11 @@ class DirectlinkExtractor(Extractor): for key, value in data.items(): if value: data[key] = text.unquote(value) + data["path"], _, name = data["path"].rpartition("/") data["filename"], _, ext = name.rpartition(".") data["extension"] = ext.lower() + data["_http_headers"] = {"Referer": self.url} yield Message.Version, 1 yield Message.Directory, data diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index cba9627..6cc3abc 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -23,7 +23,7 @@ BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org" class ExhentaiExtractor(Extractor): """Base class for exhentai extractors""" category = "exhentai" - directory_fmt = ("{category}", "{gallery_id} {title}") + directory_fmt = ("{category}", "{gallery_id} {title[:247]}") filename_fmt = ( "{gallery_id}_{num:>04}_{image_token}_{filename}.{extension}") archive_fmt = "{gallery_id}_{num}" diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index bd34bdb..967fd9c 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -16,6 +16,7 @@ class FlickrExtractor(Extractor): """Base class for flickr extractors""" category = "flickr" filename_fmt = "{category}_{id}.{extension}" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 428f3c3..4af9d4a 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -120,8 +120,8 @@ EXTRACTORS = { }, "fireden": { "root": "https://boards.fireden.net", - "test-thread": ("https://boards.fireden.net/a/thread/159803223/", { - "url": "01b7baacfb0656a68e566368290e3072b27f86c9", + "test-thread": ("https://boards.fireden.net/sci/thread/11264294/", { + "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43", }), }, "nyafuu": { diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index d6eea7f..fd97605 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,6 +13,9 @@ from .. import text import json +BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com" + + class ImagefapExtractor(Extractor): """Base class for imagefap extractors""" category = "imagefap" @@ -29,8 +32,8 @@ class ImagefapExtractor(Extractor): class ImagefapGalleryExtractor(ImagefapExtractor): """Extractor for image galleries from imagefap.com""" subcategory = "gallery" - pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/" - r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)") + pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)" + test = ( ("https://www.imagefap.com/pictures/7102714", { "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg", @@ -42,6 +45,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor): "keyword": "3e24eace5b09639b881ebd393165862feb46adde", }), ("https://www.imagefap.com/gallery.php?gid=7102714"), + ("https://beta.imagefap.com/gallery.php?gid=7102714"), ) def __init__(self, match): @@ -99,11 +103,14 @@ class ImagefapGalleryExtractor(ImagefapExtractor): class ImagefapImageExtractor(ImagefapExtractor): """Extractor for single images from imagefap.com""" subcategory = "image" - pattern = r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)" - test = ("https://www.imagefap.com/photo/1369341772/", { - "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg", - "keyword": "8894e45f7262020d8d66ce59917315def1fc475b", - }) + pattern = BASE_PATTERN + r"/photo/(\d+)" + test = ( + ("https://www.imagefap.com/photo/1369341772/", { + "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg", + "keyword": "8894e45f7262020d8d66ce59917315def1fc475b", + }), + ("https://beta.imagefap.com/photo/1369341772/"), + ) def __init__(self, match): ImagefapExtractor.__init__(self, match) @@ -143,8 +150,8 @@ class ImagefapUserExtractor(ImagefapExtractor): """Extractor for all galleries from a user at imagefap.com""" subcategory = "user" categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/" - r"(?:profile(?:\.php\?user=|/)([^/?&#]+)" + pattern = (BASE_PATTERN + + r"/(?:profile(?:\.php\?user=|/)([^/?&#]+)" r"|usergallery\.php\?userid=(\d+))") test = ( ("https://www.imagefap.com/profile/LucyRae/galleries", { @@ -154,6 +161,7 @@ class ImagefapUserExtractor(ImagefapExtractor): "url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd", }), ("https://www.imagefap.com/profile.php?user=LucyRae"), + ("https://beta.imagefap.com/profile.php?user=LucyRae"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index ce3e1ce..6ff6588 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -153,7 +153,7 @@ class ImgurAlbumExtractor(ImgurExtractor): "is_album" : True, "layout" : "blog", "link" : "https://imgur.com/a/TcBmP", - "nsfw" : False, + "nsfw" : True, "privacy" : "hidden", "section" : None, "title" : "138", diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 0aeeb4a..c80cf14 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -56,7 +56,7 @@ class LusciousAlbumExtractor(LusciousExtractor): "__typename" : "Album", "audiences" : list, "content" : "Hentai", - "cover" : "re:https://cdnio.luscious.net/.+/277031/", + "cover" : "re:https://\\w+.luscious.net/.+/277031/", "created" : 1479625853, "created_by" : "NTRshouldbeillegal", "date" : "type:datetime", @@ -102,7 +102,7 @@ class LusciousAlbumExtractor(LusciousExtractor): }, }), ("https://luscious.net/albums/virgin-killer-sweater_282582/", { - "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c", + "url": "0be0cc279be1de99f727764819e03435e2a79915", }), ("https://luscious.net/albums/not-found_277035/", { "exception": exception.NotFoundError, diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index d0eb2a9..558aa9d 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://mangadex.org/""" +"""Extract manga-chapters and entire manga from https://mangadex.cc/""" from .common import Extractor, Message from .. import text, util @@ -16,7 +16,7 @@ from ..cache import memcache class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" - root = "https://mangadex.org" + root = "https://mangadex.cc" # mangadex-to-iso639-1 codes iso639_map = { @@ -39,7 +39,7 @@ class MangadexExtractor(Extractor): class MangadexChapterExtractor(MangadexExtractor): - """Extractor for manga-chapters from mangadex.org""" + """Extractor for manga-chapters from mangadex.cc""" subcategory = "chapter" directory_fmt = ( "{category}", "{manga}", @@ -47,14 +47,14 @@ class MangadexChapterExtractor(MangadexExtractor): filename_fmt = ( "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") archive_fmt = "{chapter_id}_{page}" - pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)" + pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)/chapter/(\d+)" test = ( - ("https://mangadex.org/chapter/122094", { + ("https://mangadex.cc/chapter/122094", { "keyword": "1c834dca33025f521e1874aee1f71c51e28ebf99", - "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", + # "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", }), # oneshot - ("https://mangadex.org/chapter/138086", { + ("https://mangadex.cc/chapter/138086", { "count": 64, "keyword": "178777bd0352fb19eb934cbee5630d16e3fb60ab", }), @@ -107,14 +107,14 @@ class MangadexChapterExtractor(MangadexExtractor): class MangadexMangaExtractor(MangadexExtractor): - """Extractor for manga from mangadex.org""" + """Extractor for manga from mangadex.cc""" subcategory = "manga" categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)" + pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)" r"/(?:title|manga)/(\d+)") test = ( - ("https://mangadex.org/manga/2946/souten-no-koumori", { - "pattern": r"https://mangadex.org/chapter/\d+", + ("https://mangadex.cc/manga/2946/souten-no-koumori", { + "pattern": r"https://mangadex.cc/chapter/\d+", "keywords": { "manga": "Souten no Koumori", "manga_id": 2946, diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index e15acbe..52cc672 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -97,10 +97,19 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor): "url": "654850570aa03825cd57e2ae2904af489602c523", "keyword": "c8084d89a9ea6cf40353093669f9601a39bf5ca2", }), + # adult filter (#556) + ("http://www.mangahere.cc/manga/gunnm_mars_chronicle/", { + "pattern": MangahereChapterExtractor.pattern, + "count": ">= 50", + }), ("https://www.mangahere.co/manga/aria/"), ("https://m.mangahere.co/manga/aria/"), ) + def __init__(self, match): + MangaExtractor.__init__(self, match) + self.session.cookies.set("isAdult", "1", domain="www.mangahere.cc") + def chapters(self, page): results = [] manga, pos = text.extract(page, '<meta name="og:title" content="', '"') diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index a325264..36e0b62 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,7 @@ class MastodonExtractor(Extractor): directory_fmt = ("mastodon", "{instance}", "{account[username]}") filename_fmt = "{category}_{id}_{media[id]}.{extension}" archive_fmt = "{media[id]}" + cookiedomain = None instance = None root = None diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 5454e52..54e60b0 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -98,7 +98,7 @@ class NewgroundsExtractor(Extractor): 'id="faves_load">', '<').replace(",", "")) data["score"] = text.parse_float(extr('id="score_number">', '<')) data["tags"] = text.split_html(extr( - '<dd class="tags momag">', '</dd>')) + '<dd class="tags">', '</dd>')) data["artist"] = [ text.extract(user, '//', '.')[0] for user in text.extract_iter(page, '<div class="item-user">', '>') diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index bcdd082..b72a896 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -175,7 +175,7 @@ class PinterestPinitExtractor(PinterestExtractor): "https://pin.it/None", "https://www.pinterest.com"): raise exception.NotFoundError("pin") - yield Message.Queue, location, {} + yield Message.Queue, location, {"_extractor": PinterestPinExtractor} class PinterestAPI(): diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 7901149..36fa0fe 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,6 +13,7 @@ from .. import text, exception from ..cache import cache from datetime import datetime, timedelta import hashlib +import time class PixivExtractor(Extractor): @@ -21,6 +22,7 @@ class PixivExtractor(Extractor): directory_fmt = ("{category}", "{user[id]} {user[account]}") filename_fmt = "{id}_p{num}.{extension}" archive_fmt = "{id}{suffix}.{extension}" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) @@ -141,10 +143,11 @@ class PixivMeExtractor(PixivExtractor): def items(self): url = "https://pixiv.me/" + self.account + data = {"_extractor": PixivUserExtractor} response = self.request( url, method="HEAD", allow_redirects=False, notfound="user") yield Message.Version, 1 - yield Message.Queue, response.headers["Location"], {} + yield Message.Queue, response.headers["Location"], data class PixivWorkExtractor(PixivExtractor): @@ -217,6 +220,11 @@ class PixivFavoriteExtractor(PixivExtractor): ("https://www.pixiv.net/bookmark.php", { "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba", }), + # followed users (#515) + ("https://www.pixiv.net/bookmark.php?id=173530&type=user", { + "pattern": PixivUserExtractor.pattern, + "count": ">= 12", + }), # touch URLs ("https://touch.pixiv.net/bookmark.php?id=173530"), ("https://touch.pixiv.net/bookmark.php"), @@ -227,6 +235,9 @@ class PixivFavoriteExtractor(PixivExtractor): self.query = text.parse_query(match.group(1)) if "id" not in self.query: self.subcategory = "bookmark" + elif self.query.get("type") == "user": + self.subcategory = "following" + self.items = self._items_following def works(self): tag = None @@ -249,6 +260,15 @@ class PixivFavoriteExtractor(PixivExtractor): self.user_id = user["id"] return {"user_bookmark": user} + def _items_following(self): + yield Message.Version, 1 + + for preview in self.api.user_following(self.query["id"]): + user = preview["user"] + user["_extractor"] = PixivUserExtractor + url = "https://www.pixiv.net/member.php?id={}".format(user["id"]) + yield Message.Queue, url, user + class PixivRankingExtractor(PixivExtractor): """Extractor for pixiv ranking pages""" @@ -493,6 +513,10 @@ class PixivAppAPI(): params = {"user_id": user_id} return self._call("v1/user/detail", params)["user"] + def user_following(self, user_id): + params = {"user_id": user_id} + return self._pagination("v1/user/following", params, "user_previews") + def user_illusts(self, user_id): params = {"user_id": user_id} return self._pagination("v1/user/illusts", params) @@ -506,17 +530,25 @@ class PixivAppAPI(): self.login() response = self.extractor.request(url, params=params, fatal=False) + data = response.json() + + if "error" in data: + if response.status_code == 404: + raise exception.NotFoundError() + + error = data["error"] + if "rate limit" in (error.get("message") or "").lower(): + self.log.info("Waiting two minutes for API rate limit reset.") + time.sleep(120) + return self._call(endpoint, params) + raise exception.StopExtraction("API request failed: %s", error) - if response.status_code < 400: - return response.json() - if response.status_code == 404: - raise exception.NotFoundError() - raise exception.StopExtraction("API request failed: %s", response.text) + return data - def _pagination(self, endpoint, params): + def _pagination(self, endpoint, params, key="illusts"): while True: data = self._call(endpoint, params) - yield from data["illusts"] + yield from data[key] if not data["next_url"]: return diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 656148e..4c83019 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,12 +11,12 @@ from .common import Extractor, Message from .. import text, util, extractor, exception from ..cache import cache -import time class RedditExtractor(Extractor): """Base class for reddit extractors""" category = "reddit" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) @@ -277,11 +277,13 @@ class RedditAPI(): params["raw_json"] = 1 self.authenticate() response = self.extractor.request(url, params=params, fatal=None) + remaining = response.headers.get("x-ratelimit-remaining") if remaining and float(remaining) < 2: - wait = int(response.headers["x-ratelimit-reset"]) - self.log.info("Waiting %d seconds for ratelimit reset", wait) - time.sleep(wait) + reset = response.headers["x-ratelimit-reset"] + self.extractor.wait(seconds=reset, reason="rate limit reset") + return self._call(endpoint, params) + data = response.json() if "error" in data: if data["error"] == 403: diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index 0c13825..69b8cb9 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,6 +21,7 @@ class SmugmugExtractor(Extractor): category = "smugmug" filename_fmt = ("{category}_{User[NickName]:?/_/}" "{Image[UploadKey]}_{Image[ImageKey]}.{extension}") + cookiedomain = None empty_user = { "Uri": "", "ResponseLevel": "Public", diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 1d37419..a1f2199 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -12,7 +12,6 @@ from .common import Extractor, Message from .. import text, oauth, extractor, exception from datetime import datetime, timedelta import re -import time def _original_inline_image(url): @@ -45,6 +44,7 @@ class TumblrExtractor(Extractor): directory_fmt = ("{category}", "{name}") filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" + cookiedomain = None def __init__(self, match): Extractor.__init__(self, match) @@ -407,27 +407,18 @@ class TumblrAPI(oauth.OAuth1API): # daily rate limit if response.headers.get("x-ratelimit-perday-remaining") == "0": reset = response.headers.get("x-ratelimit-perday-reset") + t = (datetime.now() + timedelta(seconds=float(reset))).time() + + self.log.error("Daily API rate limit exceeded") raise exception.StopExtraction( - "Daily API rate limit exceeded: aborting; " - "rate limit will reset at %s", self._to_time(reset), - ) + "Aborting - Rate limit will reset at %s", + "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)) # hourly rate limit reset = response.headers.get("x-ratelimit-perhour-reset") if reset: - self.log.info( - "Hourly API rate limit exceeded; waiting until " - "%s for rate limit reset", self._to_time(reset), - ) - time.sleep(int(reset) + 1) + self.log.info("Hourly API rate limit exceeded") + self.extractor.wait(seconds=reset, reason="rate limit reset") return self._call(blog, endpoint, params) raise exception.StopExtraction(data) - - @staticmethod - def _to_time(reset): - try: - reset_time = datetime.now() + timedelta(seconds=int(reset)) - except (ValueError, TypeError): - return "?" - return reset_time.strftime("%H:%M:%S") diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 8ef966f..610e0ee 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -53,10 +53,12 @@ class TwitterExtractor(Extractor): if self.videos == "ytdl": data["extension"] = None - url = "ytdl:{}/{}/status/{}".format( - self.root, data["user"], data["tweet_id"]) + url = "ytdl:{}/i/web/status/{}".format( + self.root, data["tweet_id"]) else: url = self._video_from_tweet(data["tweet_id"]) + if not url: + continue ext = text.ext_from_url(url) if ext == "m3u8": url = "ytdl:" + url @@ -155,6 +157,16 @@ class TwitterExtractor(Extractor): cl, _, cr = content.rpartition("pic.twitter.com/") data["content"] = cl if cl and len(cr) < 16 else content + if extr('<div class="QuoteTweet', '>'): + data["retweet_id"] = text.parse_int(extr('data-item-id="', '"')) + data["retweeter"] = data["user"]["name"] + data["author"] = { + "name" : extr('data-screen-name="', '"'), + "id" : text.parse_int(extr('data-user-id="' , '"')), + "nick" : text.unescape(extr( + 'QuoteTweet-fullname', '<').partition('>')[2]), + } + return data def _video_from_tweet(self, tweet_id): @@ -173,19 +185,28 @@ class TwitterExtractor(Extractor): if self.logged_in: headers["x-twitter-auth-type"] = "OAuth2Session" else: - token = self._guest_token(headers) + token = _guest_token(self, headers) cookies = {"gt": token} headers["x-guest-token"] = token - data = self.request(url, cookies=cookies, headers=headers).json() - return data["track"]["playbackUrl"] + response = self.request( + url, cookies=cookies, headers=headers, fatal=None) + + if response.status_code == 429 or \ + response.headers.get("x-rate-limit-remaining") == "0": + if self.logged_in: + reset = response.headers.get("x-rate-limit-reset") + self.wait(until=reset, reason="rate limit reset") + else: + _guest_token.invalidate() + return self._video_from_tweet(tweet_id) - @memcache() - def _guest_token(self, headers): - return self.request( - "https://api.twitter.com/1.1/guest/activate.json", - method="POST", headers=headers, - ).json().get("guest_token") + elif response.status_code >= 400: + self.log.warning("Unable to fetch video data for %s ('%s %s')", + tweet_id, response.status_code, response.reason) + return None + + return response.json()["track"]["playbackUrl"] def _tweets_from_api(self, url, max_position=None): params = { @@ -313,12 +334,17 @@ class TwitterTweetExtractor(TwitterExtractor): # Reply to another tweet (#403) ("https://twitter.com/tyson_hesse/status/1103767554424598528", { "options": (("videos", "ytdl"),), - "pattern": r"ytdl:https://twitter.com/.+/1103767554424598528", + "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528", }), # /i/web/ URL ("https://twitter.com/i/web/status/1155074198240292865", { "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig", }), + # quoted tweet (#526) + ("https://twitter.com/Meiyu_miu/status/1070693241413021696", { + "count": 4, + "keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8", + }), ) def __init__(self, match): @@ -342,3 +368,11 @@ class TwitterTweetExtractor(TwitterExtractor): end = page.index('class="js-tweet-stats-container') beg = page.rindex('<div class="tweet ', 0, end) return (page[beg:end],) + + +@memcache() +def _guest_token(extr, headers): + return extr.request( + "https://api.twitter.com/1.1/guest/activate.json", + method="POST", headers=headers, + ).json().get("guest_token") diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index ac289df..b614cab 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -69,8 +69,8 @@ class WikiartArtistExtractor(WikiartExtractor): directory_fmt = ("{category}", "{artist[artistName]}") pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)" test = ("https://www.wikiart.org/en/thomas-cole", { - "url": "f1eee8158f5b8b7380382ab730a8f53884715c8b", - "keyword": "c61f5a4774b977106000e9554d19cfb9438a7032", + "url": "9049e52e897b9ae6586df4c2c4f827d0a19dafa3", + "keyword": "c3168b21a993707c41efb7674e8c90d53a79d483", }) def __init__(self, match): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 88b6a55..699f057 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -182,6 +182,7 @@ class DownloadJob(Job): self.downloaders = {} self.postprocessors = None self.out = output.select() + self.visited = parent.visited if parent else set() def handle_url(self, url, kwdict, fallback=None): """Download the resource specified in 'url'""" @@ -261,6 +262,10 @@ class DownloadJob(Job): pp.run_metadata(pathfmt) def handle_queue(self, url, kwdict): + if url in self.visited: + return + self.visited.add(url) + if "_extractor" in kwdict: extr = kwdict["_extractor"].from_url(url) else: @@ -422,11 +427,19 @@ class KeywordJob(Job): self.print_kwdict(kwdict) def handle_queue(self, url, kwdict): - if not kwdict: + if not util.filter_dict(kwdict): self.extractor.log.info( - "This extractor delegates work to other extractors " - "and does not provide any keywords on its own. Try " - "'gallery-dl -K \"%s\"' instead.", url) + "This extractor only spawns other extractors " + "and does not provide any metadata on its own.") + + if "_extractor" in kwdict: + self.extractor.log.info( + "Showing results for '%s' instead:\n", url) + extr = kwdict["_extractor"].from_url(url) + KeywordJob(extr, self).run() + else: + self.extractor.log.info( + "Try 'gallery-dl -K \"%s\"' instead.", url) else: print("Keywords for --chapter-filter:") print("------------------------------") diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index bc26484..aa50dfd 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -10,6 +10,7 @@ from .common import PostProcessor from .. import util +import os class MetadataPP(PostProcessor): @@ -32,32 +33,46 @@ class MetadataPP(PostProcessor): self.ascii = options.get("ascii", False) ext = "json" + directory = options.get("directory") + if directory: + self._directory = self._directory_custom + sep = os.sep + (os.altsep or "") + self.metadir = directory.rstrip(sep) + os.sep + extfmt = options.get("extension-format") if extfmt: - self.path = self._path_format + self._filename = self._filename_custom self.extfmt = util.Formatter(extfmt).format_map else: - self.path = self._path_append self.extension = options.get("extension", ext) if options.get("bypost"): self.run_metadata, self.run = self.run, self.run_metadata def run(self, pathfmt): - with open(self.path(pathfmt), "w", encoding="utf-8") as file: + path = self._directory(pathfmt) + self._filename(pathfmt) + with open(path, "w", encoding="utf-8") as file: self.write(file, pathfmt.kwdict) - def _path_append(self, pathfmt): - return "{}.{}".format(pathfmt.realpath, self.extension) + def _directory(self, pathfmt): + return pathfmt.realdirectory + + def _directory_custom(self, pathfmt): + directory = os.path.join(pathfmt.realdirectory, self.metadir) + os.makedirs(directory, exist_ok=True) + return directory + + def _filename(self, pathfmt): + return pathfmt.filename + "." + self.extension - def _path_format(self, pathfmt): + def _filename_custom(self, pathfmt): kwdict = pathfmt.kwdict ext = kwdict["extension"] kwdict["extension"] = pathfmt.extension kwdict["extension"] = pathfmt.prefix + self.extfmt(kwdict) - path = pathfmt.realdirectory + pathfmt.build_filename() + filename = pathfmt.build_filename() kwdict["extension"] = ext - return path + return filename def _write_custom(self, file, kwdict): file.write(self.contentfmt(kwdict)) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 48ae0be..f426829 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -740,8 +740,15 @@ class DownloadArchive(): con.isolation_level = None self.close = con.close self.cursor = con.cursor() - self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " - "(entry PRIMARY KEY) WITHOUT ROWID") + + try: + self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " + "(entry PRIMARY KEY) WITHOUT ROWID") + except sqlite3.OperationalError: + # fallback for missing WITHOUT ROWID support (#553) + self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " + "(entry PRIMARY KEY)") + self.keygen = (extractor.category + extractor.config( "archive-format", extractor.archive_fmt) ).format_map diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 2ac7ceb..4b83107 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.12.1" +__version__ = "1.12.2" diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 17f82c9..629b0d7 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -7,6 +7,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. +import os import os.path import zipfile import tempfile @@ -156,7 +157,6 @@ class MetadataTest(BasePostprocessorTest): "_private" : "world", }) - self.assertEqual(pp.path , pp._path_append) self.assertEqual(pp.write , pp._write_json) self.assertEqual(pp.ascii , True) self.assertEqual(pp.indent , 2) @@ -242,7 +242,7 @@ class MetadataTest(BasePostprocessorTest): "extension-format": "json", }) - self.assertEqual(pp.path, pp._path_format) + self.assertEqual(pp._filename, pp._filename_custom) with patch("builtins.open", mock_open()) as m: pp.prepare(self.pathfmt) @@ -264,6 +264,31 @@ class MetadataTest(BasePostprocessorTest): path = self.pathfmt.realdirectory + "file.2.EXT-data:tESt" m.assert_called_once_with(path, "w", encoding="utf-8") + def test_metadata_directory(self): + pp = self._create({ + "directory": "metadata", + }) + + with patch("builtins.open", mock_open()) as m: + pp.prepare(self.pathfmt) + pp.run(self.pathfmt) + + path = self.pathfmt.realdirectory + "metadata/file.ext.json" + m.assert_called_once_with(path, "w", encoding="utf-8") + + def test_metadata_directory_2(self): + pp = self._create({ + "directory" : "metadata////", + "extension-format": "json", + }) + + with patch("builtins.open", mock_open()) as m: + pp.prepare(self.pathfmt) + pp.run(self.pathfmt) + + path = self.pathfmt.realdirectory + "metadata/file.json" + m.assert_called_once_with(path, "w", encoding="utf-8") + @staticmethod def _output(mock): return "".join( |