diff options
author | Unit 193 <unit193@ubuntu.com> | 2020-03-28 23:01:51 -0400 |
---|---|---|
committer | Unit 193 <unit193@ubuntu.com> | 2020-03-28 23:01:51 -0400 |
commit | e4887ae6b00c50fbbde531cc274c77b076bd821d (patch) | |
tree | 051849d0ce8ed35aa229ba828a2dfe1faf10c5c0 | |
parent | e8cc000750de972384f2f34d02d42222b4018ae9 (diff) | |
download | gallery-dl-e4887ae6b00c50fbbde531cc274c77b076bd821d.tar.bz2 gallery-dl-e4887ae6b00c50fbbde531cc274c77b076bd821d.tar.xz gallery-dl-e4887ae6b00c50fbbde531cc274c77b076bd821d.tar.zst |
New upstream version 1.13.3upstream/1.13.3
-rw-r--r-- | PKG-INFO | 8 | ||||
-rw-r--r-- | README.rst | 6 | ||||
-rw-r--r-- | data/man/gallery-dl.1 | 2 | ||||
-rw-r--r-- | data/man/gallery-dl.conf.5 | 2 | ||||
-rw-r--r-- | gallery_dl.egg-info/PKG-INFO | 8 | ||||
-rw-r--r-- | gallery_dl/__init__.py | 12 | ||||
-rw-r--r-- | gallery_dl/extractor/35photo.py | 54 | ||||
-rw-r--r-- | gallery_dl/extractor/danbooru.py | 17 | ||||
-rw-r--r-- | gallery_dl/extractor/deviantart.py | 12 | ||||
-rw-r--r-- | gallery_dl/extractor/e621.py | 111 | ||||
-rw-r--r-- | gallery_dl/extractor/instagram.py | 56 | ||||
-rw-r--r-- | gallery_dl/extractor/mangadex.py | 14 | ||||
-rw-r--r-- | gallery_dl/extractor/mangapark.py | 57 | ||||
-rw-r--r-- | gallery_dl/extractor/newgrounds.py | 2 | ||||
-rw-r--r-- | gallery_dl/extractor/nozomi.py | 101 | ||||
-rw-r--r-- | gallery_dl/extractor/piczel.py | 16 | ||||
-rw-r--r-- | gallery_dl/extractor/sexcom.py | 20 | ||||
-rw-r--r-- | gallery_dl/extractor/simplyhentai.py | 1 | ||||
-rw-r--r-- | gallery_dl/extractor/twitter.py | 2 | ||||
-rw-r--r-- | gallery_dl/output.py | 13 | ||||
-rw-r--r-- | gallery_dl/util.py | 6 | ||||
-rw-r--r-- | gallery_dl/version.py | 2 | ||||
-rw-r--r-- | test/test_results.py | 5 |
23 files changed, 278 insertions, 249 deletions
@@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.13.2 +Version: 1.13.3 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -247,7 +247,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.2.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -236,7 +236,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.2.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 304c345..af9ac7d 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-03-14" "1.13.2" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-03-28" "1.13.3" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 4ad93f8..9a374da 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-03-14" "1.13.2" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-03-28" "1.13.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 92ded16..c9ca17b 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.13.2 +Version: 1.13.3 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -247,7 +247,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.2.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 6fba5e2..e71a5b0 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -108,7 +108,7 @@ def parse_inputfile(file, log): def main(): try: - if sys.stdout.encoding.lower() != "utf-8": + if sys.stdout and sys.stdout.encoding.lower() != "utf-8": output.replace_std_streams() parser = option.build_parser() @@ -205,11 +205,13 @@ def main(): if args.inputfile: try: if args.inputfile == "-": - file = sys.stdin + if sys.stdin: + urls += parse_inputfile(sys.stdin, log) + else: + log.warning("input file: stdin is not readable") else: - file = open(args.inputfile, encoding="utf-8") - urls += parse_inputfile(file, log) - file.close() + with open(args.inputfile, encoding="utf-8") as file: + urls += parse_inputfile(file, log) except OSError as exc: log.warning("input file: %s", exc) diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py index d3e9276..e33aa2d 100644 --- a/gallery_dl/extractor/35photo.py +++ b/gallery_dl/extractor/35photo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor): """Extractor for all images of a user on 35photo.pro""" subcategory = "user" pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro" - r"/(?!photo_|genre_|rating/)([^/?&#]+)") + r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)") test = ( ("https://35photo.pro/liya", { "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg", @@ -137,25 +137,49 @@ class _35photoUserExtractor(_35photoExtractor): }) +class _35photoTagExtractor(_35photoExtractor): + """Extractor for all photos from a tag listing""" + subcategory = "tag" + directory_fmt = ("{category}", "Tags", "{search_tag}") + archive_fmt = "t{search_tag}_{id}_{num}" + pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)" + test = ("https://35photo.pro/tags/landscape/", { + "range": "1-25", + "count": 25, + }) + + def __init__(self, match): + _35photoExtractor.__init__(self, match) + self.tag = match.group(1) + + def metadata(self): + return {"search_tag": text.unquote(self.tag).lower()} + + def photos(self): + num = 1 + + while True: + url = "{}/tags/{}/list_{}/".format(self.root, self.tag, num) + page = self.request(url).text + prev = None + + for photo_id in text.extract_iter(page, "35photo.pro/photo_", "/"): + if photo_id != prev: + prev = photo_id + yield photo_id + + if not prev: + return + num += 1 + + class _35photoGenreExtractor(_35photoExtractor): """Extractor for images of a specific genre on 35photo.pro""" subcategory = "genre" directory_fmt = ("{category}", "Genre", "{genre}") archive_fmt = "g{genre_id}_{id}_{num}" pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/genre_(\d+)(/new/)?" - test = ( - ("https://35photo.pro/genre_109/", { - "range": "1-30", - }), - ("https://35photo.pro/genre_103/", { - "range": "1-30", - "count": 30, - }), - ("https://35photo.pro/genre_103/new/", { - "range": "1-30", - "count": 30, - }), - ) + test = ("https://35photo.pro/genre_109/",) def __init__(self, match): _35photoExtractor.__init__(self, match) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 3fdeaf9..3a0d0ef 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -27,10 +27,10 @@ class DanbooruExtractor(SharedConfigMixin, Extractor): filename_fmt = "{category}_{id}_{md5}.{extension}" page_limit = 1000 page_start = None - per_page = 100 + per_page = 200 def __init__(self, match): - Extractor.__init__(self, match) + super().__init__(match) self.root = "https://{}.donmai.us".format(match.group(1)) self.ugoira = self.config("ugoira", True) self.params = {} @@ -83,6 +83,8 @@ class DanbooruExtractor(SharedConfigMixin, Extractor): while True: posts = self.request(url, params=params).json() + if "posts" in posts: + posts = posts["posts"] yield from posts if len(posts) < self.per_page: @@ -114,7 +116,7 @@ class DanbooruTagExtractor(DanbooruExtractor): ) def __init__(self, match): - DanbooruExtractor.__init__(self, match) + super().__init__(match) self.params["tags"] = text.unquote(match.group(2).replace("+", " ")) def metadata(self): @@ -132,7 +134,7 @@ class DanbooruPoolExtractor(DanbooruExtractor): }) def __init__(self, match): - DanbooruExtractor.__init__(self, match) + super().__init__(match) self.pool_id = match.group(2) self.params["tags"] = "pool:" + self.pool_id @@ -160,12 +162,13 @@ class DanbooruPostExtractor(DanbooruExtractor): ) def __init__(self, match): - DanbooruExtractor.__init__(self, match) + super().__init__(match) self.post_id = match.group(2) def posts(self): url = "{}/posts/{}.json".format(self.root, self.post_id) - return (self.request(url).json(),) + post = self.request(url).json() + return (post["post"] if "post" in post else post,) class DanbooruPopularExtractor(DanbooruExtractor): @@ -184,7 +187,7 @@ class DanbooruPopularExtractor(DanbooruExtractor): ) def __init__(self, match): - DanbooruExtractor.__init__(self, match) + super().__init__(match) self.params.update(text.parse_query(match.group(2))) def metadata(self): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 90b27d1..d6669d1 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1041,10 +1041,18 @@ class DeviantartAPI(): } response = self.extractor.request( url, headers=headers, params=params, fatal=None) - if response.status_code == 404: + code = response.status_code + + if code == 404: raise exception.StopExtraction( "Your account must use the Eclipse interface.") - return response.json() + elif code == 403 and b"Request blocked." in response.content: + raise exception.StopExtraction( + "Requests to deviantart.com blocked due to too much traffic.") + try: + return response.json() + except Exception: + return {"error": response.text} def deviation_metadata(self, deviations): """ Fetch deviation metadata for a set of deviations""" diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index bc3f67a..5c5c36c 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -8,41 +8,34 @@ """Extractors for https://e621.net/""" -from .common import Extractor, Message, SharedConfigMixin -from .. import text -import datetime +from .common import Extractor, Message +from . import danbooru import time BASE_PATTERN = r"(?:https?://)?e(621|926)\.net" -class E621Extractor(SharedConfigMixin, Extractor): +class E621Extractor(danbooru.DanbooruExtractor): """Base class for e621 extractors""" - basecategory = "booru" category = "e621" filename_fmt = "{category}_{id}_{file[md5]}.{extension}" page_limit = 750 page_start = None - per_page = 200 + per_page = 320 _last_request = 0 def __init__(self, match): - Extractor.__init__(self, match) + super().__init__(match) self.root = "https://e{}.net".format(match.group(1)) - self.params = {} - - username, api_key = self._get_auth_info() - if username: - self.log.debug("Using HTTP Basic Auth for user '%s'", username) - self.session.auth = (username, api_key) def request(self, url, **kwargs): diff = time.time() - E621Extractor._last_request if diff < 1.0: - self.log.debug("Sleeping for %s seconds", diff) - time.sleep(diff) - kwargs["headers"] = {"User-Agent": "gallery-dl/1.13.0 (by mikf)"} + delay = 1.0 - diff + self.log.debug("Sleeping for %s seconds", delay) + time.sleep(delay) + kwargs["headers"] = {"User-Agent": "gallery-dl/1.14.0 (by mikf)"} response = Extractor.request(self, url, **kwargs) E621Extractor._last_request = time.time() return response @@ -63,31 +56,9 @@ class E621Extractor(SharedConfigMixin, Extractor): yield Message.Directory, post yield Message.Url, file["url"], post - def metadata(self): - return {} - - def posts(self): - return self._pagination(self.root + "/posts.json") - - def _pagination(self, url): - params = self.params.copy() - params["limit"] = self.per_page - tags = params.get("tags", "") - - while True: - posts = self.request(url, params=params).json()["posts"] - yield from posts - - if len(posts) < self.per_page: - return - params["tags"] = "id:<{} {}".format(posts[-1]["id"], tags) - -class E621TagExtractor(E621Extractor): +class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor): """Extractor for e621 posts from tag searches""" - subcategory = "tag" - directory_fmt = ("{category}", "{search_tags}") - archive_fmt = "t_{search_tags}_{id}" pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)" test = ( ("https://e621.net/posts?tags=anry", { @@ -99,19 +70,9 @@ class E621TagExtractor(E621Extractor): ("https://e621.net/post?tags=anry"), ) - def __init__(self, match): - E621Extractor.__init__(self, match) - self.params["tags"] = text.unquote(match.group(2).replace("+", " ")) - - def metadata(self): - return {"search_tags": self.params["tags"]} - -class E621PoolExtractor(E621Extractor): +class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor): """Extractor for e621 pools""" - subcategory = "pool" - directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}") - archive_fmt = "p_{pool[id]}_{id}" pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)" test = ( ("https://e621.net/pools/73", { @@ -121,23 +82,9 @@ class E621PoolExtractor(E621Extractor): ("https://e621.net/pool/show/73"), ) - def __init__(self, match): - E621Extractor.__init__(self, match) - self.pool_id = match.group(2) - self.params["tags"] = "pool:" + self.pool_id - - def metadata(self): - url = "{}/pools/{}.json".format(self.root, self.pool_id) - pool = self.request(url).json() - pool["name"] = pool["name"].replace("_", " ") - del pool["post_ids"] - return {"pool": pool} - -class E621PostExtractor(E621Extractor): +class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor): """Extractor for single e621 posts""" - subcategory = "post" - archive_fmt = "{id}" pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)" test = ( ("https://e621.net/posts/535", { @@ -147,20 +94,9 @@ class E621PostExtractor(E621Extractor): ("https://e621.net/post/show/535"), ) - def __init__(self, match): - E621Extractor.__init__(self, match) - self.post_id = match.group(2) - - def posts(self): - url = "{}/posts/{}.json".format(self.root, self.post_id) - return (self.request(url).json()["post"],) - -class E621PopularExtractor(E621Extractor): +class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor): """Extractor for popular images from e621""" - subcategory = "popular" - directory_fmt = ("{category}", "popular", "{scale}", "{date}") - archive_fmt = "P_{scale[0]}_{date}_{id}" pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?" test = ( ("https://e621.net/explore/posts/popular"), @@ -170,24 +106,3 @@ class E621PopularExtractor(E621Extractor): "count": ">= 70", }) ) - - def __init__(self, match): - E621Extractor.__init__(self, match) - self.params.update(text.parse_query(match.group(2))) - - def metadata(self): - scale = self.params.get("scale", "day") - date = self.params.get("date") or datetime.date.today().isoformat() - date = date[:10] - - if scale == "week": - date = datetime.date.fromisoformat(date) - date = (date - datetime.timedelta(days=date.weekday())).isoformat() - elif scale == "month": - date = date[:-3] - - return {"date": date, "scale": scale} - - def posts(self): - url = self.root + "/explore/posts/popular.json" - return self._pagination(url) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 96afea1..4af12f1 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Leonardo Taccari +# Copyright 2018-2020 Leonardo Taccari # Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text, exception from ..cache import cache +import itertools import json @@ -208,8 +209,10 @@ class InstagramExtractor(Extractor): media_data = { 'owner_id': media['owner']['id'], 'username': media['owner']['username'], - 'date': text.parse_timestamp(media['taken_at_timestamp']), - 'expires': text.parse_timestamp(media['expiring_at_timestamp']), + 'date' : text.parse_timestamp( + media['taken_at_timestamp']), + 'expires' : text.parse_timestamp( + media['expiring_at_timestamp']), 'media_id': media['id'], 'typename': media['__typename'], 'display_url': media['display_url'], @@ -268,7 +271,10 @@ class InstagramExtractor(Extractor): # Deal with different structure of pages: the first page # has interesting data in `entry_data', next pages in `data'. if 'entry_data' in shared_data: - base_shared_data = shared_data['entry_data'][psdf['page']][0]['graphql'] + entry_data = shared_data['entry_data'] + if 'HttpErrorPage' in entry_data: + return + base_shared_data = entry_data[psdf['page']][0]['graphql'] # variables_id is available only in the first page variables_id = base_shared_data[psdf['node']][psdf['node_id']] @@ -404,12 +410,38 @@ class InstagramStoriesExtractor(InstagramExtractor): return self._extract_stories(url) +class InstagramSavedExtractor(InstagramExtractor): + """Extractor for ProfilePage saved media""" + subcategory = "saved" + pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" + r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)" + r"([^/?&#]+)/saved") + test = ("https://www.instagram.com/instagram/saved/",) + + def __init__(self, match): + InstagramExtractor.__init__(self, match) + self.username = match.group(1) + + def instagrams(self): + url = '{}/{}/saved/'.format(self.root, self.username) + shared_data = self._extract_shared_data(url) + + return self._extract_page(shared_data, { + 'page': 'ProfilePage', + 'node': 'user', + 'node_id': 'id', + 'variables_id': 'id', + 'edge_to_medias': 'edge_saved_media', + 'query_hash': '8c86fed24fa03a8a2eea2a70a80c7b6b', + }) + + class InstagramUserExtractor(InstagramExtractor): """Extractor for ProfilePage""" subcategory = "user" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)" - r"([^/?&#]+)/?$") + r"([^/?&#]+)/?(?:$|[?#])") test = ( ("https://www.instagram.com/instagram/", { "range": "1-16", @@ -421,6 +453,7 @@ class InstagramUserExtractor(InstagramExtractor): "range": "1-2", "count": 2, }), + ("https://www.instagram.com/instagram/?hl=en"), ) def __init__(self, match): @@ -431,10 +464,7 @@ class InstagramUserExtractor(InstagramExtractor): url = '{}/{}/'.format(self.root, self.username) shared_data = self._extract_shared_data(url) - if self.config('highlights'): - yield from self._extract_story_highlights(shared_data) - - yield from self._extract_page(shared_data, { + instagrams = self._extract_page(shared_data, { 'page': 'ProfilePage', 'node': 'user', 'node_id': 'id', @@ -443,6 +473,14 @@ class InstagramUserExtractor(InstagramExtractor): 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a', }) + if self.config('highlights'): + instagrams = itertools.chain( + self._extract_story_highlights(shared_data), + instagrams, + ) + + return instagrams + class InstagramChannelExtractor(InstagramExtractor): """Extractor for ProfilePage channel""" diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 9fd9f3f..38c90df 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -50,13 +50,13 @@ class MangadexChapterExtractor(MangadexExtractor): pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)" test = ( ("https://mangadex.org/chapter/122094", { - "keyword": "1c834dca33025f521e1874aee1f71c51e28ebf99", + "keyword": "ef1084c2845825979e150512fed8fdc209baf05a", "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", }), # oneshot ("https://mangadex.cc/chapter/138086", { "count": 64, - "keyword": "178777bd0352fb19eb934cbee5630d16e3fb60ab", + "keyword": "f3da80e57b1acfe1bede7d6ebe82a4bae3f9101a", }), ) @@ -93,7 +93,7 @@ class MangadexChapterExtractor(MangadexExtractor): "chapter_minor": sep + minor, "chapter_id": cdata["id"], "group": mdata["chapter"][self.chapter_id]["group_name"], - "date": cdata["timestamp"], + "date": text.parse_timestamp(cdata["timestamp"]), "lang": util.language_to_code(cdata["lang_name"]), "language": cdata["lang_name"], } @@ -115,16 +115,16 @@ class MangadexMangaExtractor(MangadexExtractor): test = ( ("https://mangadex.org/manga/2946/souten-no-koumori", { "pattern": r"https://mangadex.org/chapter/\d+", - "keywords": { + "keyword": { "manga": "Souten no Koumori", "manga_id": 2946, - "title": "Oneshot", + "title": "re:One[Ss]hot", "volume": 0, "chapter": 0, "chapter_minor": "", "chapter_id": int, "group": str, - "date": int, + "date": "type:datetime", "lang": str, "language": str, }, @@ -169,7 +169,7 @@ class MangadexMangaExtractor(MangadexExtractor): "chapter_minor": sep + minor, "chapter_id": text.parse_int(chid), "group": text.unescape(info["group_name"]), - "date": info["timestamp"], + "date": text.parse_timestamp(info["timestamp"]), "lang": lang, "language": util.code_to_language(lang), "_extractor": MangadexChapterExtractor, diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index ee11231..228324f 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -1,16 +1,17 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://mangapark.me/""" +"""Extractors for https://mangapark.net/""" from .common import ChapterExtractor, MangaExtractor from .. import text, exception import json +import re class MangaparkBase(): @@ -37,26 +38,35 @@ class MangaparkBase(): elif key == "e": data["chapter_minor"] = "v" + value + @staticmethod + def parse_chapter_title(title, data): + match = re.search(r"(?i)(?:vol(?:ume)?[ .]*(\d+) )?" + r"ch(?:apter)?[ .]*(\d+)(\.\w+)?", title) + if match: + vol, ch, data["chapter_minor"] = match.groups() + data["volume"] = text.parse_int(vol) + data["chapter"] = text.parse_int(ch) + class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): - """Extractor for manga-chapters from mangapark.me""" + """Extractor for manga-chapters from mangapark.net""" pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)" r"/manga/([^?&#]+/i\d+)") test = ( - ("https://mangapark.me/manga/gosu/i811615/c55/1", { + ("https://mangapark.net/manga/gosu/i811615/c55/1", { "count": 50, "keyword": "373d678048d29492f9763743ccaa9b6d840f17cf", }), - (("https://mangapark.me/manga" + (("https://mangapark.net/manga" "/ad-astra-per-aspera-hata-kenjirou/i662054/c001.2/1"), { "count": 40, "keyword": "8e9cce4ed0e25d12a45e02f840d6f32ef838e257", }), - ("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/i655476/c70/1", { + ("https://mangapark.net/manga/gekkan-shoujo-nozaki-kun/i655476/c70", { "count": 15, "keyword": "19f730617074d65f91c0781f429de324890925bf", }), - ("https://mangapark.net/manga/gosu/i811615/c55/1"), + ("https://mangapark.me/manga/gosu/i811615/c55/1"), ("https://mangapark.com/manga/gosu/i811615/c55/1"), ) @@ -78,7 +88,10 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): if not data["path"]: raise exception.NotFoundError("chapter") + self.parse_chapter_path(data["path"], data) + if "chapter" not in data: + self.parse_chapter_title(data["title"], data) data["manga"], _, data["type"] = data["manga"].rpartition(" ") data["manga"] = text.unescape(data["manga"]) @@ -89,8 +102,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): return data def images(self, page): - data = json.loads(text.extract( - page, "var _load_pages =", ";")[0] or "[]") + data = json.loads(text.extract(page, "var _load_pages =", ";")[0]) return [ (text.urljoin(self.root, item["u"]), { "width": text.parse_int(item["w"]), @@ -101,16 +113,16 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): class MangaparkMangaExtractor(MangaparkBase, MangaExtractor): - """Extractor for manga from mangapark.me""" + """Extractor for manga from mangapark.net""" chapterclass = MangaparkChapterExtractor pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)" r"(/manga/[^/?&#]+)/?$") test = ( - ("https://mangapark.me/manga/aria", { - "url": "a58be23ef3874fe9705b0b41dd462b67eaaafd9a", - "keyword": "b3b5a30aa2a326bc0ca8b74c65b5ecd4bf676ebf", + ("https://mangapark.net/manga/aria", { + "url": "9b0b31e4992260876f56d7bfc8ff0ae71295c4f4", + "keyword": "6e44744a28d01b889b1e8291847abd84b591590d", }), - ("https://mangapark.net/manga/aria"), + ("https://mangapark.me/manga/aria"), ("https://mangapark.com/manga/aria"), ) @@ -128,13 +140,22 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor): data["stream"] = text.parse_int(text.extract(stream, '', '"')[0]) for chapter in text.extract_iter(stream, '<li ', '</li>'): - path , pos = text.extract(chapter, 'href="', '"') - title, pos = text.extract(chapter, '>: </span>', '<', pos) - count, pos = text.extract(chapter, ' of ', ' ', pos) + path , pos = text.extract(chapter, 'href="', '"') + title1, pos = text.extract(chapter, '>', '<', pos) + title2, pos = text.extract(chapter, '>: </span>', '<', pos) + count , pos = text.extract(chapter, ' of ', ' ', pos) self.parse_chapter_path(path[8:], data) - data["title"] = title.strip() if title else "" + if "chapter" not in data: + self.parse_chapter_title(title1, data) + + if title2: + data["title"] = title2.strip() + else: + data["title"] = title1.partition(":")[2].strip() + data["count"] = text.parse_int(count) results.append((self.root + path, data.copy())) + data.pop("chapter", None) return results diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 21afeae..1f10319 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -38,7 +38,7 @@ class NewgroundsExtractor(Extractor): try: post = self.extract_post(post_url) url = post.get("url") - except OSError: + except Exception: url = None if url: diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index dfe31e3..a936370 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -16,8 +16,8 @@ class NozomiExtractor(Extractor): """Base class for nozomi extractors""" category = "nozomi" root = "https://nozomi.la" - filename_fmt = "{postid}.{extension}" - archive_fmt = "{postid}" + filename_fmt = "{postid} {dataid}.{extension}" + archive_fmt = "{dataid}" def items(self): yield Message.Version, 1 @@ -37,24 +37,27 @@ class NozomiExtractor(Extractor): post_id, response.status_code, response.reason) continue - image = response.json() - image["tags"] = self._list(image.get("general")) - image["artist"] = self._list(image.get("artist")) - image["copyright"] = self._list(image.get("copyright")) - image["character"] = self._list(image.get("character")) - image["is_video"] = bool(image.get("is_video")) - image["date"] = text.parse_datetime( - image["date"] + ":00", "%Y-%m-%d %H:%M:%S%z") - image["url"] = text.urljoin(self.root, image["imageurl"]) - text.nameext_from_url(image["url"], image) - image.update(data) + post = response.json() + post["tags"] = self._list(post.get("general")) + post["artist"] = self._list(post.get("artist")) + post["copyright"] = self._list(post.get("copyright")) + post["character"] = self._list(post.get("character")) + post["date"] = text.parse_datetime( + post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z") + post.update(data) + images = post["imageurls"] for key in ("general", "imageurl", "imageurls"): - if key in image: - del image[key] + if key in post: + del post[key] - yield Message.Directory, image - yield Message.Url, image["url"], image + yield Message.Directory, post + for image in images: + post["url"] = url = text.urljoin(self.root, image["imageurl"]) + text.nameext_from_url(url, post) + post["is_video"] = bool(image.get("is_video")) + post["dataid"] = post["filename"] + yield Message.Url, url, post def metadata(self): return {} @@ -64,9 +67,7 @@ class NozomiExtractor(Extractor): @staticmethod def _list(src): - if not src: - return [] - return [x["tagname_display"] for x in src] + return [x["tagname_display"] for x in src] if src else () @staticmethod def _unpack(b): @@ -78,29 +79,37 @@ class NozomiPostExtractor(NozomiExtractor): """Extractor for individual posts on nozomi.la""" subcategory = "post" pattern = r"(?:https?://)?nozomi\.la/post/(\d+)" - test = ("https://nozomi.la/post/3649262.html", { - "url": "f4522adfc8159355fd0476de28761b5be0f02068", - "content": "cd20d2c5149871a0b80a1b0ce356526278964999", - "keyword": { - "artist" : ["hammer (sunset beach)"], - "character": ["patchouli knowledge"], - "copyright": ["touhou"], - "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5cf5a", - "date" : "dt:2016-07-26 02:32:03", - "extension": "jpg", - "favorites": int, - "filename" : str, - "height" : 768, - "is_video" : False, - "postid" : 3649262, - "source" : "danbooru", - "sourceid" : 2434215, - "tags" : list, - "type" : "jpg", - "url" : str, - "width" : 1024, - }, - }) + test = ( + ("https://nozomi.la/post/3649262.html", { + "url": "f4522adfc8159355fd0476de28761b5be0f02068", + "content": "cd20d2c5149871a0b80a1b0ce356526278964999", + "keyword": { + "artist" : ["hammer (sunset beach)"], + "character": ["patchouli knowledge"], + "copyright": ["touhou"], + "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5", + "date" : "dt:2016-07-26 02:32:03", + "extension": "jpg", + "favorites": int, + "filename" : str, + "height" : 768, + "is_video" : False, + "postid" : 3649262, + "source" : "danbooru", + "sourceid" : 2434215, + "tags" : list, + "type" : "jpg", + "url" : str, + "width" : 1024, + }, + }), + # multiple images per post + ("https://nozomi.la/post/25588032.html", { + "url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228", + "keyword": "0aa99cbaaeada2984a1fbf912274409c6ba106d4", + "count": 7, + }), + ) def __init__(self, match): NozomiExtractor.__init__(self, match) @@ -118,8 +127,8 @@ class NozomiTagExtractor(NozomiExtractor): pattern = r"(?:https?://)?nozomi\.la/tag/([^/?&#]+)-\d+\." test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", { "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$", - "count": ">= 75", - "range": "1-75", + "count": ">= 25", + "range": "1-25", }) def __init__(self, match): @@ -182,4 +191,4 @@ class NozomiSearchExtractor(NozomiExtractor): else: result.update(items) - return result + return sorted(result, reverse=True) diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index 35f9f91..41b1039 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -10,6 +10,7 @@ from .common import Extractor, Message from .. import text +import json class PiczelExtractor(Extractor): @@ -49,7 +50,6 @@ class PiczelExtractor(Extractor): def _pagination(self, url, folder_id=None): params = { - "hideNsfw" : "false", "from_id" : None, "folder_id": folder_id, } @@ -59,7 +59,10 @@ class PiczelExtractor(Extractor): if not data: return params["from_id"] = data[-1]["id"] - yield from data + + for post in data: + if not folder_id or folder_id == post["folder_id"]: + yield post class PiczelUserExtractor(PiczelExtractor): @@ -97,7 +100,7 @@ class PiczelFolderExtractor(PiczelExtractor): def posts(self): url = "{}/api/users/{}/gallery".format(self.root, self.user) - return self._pagination(url, self.folder_id) + return self._pagination(url, int(self.folder_id)) class PiczelImageExtractor(PiczelExtractor): @@ -134,5 +137,8 @@ class PiczelImageExtractor(PiczelExtractor): self.image_id = match.group(1) def posts(self): - url = "{}/api/gallery/image/{}".format(self.root, self.image_id) - return (self.request(url).json(),) + url = "{}/gallery/image/{}".format(self.root, self.image_id) + page = self.request(url).text + data = json.loads(text.extract( + page, 'window.__PRELOADED_STATE__ =', '</script>')[0]) + return (data["gallery"]["images"]["byId"][self.image_id],) diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index 521b034..b21ad32 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -101,22 +101,22 @@ class SexcomPinExtractor(SexcomExtractor): pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+)(?!.*#related$)" test = ( # picture - ("https://www.sex.com/pin/56714360/", { - "pattern": "https://cdn.sex.com/images/.+/2018/10/02/20037816.jpg", - "content": "e579e3283fea812d0545a3f79734b79bc3c51acb", + ("https://www.sex.com/pin/21241874-sexy-ecchi-girls-166/", { + "pattern": "https://cdn.sex.com/images/.+/2014/08/26/7637609.jpg", + "content": "ebe1814dadfebf15d11c6af4f6afb1a50d6c2a1c", "keyword": { "comments" : int, - "date" : "dt:2018-10-02 21:18:17", + "date" : "dt:2014-10-19 15:45:44", "extension": "jpg", - "filename" : "20037816", + "filename" : "7637609", "likes" : int, - "pin_id" : 56714360, + "pin_id" : 21241874, "repins" : int, "tags" : list, "thumbnail": str, - "title" : "Pin #56714360", + "title" : "Sexy Ecchi Girls 166", "type" : "picture", - "uploader" : "alguem", + "uploader" : "mangazeta", "url" : str, }, }), @@ -149,8 +149,8 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor): subcategory = "related-pin" directory_fmt = ("{category}", "related {original_pin[pin_id]}") pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$" - test = ("https://www.sex.com/pin/56714360/#related", { - "count": ">= 22", + test = ("https://www.sex.com/pin/21241874/#related", { + "count": ">= 20", }) def metadata(self): diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index 82a61da..abf9995 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -143,6 +143,7 @@ class SimplyhentaiVideoExtractor(Extractor): "pattern": r"https://www\.googleapis\.com/drive/v3/files" r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+", "keyword": "706790708b14773efc1e075ddd3b738a375348a5", + "options": (("verify", False),), "count": 1, }), (("https://videos.simply-hentai.com" diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 2a04463..cbb075c 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -459,7 +459,7 @@ class TwitterBookmarkExtractor(TwitterExtractor): "Referer": self.root + "/i/bookmarks", "x-csrf-token": self.session.cookies.get("ct0"), "x-twitter-active-user": "yes", - "x-twitter-auth-type": "Auth2Session", + "x-twitter-auth-type": "OAuth2Session", "x-twitter-client-language": "en", } diff --git a/gallery_dl/output.py b/gallery_dl/output.py index f084950..9e2f8a6 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -149,12 +149,13 @@ def replace_std_streams(errors="replace"): """Replace standard streams and set their error handlers to 'errors'""" for name in ("stdout", "stdin", "stderr"): stream = getattr(sys, name) - setattr(sys, name, stream.__class__( - stream.buffer, - errors=errors, - newline=stream.newlines, - line_buffering=stream.line_buffering, - )) + if stream: + setattr(sys, name, stream.__class__( + stream.buffer, + errors=errors, + newline=stream.newlines, + line_buffering=stream.line_buffering, + )) # -------------------------------------------------------------------- diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 232047c..47fad9e 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -726,6 +726,7 @@ class PathFormat(): def set_directory(self, kwdict): """Build directory path and create it if necessary""" + windows = os.name == "nt" # Build path segments by applying 'kwdict' to directory format strings segments = [] @@ -733,6 +734,9 @@ class PathFormat(): try: for formatter in self.directory_formatters: segment = formatter(kwdict).strip() + if windows: + # remove trailing dots and spaces (#647) + segment = segment.rstrip(". ") if segment: append(self.clean_segment(segment)) except Exception as exc: @@ -747,7 +751,7 @@ class PathFormat(): directory += sep self.directory = directory - if os.name == "nt": + if windows: # Enable longer-than-260-character paths on Windows directory = "\\\\?\\" + os.path.abspath(directory) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 9171f15..37d133e 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.13.2" +__version__ = "1.13.3" diff --git a/test/test_results.py b/test/test_results.py index 538abfa..b697d15 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -27,11 +27,8 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "35photo", - "mangapark", + "myportfolio", "photobucket", - "sexcom", - "hentaicafe", "worldthree", } |