diff options
author | Unit 193 <unit193@ubuntu.com> | 2020-04-29 17:42:33 -0400 |
---|---|---|
committer | Unit 193 <unit193@ubuntu.com> | 2020-04-29 17:42:33 -0400 |
commit | 32535ac322d26c3d2de455a2eb8225a8e7ff3c04 (patch) | |
tree | 82f2b2982fbbb414e1eb27f7d589c7cd6e7c1801 | |
parent | 7b5f108ad4bb896ce6330868829ff288ef763f4b (diff) | |
parent | d5b96ce44b7809f5ae01e3e9d70a1d58fe21ccf5 (diff) | |
download | gallery-dl-32535ac322d26c3d2de455a2eb8225a8e7ff3c04.tar.bz2 gallery-dl-32535ac322d26c3d2de455a2eb8225a8e7ff3c04.tar.xz gallery-dl-32535ac322d26c3d2de455a2eb8225a8e7ff3c04.tar.zst |
Update upstream source from tag 'upstream/1.13.5'
Update to upstream version '1.13.5'
with Debian dir 5854d7424ad212665647d815ebbf3bac5f95fa38
27 files changed, 376 insertions, 186 deletions
@@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.13.4 +Version: 1.13.5 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -247,7 +247,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -236,7 +236,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index e7e1566..9ef6a93 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-04-12" "1.13.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-04-27" "1.13.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 2fd4dba..6c48a70 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-04-12" "1.13.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-04-27" "1.13.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 45381a6..a0c6286 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.13.4 +Version: 1.13.5 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -247,7 +247,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 6ba5480..e3ebd1a 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -13,7 +13,7 @@ import time import operator import collections import urllib.parse -from . import text, exception +from . import text from .cache import memcache @@ -39,12 +39,16 @@ def solve_challenge(session, response, kwargs): page = response.text url = root + text.unescape(text.extract(page, 'action="', '"')[0]) - params["r"] = text.extract(page, 'name="r" value="', '"')[0] - params["jschl_vc"] = text.extract(page, 'name="jschl_vc" value="', '"')[0] - params["pass"] = text.extract(page, 'name="pass" value="', '"')[0] - params["jschl_answer"] = solve_js_challenge(page, parsed.netloc) headers["Referer"] = response.url + for inpt in text.extract_iter(page, "<input ", ">"): + name = text.extract(inpt, 'name="', '"')[0] + if name == "jschl_answer": + value = solve_js_challenge(page, parsed.netloc) + else: + value = text.unescape(text.extract(inpt, 'value="', '"')[0]) + params[name] = value + time.sleep(4) cf_kwargs["allow_redirects"] = False @@ -54,14 +58,13 @@ def solve_challenge(session, response, kwargs): cookie.name: cookie.value for cookie in cf_response.cookies } + if not cookies: import logging log = logging.getLogger("cloudflare") - rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected" - log.error("%s response", rtype) log.debug("Headers:\n%s", cf_response.headers) log.debug("Content:\n%s", cf_response.text) - raise exception.StopExtraction() + return cf_response, None, None domain = next(iter(cf_response.cookies)).domain cookies["__cfduid"] = response.cookies.get("__cfduid", "") diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 07c2e14..7ecdef7 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -12,6 +12,9 @@ from .common import Extractor, Message from .. import text +BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com" + + class _500pxExtractor(Extractor): """Base class for 500px extractors""" category = "500px" @@ -86,13 +89,15 @@ class _500pxExtractor(Extractor): class _500pxUserExtractor(_500pxExtractor): """Extractor for photos from a user's photostream on 500px.com""" subcategory = "user" - pattern = (r"(?:https?://)?500px\.com" - r"/(?!photo/)([^/?&#]+)/?(?:$|\?|#)") - test = ("https://500px.com/light_expression_photography", { - "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2", - "range": "1-99", - "count": 99, - }) + pattern = BASE_PATTERN + r"/(?!photo/)([^/?&#]+)/?(?:$|\?|#)" + test = ( + ("https://500px.com/light_expression_photography", { + "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2", + "range": "1-99", + "count": 99, + }), + ("https://web.500px.com/light_expression_photography"), + ) def __init__(self, match): _500pxExtractor.__init__(self, match) @@ -120,8 +125,7 @@ class _500pxGalleryExtractor(_500pxExtractor): """Extractor for photo galleries on 500px.com""" subcategory = "gallery" directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}") - pattern = (r"(?:https?://)?500px\.com" - r"/(?!photo/)([^/?&#]+)/galleries/([^/?&#]+)") + pattern = BASE_PATTERN + r"/(?!photo/)([^/?&#]+)/galleries/([^/?&#]+)" test = ("https://500px.com/fashvamp/galleries/lera", { "url": "002dc81dee5b4a655f0e31ad8349e8903b296df6", "count": 3, @@ -171,7 +175,7 @@ class _500pxGalleryExtractor(_500pxExtractor): class _500pxImageExtractor(_500pxExtractor): """Extractor for individual images from 500px.com""" subcategory = "image" - pattern = r"(?:https?://)?500px\.com/photo/(\d+)" + pattern = BASE_PATTERN + r"/photo/(\d+)" test = ("https://500px.com/photo/222049255/queen-of-coasts", { "url": "fbdf7df39325cae02f5688e9f92935b0e7113315", "count": 1, diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index d8f55bd..7575de9 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -28,21 +28,18 @@ class AryionExtractor(Extractor): self.user = match.group(1) self.offset = 0 - def items(self): - for post_id in util.advance(self.posts(), self.offset): - post = self._parse_post(post_id) - if post: - yield Message.Directory, post - yield Message.Url, post["url"], post - - def posts(self): - return () + def posts(self, url): + while True: + page = self.request(url).text + yield from text.extract_iter( + page, "class='thumb' href='/g4/view/", "'") - def skip(self, num): - self.offset += num - return num + pos = page.find("Next >>") + if pos < 0: + return + url = self.root + text.rextract(page, "href='", "'", pos)[0] - def _parse_post(self, post_id): + def parse_post(self, post_id): url = "{}/g4/data.php?id={}".format(self.root, post_id) with self.request(url, method="HEAD", fatal=False) as response: @@ -50,18 +47,24 @@ class AryionExtractor(Extractor): return None headers = response.headers - # ignore folders - if headers["content-type"] == "application/x-folder": - return None + # folder + if headers["content-type"] in ( + "application/x-folder", + "application/x-comic-folder-nomerge", + ): + return False - # get filename from 'content-disposition' header + # get filename from 'Content-Disposition' header cdis = headers["content-disposition"] fname, _, ext = text.extract( cdis, 'filename="', '"')[0].rpartition(".") if not fname: fname, ext = ext, fname - # fix 'last-modified' header + # get file size from 'Content-Length' header + clen = headers.get("content-length") + + # fix 'Last-Modified' header lmod = headers["last-modified"] if lmod[22] != ":": lmod = "{}:{} GMT".format(lmod[:22], lmod[22:24]) @@ -79,8 +82,8 @@ class AryionExtractor(Extractor): "artist": artist, "path" : text.split_html(extr("cookiecrumb'>", '</span'))[4:-1:2], "date" : extr("class='pretty-date' title='", "'"), + "size" : text.parse_int(clen), "views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")), - "size" : text.parse_bytes(extr("File size</b>:", "<")[:-2]), "width" : text.parse_int(extr("Resolution</b>:", "x")), "height": text.parse_int(extr("", "<")), "comments" : text.parse_int(extr("Comments</b>:", "<")), @@ -88,9 +91,9 @@ class AryionExtractor(Extractor): "tags" : text.split_html(extr("class='taglist'>", "</span>")), "description": text.unescape(text.remove_html(extr( "<p>", "</p>"), "", "")), - "filename" : fname, - "extension" : ext, - "_mtime" : lmod, + "filename" : fname, + "extension": ext, + "_mtime" : lmod, } d1, _, d2 = data["date"].partition(",") @@ -114,48 +117,66 @@ class AryionGalleryExtractor(AryionExtractor): ("https://aryion.com/g4/latest.php?name=jameshoward"), ) - def posts(self): - url = "{}/g4/latest.php?name={}".format(self.root, self.user) - - while True: - page = self.request(url).text - yield from text.extract_iter( - page, "class='thumb' href='/g4/view/", "'") + def skip(self, num): + self.offset += num + return num - pos = page.find("Next >>") - if pos < 0: - return - url = self.root + text.rextract(page, "href='", "'", pos)[0] + def items(self): + url = "{}/g4/latest.php?name={}".format(self.root, self.user) + for post_id in util.advance(self.posts(url), self.offset): + post = self.parse_post(post_id) + if post: + yield Message.Directory, post + yield Message.Url, post["url"], post class AryionPostExtractor(AryionExtractor): """Extractor for individual posts on eka's portal""" subcategory = "post" pattern = BASE_PATTERN + r"/view/(\d+)" - test = ("https://aryion.com/g4/view/510079", { - "url": "f233286fa5558c07ae500f7f2d5cb0799881450e", - "keyword": { - "artist" : "jameshoward", - "user" : "jameshoward", - "filename" : "jameshoward-510079-subscribestar_150", - "extension": "jpg", - "id" : 510079, - "width" : 1665, - "height" : 1619, - "size" : 784241, - "title" : "I'm on subscribestar now too!", - "description": r"re:Doesn't hurt to have a backup, right\?", - "tags" : ["Non-Vore", "subscribestar"], - "date" : "dt:2019-02-16 19:30:00", - "path" : [], - "views" : int, - "favorites": int, - "comments" : int, - "_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT", - }, - }) - - def posts(self): + test = ( + ("https://aryion.com/g4/view/510079", { + "url": "f233286fa5558c07ae500f7f2d5cb0799881450e", + "keyword": { + "artist" : "jameshoward", + "user" : "jameshoward", + "filename" : "jameshoward-510079-subscribestar_150", + "extension": "jpg", + "id" : 510079, + "width" : 1665, + "height" : 1619, + "size" : 784239, + "title" : "I'm on subscribestar now too!", + "description": r"re:Doesn't hurt to have a backup, right\?", + "tags" : ["Non-Vore", "subscribestar"], + "date" : "dt:2019-02-16 19:30:00", + "path" : [], + "views" : int, + "favorites": int, + "comments" : int, + "_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT", + }, + }), + # folder (#694) + ("https://aryion.com/g4/view/588928", { + "pattern": pattern, + "count": ">= 8", + }), + ) + + def items(self): post_id = self.user self.user = None - return (post_id,) + post = self.parse_post(post_id) + + if post: + yield Message.Directory, post + yield Message.Url, post["url"], post + + elif post is False: + folder_url = "{}/g4/view/{}".format(self.root, post_id) + data = {"_extractor": AryionPostExtractor} + + for post_id in self.posts(folder_url): + url = "{}/g4/view/{}".format(self.root, post_id) + yield Message.Queue, url, data diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 162e9cc..be0027a 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -171,7 +171,8 @@ class GelbooruPoolMixin(PoolMixin): name, pos = text.extract(page, "<h4>Pool: ", "</h4>") if not name: raise exception.NotFoundError("pool") - self.posts = list(text.extract_iter(page, 'id="p', '"', pos)) + self.posts = list(text.extract_iter( + page, 'class="thumb" id="p', '"', pos)) return { "pool": text.parse_int(self.pool), diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 8986c99..3a282c2 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -99,18 +99,20 @@ class Extractor(): return response if notfound and code == 404: raise exception.NotFoundError(notfound) + + reason = response.reason if cloudflare.is_challenge(response): self.log.info("Solving Cloudflare challenge") response, domain, cookies = cloudflare.solve_challenge( session, response, kwargs) - if response.status_code >= 400: - continue - cloudflare.cookies.update(self.category, (domain, cookies)) - return response + if cookies: + cloudflare.cookies.update( + self.category, (domain, cookies)) + return response if cloudflare.is_captcha(response): self.log.warning("Cloudflare CAPTCHA") - msg = "'{} {}' for '{}'".format(code, response.reason, url) + msg = "'{} {}' for '{}'".format(code, reason, url) if code < 500 and code != 429 and code != 430: break diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 2dcf0b7..ca722b8 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -813,6 +813,7 @@ class DeviantartFollowingExtractor(DeviantartExtractor): yield Message.Version, 1 for user in eclipse_api.user_watching(self.user, self.offset): url = "{}/{}".format(self.root, user["username"]) + user["_extractor"] = DeviantartUserExtractor yield Message.Queue, url, user diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 1f8c567..3cc263c 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,7 @@ from .common import ( from .. import text, util import base64 import json +import re class FoolslideBase(SharedConfigMixin): @@ -85,7 +86,12 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): data = None if self.decode == "base64": - base64_data = text.extract(page, 'atob("', '"')[0] + pos = page.find("'fromCharCode'") + if pos >= 0: + blob = text.extract(page, "'", "'", pos+15)[0] + base64_data = re.sub(r"[a-zA-Z]", _decode_jaiminisbox, blob) + else: + base64_data = text.extract(page, 'atob("', '"')[0] if base64_data: data = base64.b64decode(base64_data.encode()).decode() elif self.decode == "double": @@ -120,6 +126,16 @@ class FoolslideMangaExtractor(FoolslideBase, MangaExtractor): }))) +def _decode_jaiminisbox(match): + c = match.group(0) + + # ord("Z") == 90, ord("z") == 122 + N = 90 if c <= "Z" else 122 + C = ord(c) + 13 + + return chr(C if N >= C else (C - 26)) + + EXTRACTORS = { "dokireader": { "root": "https://kobato.hologfx.com/reader", @@ -140,8 +156,8 @@ EXTRACTORS = { "pattern": r"(?:www\.)?jaiminisbox\.com/reader", "extra": {"decode": "base64"}, "test-chapter": ( - ("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", { - "keyword": "6009af77cc9c05528ab1fdda47b1ad9d4811c673", + ("https://jaiminisbox.com/reader/read/fire-force/en/0/215/", { + "keyword": "6d2b5c0b34344156b0301ff2733389dfe36a7604", }), ("https://jaiminisbox.com/reader/read/red-storm/en/0/336/", { "keyword": "53c6dddf3e5a61b6002a886ccd7e3354e973299a", diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index ba60e19..9af7274 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -233,3 +233,29 @@ class FuraffinityUserExtractor(FuraffinityExtractor): (FuraffinityScrapsExtractor , base.format("scraps")), (FuraffinityFavoriteExtractor, base.format("favorites")), ), ("gallery",)) + + +class FuraffinityFollowingExtractor(FuraffinityExtractor): + """Extractor for a furaffinity user's watched users""" + subcategory = "following" + pattern = BASE_PATTERN + "/watchlist/by/([^/?&#]+)" + test = ("https://www.furaffinity.net/watchlist/by/mirlinthloth/", { + "pattern": FuraffinityUserExtractor.pattern, + "range": "176-225", + "count": 50, + }) + + def items(self): + url = "{}/watchlist/by/{}/".format(self.root, self.user) + data = {"_extractor": FuraffinityUserExtractor} + + while True: + page = self.request(url).text + + for path in text.extract_iter(page, '<a href="', '"'): + yield Message.Queue, self.root + path, data + + path = text.rextract(page, 'action="', '"')[0] + if url.endswith(path): + return + url = self.root + path diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index 85cfe49..3883445 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -61,25 +61,28 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for manga chapters from hiperdex.com""" - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com" + pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" r"(/manga/([^/?&#]+)/([^/?&#]+))") - test = ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { - "pattern": r"https://hiperdex.com/wp-content/uploads" - r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp", - "count": 9, - "keyword": { - "artist" : "Sasuga Kei", - "author" : "Sasuga Kei", - "chapter": 154, - "chapter_minor": ".5", - "description": "re:Natsuo Fujii is in love with his teacher, Hina", - "genre" : list, - "manga" : "Domestic na Kanojo", - "release": 2014, - "score" : float, - "type" : "Manga", - }, - }) + test = ( + ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { + "pattern": r"https://hiperdex.com/wp-content/uploads" + r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp", + "count": 9, + "keyword": { + "artist" : "Sasuga Kei", + "author" : "Sasuga Kei", + "chapter": 154, + "chapter_minor": ".5", + "description": "re:Natsuo Fujii is in love with his teacher, ", + "genre" : list, + "manga" : "Domestic na Kanojo", + "release": 2014, + "score" : float, + "type" : "Manga", + }, + }), + ("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"), + ) def __init__(self, match): path, self.manga, self.chapter = match.groups() @@ -99,24 +102,28 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): """Extractor for manga from hiperdex.com""" chapterclass = HiperdexChapterExtractor - pattern = r"(?:https?://)?(?:www\.)?hiperdex\.com(/manga/([^/?&#]+))/?$" - test = ("https://hiperdex.com/manga/youre-not-that-special/", { - "count": 51, - "pattern": HiperdexChapterExtractor.pattern, - "keyword": { - "artist" : "Bolp", - "author" : "Abyo4", - "chapter": int, - "chapter_minor": "", - "description": "re:I didn’t think much of the creepy girl in ", - "genre" : list, - "manga" : "You're Not That Special!", - "release": 2019, - "score" : float, - "status" : "Completed", - "type" : "Manhwa", - }, - }) + pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" + r"(/manga/([^/?&#]+))/?$") + test = ( + ("https://hiperdex.com/manga/youre-not-that-special/", { + "count": 51, + "pattern": HiperdexChapterExtractor.pattern, + "keyword": { + "artist" : "Bolp", + "author" : "Abyo4", + "chapter": int, + "chapter_minor": "", + "description": "re:I didn’t think much of the creepy girl in ", + "genre" : list, + "manga" : "You’re Not That Special!", + "release": 2019, + "score" : float, + "status" : "Completed", + "type" : "Manhwa", + }, + }), + ("https://hiperdex.net/manga/youre-not-that-special/"), + ) def __init__(self, match): path, self.manga = match.groups() @@ -147,10 +154,11 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): categorytransfer = False chapterclass = HiperdexMangaExtractor reverse = False - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com" + pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" r"(/manga-a(?:rtist|uthor)/([^/?&#]+))") test = ( ("https://hiperdex.com/manga-artist/beck-ho-an/"), + ("https://hiperdex.net/manga-artist/beck-ho-an/"), ("https://hiperdex.com/manga-author/viagra/", { "pattern": HiperdexMangaExtractor.pattern, "count": ">= 6", diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 3baf819..da53113 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -8,7 +8,8 @@ """Extractors for https://hitomi.la/""" -from .common import GalleryExtractor +from .common import GalleryExtractor, Extractor, Message +from .nozomi import decode_nozomi from .. import text, util import string import json @@ -46,6 +47,10 @@ class HitomiGalleryExtractor(GalleryExtractor): ("https://hitomi.la/cg/scathacha-sama-okuchi-ecchi-1291900.html", { "count": 10, }), + # no tags + ("https://hitomi.la/cg/1615823.html", { + "count": 22, + }), ("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"), ("https://hitomi.la/manga/867789.html"), ("https://hitomi.la/doujinshi/867789.html"), @@ -75,14 +80,18 @@ class HitomiGalleryExtractor(GalleryExtractor): if language: language = language.capitalize() + date = info.get("date") + if date: + date += ":00" + tags = [] - for tinfo in info["tags"]: - tag = tinfo["tag"] + for tinfo in info.get("tags") or (): + tag = string.capwords(tinfo["tag"]) if tinfo.get("female"): tag += " ♀" elif tinfo.get("male"): tag += " ♂" - tags.append(string.capwords(tag)) + tags.append(tag) return { "gallery_id": text.parse_int(info["id"]), @@ -90,9 +99,8 @@ class HitomiGalleryExtractor(GalleryExtractor): "type" : info["type"].capitalize(), "language" : language, "lang" : util.language_to_code(language), + "date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"), "tags" : tags, - "date" : text.parse_datetime( - info["date"] + ":00", "%Y-%m-%d %H:%M:%S%z"), } def _data_from_gallery_page(self, info): @@ -142,3 +150,35 @@ class HitomiGalleryExtractor(GalleryExtractor): ) result.append((url, idata)) return result + + +class HitomiTagExtractor(Extractor): + """Extractor for galleries from tag searches on hitomi.la""" + category = "hitomi" + subcategory = "tag" + pattern = (r"(?:https?://)?hitomi\.la/" + r"(tag|artist|group|series|type|character)/" + r"([^/?&#]+)-\d+\.html") + test = ( + ("https://hitomi.la/tag/screenshots-japanese-1.html", { + "pattern": HitomiGalleryExtractor.pattern, + "count": ">= 35", + }), + ("https://hitomi.la/artist/a1-all-1.html"), + ("https://hitomi.la/group/initial%2Dg-all-1.html"), + ("https://hitomi.la/series/amnesia-all-1.html"), + ("https://hitomi.la/type/doujinshi-all-1.html"), + ("https://hitomi.la/character/a2-all-1.html"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.type, self.tag = match.groups() + + def items(self): + url = "https://ltn.hitomi.la/{}/{}.nozomi".format(self.type, self.tag) + data = {"_extractor": HitomiGalleryExtractor} + + for gallery_id in decode_nozomi(self.request(url).content): + url = "https://hitomi.la/galleries/{}.html".format(gallery_id) + yield Message.Queue, url, data diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index d0aa4f2..8d2c937 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -28,9 +28,17 @@ class ImgbbExtractor(Extractor): def items(self): self.login() - response = self.request(self.page_url, params={"sort": self.sort}) - if response.history and response.url.startswith(self.root): - raise exception.NotFoundError(self.subcategory) + + url = self.page_url + params = {"sort": self.sort} + while True: + response = self.request(url, params=params, allow_redirects=False) + if response.status_code < 300: + break + url = response.headers["location"] + if url.startswith(self.root): + raise exception.NotFoundError(self.subcategory) + page = response.text data = self.metadata(page) first = True @@ -151,12 +159,15 @@ class ImgbbAlbumExtractor(ImgbbExtractor): } def images(self, page): + url = text.extract(page, '"og:url" content="', '"')[0] + album_id = url.rpartition("/")[2].partition("?")[0] + return self._pagination(page, "https://ibb.co/json", { "from" : "album", - "albumid" : self.album_id, + "albumid" : album_id, "params_hidden[list]" : "images", "params_hidden[from]" : "album", - "params_hidden[albumid]": self.album_id, + "params_hidden[albumid]": album_id, }) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 4af12f1..ea39cab 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -131,7 +131,8 @@ class InstagramExtractor(Extractor): 'owner_id': media['owner']['id'], 'username': media['owner']['username'], 'fullname': media['owner']['full_name'], - "post_shortcode": media['shortcode'], + 'post_id': media['id'], + 'post_shortcode': media['shortcode'], 'description': text.parse_unicode_escapes('\n'.join( edge['node']['text'] for edge in media['edge_media_to_caption']['edges'] @@ -140,9 +141,11 @@ class InstagramExtractor(Extractor): medias = [] if media['__typename'] == 'GraphSidecar': - for n in media['edge_sidecar_to_children']['edges']: - children = n['node'] + for num, edge in enumerate( + media['edge_sidecar_to_children']['edges'], 1): + children = edge['node'] media_data = { + 'num': num, 'media_id': children['id'], 'shortcode': children['shortcode'], 'typename': children['__typename'], @@ -320,6 +323,7 @@ class InstagramImageExtractor(InstagramExtractor): "likes": int, "media_id": "1922949326347663701", "shortcode": "BqvsDleB3lV", + "post_id": "1922949326347663701", "post_shortcode": "BqvsDleB3lV", "typename": "GraphImage", "username": "instagram", @@ -333,7 +337,9 @@ class InstagramImageExtractor(InstagramExtractor): "keyword": { "sidecar_media_id": "1875629777499953996", "sidecar_shortcode": "BoHk1haB5tM", + "post_id": "1875629777499953996", "post_shortcode": "BoHk1haB5tM", + "num": int, "likes": int, "username": "instagram", } diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 8809589..348453d 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -126,7 +126,7 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): (aes.aes_cbc_decrypt_text( data, key, iv).partition("&")[0], None) for data in text.extract_iter( - page, 'lstImages.push(wrapKA("', '"' + page, 'push(wrapKA("', '"' ) ] except UnicodeDecodeError: diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 228324f..3d64acd 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -55,7 +55,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): test = ( ("https://mangapark.net/manga/gosu/i811615/c55/1", { "count": 50, - "keyword": "373d678048d29492f9763743ccaa9b6d840f17cf", + "keyword": "2bb16a50dbac9577ead62b41db9a01a0419c0ae2", }), (("https://mangapark.net/manga" "/ad-astra-per-aspera-hata-kenjirou/i662054/c001.2/1"), { diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 3f07d21..4f0e38d 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -68,8 +68,8 @@ class MastodonUserExtractor(MastodonExtractor): self.account_name = match.group(1) def statuses(self): - results = self.api.account_search("@" + self.account_name, 1) - for account in results: + handle = "@{}@{}".format(self.account_name, self.instance) + for account in self.api.account_search(handle, 1): if account["username"] == self.account_name: break else: @@ -121,7 +121,10 @@ class MastodonAPI(): return self._call("statuses/" + status_id).json() def _call(self, endpoint, params=None): - url = "{}/api/v1/{}".format(self.root, endpoint) + if endpoint.startswith("http"): + url = endpoint + else: + url = "{}/api/v1/{}".format(self.root, endpoint) while True: response = self.extractor.request( @@ -143,7 +146,7 @@ class MastodonAPI(): def _pagination(self, endpoint, params): url = "{}/api/v1/{}".format(self.root, endpoint) while url: - response = self._call(endpoint, params) + response = self._call(url, params) yield from response.json() url = response.links.get("next") @@ -181,6 +184,7 @@ def generate_extractors(): Extr.instance = instance Extr.pattern = (r"(?:https?://)?" + pattern + r"/@([^/?&#]+)(?:/media)?/?$") + Extr.test = info.get("test-user") Extr.root = root Extr.access_token = token symtable[Extr.__name__] = Extr @@ -193,6 +197,7 @@ def generate_extractors(): Extr.category = category Extr.instance = instance Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?&#]+/(\d+)" + Extr.test = info.get("test-status") Extr.root = root Extr.access_token = token symtable[Extr.__name__] = Extr @@ -204,6 +209,15 @@ EXTRACTORS = { "access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48", "client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo", "client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI", + "test-user" : ("https://mastodon.social/@jk", { + "pattern": r"https://files.mastodon.social/media_attachments" + r"/files/\d+/\d+/\d+/original/\w+", + "range": "1-60", + "count": 60, + }), + "test-status" : ("https://mastodon.social/@jk/103794036899778366", { + "count": 4, + }), }, "pawoo.net": { "category" : "pawoo", diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 1f10319..bb87a69 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -345,7 +345,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user}", "Favorites") pattern = (r"(?:https?://)?([^.]+)\.newgrounds\.com" - r"/favorites(?:/(art|audio|movies))?/?") + r"/favorites(?!/following)(?:/(art|audio|movies))?/?") test = ( ("https://tomfulp.newgrounds.com/favorites/art", { "range": "1-10", @@ -381,10 +381,39 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor): if response.history: return - favs = list(text.extract_iter( - response.text, 'href="//www.newgrounds.com', '"')) - for path in favs: - yield self.root + path + favs = self._extract_favorites(response.text) + yield from favs + if len(favs) < 24: return num += 1 + + def _extract_favorites(self, page): + return [ + self.root + path + for path in text.extract_iter( + page, 'href="//www.newgrounds.com', '"') + ] + + +class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): + """Extractor for a newgrounds user's favorited users""" + subcategory = "following" + pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/favorites/(following)" + test = ("https://tomfulp.newgrounds.com/favorites/following", { + "pattern": NewgroundsUserExtractor.pattern, + "range": "76-125", + "count": 50, + }) + + def items(self): + data = {"_extractor": NewgroundsUserExtractor} + for url in self._pagination(self.kind): + yield Message.Queue, url, data + + @staticmethod + def _extract_favorites(page): + return [ + "https://" + user.rpartition('"')[2] + for user in text.extract_iter(page, 'class="item-user', '"><img') + ] diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index a936370..abf88cd 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -12,6 +12,11 @@ from .common import Extractor, Message from .. import text +def decode_nozomi(n): + for i in range(0, len(n), 4): + yield (n[i] << 24) + (n[i+1] << 16) + (n[i+2] << 8) + n[i+3] + + class NozomiExtractor(Extractor): """Base class for nozomi extractors""" category = "nozomi" @@ -69,11 +74,6 @@ class NozomiExtractor(Extractor): def _list(src): return [x["tagname_display"] for x in src] if src else () - @staticmethod - def _unpack(b): - for i in range(0, len(b), 4): - yield (b[i] << 24) + (b[i+1] << 16) + (b[i+2] << 8) + b[i+3] - class NozomiPostExtractor(NozomiExtractor): """Extractor for individual posts on nozomi.la""" @@ -145,7 +145,7 @@ class NozomiTagExtractor(NozomiExtractor): while True: headers = {"Range": "bytes={}-{}".format(i, i+255)} response = self.request(url, headers=headers) - yield from self._unpack(response.content) + yield from decode_nozomi(response.content) i += 256 cr = response.headers.get("Content-Range", "").rpartition("/")[2] @@ -176,7 +176,7 @@ class NozomiSearchExtractor(NozomiExtractor): def nozomi(path): url = "https://j.nozomi.la/" + path + ".nozomi" - return self._unpack(self.request(url).content) + return decode_nozomi(self.request(url).content) for tag in self.tags: if tag[0] == "-": diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 0d51df2..18c10a6 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -47,7 +47,7 @@ class PatreonExtractor(Extractor): self._attachments(post), self._content(post), ): - fhash = url.rsplit("/", 2)[1] + fhash = url.split("/")[9].partition("?")[0] if fhash not in hashes: hashes.add(fhash) post["hash"] = fhash @@ -194,7 +194,7 @@ class PatreonCreatorExtractor(PatreonExtractor): subcategory = "creator" pattern = (r"(?:https?://)?(?:www\.)?patreon\.com" r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))" - r"([^/?&#]+)/?") + r"(?:user(?:/posts)?/?\?([^#]+)|([^/?&#]+)/?)") test = ( ("https://www.patreon.com/koveliana", { "range": "1-25", @@ -216,14 +216,21 @@ class PatreonCreatorExtractor(PatreonExtractor): ("https://www.patreon.com/kovelianot", { "exception": exception.NotFoundError, }), + ("https://www.patreon.com/user?u=2931440"), + ("https://www.patreon.com/user/posts/?u=2931440"), ) def __init__(self, match): PatreonExtractor.__init__(self, match) - self.creator = match.group(1).lower() + self.query, self.creator = match.groups() def posts(self): - url = "{}/{}".format(self.root, self.creator) + if self.creator: + url = "{}/{}".format(self.root, self.creator.lower()) + else: + query = text.parse_query(self.query) + url = "{}/user?u={}".format(self.root, query.get("u")) + page = self.request(url, notfound="creator").text campaign_id = text.extract(page, "/campaign/", "/")[0] diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 03ce3dd..3a274c7 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -63,13 +63,11 @@ class TwitterExtractor(Extractor): url = self._video_from_tweet(data["tweet_id"]) if not url: continue - ext = text.ext_from_url(url) - if ext == "m3u8": + text.nameext_from_url(url, data) + if data["extension"] == "m3u8": url = "ytdl:" + url data["extension"] = "mp4" data["_ytdl_extra"] = {"protocol": "m3u8_native"} - else: - data["extension"] = ext data["num"] = 1 yield Message.Url, url, data @@ -149,6 +147,7 @@ class TwitterExtractor(Extractor): extr = text.extract_from(tweet) data = { "tweet_id" : text.parse_int(extr('data-tweet-id="' , '"')), + "reply" : bool(extr('data-is-reply-to="' , '"')), "retweet_id": text.parse_int(extr('data-retweet-id="', '"')), "retweeter" : extr('data-retweeter="' , '"'), "author" : { @@ -286,7 +285,7 @@ class TwitterTimelineExtractor(TwitterExtractor): ("https://twitter.com/supernaturepics", { "range": "1-40", "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", - "keyword": "37f4d35affd733d458d3b235b4a55f619a86f794", + "keyword": "4a3d28cc9f7a39e27333d56f3fe19e6e07ee979e", }), ("https://mobile.twitter.com/supernaturepics?p=i"), ) @@ -344,13 +343,13 @@ class TwitterTweetExtractor(TwitterExtractor): test = ( ("https://twitter.com/supernaturepics/status/604341487988576256", { "url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580", - "keyword": "3fa3623e8d9a204597238e2f1f6433da19c63b4a", + "keyword": "76e018cf3f4c8b82d3bdd425e01e28078c98373b", "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab", }), # 4 images ("https://twitter.com/perrypumas/status/894001459754180609", { "url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6", - "keyword": "49165725116ac52193a3861e8f5534e47a706b62", + "keyword": "c9251b1fd79d547b0c6b4577f06c937d0e9b63d2", }), # video ("https://twitter.com/perrypumas/status/1065692031626829824", { diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py index 0422589..a338216 100644 --- a/gallery_dl/extractor/xhamster.py +++ b/gallery_dl/extractor/xhamster.py @@ -13,7 +13,8 @@ from .. import text import json -BASE_PATTERN = r"(?:https?://)?((?:[^.]+\.)?xhamster\d?\.(?:com|one|desi))" +BASE_PATTERN = (r"(?:https?://)?((?:[^.]+\.)?xhamster" + r"(?:\d?\.(?:com|one|desi)|\.porncache\.net))") class XhamsterExtractor(Extractor): @@ -79,6 +80,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor): ("https://xhamster.desi/photos/gallery/11748968"), ("https://xhamster2.com/photos/gallery/11748968"), ("https://en.xhamster.com/photos/gallery/11748968"), + ("https://xhamster.porncache.net/photos/gallery/11748968"), ) def __init__(self, match): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 7905500..73920c2 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.13.4" +__version__ = "1.13.5" diff --git a/test/test_results.py b/test/test_results.py index 9064810..bfed2ca 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -27,9 +27,6 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "dokireader", - "mangafox", - "mangahere", "photobucket", "worldthree", } @@ -317,6 +314,9 @@ def setup_test_config(): config.set(("extractor", "mangoxo") , "username", "LiQiang3") config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma") + config.set(("extractor", "mastodon.social"), "access-token", + "Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ") + config.set(("extractor", "deviantart"), "client-id", "7777") config.set(("extractor", "deviantart"), "client-secret", "ff14994c744d9208e5caeec7aab4a026") |