From 4366125d2580982abb57bc65a26fc1fb8ef2a5df Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 21 Jan 2020 01:08:43 -0500 Subject: New upstream version 1.12.3 --- PKG-INFO | 12 ++--- README.rst | 9 ++-- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 38 ++++++++++++++- gallery_dl.egg-info/PKG-INFO | 12 ++--- gallery_dl.egg-info/SOURCES.txt | 2 +- gallery_dl.egg-info/requires.txt | 4 -- gallery_dl/downloader/common.py | 7 ++- gallery_dl/downloader/http.py | 10 ++-- gallery_dl/extractor/__init__.py | 3 +- gallery_dl/extractor/common.py | 6 ++- gallery_dl/extractor/erolord.py | 64 ------------------------- gallery_dl/extractor/hentaifoundry.py | 88 ++++++++++++++++++++++------------- gallery_dl/extractor/hitomi.py | 32 +++++++------ gallery_dl/extractor/imgur.py | 2 +- gallery_dl/extractor/issuu.py | 36 +++++++------- gallery_dl/extractor/livedoor.py | 3 +- gallery_dl/extractor/mangadex.py | 25 +++++----- gallery_dl/extractor/pinterest.py | 9 ++-- gallery_dl/extractor/pixiv.py | 73 +++++++++++++++++++++-------- gallery_dl/extractor/shopify.py | 3 +- gallery_dl/extractor/slickpic.py | 5 +- gallery_dl/extractor/twitter.py | 34 +++++++++++++- gallery_dl/extractor/xhamster.py | 2 +- gallery_dl/job.py | 3 +- gallery_dl/postprocessor/__init__.py | 3 +- gallery_dl/postprocessor/compare.py | 62 ++++++++++++++++++++++++ gallery_dl/postprocessor/zip.py | 21 +++------ gallery_dl/util.py | 33 +++++++++---- gallery_dl/version.py | 4 +- setup.py | 4 -- test/test_downloader.py | 1 + test/test_results.py | 13 ++++-- 33 files changed, 378 insertions(+), 247 deletions(-) delete mode 100644 gallery_dl/extractor/erolord.py create mode 100644 gallery_dl/postprocessor/compare.py diff --git a/PKG-INFO b/PKG-INFO index 0e6429e..e86eb0c 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.12.2 +Version: 1.12.3 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -33,7 +33,6 @@ Description: ========== - FFmpeg_: Pixiv Ugoira to WebM conversion - youtube-dl_: Video downloads - - pyOpenSSL_: Access Cloudflare protected sites Installation @@ -95,10 +94,10 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ - These executables include a Python 3.7 interpreter + These executables include a Python 3.8 interpreter and all required Python packages. @@ -241,7 +240,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.3.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -282,5 +281,4 @@ Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: Multimedia :: Graphics Classifier: Topic :: Utilities Requires-Python: >=3.4 -Provides-Extra: cloudflare Provides-Extra: video diff --git a/README.rst b/README.rst index e6846b6..f450c81 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,6 @@ Optional - FFmpeg_: Pixiv Ugoira to WebM conversion - youtube-dl_: Video downloads -- pyOpenSSL_: Access Cloudflare protected sites Installation @@ -84,10 +83,10 @@ Download a standalone executable file, put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ -These executables include a Python 3.7 interpreter +These executables include a Python 3.8 interpreter and all required Python packages. @@ -230,7 +229,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.3.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 7249537..a2cd77d 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-01-05" "1.12.2" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-01-19" "1.12.3" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 7e7993a..d7bb941 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-01-05" "1.12.2" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-01-19" "1.12.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1207,6 +1207,16 @@ Extract tweet text as \f[I]content\f[] metadata. .IP "Description:" 4 Extract images from retweets. +.SS extractor.twitter.twitpic +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract \f[I]TwitPic \f[] embeds. + .SS extractor.twitter.videos .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -1594,6 +1604,32 @@ be stored in them. Files with an extension not listed will be ignored and stored in their default location. +.SS compare.action +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"replace"\f[] + +.IP "Description:" 4 +The action to take when files do not compare as equal. + + +* \f[I]"replace"\f[]: Replace/Overwrite the old version with the new one + +* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new +version like \f[I]skip = "enumerate" \f[] + +.SS compare.shallow +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Only compare file sizes. Do not read and compare their content. + .SS exec.async .IP "Type:" 6 \f[I]bool\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 3aa6d61..8f4897f 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.12.2 +Version: 1.12.3 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -33,7 +33,6 @@ Description: ========== - FFmpeg_: Pixiv Ugoira to WebM conversion - youtube-dl_: Video downloads - - pyOpenSSL_: Access Cloudflare protected sites Installation @@ -95,10 +94,10 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ - These executables include a Python 3.7 interpreter + These executables include a Python 3.8 interpreter and all required Python packages. @@ -241,7 +240,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.3.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -282,5 +281,4 @@ Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: Multimedia :: Graphics Classifier: Topic :: Utilities Requires-Python: >=3.4 -Provides-Extra: cloudflare Provides-Extra: video diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 513b6c7..bbe9bbe 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -48,7 +48,6 @@ gallery_dl/extractor/deviantart.py gallery_dl/extractor/directlink.py gallery_dl/extractor/dynastyscans.py gallery_dl/extractor/e621.py -gallery_dl/extractor/erolord.py gallery_dl/extractor/exhentai.py gallery_dl/extractor/fallenangels.py gallery_dl/extractor/flickr.py @@ -148,6 +147,7 @@ gallery_dl/extractor/yuki.py gallery_dl/postprocessor/__init__.py gallery_dl/postprocessor/classify.py gallery_dl/postprocessor/common.py +gallery_dl/postprocessor/compare.py gallery_dl/postprocessor/exec.py gallery_dl/postprocessor/metadata.py gallery_dl/postprocessor/mtime.py diff --git a/gallery_dl.egg-info/requires.txt b/gallery_dl.egg-info/requires.txt index 821055e..44dd863 100644 --- a/gallery_dl.egg-info/requires.txt +++ b/gallery_dl.egg-info/requires.txt @@ -1,8 +1,4 @@ requests>=2.11.0 -[cloudflare] -pyOpenSSL>=19.0.0 -cryptography>=2.8.0 - [video] youtube-dl diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py index 596c956..eca1284 100644 --- a/gallery_dl/downloader/common.py +++ b/gallery_dl/downloader/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -20,10 +20,13 @@ class DownloaderBase(): def __init__(self, extractor, output): self.session = extractor.session self.out = output - self.log = logging.getLogger("downloader." + self.scheme) self.part = self.config("part", True) self.partdir = self.config("part-directory") + self.log = logging.getLogger("downloader." + self.scheme) + self.log.job = extractor.log.job + self.log.extractor = extractor + if self.partdir: self.partdir = util.expand_path(self.partdir) os.makedirs(self.partdir, exist_ok=True) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index fab96ba..9cd2aa6 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,12 +8,11 @@ """Downloader module for http:// and https:// URLs""" -import os import time import mimetypes from requests.exceptions import RequestException, ConnectionError, Timeout from .common import DownloaderBase -from .. import text +from .. import text, util from ssl import SSLError try: @@ -57,10 +56,7 @@ class HttpDownloader(DownloaderBase): finally: # remove file from incomplete downloads if self.downloading and not self.part: - try: - os.unlink(pathfmt.temppath) - except (OSError, AttributeError): - pass + util.remove_file(pathfmt.temppath) def _download_impl(self, url, pathfmt): response = None diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 9ff3746..66203fe 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -25,7 +25,6 @@ modules = [ "deviantart", "dynastyscans", "e621", - "erolord", "exhentai", "fallenangels", "flickr", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 380bcc7..55b15d4 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -344,7 +344,11 @@ class GalleryExtractor(Extractor): for data[self.enum], (url, imgdata) in images: if imgdata: data.update(imgdata) - yield Message.Url, url, text.nameext_from_url(url, data) + if "extension" not in imgdata: + text.nameext_from_url(url, data) + else: + text.nameext_from_url(url, data) + yield Message.Url, url, data def login(self): """Login and set necessary cookies""" diff --git a/gallery_dl/extractor/erolord.py b/gallery_dl/extractor/erolord.py deleted file mode 100644 index 8628039..0000000 --- a/gallery_dl/extractor/erolord.py +++ /dev/null @@ -1,64 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2019 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract images from http://erolord.com/""" - -from .common import GalleryExtractor -from .. import text, util -import json - - -class ErolordGalleryExtractor(GalleryExtractor): - """Extractor for image galleries from erolord.com""" - category = "erolord" - root = "http://erolord.com" - pattern = r"(?:https?://)?(?:www\.)?erolord.com(/doujin/(\d+)/?)" - test = ("http://erolord.com/doujin/2189055/", { - "url": "7ce6d10a3934102b95c9718a34ccd3d35f55d85f", - "keyword": { - "title" : "Amazon No Hiyaku | Amazon Elixir", - "gallery_id": 2189055, - "count" : 16, - "artist" : ["Morris"], - "group" : list, - "parody" : list, - "characters": list, - "tags" : list, - "lang" : "en", - "language" : "English", - }, - }) - - def __init__(self, match): - GalleryExtractor.__init__(self, match) - self.gallery_id = match.group(2) - - def metadata(self, page): - extr = text.extract_from(page) - split = text.split_html - title, _, language = extr('

', '

').rpartition(" ") - language = language.strip("[]") - - return { - "gallery_id": text.parse_int(self.gallery_id), - "title" : text.unescape(title), - # double quotes for anime, circle, tags - # single quotes for characters, artist - "parody" : split(extr('class="sp1">Anime:' , "\r")), - "characters": split(extr("class='sp1'>Characters:", "\r")), - "artist" : split(extr("class='sp1'>Artist:" , "\r")), - "group" : split(extr('class="sp1">Circle:' , "\r")), - "tags" : split(extr('class="sp1">Tags:' , "\r")), - "lang" : util.language_to_code(language), - "language" : language, - } - - def images(self, page): - url = self.root + text.extract(page, 'id="d1">', '') - _ , pos = text.extract(page, 'id="picBox"', '', pos) - width , pos = text.extract(page, 'width="', '"', pos) - height, pos = text.extract(page, 'height="', '"', pos) - url , pos = text.extract(page, 'src="', '"', pos) - - title, _, artist = title.rpartition(" - ")[0].rpartition(" by ") - - data = text.nameext_from_url(url, { - "title": text.unescape(title), - "artist": text.unescape(artist), - "index": text.parse_int(index), - "width": text.parse_int(width), - "height": text.parse_int(height), - }) - if not data["extension"]: - data["extension"] = "jpg" - return text.urljoin(self.root, url), data + url = text.urljoin(self.root, path) + page = self.request(url).text + extr = text.extract_from(page, page.index('id="picBox"')) + + data = { + "title" : text.unescape(extr('class="imageTitle">', '<')), + "artist" : text.unescape(extr('/profile">', '<')), + "width" : text.parse_int(extr('width="', '"')), + "height" : text.parse_int(extr('height="', '"')), + "index" : text.parse_int(path.rsplit("/", 2)[1]), + "src" : "https:" + text.unescape(extr('src="', '"')), + "description": text.unescape(text.remove_html(extr( + '>Description', '') + .replace("\r\n", "\n"), "", "")), + "ratings" : [text.unescape(r) for r in text.extract_iter(extr( + "class='ratings_box'", ""), "title='", "'")], + "media" : text.unescape(extr("Media\t\t", "<")), + "date" : text.parse_datetime(extr("datetime='", "'")), + "views" : text.parse_int(extr("Views\t\t", "<")), + "tags" : text.split_html(extr( + "Keywords", ""))[::2], + "score" : text.parse_int(extr('Score\t\t', '<')), + } + + return text.nameext_from_url(data["src"], data) def set_filters(self): """Set site-internal filters to show all images""" @@ -127,7 +132,6 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor): test = ( ("https://www.hentai-foundry.com/pictures/user/Tenpura", { "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28", - "keyword": "63ad576f87f82fa166ca4676761762f7f8496cf5", }), ("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"), ("https://www.hentai-foundry.com/user/Tenpura/profile"), @@ -153,7 +157,6 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor): test = ( ("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", { "url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7", - "keyword": "40b07a9822b6b868fea2fa9b1c0b212ae8735da7", }), ("https://www.hentai-foundry.com" "/pictures/user/Evulchibi/scraps/page/3"), @@ -181,7 +184,6 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor): test = ( ("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", { "url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b", - "keyword": "2b9478725e66d46ea043fa87476bbd28546958e7", }), ("https://www.hentai-foundry.com" "/user/Tenpura/faves/pictures/page/3"), @@ -201,7 +203,10 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor): archive_fmt = "r_{index}" pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com" r"/pictures/recent/(\d+-\d+-\d+)(?:/page/(\d+))?") - test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20",) + test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20", { + "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/]+/\d+/", + "range": "20-30", + }) def __init__(self, match): HentaifoundryExtractor.__init__(self, match, "", match.group(2)) @@ -220,7 +225,10 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor): archive_fmt = "p_{index}" pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com" r"/pictures/popular(?:/page/(\d+))?") - test = ("http://www.hentai-foundry.com/pictures/popular",) + test = ("http://www.hentai-foundry.com/pictures/popular", { + "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/]+/\d+/", + "range": "20-30", + }) def __init__(self, match): HentaifoundryExtractor.__init__(self, match, "", match.group(1)) @@ -236,8 +244,22 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor): (("https://www.hentai-foundry.com" "/pictures/user/Tenpura/407501/shimakaze"), { "url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3", - "keyword": "cbb9381e6c2acce58db4adf4efc0ad7d138bddc4", "content": "91bf01497c39254b6dfb234a18e8f01629c77fd1", + "keyword": { + "artist" : "Tenpura", + "date" : "type:datetime", + "description": "Thank you!", + "height" : 700, + "index" : 407501, + "media" : "Other digital art", + "ratings": ["Sexual content", "Contains female nudity"], + "score" : int, + "tags" : ["kancolle", "kantai", "collection", "shimakaze"], + "title" : "shimakaze", + "user" : "Tenpura", + "views" : int, + "width" : 495, + }, }), ("https://www.hentai-foundry.com/pictures/user/Tenpura/340853/", { "exception": exception.HttpError, @@ -253,12 +275,12 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor): def items(self): post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format( self.root, self.user, self.index) - url, data = self.get_image_metadata(post_url) + data = self.get_image_metadata(post_url) data["user"] = self.user yield Message.Version, 1 yield Message.Directory, data - yield Message.Url, url, data + yield Message.Url, data["src"], data def skip(self, _): return 0 diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index e53b051..d6fdcf2 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -23,7 +23,7 @@ class HitomiGalleryExtractor(GalleryExtractor): r"/(?:[^/?&#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { - "pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg", + "pattern": r"https://[a-c]a.hitomi.la/images/./../[0-9a-f]+.jpg", "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2", "count": 16, }), @@ -34,12 +34,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), ("https://hitomi.la/galleries/733697.html", { # Game CG with scenes (#321) - "url": "c2a84185f467450b8b9b72fbe40c0649029ce007", + "url": "21064f9e3c244aca87f1a91967a3fbe79032c4ce", "count": 210, }), ("https://hitomi.la/galleries/1045954.html", { # fallback for galleries only available through /reader/ URLs - "url": "055c898a36389719799d6bce76889cc4ea4421fc", + "url": "0a67f5e6c3c6a384b578e328f4817fa6ccdf856a", "count": 1413, }), ("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"), @@ -96,12 +96,6 @@ class HitomiGalleryExtractor(GalleryExtractor): return data def images(self, page): - # see https://ltn.hitomi.la/common.js - offset = text.parse_int(self.gallery_id[-1]) % 3 - subdomain = chr(97 + offset) + "a" - base = "https://{}.hitomi.la/galleries/{}/".format( - subdomain, self.gallery_id) - # set Referer header before image downloads (#239) self.session.headers["Referer"] = self.gallery_url @@ -109,10 +103,20 @@ class HitomiGalleryExtractor(GalleryExtractor): url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gallery_id) page = self.request(url).text - return [ - (base + image["name"], None) - for image in json.loads(page.partition("=")[2]) - ] + result = [] + for image in json.loads(page.partition("=")[2]): + ihash = image["hash"] + idata = text.nameext_from_url(image["name"]) + + # see https://ltn.hitomi.la/common.js + offset = int(ihash[-3:-1], 16) % 3 + url = "https://{}a.hitomi.la/images/{}/{}/{}.{}".format( + chr(97 + offset), + ihash[-1], ihash[-3:-1], ihash, + idata["extension"], + ) + result.append((url, idata)) + return result @staticmethod def _prep(value): diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 6ff6588..5084e80 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -153,7 +153,7 @@ class ImgurAlbumExtractor(ImgurExtractor): "is_album" : True, "layout" : "blog", "link" : "https://imgur.com/a/TcBmP", - "nsfw" : True, + "nsfw" : bool, "privacy" : "hidden", "section" : None, "title" : "138", diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index 12d7487..49d68ef 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -32,23 +32,23 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): "count" : 36, "keyword": { "document": { - "access" : "public", - "contentRating": dict, - "date" : "type:datetime", - "description" : "re:Motions, the brand new publication by Is", - "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510", - "documentName" : "motions-1-2019", - "downloadState": "NOT_AVAILABLE", - "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510", - "isConverting" : False, - "isQuarantined": False, - "lang" : "en", - "language" : "English", - "pageCount" : 36, - "publicationId": "d99ec95935f15091b040cb8060f05510", - "sections" : list, - "title" : "Motions by Issuu - Issue 1", - "userName" : "issuu", + "access" : "public", + "articleStories": list, + "contentRating" : dict, + "date" : "type:datetime", + "description" : "re:Motions, the brand new publication by I", + "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510", + "documentName" : "motions-1-2019", + "downloadState" : "NOT_AVAILABLE", + "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510", + "isConverting" : False, + "isQuarantined" : False, + "lang" : "en", + "language" : "English", + "pageCount" : 36, + "publicationId" : "d99ec95935f15091b040cb8060f05510", + "title" : "Motions by Issuu - Issue 1", + "userName" : "issuu", }, "extension": "jpg", "filename" : r"re:page_\d+", diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py index e47b7db..9d2383f 100644 --- a/gallery_dl/extractor/livedoor.py +++ b/gallery_dl/extractor/livedoor.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -89,6 +89,7 @@ class LivedoorBlogExtractor(LivedoorExtractor): ("http://blog.livedoor.jp/zatsu_ke/", { "range": "1-50", "count": 50, + "archive": False, "pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+", "keyword": { "post": { diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 558aa9d..9fd9f3f 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://mangadex.cc/""" +"""Extractors for https://mangadex.org/""" from .common import Extractor, Message from .. import text, util @@ -16,7 +16,7 @@ from ..cache import memcache class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" - root = "https://mangadex.cc" + root = "https://mangadex.org" # mangadex-to-iso639-1 codes iso639_map = { @@ -39,7 +39,7 @@ class MangadexExtractor(Extractor): class MangadexChapterExtractor(MangadexExtractor): - """Extractor for manga-chapters from mangadex.cc""" + """Extractor for manga-chapters from mangadex.org""" subcategory = "chapter" directory_fmt = ( "{category}", "{manga}", @@ -47,11 +47,11 @@ class MangadexChapterExtractor(MangadexExtractor): filename_fmt = ( "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") archive_fmt = "{chapter_id}_{page}" - pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)/chapter/(\d+)" + pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)" test = ( - ("https://mangadex.cc/chapter/122094", { + ("https://mangadex.org/chapter/122094", { "keyword": "1c834dca33025f521e1874aee1f71c51e28ebf99", - # "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", + "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", }), # oneshot ("https://mangadex.cc/chapter/138086", { @@ -107,14 +107,14 @@ class MangadexChapterExtractor(MangadexExtractor): class MangadexMangaExtractor(MangadexExtractor): - """Extractor for manga from mangadex.cc""" + """Extractor for manga from mangadex.org""" subcategory = "manga" categorytransfer = True - pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)" + pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)" r"/(?:title|manga)/(\d+)") test = ( - ("https://mangadex.cc/manga/2946/souten-no-koumori", { - "pattern": r"https://mangadex.cc/chapter/\d+", + ("https://mangadex.org/manga/2946/souten-no-koumori", { + "pattern": r"https://mangadex.org/chapter/\d+", "keywords": { "manga": "Souten no Koumori", "manga_id": 2946, @@ -129,13 +129,12 @@ class MangadexMangaExtractor(MangadexExtractor): "language": str, }, }), - ("https://mangadex.org/manga/13318/dagashi-kashi/chapters/2/", { + ("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", { "count": ">= 100", }), ("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", { "count": 0, }), - ("https://mangadex.org/title/2946/souten-no-koumori"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index b72a896..24a0a55 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -60,9 +60,8 @@ class PinterestPinExtractor(PinterestExtractor): test = ( ("https://www.pinterest.com/pin/858146903966145189/", { "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5", - # image version depends on CDN server used - # "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", - # "content": "4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca", + "content": ("4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca", + "d3e24bc9f7af585e8c23b9136956bd45a4d9b947"), }), ("https://www.pinterest.com/pin/858146903966145188/", { "exception": exception.NotFoundError, @@ -171,9 +170,7 @@ class PinterestPinitExtractor(PinterestExtractor): self.shortened_id) response = self.request(url, method="HEAD", allow_redirects=False) location = response.headers.get("Location") - if not location or location in ("https://api.pinterest.com/None", - "https://pin.it/None", - "https://www.pinterest.com"): + if not location or not PinterestPinExtractor.pattern.match(location): raise exception.NotFoundError("pin") yield Message.Queue, location, {"_extractor": PinterestPinExtractor} diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 36fa0fe..8a10028 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -84,14 +84,20 @@ class PixivExtractor(Extractor): class PixivUserExtractor(PixivExtractor): """Extractor for works of a pixiv-user""" subcategory = "user" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/" - r"(?:member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?" + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" + r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)" + r"(?:/([^/?&#]+))?)?/?(?:$|[?#])" + r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?" r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))") test = ( - ("http://www.pixiv.net/member_illust.php?id=173530", { + ("https://www.pixiv.net/en/users/173530/artworks", { "url": "852c31ad83b6840bacbce824d85f2a997889efb7", }), # illusts with specific tag + (("https://www.pixiv.net/en/users/173530/artworks" + "/%E6%89%8B%E3%81%B6%E3%82%8D"), { + "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", + }), (("https://www.pixiv.net/member_illust.php?id=173530" "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), { "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", @@ -99,6 +105,10 @@ class PixivUserExtractor(PixivExtractor): ("http://www.pixiv.net/member_illust.php?id=173531", { "exception": exception.NotFoundError, }), + ("https://www.pixiv.net/en/users/173530"), + ("https://www.pixiv.net/en/users/173530/manga"), + ("https://www.pixiv.net/en/users/173530/illustrations"), + ("https://www.pixiv.net/member_illust.php?id=173530"), ("https://www.pixiv.net/u/173530"), ("https://www.pixiv.net/user/173530"), ("https://www.pixiv.net/mypage.php#id=173530"), @@ -108,14 +118,19 @@ class PixivUserExtractor(PixivExtractor): def __init__(self, match): PixivExtractor.__init__(self, match) - self.user_id = match.group(1) or match.group(3) - self.query = text.parse_query(match.group(2)) + u1, t1, u2, t2, u3 = match.groups() + if t1: + t1 = text.unquote(t1) + elif t2: + t2 = text.parse_query(t2).get("tag") + self.user_id = u1 or u2 or u3 + self.tag = t1 or t2 def works(self): works = self.api.user_illusts(self.user_id) - if "tag" in self.query: - tag = text.unquote(self.query["tag"]).lower() + if self.tag: + tag = self.tag.lower() works = ( work for work in works if tag in [t["name"].lower() for t in work["tags"]] @@ -130,7 +145,7 @@ class PixivMeExtractor(PixivExtractor): pattern = r"(?:https?://)?pixiv\.me/([^/?&#]+)" test = ( ("https://pixiv.me/del_shannon", { - "url": "0b1a18c3e3553c44ee6e0ccc36a7fd906c498e8f", + "url": "29c295ce75150177e6b0a09089a949804c708fbf", }), ("https://pixiv.me/del_shanno", { "exception": exception.NotFoundError, @@ -205,9 +220,13 @@ class PixivFavoriteExtractor(PixivExtractor): directory_fmt = ("{category}", "bookmarks", "{user_bookmark[id]} {user_bookmark[account]}") archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}" - pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/bookmark\.php(?:\?([^#]*))?") + pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/" + r"(?:(?:en/)?users/(\d+)/(bookmarks/artworks|following)" + r"|bookmark\.php(?:\?([^#]*))?)") test = ( + ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", { + "url": "e717eb511500f2fa3497aaee796a468ecf685cc4", + }), ("https://www.pixiv.net/bookmark.php?id=173530", { "url": "e717eb511500f2fa3497aaee796a468ecf685cc4", }), @@ -221,6 +240,11 @@ class PixivFavoriteExtractor(PixivExtractor): "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba", }), # followed users (#515) + ("https://www.pixiv.net/en/users/173530/following", { + "pattern": PixivUserExtractor.pattern, + "count": ">= 12", + }), + # followed users (#515) ("https://www.pixiv.net/bookmark.php?id=173530&type=user", { "pattern": PixivUserExtractor.pattern, "count": ">= 12", @@ -231,13 +255,24 @@ class PixivFavoriteExtractor(PixivExtractor): ) def __init__(self, match): + uid, kind, query = match.groups() + + if query: + self.query = text.parse_query(query) + uid = self.query.get("id", -1) + if not uid: + self.subcategory = "bookmark" + elif self.query.get("type") == "user": + self.subcategory = "following" + self.items = self._items_following + else: + self.query = {} + if kind == "following": + self.subcategory = "following" + self.items = self._items_following + PixivExtractor.__init__(self, match) - self.query = text.parse_query(match.group(1)) - if "id" not in self.query: - self.subcategory = "bookmark" - elif self.query.get("type") == "user": - self.subcategory = "following" - self.items = self._items_following + self.user_id = uid def works(self): tag = None @@ -251,8 +286,8 @@ class PixivFavoriteExtractor(PixivExtractor): return self.api.user_bookmarks_illust(self.user_id, tag, restrict) def get_metadata(self, user=None): - if "id" in self.query: - user = self.api.user_detail(self.query["id"]) + if self.user_id: + user = self.api.user_detail(self.user_id) else: self.api.login() user = self.api.user @@ -263,7 +298,7 @@ class PixivFavoriteExtractor(PixivExtractor): def _items_following(self): yield Message.Version, 1 - for preview in self.api.user_following(self.query["id"]): + for preview in self.api.user_following(self.user_id): user = preview["user"] user["_extractor"] = PixivUserExtractor url = "https://www.pixiv.net/member.php?id={}".format(user["id"]) diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py index b2498a0..28ee46c 100644 --- a/gallery_dl/extractor/shopify.py +++ b/gallery_dl/extractor/shopify.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -116,6 +116,7 @@ EXTRACTORS = { ("https://www.fashionnova.com/collections/mini-dresses", { "range": "1-20", "count": 20, + "archive": False, }), ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), ("https://www.fashionnova.com/collections/mini-dresses#1"), diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py index 127cce8..1063716 100644 --- a/gallery_dl/extractor/slickpic.py +++ b/gallery_dl/extractor/slickpic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -41,7 +41,8 @@ class SlickpicAlbumExtractor(SlickpicExtractor): }), ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", { "range": "34", - "content": "cec6630e659dc72db1ee1a9a6f3b525189261988", + "content": ("cec6630e659dc72db1ee1a9a6f3b525189261988", + "6f81e1e74c6cd6db36844e7211eef8e7cd30055d"), }), ) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 610e0ee..dc558c0 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -30,6 +30,7 @@ class TwitterExtractor(Extractor): self._user_dict = None self.logged_in = False self.retweets = self.config("retweets", True) + self.twitpic = self.config("twitpic", False) self.content = self.config("content", False) self.videos = self.config("videos", False) @@ -79,6 +80,26 @@ class TwitterExtractor(Extractor): urls = [url + size for size in self.sizes] yield Message.Urllist, urls, data + if self.twitpic and "//twitpic.com/" in tweet: + urls = [ + url for url in text.extract_iter( + tweet, 'data-expanded-url="', '"') + if "//twitpic.com/" in url + ] + + if "num" not in data: + if urls: + yield Message.Directory, data + data["num"] = 0 + + for data["num"], url in enumerate(urls, data["num"]+1): + response = self.request(url, fatal=False) + if response.status_code >= 400: + continue + url = text.extract( + response.text, 'name="twitter:image" value="', '"')[0] + yield Message.Url, url, text.nameext_from_url(url, data) + def metadata(self): """Return general metadata""" return {} @@ -231,8 +252,11 @@ class TwitterExtractor(Extractor): data["items_html"], '
=2.11.0", ], extras_require={ - "cloudflare": [ - "pyOpenSSL>=19.0.0", - "cryptography>=2.8.0", - ], "video": [ "youtube-dl", ], diff --git a/test/test_downloader.py b/test/test_downloader.py index a7c4ce6..c43b533 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -98,6 +98,7 @@ class TestDownloaderBase(unittest.TestCase): @classmethod def setUpClass(cls): cls.extractor = extractor.find("test:") + cls.extractor.log.job = None cls.dir = tempfile.TemporaryDirectory() cls.fnum = 0 config.set((), "base-directory", cls.dir.name) diff --git a/test/test_results.py b/test/test_results.py index 869ff83..e87b4b8 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,12 +21,12 @@ TRAVIS_SKIP = { "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx", "archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs", "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex", - "sankakucomplex", "warosu", "fuskator", + "sankakucomplex", "warosu", "fuskator", "patreon", } # temporary issues, etc. BROKEN = { - "erolord", + "imxto", "mangapark", "photobucket", } @@ -109,7 +109,12 @@ class TestExtractorResults(unittest.TestCase): self.assertEqual(result["url"], tjob.url_hash.hexdigest()) if "content" in result: - self.assertEqual(result["content"], tjob.content_hash.hexdigest()) + expected = result["content"] + digest = tjob.content_hash.hexdigest() + if isinstance(expected, str): + self.assertEqual(digest, expected, "content") + else: # assume iterable + self.assertIn(digest, expected, "content") if "keyword" in result: expected = result["keyword"] -- cgit v1.2.3