diff options
| -rw-r--r-- | CHANGELOG.md | 13 | ||||
| -rw-r--r-- | README.rst | 6 | ||||
| -rw-r--r-- | docs/configuration.rst | 12 | ||||
| -rw-r--r-- | docs/gallery-dl.conf | 3 | ||||
| -rw-r--r-- | docs/supportedsites.rst | 4 | ||||
| -rw-r--r-- | gallery_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/booru.py | 7 | ||||
| -rw-r--r-- | gallery_dl/extractor/danbooru.py | 13 | ||||
| -rw-r--r-- | gallery_dl/extractor/exhentai.py | 17 | ||||
| -rw-r--r-- | gallery_dl/extractor/fuskator.py | 110 | ||||
| -rw-r--r-- | gallery_dl/extractor/hentaicafe.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/hentaifoundry.py | 4 | ||||
| -rw-r--r-- | gallery_dl/extractor/lineblog.py | 73 | ||||
| -rw-r--r-- | gallery_dl/extractor/livedoor.py | 51 | ||||
| -rw-r--r-- | gallery_dl/extractor/myportfolio.py | 6 | ||||
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 13 | ||||
| -rw-r--r-- | gallery_dl/extractor/simplyhentai.py | 163 | ||||
| -rw-r--r-- | gallery_dl/extractor/tumblr.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 37 | ||||
| -rw-r--r-- | gallery_dl/extractor/vsco.py | 15 | ||||
| -rw-r--r-- | gallery_dl/job.py | 2 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/ugoira.py | 32 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 | ||||
| -rwxr-xr-x | scripts/supportedsites.py | 1 | ||||
| -rw-r--r-- | test/test_results.py | 10 |
25 files changed, 482 insertions, 118 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index a2b5109..4cde46b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## 1.10.4 - 2019-09-08 +### Additions +- Support for + - `lineblog` - https://www.lineblog.me/ ([#404](https://github.com/mikf/gallery-dl/issues/404)) + - `fuskator` - https://fuskator.com/ ([#407](https://github.com/mikf/gallery-dl/issues/407)) +- `ugoira` option for `danbooru` to download pre-rendered ugoira animations ([#406](https://github.com/mikf/gallery-dl/issues/406)) +### Fixes +- Download the correct files from `twitter` replies ([#403](https://github.com/mikf/gallery-dl/issues/403)) +- Prevent crash when trying to use unavailable downloader modules ([#405](https://github.com/mikf/gallery-dl/issues/405)) +- Fix `pixiv` authentication ([#411](https://github.com/mikf/gallery-dl/issues/411)) +- Improve `exhentai` image limit checks +- Miscellaneous fixes for `hentaicafe`, `simplyhentai`, `tumblr` + ## 1.10.3 - 2019-08-30 ### Additions - Provide `filename` metadata for all `deviantart` files ([#392](https://github.com/mikf/gallery-dl/issues/392), [#400](https://github.com/mikf/gallery-dl/issues/400)) @@ -78,8 +78,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.4/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -224,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.3.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.4.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _Python: https://www.python.org/downloads/ diff --git a/docs/configuration.rst b/docs/configuration.rst index d69406d..e384f2c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -486,6 +486,18 @@ Description Try to follow external URLs of embedded players. =========== ===== +extractor.danbooru.ugoira +------------------------- +=========== ===== +Type ``bool`` +Default ``true`` +Description Controls the download target for Ugoira posts. + + * ``true``: Original ZIP archives + * ``false``: Converted video files +=========== ===== + + extractor.deviantart.extra -------------------------- =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 6439437..eff6da1 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -19,7 +19,8 @@ "danbooru": { "username": null, - "password": null + "password": null, + "ugoira": true }, "deviantart": { diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 05c8555..925185c 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -33,6 +33,7 @@ Fallen Angels Scans https://www.fascans.com/ Chapters, Manga Fashion Nova https://www.fashionnova.com/ Collections, Products Fireden https://boards.fireden.net/ Threads Flickr https://www.flickr.com/ |flickr-C| Optional (OAuth) +Fuskator https://fuskator.com/ Galleries, Search Results Futaba Channel https://www.2chan.net/ Threads Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches Gfycat https://gfycat.com/ individual Images @@ -61,6 +62,7 @@ Kirei Cake https://reader.kireicake.com/ Chapters, Manga KissManga https://kissmanga.com/ Chapters, Manga Komikcast https://komikcast.com/ Chapters, Manga Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag-Searches +LINE BLOG https://www.lineblog.me/ Blogs, Posts livedoor Blog http://blog.livedoor.jp/ Blogs, Posts Luscious https://luscious.net/ Albums, Search Results Optional Manga Fox https://fanfox.net/ Chapters @@ -101,7 +103,7 @@ Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag-Searches Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga Sex.com https://www.sex.com/ Boards, Pins, related Pins, Search Results -Simply Hentai https://www.simply-hentai.com/ Galleries +Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos SlickPic https://www.slickpic.com/ Images from Users, Albums SlideShare https://www.slideshare.net/ Presentations SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 0b24111..351c5df 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -29,6 +29,7 @@ modules = [ "exhentai", "fallenangels", "flickr", + "fuskator", "gelbooru", "gfycat", "hbrowse", @@ -53,6 +54,7 @@ modules = [ "kissmanga", "komikcast", "konachan", + "lineblog", "livedoor", "luscious", "mangadex", diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 54a8878..ac45e0b 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -27,6 +27,7 @@ class BooruExtractor(SharedConfigMixin, Extractor): page_start = 1 page_limit = None sort = False + ugoira = True def __init__(self, match): super().__init__(match) @@ -51,7 +52,11 @@ class BooruExtractor(SharedConfigMixin, Extractor): for image in images: try: - url = image["file_url"] + if "pixiv_ugoira_frame_data" in image and \ + "large_file_url" in image and not self.ugoira: + url = image["large_file_url"] + else: + url = image["file_url"] except KeyError: continue if url.startswith("/"): diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 211c340..e8d3abf 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -28,6 +28,7 @@ class DanbooruExtractor(booru.DanbooruPageMixin, booru.BooruExtractor): self.scheme = "https" if self.subdomain == "danbooru" else "http" self.api_url = "{scheme}://{subdomain}.donmai.us/posts.json".format( scheme=self.scheme, subdomain=self.subdomain) + self.ugoira = self.config("ugoira", True) username, api_key = self._get_auth_info() if username: @@ -63,9 +64,15 @@ class DanbooruPoolExtractor(booru.PoolMixin, DanbooruExtractor): class DanbooruPostExtractor(booru.PostMixin, DanbooruExtractor): """Extractor for single images from danbooru""" pattern = BASE_PATTERN + r"/posts/(?P<post>\d+)" - test = ("https://danbooru.donmai.us/posts/294929", { - "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", - }) + test = ( + ("https://danbooru.donmai.us/posts/294929", { + "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", + }), + ("https://danbooru.donmai.us/posts/3613024", { + "pattern": r"https?://.+\.webm$", + "options": (("ugoira", False),) + }) + ) class DanbooruPopularExtractor(booru.PopularMixin, DanbooruExtractor): diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 1833b1a..75e19d6 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -121,7 +121,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): r"|/s/([\da-f]{10})/(\d+)-(\d+))") test = ( ("https://exhentai.org/g/1200119/d55c44d3d0/", { - "keyword": "1b353fad00dff0665b1746cdd151ab5cc326df23", + "keyword": "3eeae7bde70dd992402d4cc0230ea0f2c4af46c5", "content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff", }), ("https://exhentai.org/g/960461/4f0e369d82/", { @@ -151,9 +151,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def items(self): self.login() - if self.limits: - self._init_limits() - if self.gallery_token: gpage = self._gallery_page() self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0] @@ -308,15 +305,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.NotFoundError("image page") return page - def _init_limits(self): - self._update_limits() - if self._remaining <= 0: - self.log.error("Image limit reached!") - ExhentaiExtractor.LIMIT = True - raise exception.StopExtraction() - def _check_limits(self, data): - if data["num"] % 20 == 0: + if not self._remaining or data["num"] % 20 == 0: self._update_limits() self._remaining -= data["cost"] @@ -360,7 +350,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "width": text.parse_int(parts[0]), "height": text.parse_int(parts[2]), "size": size, - "cost": 1 + math.ceil(size * 5 / 1024 / 1024) + # 1 initial point + 1 per 0.1 MB + "cost": 1 + math.ceil(size / 104857.6) } diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py new file mode 100644 index 0000000..dbcf2f2 --- /dev/null +++ b/gallery_dl/extractor/fuskator.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://fuskator.com/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text +import time + + +class FuskatorGalleryExtractor(GalleryExtractor): + """Extractor for image galleries on fuskator.com""" + category = "fuskator" + root = "https://fuskator.com" + pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?&#]+)" + test = ( + ("https://fuskator.com/thumbs/d0GnIzXrSKU/", { + "pattern": r"https://i\d+.fuskator.com/large/d0GnIzXrSKU/.+\.jpg", + "count": 22, + "keyword": { + "gallery_id": 473023, + "gallery_hash": "d0GnIzXrSKU", + "title": "re:Shaved Brunette Babe Maria Ryabushkina with ", + "views": int, + "score": float, + "count": 22, + "tags": list, + }, + }), + ("https://fuskator.com/expanded/gXpKzjgIidA/index.html"), + ) + + def __init__(self, match): + self.gallery_hash = match.group(1) + url = "{}/thumbs/{}/".format(self.root, self.gallery_hash) + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + headers = { + "Referer" : self.chapter_url, + "X-Requested-With": "XMLHttpRequest", + } + auth = self.request( + self.root + "/ajax/auth.aspx", method="POST", headers=headers, + ).text + + params = { + "X-Auth": auth, + "hash" : self.gallery_hash, + "_" : int(time.time()), + } + self.data = data = self.request( + self.root + "/ajax/gal.aspx", params=params, headers=headers, + ).json() + + title = text.extract(page, "<title>", "</title>")[0].strip() + title, _, gallery_id = title.rpartition("#") + + return { + "gallery_id" : text.parse_int(gallery_id), + "gallery_hash": self.gallery_hash, + "title" : text.unescape(title[:-15]), + "views" : data["hits"], + "score" : data["rating"], + "tags" : data["tags"].split(","), + "count" : len(data["images"]), + } + + def images(self, page): + for image in self.data["images"]: + yield "https:" + image["imageUrl"], image + + +class FuskatorSearchExtractor(Extractor): + """Extractor for search results on fuskator.com""" + category = "fuskator" + subcategory = "search" + root = "https://fuskator.com" + pattern = r"(?:https?://)?fuskator\.com(/(?:search|page)/.+)" + test = ( + ("https://fuskator.com/search/red_swimsuit/", { + "pattern": FuskatorGalleryExtractor.pattern, + "count": ">= 40", + }), + ("https://fuskator.com/page/3/swimsuit/quality/"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.path = match.group(1) + + def items(self): + url = self.root + self.path + data = {"_extractor": FuskatorGalleryExtractor} + + while True: + page = self.request(url).text + for path in text.extract_iter( + page, 'class="pic_pad"><a href="', '"'): + yield Message.Queue, self.root + path, data + + pages = text.extract(page, 'class="pages"><span>', '>>><')[0] + if not pages: + return + url = self.root + text.rextract(pages, 'href="', '"')[0] diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py index e95467b..679b3ad 100644 --- a/gallery_dl/extractor/hentaicafe.py +++ b/gallery_dl/extractor/hentaicafe.py @@ -10,6 +10,7 @@ from . import foolslide from .. import text +from .common import Extractor from ..cache import memcache import re @@ -64,6 +65,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): ) root = "https://hentai.cafe" reverse = False + request = Extractor.request chapterclass = HentaicafeChapterExtractor def chapters(self, page): diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index d31f66f..b6b9876 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -152,8 +152,8 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor): r"/pictures/user/([^/]+)/scraps(?:/page/(\d+))?") test = ( ("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", { - "url": "00a11e30b73ff2b00a1fba0014f08d49da0a68ec", - "keyword": "410c6c900cfd23a8dd1e53dfcc97a79ea68c3359", + "url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7", + "keyword": "40b07a9822b6b868fea2fa9b1c0b212ae8735da7", }), ("https://www.hentai-foundry.com" "/pictures/user/Evulchibi/scraps/page/3"), diff --git a/gallery_dl/extractor/lineblog.py b/gallery_dl/extractor/lineblog.py new file mode 100644 index 0000000..a1daa39 --- /dev/null +++ b/gallery_dl/extractor/lineblog.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.lineblog.me/""" + +from .livedoor import LivedoorBlogExtractor, LivedoorPostExtractor +from .. import text + + +class LineblogBase(): + """Base class for lineblog extractors""" + category = "lineblog" + root = "https://lineblog.me" + + def _images(self, post): + imgs = [] + body = post.pop("body") + + for num, img in enumerate(text.extract_iter(body, "<img ", ">"), 1): + src = text.extract(img, 'src="', '"')[0] + alt = text.extract(img, 'alt="', '"')[0] + + if not src: + continue + if src.startswith("https://obs.line-scdn.") and src.count("/") > 3: + src = src.rpartition("/")[0] + + imgs.append(text.nameext_from_url(alt or src, { + "url" : src, + "num" : num, + "hash": src.rpartition("/")[2], + "post": post, + })) + + return imgs + + +class LineblogBlogExtractor(LineblogBase, LivedoorBlogExtractor): + """Extractor for a user's blog on lineblog.me""" + pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?&#])" + test = ("https://lineblog.me/mamoru_miyano/", { + "range": "1-20", + "count": 20, + "pattern": r"https://obs.line-scdn.net/[\w-]+$", + "keyword": { + "post": { + "categories" : tuple, + "date" : "type:datetime", + "description": str, + "id" : int, + "tags" : list, + "title" : str, + "user" : "mamoru_miyano" + }, + "filename": str, + "hash" : r"re:\w{32,}", + "num" : int, + }, + }) + + +class LineblogPostExtractor(LineblogBase, LivedoorPostExtractor): + """Extractor for blog posts on lineblog.me""" + pattern = r"(?:https?://)?lineblog\.me/(\w+)/archives/(\d+)" + test = ("https://lineblog.me/mamoru_miyano/archives/1919150.html", { + "url": "24afeb4044c554f80c374b52bf8109c6f1c0c757", + "keyword": "76a38e2c0074926bd3362f66f9fc0e6c41591dcb", + }) diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py index ed72f4c..e922f61 100644 --- a/gallery_dl/extractor/livedoor.py +++ b/gallery_dl/extractor/livedoor.py @@ -38,17 +38,19 @@ class LivedoorExtractor(Extractor): def _load(self, data, body): extr = text.extract_from(data) - tags = text.extract(body, '</dt><dd>', '</dl>')[0] + tags = text.extract(body, 'class="article-tags">', '</dl>')[0] + about = extr('rdf:about="', '"') return { - "id" : text.parse_int(extr("id : '", "'")), - "title" : text.unescape(extr("title : '", "'")), - "categories": [extr("name:'", "'"), extr("name:'", "'")], - "date" : text.parse_datetime( - extr("date : '", "'"), "%Y-%m-%d %H:%M:%S"), - "tags" : text.split_html(tags), - "user" : self.user, - "body" : body, + "id" : text.parse_int( + about.rpartition("/")[2].partition(".")[0]), + "title" : text.unescape(extr('dc:title="', '"')), + "categories" : extr('dc:subject="', '"').partition(",")[::2], + "description": extr('dc:description="', '"'), + "date" : text.parse_datetime(extr('dc:date="', '"')), + "tags" : text.split_html(tags)[1:] if tags else [], + "user" : self.user, + "body" : body, } def _images(self, post): @@ -90,16 +92,17 @@ class LivedoorBlogExtractor(LivedoorExtractor): "pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+", "keyword": { "post": { - "categories": list, - "date": "type:datetime", - "id": int, - "tags": list, - "title": str, - "user": "zatsu_ke" + "categories" : tuple, + "date" : "type:datetime", + "description": str, + "id" : int, + "tags" : list, + "title" : str, + "user" : "zatsu_ke" }, "filename": str, - "hash": r"re:\w{4,}", - "num": int, + "hash" : r"re:\w{4,}", + "num" : int, }, }), ("http://blog.livedoor.jp/uotapo/", { @@ -110,11 +113,10 @@ class LivedoorBlogExtractor(LivedoorExtractor): def posts(self): url = "{}/{}".format(self.root, self.user) - while url: extr = text.extract_from(self.request(url).text) while True: - data = extr('.articles.push(', ');') + data = extr('<rdf:RDF', '</rdf:RDF>') if not data: break body = extr('class="article-body-inner">', @@ -130,15 +132,15 @@ class LivedoorPostExtractor(LivedoorExtractor): test = ( ("http://blog.livedoor.jp/zatsu_ke/archives/51493859.html", { "url": "8826fe623f19dc868e7538e8519bf8491e92a0a2", - "keyword": "52fcba9253a000c339bcd658572d252e282626af", + "keyword": "83993111d5d0c08d021196802dd36b73f04c7057", }), ("http://blog.livedoor.jp/amaumauma/archives/7835811.html", { "url": "fc1d6a9557245b5a27d3a10bf0fa9922ef377215", - "keyword": "0229072abb5cd8a221df72e0ffdfc13336c0e9ce", + "keyword": "fd700760c98897c3125328e157972f905fd34aaa", }), ("http://blog.livedoor.jp/uotapo/archives/1050616939.html", { "url": "3f3581807ec4776e6a67ed7985a22494d4bc4904", - "keyword": "2eb3e383c68e909c4dd3d563c16d0b6e2fe6627b", + "keyword": "9e319413a42e08d32f0dcbe8aa3b452ad41aa906", }), ) @@ -150,7 +152,6 @@ class LivedoorPostExtractor(LivedoorExtractor): url = "{}/{}/archives/{}.html".format( self.root, self.user, self.post_id) extr = text.extract_from(self.request(url).text) - data = extr('articles :', '</script>') - body = extr('class="article-body-inner">', - 'class="article-footer">') + data = extr('<rdf:RDF', '</rdf:RDF>') + body = extr('class="article-body-inner">', 'class="article-footer">') return (self._load(data, body),) diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py index 1515f53..95799cf 100644 --- a/gallery_dl/extractor/myportfolio.py +++ b/gallery_dl/extractor/myportfolio.py @@ -23,9 +23,9 @@ class MyportfolioGalleryExtractor(Extractor): r"(?:https?://)?([^.]+\.myportfolio\.com))" r"(/[^/?&#]+)?") test = ( - ("https://hannahcosgrove.myportfolio.com/robyn", { - "url": "93b5430e765e53564b13e7d9c64c30c286011a6b", - "keyword": "25cb3dbdad6b011242a133f30ec598318b7512e8", + ("https://hannahcosgrove.myportfolio.com/niamh-1", { + "url": "8cbd73a73e5bf3b4f5d1b1d4a1eb114c01a72a66", + "keyword": "7a460bb5641e648ae70702ff91c2fb11054b0e0b", }), ("https://hannahcosgrove.myportfolio.com/lfw", { "pattern": r"https://hannahcosgrove\.myportfolio\.com/[^/?&#+]+$", diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 4f8ee9c..8e6a74e 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text, exception from ..cache import cache from datetime import datetime, timedelta +import hashlib class PixivExtractor(Extractor): @@ -395,6 +396,8 @@ class PixivAppAPI(): """ CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT" CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj" + HASH_SECRET = ("28c1fdd170a5204386cb1313c7077b34" + "f83e4aaf4aa829ce78c231e05b0bae2c") def __init__(self, extractor): self.extractor = extractor @@ -406,7 +409,6 @@ class PixivAppAPI(): "client-id", self.CLIENT_ID) self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) - extractor.session.headers.update({ "App-OS": "ios", "App-OS-Version": "10.3.1", @@ -440,8 +442,15 @@ class PixivAppAPI(): data["username"] = username data["password"] = password + time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00") + headers = { + "X-Client-Time": time, + "X-Client-Hash": hashlib.md5( + (time + self.HASH_SECRET).encode()).hexdigest(), + } + response = self.extractor.request( - url, method="POST", data=data, fatal=False) + url, method="POST", headers=headers, data=data, fatal=False) if response.status_code >= 400: raise exception.AuthenticationError() diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index 8567155..a6a3da0 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -8,16 +8,14 @@ """Extract hentai-manga from https://www.simply-hentai.com/""" -from .common import GalleryExtractor +from .common import GalleryExtractor, Extractor, Message from .. import text, util, exception -import json class SimplyhentaiGalleryExtractor(GalleryExtractor): """Extractor for image galleries from simply-hentai.com""" category = "simplyhentai" archive_fmt = "{image_id}" - root = "https://www.simply-hentai.com" pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com" r"(?!/(?:album|gifs?|images?|series)(?:/|$))" r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)") @@ -25,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): (("https://original-work.simply-hentai.com" "/amazon-no-hiyaku-amazon-elixir"), { "url": "258289249990502c3138719cb89e995a60861e49", - "keyword": "8b2400e4b466e8f46802fa5a6b917d2788bb7e8e", + "keyword": "eba83ccdbab3022a2280c77aa747f9458196138b", }), ("https://www.simply-hentai.com/notfound", { "exception": exception.GalleryDLException, @@ -42,30 +40,145 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): self.session.headers["Referer"] = url def metadata(self, page): - path = text.extract(page, '<a class="preview" href="', '"')[0] - if not path: + extr = text.extract_from(page) + split = text.split_html + + self.chapter_url = extr('<link rel="canonical" href="', '"') + title = extr('<meta property="og:title" content="', '"') + if not title: raise exception.NotFoundError("gallery") - page = self.request(self.root + path).text - data = json.loads(text.unescape(text.extract( - page, 'data-react-class="Reader" data-react-props="', '"')[0])) - self.manga = manga = data["manga"] - - return { - "title" : manga["title"], - "parody" : manga["series"]["title"], - "language" : manga["language"]["name"], - "lang" : util.language_to_code(manga["language"]["name"]), - "characters": [x["name"] for x in manga["characters"]], - "tags" : [x["name"] for x in manga["tags"]], - "artist" : [x["name"] for x in manga["artists"]], - "gallery_id": text.parse_int(text.extract( - manga["images"][0]["sizes"]["full"], "/Album/", "/")[0]), - "date" : text.parse_datetime( - manga["publish_date"], "%Y-%m-%dT%H:%M:%S.%f%z"), + data = { + "title" : text.unescape(title), + "gallery_id": text.parse_int(extr('/Album/', '/')), + "parody" : split(extr('box-title">Series</div>', '</div>')), + "language" : text.remove_html(extr( + 'box-title">Language</div>', '</div>')) or None, + "characters": split(extr('box-title">Characters</div>', '</div>')), + "tags" : split(extr('box-title">Tags</div>', '</div>')), + "artist" : split(extr('box-title">Artists</div>', '</div>')), + "date" : text.parse_datetime(text.remove_html( + extr('Uploaded', '</div>')), "%d.%m.%Y"), } + data["lang"] = util.language_to_code(data["language"]) + return data def images(self, _): + url = self.chapter_url + "/all-pages" + headers = {"Accept": "application/json"} + images = self.request(url, headers=headers).json() return [ - (image["sizes"]["full"], {"image_id": image["id"]}) - for image in self.manga["images"] + (urls["full"], {"image_id": text.parse_int(image_id)}) + for image_id, urls in sorted(images.items()) ] + + +class SimplyhentaiImageExtractor(Extractor): + """Extractor for individual images from simply-hentai.com""" + category = "simplyhentai" + subcategory = "image" + directory_fmt = ("{category}", "{type}s") + filename_fmt = "{category}_{token}{title:?_//}.{extension}" + archive_fmt = "{token}" + pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com" + r"/(image|gif)/[^/?&#]+)") + test = ( + (("https://www.simply-hentai.com/image" + "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), { + "url": "0338eb137830ab6f81e5f410d3936ef785d063d9", + "keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2", + }), + ("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", { + "url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1", + "keyword": "dd97a4bb449c397d6fec9f43a1303c0fb168ae65", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.page_url = "https://www." + match.group(1) + self.type = match.group(2) + + def items(self): + extr = text.extract_from(self.request(self.page_url).text) + title = extr('"og:title" content="' , '"') + descr = extr('"og:description" content="', '"') + url = extr('"image":"' , '&') + url = extr(""content":"", "&") or url + + tags = text.extract(descr, " tagged with ", " online for free ")[0] + if tags: + tags = tags.split(", ") + tags[-1] = tags[-1].partition(" ")[2] + else: + tags = [] + + data = text.nameext_from_url(url, { + "title": text.unescape(title) if title else "", + "tags": tags, + "type": self.type, + }) + data["token"] = data["filename"].rpartition("_")[2] + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, url, data + + +class SimplyhentaiVideoExtractor(Extractor): + """Extractor for hentai videos from simply-hentai.com""" + category = "simplyhentai" + subcategory = "video" + directory_fmt = ("{category}", "{type}s") + filename_fmt = "{title}{episode:?_//>02}.{extension}" + archive_fmt = "{title}_{episode}" + pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)" + test = ( + ("https://videos.simply-hentai.com/creamy-pie-episode-02", { + "pattern": r"https://www\.googleapis\.com/drive/v3/files" + r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+", + "keyword": "706790708b14773efc1e075ddd3b738a375348a5", + "count": 1, + }), + (("https://videos.simply-hentai.com" + "/1715-tifa-in-hentai-gang-bang-3d-movie"), { + "url": "ad9a36ae06c601b6490e3c401834b4949d947eb0", + "keyword": "f9dad94fbde9c95859e631ff4f07297a9567b874", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.page_url = "https://" + match.group(1) + + def items(self): + page = self.request(self.page_url).text + + title, pos = text.extract(page, "<title>", "</title>") + tags , pos = text.extract(page, ">Tags</div>", "</div>", pos) + date , pos = text.extract(page, ">Upload Date</div>", "</div>", pos) + title = title.rpartition(" - ")[0] + + if "<video" in page: + video_url = text.extract(page, '<source src="', '"', pos)[0] + episode = 0 + else: + # video url from myhentai.tv embed + pos = page.index('<div class="video-frame-container">', pos) + embed_url = text.extract(page, 'src="', '"', pos)[0].replace( + "embedplayer.php?link=", "embed.php?name=") + embed_page = self.request(embed_url).text + video_url = text.extract(embed_page, '"file":"', '"')[0] + title, _, episode = title.rpartition(" Episode ") + + data = text.nameext_from_url(video_url, { + "title": text.unescape(title), + "episode": text.parse_int(episode), + "tags": text.split_html(tags)[::2], + "type": "video", + "date": text.parse_datetime(text.remove_html( + date), "%B %d, %Y %H:%M"), + }) + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, video_url, data diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 024d6e9..8abbaf7 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -109,7 +109,7 @@ class TumblrExtractor(Extractor): yield self._prepare_image(photo["url"], post) url = post.get("audio_url") # type: "audio" - if url: + if url and url.startswith("https://a.tumblr.com/"): yield self._prepare(url, post) url = post.get("video_url") # type: "video" diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 3672a6d..2fa69d5 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -151,12 +151,15 @@ class TwitterTimelineExtractor(TwitterExtractor): """Extractor for all images from a user's timeline""" subcategory = "timeline" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/([^/?&#]+)/?$") - test = ("https://twitter.com/supernaturepics", { - "range": "1-40", - "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", - "keyword": "7210d679606240405e0cf62cbc67596e81a7a250", - }) + r"/([^/?&#]+)/?(?:$|[?#])") + test = ( + ("https://twitter.com/supernaturepics", { + "range": "1-40", + "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", + "keyword": "7210d679606240405e0cf62cbc67596e81a7a250", + }), + ("https://mobile.twitter.com/supernaturepics?p=i"), + ) def tweets(self): url = "{}/i/profiles/show/{}/timeline/tweets".format( @@ -169,10 +172,13 @@ class TwitterMediaExtractor(TwitterExtractor): subcategory = "media" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" r"/([^/?&#]+)/media(?!\w)") - test = ("https://twitter.com/supernaturepics/media", { - "range": "1-40", - "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", - }) + test = ( + ("https://twitter.com/supernaturepics/media", { + "range": "1-40", + "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", + }), + ("https://mobile.twitter.com/supernaturepics/media#t"), + ) def tweets(self): url = "{}/i/profiles/show/{}/media_timeline".format( @@ -206,6 +212,11 @@ class TwitterTweetExtractor(TwitterExtractor): "options": (("content", True),), "keyword": "b13b6c4cd0b0c15b2ea7685479e7fedde3c47b9e", }), + # Reply to another tweet (#403) + ("https://twitter.com/tyson_hesse/status/1103767554424598528", { + "options": (("videos", True),), + "pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$", + }), ) def __init__(self, match): @@ -216,7 +227,9 @@ class TwitterTweetExtractor(TwitterExtractor): return {"user": self.user, "tweet_id": self.tweet_id} def tweets(self): + self.session.cookies.clear() url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id) page = self.request(url).text - return (text.extract( - page, '<div class="tweet ', 'class="js-tweet-stats-container')[0],) + end = page.index('class="js-tweet-stats-container') + beg = page.rindex('<div class="tweet ', 0, end) + return (page[beg:end],) diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index 639ec82..6cc5911 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -86,12 +86,15 @@ class VscoExtractor(Extractor): class VscoUserExtractor(VscoExtractor): """Extractor for images from a user on vsco.co""" subcategory = "user" - pattern = BASE_PATTERN + r"/images/" - test = ("https://vsco.co/missuri/images/1", { - "range": "1-80", - "count": 80, - "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+", - }) + pattern = BASE_PATTERN + r"(?:/images(?:/\d+)?)?/?(?:$|[?#])" + test = ( + ("https://vsco.co/missuri/images/1", { + "range": "1-80", + "count": 80, + "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+", + }), + ("https://vsco.co/missuri"), + ) def images(self): url = "{}/{}/images/1".format(self.root, self.user) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 6d81e66..b6b5a6f 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -297,7 +297,7 @@ class DownloadJob(Job): instance = None self.log.error("'%s:' URLs are not supported/enabled", scheme) - if klass.scheme == "http": + if klass and klass.scheme == "http": self.downloaders["http"] = self.downloaders["https"] = instance else: self.downloaders[scheme] = instance diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index 0dbb796..162eb9e 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -103,20 +103,26 @@ class UgoiraPP(PostProcessor): # invoke ffmpeg pathfmt.set_extension(self.extension) - if self.twopass: - if "-f" not in args: - args += ["-f", self.extension] - args += ["-passlogfile", tempdir + "/ffmpeg2pass", "-pass"] - self._exec(args + ["1", "-y", os.devnull]) - self._exec(args + ["2", pathfmt.realpath]) + try: + if self.twopass: + if "-f" not in args: + args += ["-f", self.extension] + args += ["-passlogfile", tempdir + "/ffmpeg2pass", "-pass"] + self._exec(args + ["1", "-y", os.devnull]) + self._exec(args + ["2", pathfmt.realpath]) + else: + args.append(pathfmt.realpath) + self._exec(args) + except OSError as exc: + print() + self.log.error("Unable to invoke FFmpeg (%s: %s)", + exc.__class__.__name__, exc) + pathfmt.realpath = pathfmt.temppath else: - args.append(pathfmt.realpath) - self._exec(args) - - if self.delete: - pathfmt.delete = True - else: - pathfmt.set_extension("zip") + if self.delete: + pathfmt.delete = True + else: + pathfmt.set_extension("zip") def _exec(self, args): out = None if self.output else subprocess.DEVNULL diff --git a/gallery_dl/version.py b/gallery_dl/version.py index cbb8fe7..5209e95 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.10.3" +__version__ = "1.10.4" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 78963aa..2213ffa 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -45,6 +45,7 @@ CATEGORY_MAP = { "jaiminisbox" : "Jaimini's Box", "kireicake" : "Kirei Cake", "kissmanga" : "KissManga", + "lineblog" : "LINE BLOG", "livedoor" : "livedoor Blog", "mangadex" : "MangaDex", "mangafox" : "Manga Fox", diff --git a/test/test_results.py b/test/test_results.py index a69cc81..fb29a87 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -29,8 +29,6 @@ BROKEN = { "8chan", "imgth", "mangapark", - "mangoxo", - "tumblr", } @@ -286,12 +284,14 @@ def setup_test_config(): config.set(("extractor", "timeout"), 60) config.set(("extractor", "username"), name) config.set(("extractor", "password"), name) - config.set(("extractor", "nijie", "username"), email) - config.set(("extractor", "seiga", "username"), email) + config.set(("extractor", "nijie" , "username"), email) + config.set(("extractor", "seiga" , "username"), email) config.set(("extractor", "danbooru" , "username"), None) config.set(("extractor", "instagram", "username"), None) config.set(("extractor", "twitter" , "username"), None) - config.set(("extractor", "mangoxo" , "password"), "VZ8DL3983u") + + config.set(("extractor", "mangoxo" , "username"), "LiQiang3") + config.set(("extractor", "mangoxo" , "password"), "5zbQF10_5u25259Ma") config.set(("extractor", "deviantart", "client-id"), "7777") config.set(("extractor", "deviantart", "client-secret"), |
