aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md13
-rw-r--r--README.rst6
-rw-r--r--docs/configuration.rst12
-rw-r--r--docs/gallery-dl.conf3
-rw-r--r--docs/supportedsites.rst4
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/booru.py7
-rw-r--r--gallery_dl/extractor/danbooru.py13
-rw-r--r--gallery_dl/extractor/exhentai.py17
-rw-r--r--gallery_dl/extractor/fuskator.py110
-rw-r--r--gallery_dl/extractor/hentaicafe.py2
-rw-r--r--gallery_dl/extractor/hentaifoundry.py4
-rw-r--r--gallery_dl/extractor/lineblog.py73
-rw-r--r--gallery_dl/extractor/livedoor.py51
-rw-r--r--gallery_dl/extractor/myportfolio.py6
-rw-r--r--gallery_dl/extractor/pixiv.py13
-rw-r--r--gallery_dl/extractor/simplyhentai.py163
-rw-r--r--gallery_dl/extractor/tumblr.py2
-rw-r--r--gallery_dl/extractor/twitter.py37
-rw-r--r--gallery_dl/extractor/vsco.py15
-rw-r--r--gallery_dl/job.py2
-rw-r--r--gallery_dl/postprocessor/ugoira.py32
-rw-r--r--gallery_dl/version.py2
-rwxr-xr-xscripts/supportedsites.py1
-rw-r--r--test/test_results.py10
25 files changed, 482 insertions, 118 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a2b5109..4cde46b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
# Changelog
+## 1.10.4 - 2019-09-08
+### Additions
+- Support for
+ - `lineblog` - https://www.lineblog.me/ ([#404](https://github.com/mikf/gallery-dl/issues/404))
+ - `fuskator` - https://fuskator.com/ ([#407](https://github.com/mikf/gallery-dl/issues/407))
+- `ugoira` option for `danbooru` to download pre-rendered ugoira animations ([#406](https://github.com/mikf/gallery-dl/issues/406))
+### Fixes
+- Download the correct files from `twitter` replies ([#403](https://github.com/mikf/gallery-dl/issues/403))
+- Prevent crash when trying to use unavailable downloader modules ([#405](https://github.com/mikf/gallery-dl/issues/405))
+- Fix `pixiv` authentication ([#411](https://github.com/mikf/gallery-dl/issues/411))
+- Improve `exhentai` image limit checks
+- Miscellaneous fixes for `hentaicafe`, `simplyhentai`, `tumblr`
+
## 1.10.3 - 2019-08-30
### Additions
- Provide `filename` metadata for all `deviantart` files ([#392](https://github.com/mikf/gallery-dl/issues/392), [#400](https://github.com/mikf/gallery-dl/issues/400))
diff --git a/README.rst b/README.rst
index 2e934f8..f9b3e87 100644
--- a/README.rst
+++ b/README.rst
@@ -78,8 +78,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.4/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -224,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.3.zip
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.4.zip
.. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
.. _Python: https://www.python.org/downloads/
diff --git a/docs/configuration.rst b/docs/configuration.rst
index d69406d..e384f2c 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -486,6 +486,18 @@ Description Try to follow external URLs of embedded players.
=========== =====
+extractor.danbooru.ugoira
+-------------------------
+=========== =====
+Type ``bool``
+Default ``true``
+Description Controls the download target for Ugoira posts.
+
+ * ``true``: Original ZIP archives
+ * ``false``: Converted video files
+=========== =====
+
+
extractor.deviantart.extra
--------------------------
=========== =====
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 6439437..eff6da1 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -19,7 +19,8 @@
"danbooru":
{
"username": null,
- "password": null
+ "password": null,
+ "ugoira": true
},
"deviantart":
{
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
index 05c8555..925185c 100644
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@@ -33,6 +33,7 @@ Fallen Angels Scans https://www.fascans.com/ Chapters, Manga
Fashion Nova https://www.fashionnova.com/ Collections, Products
Fireden https://boards.fireden.net/ Threads
Flickr https://www.flickr.com/ |flickr-C| Optional (OAuth)
+Fuskator https://fuskator.com/ Galleries, Search Results
Futaba Channel https://www.2chan.net/ Threads
Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches
Gfycat https://gfycat.com/ individual Images
@@ -61,6 +62,7 @@ Kirei Cake https://reader.kireicake.com/ Chapters, Manga
KissManga https://kissmanga.com/ Chapters, Manga
Komikcast https://komikcast.com/ Chapters, Manga
Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag-Searches
+LINE BLOG https://www.lineblog.me/ Blogs, Posts
livedoor Blog http://blog.livedoor.jp/ Blogs, Posts
Luscious https://luscious.net/ Albums, Search Results Optional
Manga Fox https://fanfox.net/ Chapters
@@ -101,7 +103,7 @@ Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag-Searches
Sen Manga https://raw.senmanga.com/ Chapters
Sense-Scans http://sensescans.com/reader/ Chapters, Manga
Sex.com https://www.sex.com/ Boards, Pins, related Pins, Search Results
-Simply Hentai https://www.simply-hentai.com/ Galleries
+Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos
SlickPic https://www.slickpic.com/ Images from Users, Albums
SlideShare https://www.slideshare.net/ Presentations
SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 0b24111..351c5df 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -29,6 +29,7 @@ modules = [
"exhentai",
"fallenangels",
"flickr",
+ "fuskator",
"gelbooru",
"gfycat",
"hbrowse",
@@ -53,6 +54,7 @@ modules = [
"kissmanga",
"komikcast",
"konachan",
+ "lineblog",
"livedoor",
"luscious",
"mangadex",
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index 54a8878..ac45e0b 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -27,6 +27,7 @@ class BooruExtractor(SharedConfigMixin, Extractor):
page_start = 1
page_limit = None
sort = False
+ ugoira = True
def __init__(self, match):
super().__init__(match)
@@ -51,7 +52,11 @@ class BooruExtractor(SharedConfigMixin, Extractor):
for image in images:
try:
- url = image["file_url"]
+ if "pixiv_ugoira_frame_data" in image and \
+ "large_file_url" in image and not self.ugoira:
+ url = image["large_file_url"]
+ else:
+ url = image["file_url"]
except KeyError:
continue
if url.startswith("/"):
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 211c340..e8d3abf 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -28,6 +28,7 @@ class DanbooruExtractor(booru.DanbooruPageMixin, booru.BooruExtractor):
self.scheme = "https" if self.subdomain == "danbooru" else "http"
self.api_url = "{scheme}://{subdomain}.donmai.us/posts.json".format(
scheme=self.scheme, subdomain=self.subdomain)
+ self.ugoira = self.config("ugoira", True)
username, api_key = self._get_auth_info()
if username:
@@ -63,9 +64,15 @@ class DanbooruPoolExtractor(booru.PoolMixin, DanbooruExtractor):
class DanbooruPostExtractor(booru.PostMixin, DanbooruExtractor):
"""Extractor for single images from danbooru"""
pattern = BASE_PATTERN + r"/posts/(?P<post>\d+)"
- test = ("https://danbooru.donmai.us/posts/294929", {
- "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
- })
+ test = (
+ ("https://danbooru.donmai.us/posts/294929", {
+ "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
+ }),
+ ("https://danbooru.donmai.us/posts/3613024", {
+ "pattern": r"https?://.+\.webm$",
+ "options": (("ugoira", False),)
+ })
+ )
class DanbooruPopularExtractor(booru.PopularMixin, DanbooruExtractor):
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 1833b1a..75e19d6 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -121,7 +121,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
("https://exhentai.org/g/1200119/d55c44d3d0/", {
- "keyword": "1b353fad00dff0665b1746cdd151ab5cc326df23",
+ "keyword": "3eeae7bde70dd992402d4cc0230ea0f2c4af46c5",
"content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff",
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
@@ -151,9 +151,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def items(self):
self.login()
- if self.limits:
- self._init_limits()
-
if self.gallery_token:
gpage = self._gallery_page()
self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0]
@@ -308,15 +305,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.NotFoundError("image page")
return page
- def _init_limits(self):
- self._update_limits()
- if self._remaining <= 0:
- self.log.error("Image limit reached!")
- ExhentaiExtractor.LIMIT = True
- raise exception.StopExtraction()
-
def _check_limits(self, data):
- if data["num"] % 20 == 0:
+ if not self._remaining or data["num"] % 20 == 0:
self._update_limits()
self._remaining -= data["cost"]
@@ -360,7 +350,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"width": text.parse_int(parts[0]),
"height": text.parse_int(parts[2]),
"size": size,
- "cost": 1 + math.ceil(size * 5 / 1024 / 1024)
+ # 1 initial point + 1 per 0.1 MB
+ "cost": 1 + math.ceil(size / 104857.6)
}
diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py
new file mode 100644
index 0000000..dbcf2f2
--- /dev/null
+++ b/gallery_dl/extractor/fuskator.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fuskator.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
+import time
+
+
+class FuskatorGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries on fuskator.com"""
+ category = "fuskator"
+ root = "https://fuskator.com"
+ pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?&#]+)"
+ test = (
+ ("https://fuskator.com/thumbs/d0GnIzXrSKU/", {
+ "pattern": r"https://i\d+.fuskator.com/large/d0GnIzXrSKU/.+\.jpg",
+ "count": 22,
+ "keyword": {
+ "gallery_id": 473023,
+ "gallery_hash": "d0GnIzXrSKU",
+ "title": "re:Shaved Brunette Babe Maria Ryabushkina with ",
+ "views": int,
+ "score": float,
+ "count": 22,
+ "tags": list,
+ },
+ }),
+ ("https://fuskator.com/expanded/gXpKzjgIidA/index.html"),
+ )
+
+ def __init__(self, match):
+ self.gallery_hash = match.group(1)
+ url = "{}/thumbs/{}/".format(self.root, self.gallery_hash)
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ headers = {
+ "Referer" : self.chapter_url,
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ auth = self.request(
+ self.root + "/ajax/auth.aspx", method="POST", headers=headers,
+ ).text
+
+ params = {
+ "X-Auth": auth,
+ "hash" : self.gallery_hash,
+ "_" : int(time.time()),
+ }
+ self.data = data = self.request(
+ self.root + "/ajax/gal.aspx", params=params, headers=headers,
+ ).json()
+
+ title = text.extract(page, "<title>", "</title>")[0].strip()
+ title, _, gallery_id = title.rpartition("#")
+
+ return {
+ "gallery_id" : text.parse_int(gallery_id),
+ "gallery_hash": self.gallery_hash,
+ "title" : text.unescape(title[:-15]),
+ "views" : data["hits"],
+ "score" : data["rating"],
+ "tags" : data["tags"].split(","),
+ "count" : len(data["images"]),
+ }
+
+ def images(self, page):
+ for image in self.data["images"]:
+ yield "https:" + image["imageUrl"], image
+
+
+class FuskatorSearchExtractor(Extractor):
+ """Extractor for search results on fuskator.com"""
+ category = "fuskator"
+ subcategory = "search"
+ root = "https://fuskator.com"
+ pattern = r"(?:https?://)?fuskator\.com(/(?:search|page)/.+)"
+ test = (
+ ("https://fuskator.com/search/red_swimsuit/", {
+ "pattern": FuskatorGalleryExtractor.pattern,
+ "count": ">= 40",
+ }),
+ ("https://fuskator.com/page/3/swimsuit/quality/"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.path = match.group(1)
+
+ def items(self):
+ url = self.root + self.path
+ data = {"_extractor": FuskatorGalleryExtractor}
+
+ while True:
+ page = self.request(url).text
+ for path in text.extract_iter(
+ page, 'class="pic_pad"><a href="', '"'):
+ yield Message.Queue, self.root + path, data
+
+ pages = text.extract(page, 'class="pages"><span>', '>&gt;&gt;<')[0]
+ if not pages:
+ return
+ url = self.root + text.rextract(pages, 'href="', '"')[0]
diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py
index e95467b..679b3ad 100644
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@@ -10,6 +10,7 @@
from . import foolslide
from .. import text
+from .common import Extractor
from ..cache import memcache
import re
@@ -64,6 +65,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
)
root = "https://hentai.cafe"
reverse = False
+ request = Extractor.request
chapterclass = HentaicafeChapterExtractor
def chapters(self, page):
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index d31f66f..b6b9876 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -152,8 +152,8 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
r"/pictures/user/([^/]+)/scraps(?:/page/(\d+))?")
test = (
("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
- "url": "00a11e30b73ff2b00a1fba0014f08d49da0a68ec",
- "keyword": "410c6c900cfd23a8dd1e53dfcc97a79ea68c3359",
+ "url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
+ "keyword": "40b07a9822b6b868fea2fa9b1c0b212ae8735da7",
}),
("https://www.hentai-foundry.com"
"/pictures/user/Evulchibi/scraps/page/3"),
diff --git a/gallery_dl/extractor/lineblog.py b/gallery_dl/extractor/lineblog.py
new file mode 100644
index 0000000..a1daa39
--- /dev/null
+++ b/gallery_dl/extractor/lineblog.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.lineblog.me/"""
+
+from .livedoor import LivedoorBlogExtractor, LivedoorPostExtractor
+from .. import text
+
+
+class LineblogBase():
+ """Base class for lineblog extractors"""
+ category = "lineblog"
+ root = "https://lineblog.me"
+
+ def _images(self, post):
+ imgs = []
+ body = post.pop("body")
+
+ for num, img in enumerate(text.extract_iter(body, "<img ", ">"), 1):
+ src = text.extract(img, 'src="', '"')[0]
+ alt = text.extract(img, 'alt="', '"')[0]
+
+ if not src:
+ continue
+ if src.startswith("https://obs.line-scdn.") and src.count("/") > 3:
+ src = src.rpartition("/")[0]
+
+ imgs.append(text.nameext_from_url(alt or src, {
+ "url" : src,
+ "num" : num,
+ "hash": src.rpartition("/")[2],
+ "post": post,
+ }))
+
+ return imgs
+
+
+class LineblogBlogExtractor(LineblogBase, LivedoorBlogExtractor):
+ """Extractor for a user's blog on lineblog.me"""
+ pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?&#])"
+ test = ("https://lineblog.me/mamoru_miyano/", {
+ "range": "1-20",
+ "count": 20,
+ "pattern": r"https://obs.line-scdn.net/[\w-]+$",
+ "keyword": {
+ "post": {
+ "categories" : tuple,
+ "date" : "type:datetime",
+ "description": str,
+ "id" : int,
+ "tags" : list,
+ "title" : str,
+ "user" : "mamoru_miyano"
+ },
+ "filename": str,
+ "hash" : r"re:\w{32,}",
+ "num" : int,
+ },
+ })
+
+
+class LineblogPostExtractor(LineblogBase, LivedoorPostExtractor):
+ """Extractor for blog posts on lineblog.me"""
+ pattern = r"(?:https?://)?lineblog\.me/(\w+)/archives/(\d+)"
+ test = ("https://lineblog.me/mamoru_miyano/archives/1919150.html", {
+ "url": "24afeb4044c554f80c374b52bf8109c6f1c0c757",
+ "keyword": "76a38e2c0074926bd3362f66f9fc0e6c41591dcb",
+ })
diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py
index ed72f4c..e922f61 100644
--- a/gallery_dl/extractor/livedoor.py
+++ b/gallery_dl/extractor/livedoor.py
@@ -38,17 +38,19 @@ class LivedoorExtractor(Extractor):
def _load(self, data, body):
extr = text.extract_from(data)
- tags = text.extract(body, '</dt><dd>', '</dl>')[0]
+ tags = text.extract(body, 'class="article-tags">', '</dl>')[0]
+ about = extr('rdf:about="', '"')
return {
- "id" : text.parse_int(extr("id : '", "'")),
- "title" : text.unescape(extr("title : '", "'")),
- "categories": [extr("name:'", "'"), extr("name:'", "'")],
- "date" : text.parse_datetime(
- extr("date : '", "'"), "%Y-%m-%d %H:%M:%S"),
- "tags" : text.split_html(tags),
- "user" : self.user,
- "body" : body,
+ "id" : text.parse_int(
+ about.rpartition("/")[2].partition(".")[0]),
+ "title" : text.unescape(extr('dc:title="', '"')),
+ "categories" : extr('dc:subject="', '"').partition(",")[::2],
+ "description": extr('dc:description="', '"'),
+ "date" : text.parse_datetime(extr('dc:date="', '"')),
+ "tags" : text.split_html(tags)[1:] if tags else [],
+ "user" : self.user,
+ "body" : body,
}
def _images(self, post):
@@ -90,16 +92,17 @@ class LivedoorBlogExtractor(LivedoorExtractor):
"pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+",
"keyword": {
"post": {
- "categories": list,
- "date": "type:datetime",
- "id": int,
- "tags": list,
- "title": str,
- "user": "zatsu_ke"
+ "categories" : tuple,
+ "date" : "type:datetime",
+ "description": str,
+ "id" : int,
+ "tags" : list,
+ "title" : str,
+ "user" : "zatsu_ke"
},
"filename": str,
- "hash": r"re:\w{4,}",
- "num": int,
+ "hash" : r"re:\w{4,}",
+ "num" : int,
},
}),
("http://blog.livedoor.jp/uotapo/", {
@@ -110,11 +113,10 @@ class LivedoorBlogExtractor(LivedoorExtractor):
def posts(self):
url = "{}/{}".format(self.root, self.user)
-
while url:
extr = text.extract_from(self.request(url).text)
while True:
- data = extr('.articles.push(', ');')
+ data = extr('<rdf:RDF', '</rdf:RDF>')
if not data:
break
body = extr('class="article-body-inner">',
@@ -130,15 +132,15 @@ class LivedoorPostExtractor(LivedoorExtractor):
test = (
("http://blog.livedoor.jp/zatsu_ke/archives/51493859.html", {
"url": "8826fe623f19dc868e7538e8519bf8491e92a0a2",
- "keyword": "52fcba9253a000c339bcd658572d252e282626af",
+ "keyword": "83993111d5d0c08d021196802dd36b73f04c7057",
}),
("http://blog.livedoor.jp/amaumauma/archives/7835811.html", {
"url": "fc1d6a9557245b5a27d3a10bf0fa9922ef377215",
- "keyword": "0229072abb5cd8a221df72e0ffdfc13336c0e9ce",
+ "keyword": "fd700760c98897c3125328e157972f905fd34aaa",
}),
("http://blog.livedoor.jp/uotapo/archives/1050616939.html", {
"url": "3f3581807ec4776e6a67ed7985a22494d4bc4904",
- "keyword": "2eb3e383c68e909c4dd3d563c16d0b6e2fe6627b",
+ "keyword": "9e319413a42e08d32f0dcbe8aa3b452ad41aa906",
}),
)
@@ -150,7 +152,6 @@ class LivedoorPostExtractor(LivedoorExtractor):
url = "{}/{}/archives/{}.html".format(
self.root, self.user, self.post_id)
extr = text.extract_from(self.request(url).text)
- data = extr('articles :', '</script>')
- body = extr('class="article-body-inner">',
- 'class="article-footer">')
+ data = extr('<rdf:RDF', '</rdf:RDF>')
+ body = extr('class="article-body-inner">', 'class="article-footer">')
return (self._load(data, body),)
diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py
index 1515f53..95799cf 100644
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@@ -23,9 +23,9 @@ class MyportfolioGalleryExtractor(Extractor):
r"(?:https?://)?([^.]+\.myportfolio\.com))"
r"(/[^/?&#]+)?")
test = (
- ("https://hannahcosgrove.myportfolio.com/robyn", {
- "url": "93b5430e765e53564b13e7d9c64c30c286011a6b",
- "keyword": "25cb3dbdad6b011242a133f30ec598318b7512e8",
+ ("https://hannahcosgrove.myportfolio.com/niamh-1", {
+ "url": "8cbd73a73e5bf3b4f5d1b1d4a1eb114c01a72a66",
+ "keyword": "7a460bb5641e648ae70702ff91c2fb11054b0e0b",
}),
("https://hannahcosgrove.myportfolio.com/lfw", {
"pattern": r"https://hannahcosgrove\.myportfolio\.com/[^/?&#+]+$",
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 4f8ee9c..8e6a74e 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
from datetime import datetime, timedelta
+import hashlib
class PixivExtractor(Extractor):
@@ -395,6 +396,8 @@ class PixivAppAPI():
"""
CLIENT_ID = "MOBrBDS8blbauoSck0ZfDbtuzpyT"
CLIENT_SECRET = "lsACyCD94FhDUtGTXi3QzcFE2uU1hqtDaKeqrdwj"
+ HASH_SECRET = ("28c1fdd170a5204386cb1313c7077b34"
+ "f83e4aaf4aa829ce78c231e05b0bae2c")
def __init__(self, extractor):
self.extractor = extractor
@@ -406,7 +409,6 @@ class PixivAppAPI():
"client-id", self.CLIENT_ID)
self.client_secret = extractor.config(
"client-secret", self.CLIENT_SECRET)
-
extractor.session.headers.update({
"App-OS": "ios",
"App-OS-Version": "10.3.1",
@@ -440,8 +442,15 @@ class PixivAppAPI():
data["username"] = username
data["password"] = password
+ time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
+ headers = {
+ "X-Client-Time": time,
+ "X-Client-Hash": hashlib.md5(
+ (time + self.HASH_SECRET).encode()).hexdigest(),
+ }
+
response = self.extractor.request(
- url, method="POST", data=data, fatal=False)
+ url, method="POST", headers=headers, data=data, fatal=False)
if response.status_code >= 400:
raise exception.AuthenticationError()
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index 8567155..a6a3da0 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -8,16 +8,14 @@
"""Extract hentai-manga from https://www.simply-hentai.com/"""
-from .common import GalleryExtractor
+from .common import GalleryExtractor, Extractor, Message
from .. import text, util, exception
-import json
class SimplyhentaiGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from simply-hentai.com"""
category = "simplyhentai"
archive_fmt = "{image_id}"
- root = "https://www.simply-hentai.com"
pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com"
r"(?!/(?:album|gifs?|images?|series)(?:/|$))"
r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)")
@@ -25,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
(("https://original-work.simply-hentai.com"
"/amazon-no-hiyaku-amazon-elixir"), {
"url": "258289249990502c3138719cb89e995a60861e49",
- "keyword": "8b2400e4b466e8f46802fa5a6b917d2788bb7e8e",
+ "keyword": "eba83ccdbab3022a2280c77aa747f9458196138b",
}),
("https://www.simply-hentai.com/notfound", {
"exception": exception.GalleryDLException,
@@ -42,30 +40,145 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
self.session.headers["Referer"] = url
def metadata(self, page):
- path = text.extract(page, '<a class="preview" href="', '"')[0]
- if not path:
+ extr = text.extract_from(page)
+ split = text.split_html
+
+ self.chapter_url = extr('<link rel="canonical" href="', '"')
+ title = extr('<meta property="og:title" content="', '"')
+ if not title:
raise exception.NotFoundError("gallery")
- page = self.request(self.root + path).text
- data = json.loads(text.unescape(text.extract(
- page, 'data-react-class="Reader" data-react-props="', '"')[0]))
- self.manga = manga = data["manga"]
-
- return {
- "title" : manga["title"],
- "parody" : manga["series"]["title"],
- "language" : manga["language"]["name"],
- "lang" : util.language_to_code(manga["language"]["name"]),
- "characters": [x["name"] for x in manga["characters"]],
- "tags" : [x["name"] for x in manga["tags"]],
- "artist" : [x["name"] for x in manga["artists"]],
- "gallery_id": text.parse_int(text.extract(
- manga["images"][0]["sizes"]["full"], "/Album/", "/")[0]),
- "date" : text.parse_datetime(
- manga["publish_date"], "%Y-%m-%dT%H:%M:%S.%f%z"),
+ data = {
+ "title" : text.unescape(title),
+ "gallery_id": text.parse_int(extr('/Album/', '/')),
+ "parody" : split(extr('box-title">Series</div>', '</div>')),
+ "language" : text.remove_html(extr(
+ 'box-title">Language</div>', '</div>')) or None,
+ "characters": split(extr('box-title">Characters</div>', '</div>')),
+ "tags" : split(extr('box-title">Tags</div>', '</div>')),
+ "artist" : split(extr('box-title">Artists</div>', '</div>')),
+ "date" : text.parse_datetime(text.remove_html(
+ extr('Uploaded', '</div>')), "%d.%m.%Y"),
}
+ data["lang"] = util.language_to_code(data["language"])
+ return data
def images(self, _):
+ url = self.chapter_url + "/all-pages"
+ headers = {"Accept": "application/json"}
+ images = self.request(url, headers=headers).json()
return [
- (image["sizes"]["full"], {"image_id": image["id"]})
- for image in self.manga["images"]
+ (urls["full"], {"image_id": text.parse_int(image_id)})
+ for image_id, urls in sorted(images.items())
]
+
+
+class SimplyhentaiImageExtractor(Extractor):
+ """Extractor for individual images from simply-hentai.com"""
+ category = "simplyhentai"
+ subcategory = "image"
+ directory_fmt = ("{category}", "{type}s")
+ filename_fmt = "{category}_{token}{title:?_//}.{extension}"
+ archive_fmt = "{token}"
+ pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com"
+ r"/(image|gif)/[^/?&#]+)")
+ test = (
+ (("https://www.simply-hentai.com/image"
+ "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), {
+ "url": "0338eb137830ab6f81e5f410d3936ef785d063d9",
+ "keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2",
+ }),
+ ("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", {
+ "url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1",
+ "keyword": "dd97a4bb449c397d6fec9f43a1303c0fb168ae65",
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.page_url = "https://www." + match.group(1)
+ self.type = match.group(2)
+
+ def items(self):
+ extr = text.extract_from(self.request(self.page_url).text)
+ title = extr('"og:title" content="' , '"')
+ descr = extr('"og:description" content="', '"')
+ url = extr('&quot;image&quot;:&quot;' , '&')
+ url = extr("&quot;content&quot;:&quot;", "&") or url
+
+ tags = text.extract(descr, " tagged with ", " online for free ")[0]
+ if tags:
+ tags = tags.split(", ")
+ tags[-1] = tags[-1].partition(" ")[2]
+ else:
+ tags = []
+
+ data = text.nameext_from_url(url, {
+ "title": text.unescape(title) if title else "",
+ "tags": tags,
+ "type": self.type,
+ })
+ data["token"] = data["filename"].rpartition("_")[2]
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+ yield Message.Url, url, data
+
+
+class SimplyhentaiVideoExtractor(Extractor):
+ """Extractor for hentai videos from simply-hentai.com"""
+ category = "simplyhentai"
+ subcategory = "video"
+ directory_fmt = ("{category}", "{type}s")
+ filename_fmt = "{title}{episode:?_//>02}.{extension}"
+ archive_fmt = "{title}_{episode}"
+ pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)"
+ test = (
+ ("https://videos.simply-hentai.com/creamy-pie-episode-02", {
+ "pattern": r"https://www\.googleapis\.com/drive/v3/files"
+ r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
+ "keyword": "706790708b14773efc1e075ddd3b738a375348a5",
+ "count": 1,
+ }),
+ (("https://videos.simply-hentai.com"
+ "/1715-tifa-in-hentai-gang-bang-3d-movie"), {
+ "url": "ad9a36ae06c601b6490e3c401834b4949d947eb0",
+ "keyword": "f9dad94fbde9c95859e631ff4f07297a9567b874",
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.page_url = "https://" + match.group(1)
+
+ def items(self):
+ page = self.request(self.page_url).text
+
+ title, pos = text.extract(page, "<title>", "</title>")
+ tags , pos = text.extract(page, ">Tags</div>", "</div>", pos)
+ date , pos = text.extract(page, ">Upload Date</div>", "</div>", pos)
+ title = title.rpartition(" - ")[0]
+
+ if "<video" in page:
+ video_url = text.extract(page, '<source src="', '"', pos)[0]
+ episode = 0
+ else:
+ # video url from myhentai.tv embed
+ pos = page.index('<div class="video-frame-container">', pos)
+ embed_url = text.extract(page, 'src="', '"', pos)[0].replace(
+ "embedplayer.php?link=", "embed.php?name=")
+ embed_page = self.request(embed_url).text
+ video_url = text.extract(embed_page, '"file":"', '"')[0]
+ title, _, episode = title.rpartition(" Episode ")
+
+ data = text.nameext_from_url(video_url, {
+ "title": text.unescape(title),
+ "episode": text.parse_int(episode),
+ "tags": text.split_html(tags)[::2],
+ "type": "video",
+ "date": text.parse_datetime(text.remove_html(
+ date), "%B %d, %Y %H:%M"),
+ })
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+ yield Message.Url, video_url, data
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 024d6e9..8abbaf7 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -109,7 +109,7 @@ class TumblrExtractor(Extractor):
yield self._prepare_image(photo["url"], post)
url = post.get("audio_url") # type: "audio"
- if url:
+ if url and url.startswith("https://a.tumblr.com/"):
yield self._prepare(url, post)
url = post.get("video_url") # type: "video"
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 3672a6d..2fa69d5 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -151,12 +151,15 @@ class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/([^/?&#]+)/?$")
- test = ("https://twitter.com/supernaturepics", {
- "range": "1-40",
- "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
- "keyword": "7210d679606240405e0cf62cbc67596e81a7a250",
- })
+ r"/([^/?&#]+)/?(?:$|[?#])")
+ test = (
+ ("https://twitter.com/supernaturepics", {
+ "range": "1-40",
+ "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
+ "keyword": "7210d679606240405e0cf62cbc67596e81a7a250",
+ }),
+ ("https://mobile.twitter.com/supernaturepics?p=i"),
+ )
def tweets(self):
url = "{}/i/profiles/show/{}/timeline/tweets".format(
@@ -169,10 +172,13 @@ class TwitterMediaExtractor(TwitterExtractor):
subcategory = "media"
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/media(?!\w)")
- test = ("https://twitter.com/supernaturepics/media", {
- "range": "1-40",
- "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
- })
+ test = (
+ ("https://twitter.com/supernaturepics/media", {
+ "range": "1-40",
+ "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
+ }),
+ ("https://mobile.twitter.com/supernaturepics/media#t"),
+ )
def tweets(self):
url = "{}/i/profiles/show/{}/media_timeline".format(
@@ -206,6 +212,11 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("content", True),),
"keyword": "b13b6c4cd0b0c15b2ea7685479e7fedde3c47b9e",
}),
+ # Reply to another tweet (#403)
+ ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
+ "options": (("videos", True),),
+ "pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$",
+ }),
)
def __init__(self, match):
@@ -216,7 +227,9 @@ class TwitterTweetExtractor(TwitterExtractor):
return {"user": self.user, "tweet_id": self.tweet_id}
def tweets(self):
+ self.session.cookies.clear()
url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id)
page = self.request(url).text
- return (text.extract(
- page, '<div class="tweet ', 'class="js-tweet-stats-container')[0],)
+ end = page.index('class="js-tweet-stats-container')
+ beg = page.rindex('<div class="tweet ', 0, end)
+ return (page[beg:end],)
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 639ec82..6cc5911 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -86,12 +86,15 @@ class VscoExtractor(Extractor):
class VscoUserExtractor(VscoExtractor):
"""Extractor for images from a user on vsco.co"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/images/"
- test = ("https://vsco.co/missuri/images/1", {
- "range": "1-80",
- "count": 80,
- "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+",
- })
+ pattern = BASE_PATTERN + r"(?:/images(?:/\d+)?)?/?(?:$|[?#])"
+ test = (
+ ("https://vsco.co/missuri/images/1", {
+ "range": "1-80",
+ "count": 80,
+ "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+",
+ }),
+ ("https://vsco.co/missuri"),
+ )
def images(self):
url = "{}/{}/images/1".format(self.root, self.user)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 6d81e66..b6b5a6f 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -297,7 +297,7 @@ class DownloadJob(Job):
instance = None
self.log.error("'%s:' URLs are not supported/enabled", scheme)
- if klass.scheme == "http":
+ if klass and klass.scheme == "http":
self.downloaders["http"] = self.downloaders["https"] = instance
else:
self.downloaders[scheme] = instance
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 0dbb796..162eb9e 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -103,20 +103,26 @@ class UgoiraPP(PostProcessor):
# invoke ffmpeg
pathfmt.set_extension(self.extension)
- if self.twopass:
- if "-f" not in args:
- args += ["-f", self.extension]
- args += ["-passlogfile", tempdir + "/ffmpeg2pass", "-pass"]
- self._exec(args + ["1", "-y", os.devnull])
- self._exec(args + ["2", pathfmt.realpath])
+ try:
+ if self.twopass:
+ if "-f" not in args:
+ args += ["-f", self.extension]
+ args += ["-passlogfile", tempdir + "/ffmpeg2pass", "-pass"]
+ self._exec(args + ["1", "-y", os.devnull])
+ self._exec(args + ["2", pathfmt.realpath])
+ else:
+ args.append(pathfmt.realpath)
+ self._exec(args)
+ except OSError as exc:
+ print()
+ self.log.error("Unable to invoke FFmpeg (%s: %s)",
+ exc.__class__.__name__, exc)
+ pathfmt.realpath = pathfmt.temppath
else:
- args.append(pathfmt.realpath)
- self._exec(args)
-
- if self.delete:
- pathfmt.delete = True
- else:
- pathfmt.set_extension("zip")
+ if self.delete:
+ pathfmt.delete = True
+ else:
+ pathfmt.set_extension("zip")
def _exec(self, args):
out = None if self.output else subprocess.DEVNULL
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index cbb8fe7..5209e95 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.10.3"
+__version__ = "1.10.4"
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 78963aa..2213ffa 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -45,6 +45,7 @@ CATEGORY_MAP = {
"jaiminisbox" : "Jaimini's Box",
"kireicake" : "Kirei Cake",
"kissmanga" : "KissManga",
+ "lineblog" : "LINE BLOG",
"livedoor" : "livedoor Blog",
"mangadex" : "MangaDex",
"mangafox" : "Manga Fox",
diff --git a/test/test_results.py b/test/test_results.py
index a69cc81..fb29a87 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -29,8 +29,6 @@ BROKEN = {
"8chan",
"imgth",
"mangapark",
- "mangoxo",
- "tumblr",
}
@@ -286,12 +284,14 @@ def setup_test_config():
config.set(("extractor", "timeout"), 60)
config.set(("extractor", "username"), name)
config.set(("extractor", "password"), name)
- config.set(("extractor", "nijie", "username"), email)
- config.set(("extractor", "seiga", "username"), email)
+ config.set(("extractor", "nijie" , "username"), email)
+ config.set(("extractor", "seiga" , "username"), email)
config.set(("extractor", "danbooru" , "username"), None)
config.set(("extractor", "instagram", "username"), None)
config.set(("extractor", "twitter" , "username"), None)
- config.set(("extractor", "mangoxo" , "password"), "VZ8DL3983u")
+
+ config.set(("extractor", "mangoxo" , "username"), "LiQiang3")
+ config.set(("extractor", "mangoxo" , "password"), "5zbQF10_5u25259Ma")
config.set(("extractor", "deviantart", "client-id"), "7777")
config.set(("extractor", "deviantart", "client-secret"),