aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2020-12-30 18:41:56 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2020-12-30 18:41:56 -0500
commitaa3aaca425176a2017255ff474c5af2d6bb8525b (patch)
treec3f876074e58f3d1d053a459c7e1738526a8788d
parent176b3d1c174fb9fb74514eed0c1dc1e9636e89b9 (diff)
parent87a5aa088ce33a1196ff409b76a9ea8233bdc634 (diff)
downloadgallery-dl-aa3aaca425176a2017255ff474c5af2d6bb8525b.tar.bz2
gallery-dl-aa3aaca425176a2017255ff474c5af2d6bb8525b.tar.xz
gallery-dl-aa3aaca425176a2017255ff474c5af2d6bb8525b.tar.zst
Update upstream source from tag 'upstream/1.16.1'
Update to upstream version '1.16.1' with Debian dir e25ca57b32218e964e40338f4bc0b67a7dec5108
-rw-r--r--CHANGELOG.md22
-rw-r--r--PKG-INFO13
-rw-r--r--README.rst11
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.533
-rw-r--r--gallery_dl.egg-info/PKG-INFO13
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/cache.py2
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/booru.py30
-rw-r--r--gallery_dl/extractor/danbooru.py38
-rw-r--r--gallery_dl/extractor/e621.py30
-rw-r--r--gallery_dl/extractor/gelbooru.py9
-rw-r--r--gallery_dl/extractor/hentaicafe.py20
-rw-r--r--gallery_dl/extractor/instagram.py166
-rw-r--r--gallery_dl/extractor/keenspot.py11
-rw-r--r--gallery_dl/extractor/mangadex.py5
-rw-r--r--gallery_dl/extractor/mangapanda.py118
-rw-r--r--gallery_dl/extractor/moebooru.py18
-rw-r--r--gallery_dl/extractor/nijie.py4
-rw-r--r--gallery_dl/extractor/pinterest.py69
-rw-r--r--gallery_dl/extractor/pixiv.py5
-rw-r--r--gallery_dl/extractor/pornhub.py24
-rw-r--r--gallery_dl/extractor/reactor.py7
-rw-r--r--gallery_dl/extractor/sankaku.py210
-rw-r--r--gallery_dl/extractor/seiga.py6
-rw-r--r--gallery_dl/extractor/webtoons.py2
-rw-r--r--gallery_dl/extractor/wikiart.py2
-rw-r--r--gallery_dl/postprocessor/exec.py12
-rw-r--r--gallery_dl/util.py2
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_results.py2
32 files changed, 516 insertions, 374 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c536269..3531352 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,27 @@
# Changelog
+## 1.16.1 - 2020-12-27
+### Additions
+- [instagram] add `include` option ([#1180](https://github.com/mikf/gallery-dl/issues/1180))
+- [pinterest] implement video support ([#1189](https://github.com/mikf/gallery-dl/issues/1189))
+- [sankaku] reimplement login support ([#1176](https://github.com/mikf/gallery-dl/issues/1176), [#1182](https://github.com/mikf/gallery-dl/issues/1182))
+- [sankaku] add support for sankaku.app URLs ([#1193](https://github.com/mikf/gallery-dl/issues/1193))
+### Changes
+- [e621] return pool posts in order ([#1195](https://github.com/mikf/gallery-dl/issues/1195))
+- [hentaicafe] prefer title of `/hc.fyi/` pages ([#1106](https://github.com/mikf/gallery-dl/issues/1106))
+- [hentaicafe] simplify default filenames
+- [sankaku] normalize `created_at` metadata ([#1190](https://github.com/mikf/gallery-dl/issues/1190))
+- [postprocessor:exec] do not add missing `{}` to command ([#1185](https://github.com/mikf/gallery-dl/issues/1185))
+### Fixes
+- [booru] improve error handling
+- [instagram] warn about private profiles ([#1187](https://github.com/mikf/gallery-dl/issues/1187))
+- [keenspot] improve redirect handling
+- [mangadex] respect `chapter-reverse` settings ([#1194](https://github.com/mikf/gallery-dl/issues/1194))
+- [pixiv] output debug message on failed login attempts ([#1192](https://github.com/mikf/gallery-dl/issues/1192))
+- increase SQLite connection timeouts ([#1173](https://github.com/mikf/gallery-dl/issues/1173))
+### Removals
+- [mangapanda] remove module
+
## 1.16.0 - 2020-12-12
### Additions
- [booru] implement generalized extractors for `*booru` and `moebooru` sites
diff --git a/PKG-INFO b/PKG-INFO
index 049e111..ea38c4b 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.16.0
+Version: 1.16.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -236,6 +236,7 @@ Description: ==========
``instagram``,
``luscious``,
``pinterest``,
+ ``sankaku``,
``subscribestar``,
``tsumino``,
and ``twitter``.
@@ -275,8 +276,8 @@ Description: ==========
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `cookies.txt <https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg>`__ for Chrome,
- `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/?src=search>`__ for Firefox)
+ | (e.g. `Get cookies.txt <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/>`__ for Chrome,
+ `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
@@ -331,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.0.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.1.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 4bfb821..03d5367 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -225,6 +225,7 @@ and optional for
``instagram``,
``luscious``,
``pinterest``,
+``sankaku``,
``subscribestar``,
``tsumino``,
and ``twitter``.
@@ -264,8 +265,8 @@ This can be done via the
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `cookies.txt <https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg>`__ for Chrome,
- `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/?src=search>`__ for Firefox)
+ | (e.g. `Get cookies.txt <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/>`__ for Chrome,
+ `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
@@ -320,7 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.0.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.1.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index af6eaf3..c3df997 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-12-12" "1.16.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-12-27" "1.16.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 8c291fb..40efa15 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-12-12" "1.16.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-12-27" "1.16.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -346,6 +346,8 @@ and optional for
.br
* \f[I]pinterest\f[]
.br
+* \f[I]sankaku\f[]
+.br
* \f[I]subscribestar\f[]
.br
* \f[I]tsumino\f[]
@@ -1191,16 +1193,24 @@ Value of the \f[I]orderby\f[] parameter for submission searches.
for details)
-.SS extractor.instagram.highlights
+.SS extractor.instagram.include
.IP "Type:" 6
-\f[I]bool\f[]
+\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
-\f[I]false\f[]
+\f[I]"posts"\f[]
+
+.IP "Example:" 4
+"stories,highlights,posts" or ["stories", "highlights", "posts"]
.IP "Description:" 4
-Include *Story Highlights* when downloading a user profile.
-(requires authentication)
+A (comma-separated) list of subcategories to include
+when processing a user profile.
+
+Possible values are
+\f[I]"posts"\f[], \f[I]"stories"\f[], \f[I]"highlights"\f[], \f[I]"channel"\f[].
+
+You can use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.instagram.videos
@@ -1335,6 +1345,17 @@ Download subalbums.
Include pins from board sections.
+.SS extractor.pinterest.videos
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download from video pins.
+
+
.SS extractor.pixiv.user.avatar
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index e0eda0d..7b2006e 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.16.0
+Version: 1.16.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.1/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -236,6 +236,7 @@ Description: ==========
``instagram``,
``luscious``,
``pinterest``,
+ ``sankaku``,
``subscribestar``,
``tsumino``,
and ``twitter``.
@@ -275,8 +276,8 @@ Description: ==========
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `cookies.txt <https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg>`__ for Chrome,
- `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/?src=search>`__ for Firefox)
+ | (e.g. `Get cookies.txt <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/>`__ for Chrome,
+ `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
@@ -331,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.0.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.1.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index fd1b4a1..3b28345 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -97,7 +97,6 @@ gallery_dl/extractor/mangadex.py
gallery_dl/extractor/mangafox.py
gallery_dl/extractor/mangahere.py
gallery_dl/extractor/mangakakalot.py
-gallery_dl/extractor/mangapanda.py
gallery_dl/extractor/mangapark.py
gallery_dl/extractor/mangareader.py
gallery_dl/extractor/mangastream.py
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index 3886091..a874f63 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -210,6 +210,6 @@ try:
os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
DatabaseCacheDecorator.db = sqlite3.connect(
- dbfile, timeout=30, check_same_thread=False)
+ dbfile, timeout=60, check_same_thread=False)
except (OSError, TypeError, sqlite3.OperationalError):
cache = memcache # noqa: F811
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 611603e..b38cddc 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -66,7 +66,6 @@ modules = [
"mangafox",
"mangahere",
"mangakakalot",
- "mangapanda",
"mangapark",
"mangareader",
"mangastream",
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index 517df93..64cde80 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -13,6 +13,7 @@ from .. import text, util, exception
from xml.etree import ElementTree
import collections
+import operator
import re
@@ -25,15 +26,25 @@ class BooruExtractor(Extractor):
def items(self):
self.login()
- extended_tags = self.config("tags", False)
data = self.metadata()
+ tags = self.config("tags", False)
+
for post in self.posts():
try:
- url = self._prepare_post(post, extended_tags)
- except KeyError:
+ url = self._file_url(post)
+ if url[0] == "/":
+ url = self.root + url
+ except (KeyError, TypeError):
+ self.log.debug("Unable to fetch download URL for post %s "
+ "(md5: %s)", post.get("id"), post.get("md5"))
continue
+
+ if tags:
+ self._extended_tags(post)
+ self._prepare(post)
post.update(data)
text.nameext_from_url(url, post)
+
yield Message.Directory, post
yield Message.Url, url, post
@@ -53,17 +64,14 @@ class BooruExtractor(Extractor):
"""Return an iterable with post objects"""
return ()
- def _prepare_post(self, post, extended_tags=False):
- url = post["file_url"]
- if url[0] == "/":
- url = self.root + url
- if extended_tags:
- self._fetch_extended_tags(post)
+ _file_url = operator.itemgetter("file_url")
+
+ @staticmethod
+ def _prepare(post):
post["date"] = text.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
- return url
- def _fetch_extended_tags(self, post, page=None):
+ def _extended_tags(self, post, page=None):
if not page:
url = "{}/index.php?page=post&s=view&id={}".format(
self.root, post["id"])
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ca37cb4..33797f9 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text
import datetime
-
BASE_PATTERN = (
r"(?:https?://)?"
r"(danbooru|hijiribe|sonohara|safebooru)"
@@ -33,7 +32,6 @@ class DanbooruExtractor(Extractor):
super().__init__(match)
self.root = "https://{}.donmai.us".format(match.group(1))
self.ugoira = self.config("ugoira", False)
- self.params = {}
username, api_key = self._get_auth_info()
if username:
@@ -71,13 +69,16 @@ class DanbooruExtractor(Extractor):
yield Message.Url, url, post
def metadata(self):
- return {}
+ return ()
def posts(self):
- return self._pagination(self.root + "/posts.json")
+ return ()
+
+ def _pagination(self, endpoint, params=None, pagenum=False):
+ url = self.root + endpoint
- def _pagination(self, url, pagenum=False):
- params = self.params.copy()
+ if params is None:
+ params = {}
params["limit"] = self.per_page
params["page"] = self.page_start
@@ -122,10 +123,14 @@ class DanbooruTagExtractor(DanbooruExtractor):
def __init__(self, match):
super().__init__(match)
- self.params["tags"] = text.unquote(match.group(2).replace("+", " "))
+ self.tags = text.unquote(match.group(2).replace("+", " "))
def metadata(self):
- return {"search_tags": self.params["tags"]}
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ params = {"tags": self.tags}
+ return self._pagination("/posts.json", params)
class DanbooruPoolExtractor(DanbooruExtractor):
@@ -141,15 +146,19 @@ class DanbooruPoolExtractor(DanbooruExtractor):
def __init__(self, match):
super().__init__(match)
self.pool_id = match.group(2)
- self.params["tags"] = "pool:" + self.pool_id
+ self.post_ids = ()
def metadata(self):
url = "{}/pools/{}.json".format(self.root, self.pool_id)
pool = self.request(url).json()
pool["name"] = pool["name"].replace("_", " ")
- del pool["post_ids"]
+ self.post_ids = pool.pop("post_ids")
return {"pool": pool}
+ def posts(self):
+ params = {"tags": "pool:" + self.pool_id}
+ return self._pagination("/posts.json", params)
+
class DanbooruPostExtractor(DanbooruExtractor):
"""Extractor for single danbooru posts"""
@@ -193,10 +202,9 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def __init__(self, match):
super().__init__(match)
- self.params.update(text.parse_query(match.group(2)))
+ self.params = text.parse_query(match.group(2))
def metadata(self):
- self.page_start = self.page_start or 1
scale = self.params.get("scale", "day")
date = self.params.get("date") or datetime.date.today().isoformat()
@@ -209,5 +217,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
return {"date": date, "scale": scale}
def posts(self):
- url = self.root + "/explore/posts/popular.json"
- return self._pagination(url, True)
+ if self.page_start is None:
+ self.page_start = 1
+ return self._pagination(
+ "/explore/posts/popular.json", self.params, True)
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 591fe33..4ad19cd 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from . import danbooru
-
BASE_PATTERN = r"(?:https?://)?e(621|926)\.net"
@@ -39,9 +38,9 @@ class E621Extractor(danbooru.DanbooruExtractor):
file = post["file"]
if not file["url"]:
- ihash = file["md5"]
+ md5 = file["md5"]
file["url"] = "https://static1.{}/data/{}/{}/{}.{}".format(
- self.root[8:], ihash[0:2], ihash[2:4], ihash, file["ext"])
+ self.root[8:], md5[0:2], md5[2:4], md5, file["ext"])
post["filename"] = file["md5"]
post["extension"] = file["ext"]
@@ -69,12 +68,33 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
test = (
("https://e621.net/pools/73", {
- "url": "842f2fb065c7c339486a9b1d689020b8569888ed",
- "content": "c2c87b7a9150509496cddc75ccab08109922876a",
+ "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
+ "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
}),
("https://e621.net/pool/show/73"),
)
+ def posts(self):
+ self.log.info("Fetching posts of pool %s", self.pool_id)
+
+ id_to_post = {
+ post["id"]: post
+ for post in self._pagination(
+ "/posts.json", {"tags": "pool:" + self.pool_id})
+ }
+
+ posts = []
+ append = posts.append
+ for num, pid in enumerate(self.post_ids, 1):
+ if pid in id_to_post:
+ post = id_to_post[pid]
+ post["num"] = num
+ append(post)
+ else:
+ self.log.warning("Post %s is unavailable", pid)
+
+ return posts
+
class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
"""Extractor for single e621 posts"""
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index b0614e2..7a28e9c 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -17,11 +17,12 @@ class GelbooruBase():
category = "gelbooru"
root = "https://gelbooru.com"
- def _prepare_post(self, post, extended_tags=False):
- url = booru.BooruExtractor._prepare_post(self, post, extended_tags)
- if url.startswith("https://mp4.gelbooru.com/"):
+ @staticmethod
+ def _file_url(post):
+ url = post["file_url"]
+ if url.startswith(("https://mp4.gelbooru.com/", "https://video-cdn")):
md5 = post["md5"]
- return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
+ url = "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
md5[0:2], md5[2:4], md5)
return url
diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py
index e12670a..462d3e9 100644
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@@ -19,6 +19,7 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from hentai.cafe"""
category = "hentaicafe"
directory_fmt = ("{category}", "{manga}")
+ filename_fmt = "c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}"
pattern = (r"(?:https?://)?(?:www\.)?hentai\.cafe"
r"(/manga/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)")
test = ("https://hentai.cafe/manga/read/saitom-box/en/0/1/", {
@@ -32,13 +33,14 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
manga, _, chapter_string = info.partition(" :: ")
data = self._data(self.gallery_url.split("/")[5])
- data["manga"] = manga
+ if "manga" not in data:
+ data["manga"] = manga
data["chapter_string"] = chapter_string.rstrip(" :")
return self.parse_chapter_url(self.gallery_url, data)
@memcache(keyarg=1)
def _data(self, manga):
- return {"artist": [], "tags": []}
+ return {"artist": (), "tags": ()}
class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
@@ -50,17 +52,17 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
# single chapter
("https://hentai.cafe/hazuki-yuuto-summer-blues/", {
"url": "f8e24a07d6fbb7c6a6ec5ad8ad8faf2436f8751b",
- "keyword": "5af1c570bb5f533a32b3375f9cdaa17a0152ba67",
+ "keyword": "ced644ff94ea22e1991a5e44bf37c38a7e2ac2b3",
}),
# multi-chapter
("https://hentai.cafe/saitom-saitom-box/", {
"url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
- "keyword": "3c28517d356cac6acbd9895c9eeefae505304078",
+ "keyword": "4c2262d680286a54357c334c1faca8f1b0e692e9",
}),
# new-style URL
("https://hentai.cafe/hc.fyi/2782", {
"url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
- "keyword": "3c28517d356cac6acbd9895c9eeefae505304078",
+ "keyword": "4c2262d680286a54357c334c1faca8f1b0e692e9",
}),
# foolslide URL
("https://hentai.cafe/manga/series/saitom-box/", {
@@ -80,16 +82,18 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
chapters.reverse()
return chapters
- url , pos = text.extract(page, '<link rel="canonical" href="', '"')
+ manga , pos = text.extract(page, '<title>', '<')
+ url , pos = text.extract(page, 'rel="canonical" href="', '"', pos)
tags , pos = text.extract(page, "<p>Tags: ", "</br>", pos)
artist, pos = text.extract(page, "\nArtists: ", "</br>", pos)
- manga , pos = text.extract(page, "/manga/read/", "/", pos)
+ key , pos = text.extract(page, "/manga/read/", "/", pos)
data = {
+ "manga" : text.unescape(manga.rpartition(" | ")[0]),
"manga_id": text.parse_int(url.rpartition("/")[2]),
"tags" : text.split_html(tags)[::2],
"artist" : text.split_html(artist),
}
- HentaicafeChapterExtractor._data(manga).update(data)
+ HentaicafeChapterExtractor._data(key).update(data)
return [
(url, data)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 9870824..930c8b4 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -12,11 +12,13 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
-import itertools
import json
import time
import re
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?instagram\.com"
+USER_PATTERN = BASE_PATTERN + r"/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)"
+
class InstagramExtractor(Extractor):
"""Base class for instagram extractors"""
@@ -31,6 +33,7 @@ class InstagramExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
+ self.item = match.group(1)
self.www_claim = "0"
self.csrf_token = util.generate_csrf_token()
self._find_tags = re.compile(r"#\w+").findall
@@ -68,15 +71,18 @@ class InstagramExtractor(Extractor):
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
+
if response.history and "/accounts/login/" in response.request.url:
if self._cursor:
self.log.info("Use '-o cursor=%s' to continue downloading "
"from the current position", self._cursor)
raise exception.StopExtraction(
- "Redirected to login page (%s)", response.request.url)
+ "HTTP redirect to login page (%s)", response.request.url)
+
www_claim = response.headers.get("x-ig-set-www-claim")
if www_claim is not None:
self.www_claim = www_claim
+
return response
def _api_request(self, endpoint, params):
@@ -322,10 +328,11 @@ class InstagramExtractor(Extractor):
cursor = self.config("cursor")
if cursor:
return {
- "edges": (),
+ "edges" : (),
"page_info": {
- "end_cursor": cursor,
+ "end_cursor" : cursor,
"has_next_page": True,
+ "_virtual" : True,
},
}
return user[key]
@@ -338,6 +345,10 @@ class InstagramExtractor(Extractor):
info = data["page_info"]
if not info["has_next_page"]:
return
+ elif not data["edges"] and "_virtual" not in info:
+ s = "" if self.item.endswith("s") else "s"
+ raise exception.StopExtraction(
+ "%s'%s posts are private", self.item, s)
variables["after"] = self._cursor = info["end_cursor"]
self.log.debug("Cursor: %s", self._cursor)
@@ -346,80 +357,62 @@ class InstagramExtractor(Extractor):
class InstagramUserExtractor(InstagramExtractor):
- """Extractor for ProfilePage"""
+ """Extractor for an Instagram user profile"""
subcategory = "user"
- pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/(?!(?:p|explore|directory|accounts|stories|tv|reel)/)"
- r"([^/?#]+)/?(?:$|[?#])")
+ pattern = USER_PATTERN + r"/?(?:$|[?#])"
test = (
- ("https://www.instagram.com/instagram/", {
- "range": "1-16",
- "count": ">= 16",
- }),
- # ("https://www.instagram.com/instagram/", {
- # "options": (("highlights", True),),
- # "pattern": InstagramStoriesExtractor.pattern,
- # "range": "1-2",
- # "count": 2,
- # }),
+ ("https://www.instagram.com/instagram/"),
("https://www.instagram.com/instagram/?hl=en"),
)
- def __init__(self, match):
- InstagramExtractor.__init__(self, match)
- self.user = match.group(1)
+ def items(self):
+ if self.config("highlights"):
+ self.log.warning("'highlights' is deprecated, "
+ "use '\"include\": \"…,highlights\"' instead")
+ default = ("highlights", "posts")
+ else:
+ default = ("posts",)
+
+ base = "{}/{}/".format(self.root, self.item)
+ stories = "{}/stories/{}/".format(self.root, self.item)
+ return self._dispatch_extractors((
+ (InstagramStoriesExtractor , stories),
+ (InstagramHighlightsExtractor, base + "highlights/"),
+ (InstagramPostsExtractor , base + "posts/"),
+ (InstagramChannelExtractor , base + "channel/"),
+ ), default)
+
+
+class InstagramPostsExtractor(InstagramExtractor):
+ """Extractor for ProfilePage posts"""
+ subcategory = "posts"
+ pattern = USER_PATTERN + r"/posts"
+ test = ("https://www.instagram.com/instagram/posts/", {
+ "range": "1-16",
+ "count": ">= 16",
+ })
def posts(self):
- url = "{}/{}/".format(self.root, self.user)
+ url = "{}/{}/".format(self.root, self.item)
user = self._extract_profile_page(url)
- if user.get("highlight_reel_count") and self.config("highlights"):
- query_hash = "d4d88dc1500312af6f937f7b804c68c3"
- variables = {
- "user_id": user["id"],
- "include_chaining": False,
- "include_reel": True,
- "include_suggested_users": False,
- "include_logged_out_extras": False,
- "include_highlight_reels": True,
- "include_live_status": True,
- }
- data = self._graphql_request(query_hash, variables)
- highlights = [
- {
- "__typename": "GraphReel",
- "id" : "highlight:" + edge["node"]["id"],
- }
- for edge in data["user"]["edge_highlight_reels"]["edges"]
- ]
- else:
- highlights = None
-
query_hash = "003056d32c2554def87228bc3fd9668a"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
- posts = self._pagination(query_hash, variables, edge)
-
- return itertools.chain(highlights, posts) if highlights else posts
+ return self._pagination(query_hash, variables, edge)
class InstagramChannelExtractor(InstagramExtractor):
"""Extractor for ProfilePage channel"""
subcategory = "channel"
- pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
- r"([^/?#]+)/channel")
+ pattern = USER_PATTERN + r"/channel"
test = ("https://www.instagram.com/instagram/channel/", {
"range": "1-16",
"count": ">= 16",
})
- def __init__(self, match):
- InstagramExtractor.__init__(self, match)
- self.user = match.group(1)
-
def posts(self):
- url = "{}/{}/channel/".format(self.root, self.user)
+ url = "{}/{}/channel/".format(self.root, self.item)
user = self._extract_profile_page(url)
query_hash = "bc78b344a68ed16dd5d7f264681c4c76"
@@ -431,17 +424,11 @@ class InstagramChannelExtractor(InstagramExtractor):
class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for ProfilePage saved media"""
subcategory = "saved"
- pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
- r"([^/?#]+)/saved")
+ pattern = USER_PATTERN + r"([^/?#]+)/saved"
test = ("https://www.instagram.com/instagram/saved/",)
- def __init__(self, match):
- InstagramExtractor.__init__(self, match)
- self.user = match.group(1)
-
def posts(self):
- url = "{}/{}/saved/".format(self.root, self.user)
+ url = "{}/{}/saved/".format(self.root, self.item)
user = self._extract_profile_page(url)
query_hash = "2ce1d673055b99250e93b6f88f878fde"
@@ -454,22 +441,17 @@ class InstagramTagExtractor(InstagramExtractor):
"""Extractor for TagPage"""
subcategory = "tag"
directory_fmt = ("{category}", "{subcategory}", "{tag}")
- pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/explore/tags/([^/?#]+)")
+ pattern = BASE_PATTERN + r"/explore/tags/([^/?#]+)"
test = ("https://www.instagram.com/explore/tags/instagram/", {
"range": "1-16",
"count": ">= 16",
})
- def __init__(self, match):
- InstagramExtractor.__init__(self, match)
- self.tag = match.group(1)
-
def metadata(self):
- return {"tag": self.tag}
+ return {"tag": self.item}
def posts(self):
- url = "{}/explore/tags/{}/".format(self.root, self.tag)
+ url = "{}/explore/tags/{}/".format(self.root, self.item)
data = self._extract_shared_data(url)
hashtag = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
@@ -599,21 +581,20 @@ class InstagramPostExtractor(InstagramExtractor):
("https://www.instagram.com/reel/CDg_6Y1pxWu/"),
)
- def __init__(self, match):
- InstagramExtractor.__init__(self, match)
- self.shortcode = match.group(1)
-
def posts(self):
query_hash = "a9441f24ac73000fa17fe6e6da11d59d"
variables = {
- "shortcode" : self.shortcode,
+ "shortcode" : self.item,
"child_comment_count" : 3,
"fetch_comment_count" : 40,
"parent_comment_count" : 24,
"has_threaded_comments": True
}
data = self._graphql_request(query_hash, variables)
- return (data["shortcode_media"],)
+ media = data.get("shortcode_media")
+ if not media:
+ raise exception.NotFoundError("post")
+ return (media,)
class InstagramStoriesExtractor(InstagramExtractor):
@@ -644,3 +625,34 @@ class InstagramStoriesExtractor(InstagramExtractor):
reel_id = user["id"]
return ({"__typename": "GraphReel", "id": reel_id},)
+
+
+class InstagramHighlightsExtractor(InstagramExtractor):
+ """Extractor for all Instagram story highlights of a user"""
+ subcategory = "highlights"
+ pattern = USER_PATTERN + r"/highlights"
+ test = ("https://www.instagram.com/instagram/highlights",)
+
+ def posts(self):
+ url = "{}/{}/".format(self.root, self.item)
+ user = self._extract_profile_page(url)
+
+ query_hash = "d4d88dc1500312af6f937f7b804c68c3"
+ variables = {
+ "user_id": user["id"],
+ "include_chaining": False,
+ "include_reel": True,
+ "include_suggested_users": False,
+ "include_logged_out_extras": False,
+ "include_highlight_reels": True,
+ "include_live_status": True,
+ }
+ data = self._graphql_request(query_hash, variables)
+
+ return [
+ {
+ "__typename": "GraphReel",
+ "id" : "highlight:" + edge["node"]["id"],
+ }
+ for edge in data["user"]["edge_highlight_reels"]["edges"]
+ ]
diff --git a/gallery_dl/extractor/keenspot.py b/gallery_dl/extractor/keenspot.py
index 5902333..0cbea67 100644
--- a/gallery_dl/extractor/keenspot.py
+++ b/gallery_dl/extractor/keenspot.py
@@ -39,7 +39,7 @@ class KeenspotComicExtractor(Extractor):
}),
("http://twokinds.keenspot.com/comic/1066/", { # "random" access
"range": "1-3",
- "url": "97e2a6ed8ba1709314f2449f84b6b1ce5db21c04",
+ "url": "6a784e11370abfb343dcad9adbb7718f9b7be350",
})
)
@@ -58,7 +58,14 @@ class KeenspotComicExtractor(Extractor):
yield Message.Version, 1
yield Message.Directory, data
- url = self._first(self.request(self.root + "/").text)
+ with self.request(self.root + "/") as response:
+ if response.history:
+ url = response.request.url
+ self.root = url[:url.index("/", 8)]
+ page = response.text
+ del response
+
+ url = self._first(page)
if self.path:
url = self.root + self.path
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 96c81c7..dca8995 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -196,5 +196,8 @@ class MangadexMangaExtractor(MangadexExtractor):
"_extractor": MangadexChapterExtractor,
})
- results.sort(key=lambda x: (x["chapter"], x["chapter_minor"]))
+ results.sort(
+ key=lambda x: (x["chapter"], x["chapter_minor"]),
+ reverse=self.config("chapter-reverse", False),
+ )
return results
diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py
deleted file mode 100644
index 155a9b6..0000000
--- a/gallery_dl/extractor/mangapanda.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2015-2020 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for http://www.mangapanda.com/"""
-
-from .common import ChapterExtractor, MangaExtractor
-from .. import text
-
-
-class MangapandaBase():
- """Base class for mangapanda extractors"""
- category = "mangapanda"
- root = "http://www.mangapanda.com"
-
- @staticmethod
- def parse_page(page, data):
- """Parse metadata on 'page' and add it to 'data'"""
- text.extract_all(page, (
- ("manga" , '<h2 class="aname">', '</h2>'),
- ("release", '>Year of Release:</td>\n<td>', '</td>'),
- ('author' , '>Author:</td>\n<td>', '</td>'),
- ('artist' , '>Artist:</td>\n<td>', '</td>'),
- ), values=data)
- data["manga"] = data["manga"].strip()
- data["author"] = text.unescape(data["author"])
- data["artist"] = text.unescape(data["artist"])
- return data
-
-
-class MangapandaChapterExtractor(MangapandaBase, ChapterExtractor):
- """Extractor for manga-chapters from mangapanda.com"""
- archive_fmt = "{manga}_{chapter}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?#]+)/(\d+))"
- test = ("http://www.mangapanda.com/red-storm/2", {
- "url": "1f633f776e950531ba9b1e81965316458e785261",
- "keyword": "b24df4b9cc36383fb6a44e06d32a3884a4dcb5fb",
- })
-
- def __init__(self, match):
- path, self.url_title, self.chapter = match.groups()
- ChapterExtractor.__init__(self, match, self.root + path)
-
- def metadata(self, chapter_page):
- page = self.request(self.root + self.url_title).text
- data = self.parse_page(page, {
- "chapter": text.parse_int(self.chapter),
- "lang": "en",
- "language": "English",
- })
- text.extract_all(page, (
- ('title', ' ' + self.chapter + '</a> : ', '</td>'),
- ('date', '<td>', '</td>'),
- ), page.index('<div id="chapterlist">'), data)
- data["count"] = text.parse_int(text.extract(
- chapter_page, '</select> of ', '<')[0]
- )
- return data
-
- def images(self, page):
- while True:
- next_url, image_url, image_data = self.get_image_metadata(page)
- yield image_url, image_data
-
- if not next_url:
- return
- page = self.request(next_url).text
-
- def get_image_metadata(self, page):
- """Collect next url, image-url and metadata for one manga-page"""
- extr = text.extract
- width = None
- test , pos = extr(page, "document['pu']", '')
- if test is None:
- return None, None, None
- if page.find("document['imgwidth']", pos, pos+200) != -1:
- width , pos = extr(page, "document['imgwidth'] = ", ";", pos)
- height, pos = extr(page, "document['imgheight'] = ", ";", pos)
- _ , pos = extr(page, '<div id="imgholder">', '')
- url, pos = extr(page, ' href="', '"', pos)
- if width is None:
- width , pos = extr(page, '<img id="img" width="', '"', pos)
- height, pos = extr(page, ' height="', '"', pos)
- image, pos = extr(page, ' src="', '"', pos)
- return self.root + url, image, {
- "width": text.parse_int(width),
- "height": text.parse_int(height),
- }
-
-
-class MangapandaMangaExtractor(MangapandaBase, MangaExtractor):
- """Extractor for manga from mangapanda.com"""
- chapterclass = MangapandaChapterExtractor
- reverse = False
- pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?#]+)/?$"
- test = ("http://www.mangapanda.com/mushishi", {
- "url": "50a1ba730b85426b904da256c80f68ba6a8a2566",
- "keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
- })
-
- def chapters(self, page):
- results = []
- data = self.parse_page(page, {"lang": "en", "language": "English"})
-
- needle = '<div class="chico_manga"></div>\n<a href="'
- pos = page.index('<div id="chapterlist">')
- while True:
- url, pos = text.extract(page, needle, '"', pos)
- if not url:
- return results
- data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
- data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
- data["chapter"] = text.parse_int(url.rpartition("/")[2])
- results.append((self.root + url, data.copy()))
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index cbc8680..0ac55cd 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -23,16 +23,11 @@ class MoebooruExtractor(BooruExtractor):
filename_fmt = "{category}_{id}_{md5}.{extension}"
page_start = 1
- def _prepare_post(self, post, extended_tags=False):
- url = post["file_url"]
- if url[0] == "/":
- url = self.root + url
- if extended_tags:
- self._fetch_extended_tags(post)
+ @staticmethod
+ def _prepare(post):
post["date"] = text.parse_timestamp(post["created_at"])
- return url
- def _fetch_extended_tags(self, post):
+ def _extended_tags(self, post):
url = "{}/post/show/{}".format(self.root, post["id"])
page = self.request(url).text
html = text.extract(page, '<ul id="tag-', '</ul>')[0]
@@ -217,13 +212,6 @@ EXTRACTORS = {
}),
"test-post": ("https://hypnohub.net/post/show/73964", {
"content": "02d5f5a8396b621a6efc04c5f8ef1b7225dfc6ee",
- "options": (("tags", True),),
- "keyword": {
- "tags_artist": "gonoike_biwa icontrol_(manipper)",
- "tags_character": "komaru_naegi",
- "tags_copyright": "dangan_ronpa dangan_ronpa_another_episode",
- "tags_general": str,
- },
}),
"test-popular": (
("https://hypnohub.net/post/popular_by_month?month=6&year=2014", {
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 2394acf..e558513 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -100,6 +100,10 @@ class NijieExtractor(AsynchronousMixin, Extractor):
@cache(maxage=150*24*3600, keyarg=1)
def _login_impl(self, username, password):
+ if not username or not password:
+ raise exception.AuthenticationError(
+ "Username and password required")
+
self.log.info("Logging in as %s", username)
url = "{}/login_int.php".format(self.root)
data = {"email": username, "password": password, "save": "on"}
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index aa11289..739e67e 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -14,7 +14,6 @@ from ..cache import cache
import itertools
import json
-
BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
@@ -31,29 +30,59 @@ class PinterestExtractor(Extractor):
def items(self):
self.api.login()
data = self.metadata()
- yield Message.Version, 1
- yield Message.Directory, data
+ videos = self.config("videos", True)
+ yield Message.Directory, data
for pin in self.pins():
- if "images" in pin:
- url, pin_data = self.data_from_pin(pin)
- pin_data.update(data)
- yield Message.Url, url, pin_data
+
+ try:
+ media = self._media_from_pin(pin)
+ except Exception:
+ self.log.debug("Unable to fetch download URL for pin %s",
+ pin.get("id"))
+ continue
+
+ if not videos and media.get("duration") is not None:
+ continue
+
+ pin.update(data)
+ pin.update(media)
+ url = media["url"]
+ text.nameext_from_url(url, pin)
+
+ if pin["extension"] == "m3u8":
+ url = "ytdl:" + url
+ pin["extension"] = "mp4"
+ pin["_ytdl_extra"] = {"protocol": "m3u8_native"}
+
+ yield Message.Url, url, pin
def metadata(self):
"""Return general metadata"""
def pins(self):
- """Return all relevant pin-objects"""
+ """Return all relevant pin objects"""
@staticmethod
- def data_from_pin(pin):
- """Get image url and metadata from a pin-object"""
- img = pin["images"]["orig"]
- url = img["url"]
- pin["width"] = img["width"]
- pin["height"] = img["height"]
- return url, text.nameext_from_url(url, pin)
+ def _media_from_pin(pin):
+ videos = pin.get("videos")
+ if videos:
+ video_formats = videos["video_list"]
+
+ for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
+ if fmt in video_formats:
+ media = video_formats[fmt]
+ break
+ else:
+ media = max(video_formats.values(),
+ key=lambda x: x.get("width", 0))
+
+ if "V_720P" in video_formats:
+ media["_fallback"] = (video_formats["V_720P"]["url"],)
+
+ return media
+
+ return pin["images"]["orig"]
class PinterestPinExtractor(PinterestExtractor):
@@ -66,6 +95,11 @@ class PinterestPinExtractor(PinterestExtractor):
"content": ("4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca",
"d3e24bc9f7af585e8c23b9136956bd45a4d9b947"),
}),
+ # video pin (#1189)
+ ("https://www.pinterest.com/pin/422564377542934214/", {
+ "pattern": r"https://v\.pinimg\.com/videos/mc/hls/d7/22/ff"
+ r"/d722ff00ab2352981b89974b37909de8.m3u8",
+ }),
("https://www.pinterest.com/pin/858146903966145188/", {
"exception": exception.NotFoundError,
}),
@@ -78,7 +112,7 @@ class PinterestPinExtractor(PinterestExtractor):
def metadata(self):
self.pin = self.api.pin(self.pin_id)
- return self.data_from_pin(self.pin)[1]
+ return self.pin
def pins(self):
return (self.pin,)
@@ -173,8 +207,7 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor):
})
def metadata(self):
- pin = self.api.pin(self.pin_id)
- return {"original_pin": self.data_from_pin(pin)[1]}
+ return {"original_pin": self.api.pin(self.pin_id)}
def pins(self):
return self.api.pin_related(self.pin_id)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index a813d0e..8aee058 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -522,6 +522,10 @@ class PixivAppAPI():
@cache(maxage=3600, keyarg=1)
def _login_impl(self, username, password):
+ if not username or not password:
+ raise exception.AuthenticationError(
+ "Username and password required")
+
url = "https://oauth.secure.pixiv.net/auth/token"
data = {
"client_id": self.client_id,
@@ -550,6 +554,7 @@ class PixivAppAPI():
response = self.extractor.request(
url, method="POST", headers=headers, data=data, fatal=False)
if response.status_code >= 400:
+ self.log.debug(response.text)
raise exception.AuthenticationError()
data = response.json()["response"]
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index 1856c82..61e3d41 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -29,26 +29,26 @@ class PornhubGalleryExtractor(PornhubExtractor):
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/album/(\d+)"
test = (
- ("https://www.pornhub.com/album/17218841", {
+ ("https://www.pornhub.com/album/19289801", {
"pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/",
- "count": 81,
+ "count": 308,
"keyword": {
- "id": int,
- "num": int,
- "score": int,
- "views": int,
+ "id" : int,
+ "num" : int,
+ "score" : int,
+ "views" : int,
"caption": str,
- "user": "Unknown",
+ "user" : "Danika Mori",
"gallery": {
- "id" : 17218841,
+ "id" : 19289801,
"score": int,
"views": int,
"tags" : list,
- "title": "Hentai/Ecchi 41",
+ "title": "Danika Mori Best Moments",
},
},
}),
- ("https://www.pornhub.com/album/37180171", {
+ ("https://www.pornhub.com/album/69040172", {
"exception": exception.AuthorizationError,
}),
)
@@ -118,10 +118,10 @@ class PornhubGalleryExtractor(PornhubExtractor):
class PornhubUserExtractor(PornhubExtractor):
"""Extractor for all galleries of a pornhub user"""
subcategory = "user"
- pattern = (BASE_PATTERN + r"/(users|model)/([^/?#]+)"
+ pattern = (BASE_PATTERN + r"/(users|model|pornstar)/([^/?#]+)"
"(?:/photos(?:/(public|private|favorites))?)?/?$")
test = (
- ("https://www.pornhub.com/users/flyings0l0/photos/public", {
+ ("https://www.pornhub.com/pornstar/danika-mori/photos", {
"pattern": PornhubGalleryExtractor.pattern,
"count": ">= 6",
}),
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index cfbab1d..aa0ba6d 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -231,12 +231,13 @@ class JoyreactorSearchExtractor(ReactorSearchExtractor):
category = "joyreactor"
pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
test = (
- ("http://joyreactor.cc/search/Cirno+Gifs", {
+ ("http://joyreactor.cc/search/Cirno", {
"range": "1-25",
"count": ">= 20",
}),
- ("http://joyreactor.com/search?q=Cirno+Gifs", {
- "count": 0, # no search results on joyreactor.com
+ ("http://joyreactor.com/search?q=Cirno", {
+ "range": "1-25",
+ "count": ">= 20",
}),
)
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 438dd9f..9e64eac 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -6,13 +6,15 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://chan.sankakucomplex.com/"""
+"""Extractors for https://sankaku.app/"""
from .booru import BooruExtractor
from .. import text, exception
+from ..cache import cache
import collections
-BASE_PATTERN = r"(?:https?://)?(?:beta|chan)\.sankakucomplex\.com"
+BASE_PATTERN = r"(?:https?://)?" \
+ r"(?:sankaku\.app|(?:beta|chan)\.sankakucomplex\.com)"
class SankakuExtractor(BooruExtractor):
@@ -20,8 +22,8 @@ class SankakuExtractor(BooruExtractor):
basecategory = "booru"
category = "sankaku"
filename_fmt = "{category}_{id}_{md5}.{extension}"
- request_interval_min = 1.0
- per_page = 100
+ cookiedomain = None
+ _warning = True
TAG_TYPES = {
0: "general",
@@ -36,17 +38,24 @@ class SankakuExtractor(BooruExtractor):
9: "meta",
}
- def _prepare_post(self, post, extended_tags=False):
+ def skip(self, num):
+ return 0
+
+ def _file_url(self, post):
url = post["file_url"]
- if url[0] == "/":
- url = self.root + url
- if extended_tags:
- self._fetch_extended_tags(post)
- post["date"] = text.parse_timestamp(post["created_at"]["s"])
- post["tags"] = [tag["name"] for tag in post["tags"]]
+ if not url and self._warning:
+ self.log.warning(
+ "Login required to download 'contentious_content' posts")
+ SankakuExtractor._warning = False
return url
- def _fetch_extended_tags(self, post):
+ @staticmethod
+ def _prepare(post):
+ post["created_at"] = post["created_at"]["s"]
+ post["date"] = text.parse_timestamp(post["created_at"])
+ post["tags"] = [tag["name"] for tag in post["tags"]]
+
+ def _extended_tags(self, post):
tags = collections.defaultdict(list)
types = self.TAG_TYPES
for tag in post["tags"]:
@@ -54,44 +63,21 @@ class SankakuExtractor(BooruExtractor):
for key, value in tags.items():
post["tags_" + key] = value
- def _api_request(self, endpoint, params=None):
- url = "https://capi-v2.sankakucomplex.com" + endpoint
- while True:
- response = self.request(url, params=params, fatal=False)
- if response.status_code == 429:
- self.wait(until=response.headers.get("X-RateLimit-Reset"))
- continue
- return response.json()
-
- def _pagination(self, params):
- params["lang"] = "en"
- params["limit"] = str(self.per_page)
-
- while True:
- data = self._api_request("/posts/keyset", params)
- if not data.get("success", True):
- raise exception.StopExtraction(data.get("code"))
- yield from data["data"]
-
- params["next"] = data["meta"]["next"]
- if not params["next"]:
- return
- if "page" in params:
- del params["page"]
-
class SankakuTagExtractor(SankakuExtractor):
- """Extractor for images from chan.sankakucomplex.com by search-tags"""
+ """Extractor for images from sankaku.app by search-tags"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/\?([^#]*)"
test = (
- ("https://beta.sankakucomplex.com/?tags=bonocho", {
+ ("https://sankaku.app/?tags=bonocho", {
"count": 5,
"pattern": r"https://c?s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
+ ("https://beta.sankakucomplex.com/?tags=bonocho"),
+ ("https://chan.sankakucomplex.com/?tags=bonocho"),
# error on five or more tags
("https://chan.sankakucomplex.com/?tags=bonocho+a+b+c+d", {
"options": (("username", None),),
@@ -111,19 +97,21 @@ class SankakuTagExtractor(SankakuExtractor):
return {"search_tags": self.tags}
def posts(self):
- return self._pagination({"tags": self.tags})
+ params = {"tags": self.tags}
+ return SankakuAPI(self).posts_keyset(params)
class SankakuPoolExtractor(SankakuExtractor):
- """Extractor for image pools or books from chan.sankakucomplex.com"""
+ """Extractor for image pools or books from sankaku.app"""
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}")
archive_fmt = "p_{pool}_{id}"
pattern = BASE_PATTERN + r"/(?:books|pool/show)/(\d+)"
test = (
- ("https://beta.sankakucomplex.com/books/90", {
+ ("https://sankaku.app/books/90", {
"count": 5,
}),
+ ("https://beta.sankakucomplex.com/books/90"),
("https://chan.sankakucomplex.com/pool/show/90"),
)
@@ -132,7 +120,7 @@ class SankakuPoolExtractor(SankakuExtractor):
self.pool_id = match.group(1)
def metadata(self):
- pool = self._api_request("/pools/" + self.pool_id)
+ pool = SankakuAPI(self).pools(self.pool_id)
self._posts = pool.pop("posts")
return {"pool": pool}
@@ -141,12 +129,12 @@ class SankakuPoolExtractor(SankakuExtractor):
class SankakuPostExtractor(SankakuExtractor):
- """Extractor for single images from chan.sankakucomplex.com"""
+ """Extractor for single posts from sankaku.app"""
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/show/(\d+)"
test = (
- ("https://beta.sankakucomplex.com/post/show/360451", {
+ ("https://sankaku.app/post/show/360451", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"options": (("tags", True),),
"keyword": {
@@ -158,6 +146,12 @@ class SankakuPostExtractor(SankakuExtractor):
"tags_general" : list,
},
}),
+ # 'contentious_content'
+ ("https://sankaku.app/post/show/21418978", {
+ "pattern": r"https://s\.sankakucomplex\.com"
+ r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg",
+ }),
+ ("https://beta.sankakucomplex.com/post/show/360451"),
("https://chan.sankakucomplex.com/post/show/360451"),
)
@@ -166,4 +160,128 @@ class SankakuPostExtractor(SankakuExtractor):
self.post_id = match.group(1)
def posts(self):
- return self._pagination({"tags": "id:" + self.post_id})
+ return SankakuAPI(self).posts(self.post_id)
+
+
+class SankakuAPI():
+ """Interface for the sankaku.app API"""
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
+
+ self.username, self.password = self.extractor._get_auth_info()
+ if not self.username:
+ self.authenticate = lambda: None
+
+ def pools(self, pool_id):
+ params = {"lang": "en"}
+ return self._call("/pools/" + pool_id, params)
+
+ def posts(self, post_id):
+ params = {
+ "lang" : "en",
+ "page" : "1",
+ "limit": "1",
+ "tags" : "id_range:" + post_id,
+ }
+ return self._call("/posts", params)
+
+ def posts_keyset(self, params):
+ return self._pagination("/posts/keyset", params)
+
+ def authenticate(self):
+ self.headers["Authorization"] = \
+ _authenticate_impl(self.extractor, self.username, self.password)
+
+ def _call(self, endpoint, params=None):
+ url = "https://capi-v2.sankakucomplex.com" + endpoint
+ for _ in range(5):
+ self.authenticate()
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=False)
+
+ if response.status_code == 429:
+ self.extractor.wait(
+ until=response.headers.get("X-RateLimit-Reset"))
+ continue
+
+ data = response.json()
+ try:
+ success = data.get("success", True)
+ except AttributeError:
+ success = True
+ if not success:
+ code = data.get("code")
+ if code == "invalid_token":
+ _authenticate_impl.invalidate(self.username)
+ continue
+ raise exception.StopExtraction(code)
+ return data
+
+ def _pagination(self, endpoint, params):
+ params["lang"] = "en"
+ params["limit"] = str(self.extractor.per_page)
+
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["data"]
+
+ params["next"] = data["meta"]["next"]
+ if not params["next"]:
+ return
+
+
+@cache(maxage=365*24*3600, keyarg=1)
+def _authenticate_impl(extr, username, password):
+ extr.log.info("Logging in as %s", username)
+ headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
+
+ # get initial access_token
+ url = "https://login.sankakucomplex.com/auth/token"
+ data = {"login": username, "password": password}
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=False)
+ data = response.json()
+
+ if response.status_code >= 400 or not data.get("success"):
+ raise exception.AuthenticationError(data.get("error"))
+ access_token = data["access_token"]
+
+ # start openid auth
+ url = "https://login.sankakucomplex.com/oidc/auth"
+ params = {
+ "response_type": "code",
+ "scope" : "openid",
+ "client_id" : "sankaku-web-app",
+ "redirect_uri" : "https://sankaku.app/sso/callback",
+ "state" : "return_uri=https://sankaku.app/",
+ "theme" : "black",
+ "lang" : "undefined",
+ }
+ page = extr.request(url, params=params).text
+ submit_url = text.extract(page, 'submitUrl = "', '"')[0]
+
+ # get code from initial access_token
+ url = "https://login.sankakucomplex.com" + submit_url
+ data = {
+ "accessToken": access_token,
+ "nonce" : "undefined",
+ }
+ response = extr.request(url, method="POST", data=data)
+ query = text.parse_query(response.request.url.partition("?")[2])
+
+ # get final access_token from code
+ url = "https://capi-v2.sankakucomplex.com/sso/finalize?lang=en"
+ data = {
+ "code" : query["code"],
+ "client_id" : "sankaku-web-app",
+ "redirect_uri": "https://sankaku.app/sso/callback",
+ }
+ response = extr.request(
+ url, method="POST", headers=headers, json=data, fatal=False)
+ data = response.json()
+
+ if response.status_code >= 400 or not data.get("success"):
+ raise exception.AuthenticationError(data.get("error"))
+ return "Bearer " + data["access_token"]
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index b32a170..7f9130d 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -54,6 +54,10 @@ class SeigaExtractor(Extractor):
@cache(maxage=7*24*3600, keyarg=1)
def _login_impl(self, username, password):
+ if not username or not password:
+ raise exception.AuthenticationError(
+ "Username and password required")
+
self.log.info("Logging in as %s", username)
url = "https://account.nicovideo.jp/api/v1/login"
data = {"mail_tel": username, "password": password}
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index a3dc6a0..5d3ca89 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -47,7 +47,7 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor):
(("https://www.webtoons.com/en/comedy/safely-endangered"
"/ep-572-earth/viewer?title_no=352&episode_no=572"), {
"url": "11041d71a3f92728305c11a228e77cf0f7aa02ef",
- "content": "4f7701a750368e377d65900e6e8f64a5f9cb9c86",
+ "content": "1ce950324f14018b691c42b0ede57fa25618abeb",
"count": 5,
}),
)
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 4efc92c..9238590 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -116,7 +116,7 @@ class WikiartArtistsExtractor(WikiartExtractor):
pattern = (BASE_PATTERN + r"/artists-by-([\w-]+)/([\w-]+)")
test = ("https://www.wikiart.org/en/artists-by-century/12", {
"pattern": WikiartArtistExtractor.pattern,
- "count": 7,
+ "count": ">= 8",
})
def __init__(self, match):
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index 205f42e..5a54a77 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -30,8 +30,6 @@ class ExecPP(PostProcessor):
args = options["command"]
if isinstance(args, str):
- if "{}" not in args:
- args += " {}"
self.args = args
execute = self.exec_string
else:
@@ -77,12 +75,12 @@ class ExecPP(PostProcessor):
self.log.debug("Running '%s'", args)
retcode = subprocess.Popen(args, shell=shell).wait()
if retcode:
- self.log.warning(
- "Executing '%s' returned with non-zero exit status (%d)",
- " ".join(args) if isinstance(args, list) else args, retcode)
+ self.log.warning("'%s' returned with non-zero exit status (%d)",
+ args, retcode)
- def _exec_async(self, args):
- subprocess.Popen(args, shell=self.shell)
+ def _exec_async(self, args, shell):
+ self.log.debug("Running '%s'", args)
+ subprocess.Popen(args, shell=shell)
__postprocessor__ = ExecPP
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 4c0d17b..d91d29a 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -956,7 +956,7 @@ class PathFormat():
class DownloadArchive():
def __init__(self, path, extractor):
- con = sqlite3.connect(path)
+ con = sqlite3.connect(path, timeout=60, check_same_thread=False)
con.isolation_level = None
self.close = con.close
self.cursor = con.cursor()
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 0b01ad2..21541be 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.16.0"
+__version__ = "1.16.1"
diff --git a/test/test_results.py b/test/test_results.py
index 4e9f4b2..f7356d5 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -30,7 +30,6 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "dokireader",
"imagevenue",
"photobucket",
}
@@ -312,6 +311,7 @@ def setup_test_config():
config.set(("extractor", "nijie") , "username", email)
config.set(("extractor", "seiga") , "username", email)
config.set(("extractor", "pinterest") , "username", email2)
+ config.set(("extractor", "pinterest") , "username", None) # login broken
config.set(("extractor", "newgrounds"), "username", "d1618111")
config.set(("extractor", "newgrounds"), "password", "d1618111")