aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-02-16 21:36:00 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2021-02-16 21:36:00 -0500
commitb99b946e32279961452a2c1143d9cc1b1c2db32b (patch)
tree4644c7bd326a5ded783609ac3aafa0c357815603
parent039a989d1d4351612c15d117ce0c388eb816b0c1 (diff)
parentfc83315c164afd74734adf27e0f7fec2011904aa (diff)
downloadgallery-dl-b99b946e32279961452a2c1143d9cc1b1c2db32b.tar.bz2
gallery-dl-b99b946e32279961452a2c1143d9cc1b1c2db32b.tar.xz
gallery-dl-b99b946e32279961452a2c1143d9cc1b1c2db32b.tar.zst
Update upstream source from tag 'upstream/1.16.5'
Update to upstream version '1.16.5' with Debian dir cf3daef99ed1e95e13091ce2976ff5f6c8dbecf6
-rw-r--r--CHANGELOG.md26
-rw-r--r--PKG-INFO12
-rw-r--r--README.rst10
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.515
-rw-r--r--gallery_dl.egg-info/PKG-INFO12
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/downloader/http.py6
-rw-r--r--gallery_dl/extractor/2chan.py4
-rw-r--r--gallery_dl/extractor/500px.py4
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/behance.py17
-rw-r--r--gallery_dl/extractor/deviantart.py46
-rw-r--r--gallery_dl/extractor/erome.py131
-rw-r--r--gallery_dl/extractor/furaffinity.py20
-rw-r--r--gallery_dl/extractor/hentaifox.py3
-rw-r--r--gallery_dl/extractor/imagehosts.py48
-rw-r--r--gallery_dl/extractor/inkbunny.py11
-rw-r--r--gallery_dl/extractor/kemonoparty.py83
-rw-r--r--gallery_dl/extractor/mangadex.py5
-rw-r--r--gallery_dl/extractor/nsfwalbum.py15
-rw-r--r--gallery_dl/extractor/oauth.py73
-rw-r--r--gallery_dl/extractor/paheal.py8
-rw-r--r--gallery_dl/extractor/patreon.py23
-rw-r--r--gallery_dl/extractor/pillowfort.py136
-rw-r--r--gallery_dl/extractor/pixiv.py55
-rw-r--r--gallery_dl/extractor/sankaku.py2
-rw-r--r--gallery_dl/extractor/slideshare.py4
-rw-r--r--gallery_dl/extractor/twitter.py8
-rw-r--r--gallery_dl/extractor/vanillarock.py6
-rw-r--r--gallery_dl/extractor/webtoons.py1
-rw-r--r--gallery_dl/postprocessor/metadata.py2
-rw-r--r--gallery_dl/version.py4
33 files changed, 631 insertions, 165 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8629536..893b944 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,31 @@
# Changelog
+## 1.16.5 - 2021-02-14
+### Additions
+- [behance] support `video` modules ([#1282](https://github.com/mikf/gallery-dl/issues/1282))
+- [erome] add `album`, `user`, and `search` extractors ([#409](https://github.com/mikf/gallery-dl/issues/409))
+- [hentaifox] support searching by group ([#1294](https://github.com/mikf/gallery-dl/issues/1294))
+- [imgclick] add `image` extractor ([#1307](https://github.com/mikf/gallery-dl/issues/1307))
+- [kemonoparty] extract inline images ([#1286](https://github.com/mikf/gallery-dl/issues/1286))
+- [kemonoparty] support URLs with non-numeric user and post IDs ([#1303](https://github.com/mikf/gallery-dl/issues/1303))
+- [pillowfort] add `user` and `post` extractors ([#846](https://github.com/mikf/gallery-dl/issues/846))
+### Changes
+- [kemonoparty] include `service` in directories and archive keys
+- [pixiv] require a `refresh-token` to login ([#1304](https://github.com/mikf/gallery-dl/issues/1304))
+- [snap] use `core18` as base
+### Fixes
+- [500px] update query hashes
+- [deviantart] update parameters for `/browse/popular` ([#1267](https://github.com/mikf/gallery-dl/issues/1267))
+- [deviantart] provide filename extension for original file downloads ([#1272](https://github.com/mikf/gallery-dl/issues/1272))
+- [deviantart] fix `folders` option ([#1302](https://github.com/mikf/gallery-dl/issues/1302))
+- [inkbunny] add `sid` parameter to private file downloads ([#1281](https://github.com/mikf/gallery-dl/issues/1281))
+- [kemonoparty] fix absolute file URLs
+- [mangadex] revert to `https://mangadex.org/api/` and add `api-server` option ([#1310](https://github.com/mikf/gallery-dl/issues/1310))
+- [nsfwalbum] use fallback for deleted content ([#1259](https://github.com/mikf/gallery-dl/issues/1259))
+- [sankaku] update `invalid token` detection ([#1309](https://github.com/mikf/gallery-dl/issues/1309))
+- [slideshare] fix extraction
+- [postprocessor:metadata] fix crash with `extension-format` ([#1285](https://github.com/mikf/gallery-dl/issues/1285))
+
## 1.16.4 - 2021-01-23
### Additions
- [furaffinity] add `descriptions` option ([#1231](https://github.com/mikf/gallery-dl/issues/1231))
diff --git a/PKG-INFO b/PKG-INFO
index bdacf73..a89521e 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.16.4
+Version: 1.16.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -225,7 +225,7 @@ Description: ==========
Some extractors require you to provide valid login credentials in the form of
a username & password pair. This is necessary for
- ``pixiv``, ``nijie``, and ``seiga``
+ ``nijie`` and ``seiga``
and optional for
``aryion``,
``danbooru``,
@@ -248,7 +248,7 @@ Description: ==========
{
"extractor": {
- "pixiv": {
+ "seiga": {
"username": "<username>",
"password": "<password>"
}
@@ -332,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 1ddebcf..cec53a4 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -214,7 +214,7 @@ Username & Password
Some extractors require you to provide valid login credentials in the form of
a username & password pair. This is necessary for
-``pixiv``, ``nijie``, and ``seiga``
+``nijie`` and ``seiga``
and optional for
``aryion``,
``danbooru``,
@@ -237,7 +237,7 @@ You can set the necessary information in your configuration file
{
"extractor": {
- "pixiv": {
+ "seiga": {
"username": "<username>",
"password": "<password>"
}
@@ -321,7 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 619e84f..02639b8 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-01-23" "1.16.4" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-02-14" "1.16.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 413a40b..408cb61 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-01-23" "1.16.4" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-02-14" "1.16.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -321,8 +321,6 @@ another site.
Specifying a username and password is required for
.br
-* \f[I]pixiv\f[]
-.br
* \f[I]nijie\f[]
.br
* \f[I]seiga\f[]
@@ -1391,6 +1389,17 @@ port than the default.
Download subalbums.
+.SS extractor.pillowfort.reblogs
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract media from reblogged posts.
+
+
.SS extractor.pinterest.sections
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index f1a1ebe..b87c59d 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.16.4
+Version: 1.16.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -225,7 +225,7 @@ Description: ==========
Some extractors require you to provide valid login credentials in the form of
a username & password pair. This is necessary for
- ``pixiv``, ``nijie``, and ``seiga``
+ ``nijie`` and ``seiga``
and optional for
``aryion``,
``danbooru``,
@@ -248,7 +248,7 @@ Description: ==========
{
"extractor": {
- "pixiv": {
+ "seiga": {
"username": "<username>",
"password": "<password>"
}
@@ -332,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index d4907de..066ac90 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -56,6 +56,7 @@ gallery_dl/extractor/deviantart.py
gallery_dl/extractor/directlink.py
gallery_dl/extractor/dynastyscans.py
gallery_dl/extractor/e621.py
+gallery_dl/extractor/erome.py
gallery_dl/extractor/exhentai.py
gallery_dl/extractor/fallenangels.py
gallery_dl/extractor/flickr.py
@@ -121,6 +122,7 @@ gallery_dl/extractor/patreon.py
gallery_dl/extractor/photobucket.py
gallery_dl/extractor/photovogue.py
gallery_dl/extractor/piczel.py
+gallery_dl/extractor/pillowfort.py
gallery_dl/extractor/pinterest.py
gallery_dl/extractor/pixiv.py
gallery_dl/extractor/pixnet.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 179a552..8d72dc2 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -134,6 +134,12 @@ class HttpDownloader(DownloaderBase):
self.log.warning(msg)
return False
+ # check for invalid responses
+ validate = pathfmt.kwdict.get("_http_validate")
+ if validate and not validate(response):
+ self.log.warning("Invalid response")
+ return False
+
# set missing filename extension from MIME type
if not pathfmt.extension:
pathfmt.set_extension(self._find_extension(response))
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index d34209f..f5d2a4c 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -22,8 +22,8 @@ class _2chanThreadExtractor(Extractor):
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)"
test = ("http://dec.2chan.net/70/res/4752.htm", {
- "url": "20c211ae7c06b18ec345a057fe0b68dde979b051",
- "keyword": "23a529b46313b927fc94b577e5e1fdb3aa164ac1",
+ "url": "f49aa31340e9a3429226af24e19e01f5b819ca1f",
+ "keyword": "44599c21b248e79692b2eb2da12699bd0ed5640a",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index df9941a..81b11fd 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -153,7 +153,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
def metadata(self):
user = self._request_graphql(
"ProfileRendererQuery", {"username": self.user_name},
- "5a17a9af1830b58b94a912995b7947b24f27f1301c6ea8ab71a9eb1a6a86585b",
+ "105058632482dd2786fd5775745908dc928f537b28e28356b076522757d65c19",
)["profile"]
self.user_id = str(user["legacyId"])
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index a69bacc..923a78b 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -28,6 +28,7 @@ modules = [
"deviantart",
"dynastyscans",
"e621",
+ "erome",
"exhentai",
"fallenangels",
"flickr",
@@ -86,6 +87,7 @@ modules = [
"photobucket",
"photovogue",
"piczel",
+ "pillowfort",
"pinterest",
"pixiv",
"pixnet",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index a817174..b081cc9 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -83,6 +83,11 @@ class BehanceGalleryExtractor(BehanceExtractor):
"count": 20,
"url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
}),
+ # 'video' modules (#1282)
+ ("https://www.behance.net/gallery/101185577/COLCCI", {
+ "pattern": r"ytdl:https://adobeprod-a\.akamaihd\.net/",
+ "count": 3,
+ }),
)
def __init__(self, match):
@@ -120,8 +125,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
page, 'id="beconfig-store_state">', '</script>')[0])
return self._update(data["project"]["project"])
- @staticmethod
- def get_images(data):
+ def get_images(self, data):
"""Extract image results from an API response"""
result = []
append = result.append
@@ -133,6 +137,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
url = module["sizes"]["original"]
append((url, module))
+ elif mtype == "video":
+ page = self.request(module["src"]).text
+ url = text.extract(page, '<source src="', '"')[0]
+ if text.ext_from_url(url) == "m3u8":
+ url = "ytdl:" + url
+ append((url, module))
+
elif mtype == "media_collection":
for component in module["components"]:
url = component["sizes"]["source"]
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index a58401e..2eb3b28 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -176,10 +176,11 @@ class DeviantartExtractor(Extractor):
@staticmethod
def commit(deviation, target):
url = target["src"]
+ name = target.get("filename") or url
target = target.copy()
target["filename"] = deviation["filename"]
deviation["target"] = target
- deviation["extension"] = target["extension"] = text.ext_from_url(url)
+ deviation["extension"] = target["extension"] = text.ext_from_url(name)
return Message.Url, url, deviation
def _commit_journal_html(self, deviation, journal):
@@ -722,7 +723,7 @@ class DeviantartPopularExtractor(DeviantartExtractor):
def deviations(self):
return self.api.browse_popular(
- self.search_term, self.time_range, self.category_path, self.offset)
+ self.search_term, self.time_range, self.offset)
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
@@ -917,13 +918,16 @@ class DeviantartOAuthAPI():
self.client_id,
)
- def browse_popular(self, query=None, timerange=None,
- category_path=None, offset=0):
+ def browse_popular(self, query=None, timerange=None, offset=0):
"""Yield popular deviations"""
endpoint = "browse/popular"
- params = {"q": query, "offset": offset, "limit": 120,
- "timerange": timerange, "category_path": category_path,
- "mature_content": self.mature}
+ params = {
+ "q" : query,
+ "limit" : 50 if self.metadata else 120,
+ "timerange" : timerange,
+ "offset" : offset,
+ "mature_content": self.mature,
+ }
return self._pagination(endpoint, params)
def browse_user_journals(self, username, offset=0):
@@ -1127,13 +1131,31 @@ class DeviantartOAuthAPI():
self.log.info("Collecting folder information for '%s'", username)
folders = self.gallery_folders(username)
+ # create 'folderid'-to-'folder' mapping
+ fmap = {
+ folder["folderid"]: folder
+ for folder in folders
+ }
+
# add parent names to folders, but ignore "Featured" as parent
- fmap = {}
featured = folders[0]["folderid"]
- for folder in folders:
- if folder["parent"] and folder["parent"] != featured:
- folder["name"] = fmap[folder["parent"]] + "/" + folder["name"]
- fmap[folder["folderid"]] = folder["name"]
+ done = False
+
+ while not done:
+ done = True
+ for folder in folders:
+ parent = folder["parent"]
+ if not parent:
+ pass
+ elif parent == featured:
+ folder["parent"] = None
+ else:
+ parent = fmap[parent]
+ if parent["parent"]:
+ done = False
+ else:
+ folder["name"] = parent["name"] + "/" + folder["name"]
+ folder["parent"] = None
# map deviationids to folder names
dmap = collections.defaultdict(list)
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
new file mode 100644
index 0000000..1c6ebb4
--- /dev/null
+++ b/gallery_dl/extractor/erome.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.erome.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+from ..cache import cache
+import itertools
+import time
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?erome\.com"
+
+
+class EromeExtractor(Extractor):
+ category = "erome"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = "{album_id} {title} {num:>02}.{extension}"
+ archive_fmt = "{album_id}_{num}"
+ root = "https://www.erome.com"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item = match.group(1)
+ self.__cookies = True
+
+ def items(self):
+ for album_id in self.albums():
+ url = "{}/a/{}".format(self.root, album_id)
+ page = self.request(url).text
+
+ title, pos = text.extract(
+ page, 'property="og:title" content="', '"')
+ pos = page.index('<div class="user-profile', pos)
+ user, pos = text.extract(
+ page, 'href="https://www.erome.com/', '"', pos)
+ data = {
+ "album_id": album_id,
+ "title" : text.unescape(title),
+ "user" : text.unquote(user),
+ }
+
+ yield Message.Directory, data
+ groups = page.split('<div class="media-group"')
+ for data["num"], group in enumerate(util.advance(groups, 1), 1):
+ url = (text.extract(group, '<source src="', '"')[0] or
+ text.extract(group, 'data-src="', '"')[0])
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def albums(self):
+ return ()
+
+ def request(self, url, **kwargs):
+ if self.__cookies:
+ self.__cookies = False
+ self.session.cookies.update(_cookie_cache())
+
+ for _ in range(5):
+ response = Extractor.request(self, url, **kwargs)
+ if response.cookies:
+ _cookie_cache.update("", response.cookies)
+ if response.content.find(
+ b"<title>Please wait a few moments</title>", 0, 600) < 0:
+ return response
+ time.sleep(5)
+
+ def _pagination(self, url, params):
+ for params["page"] in itertools.count(1):
+ page = self.request(url, params=params).text
+
+ album_ids = EromeAlbumExtractor.pattern.findall(page)
+ yield from album_ids
+
+ if len(album_ids) < 36:
+ return
+
+
+class EromeAlbumExtractor(EromeExtractor):
+ """Extractor for albums on erome.com"""
+ subcategory = "album"
+ pattern = BASE_PATTERN + r"/a/(\w+)"
+ test = ("https://www.erome.com/a/UHUX1B73", {
+ "pattern": r"https://s\d+\.erome\.com/342/UHUX1B73/\w+",
+ "count": 5,
+ "keyword": {
+ "album_id": "UHUX1B73",
+ "num": int,
+ "title": "Ryan Ryans",
+ "user": "gutiquq",
+ },
+ })
+
+ def albums(self):
+ return (self.item,)
+
+
+class EromeUserExtractor(EromeExtractor):
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
+ test = ("https://www.erome.com/gutiquq", {
+ "range": "1-25",
+ "count": 25,
+ })
+
+ def albums(self):
+ url = "{}/{}".format(self.root, self.item)
+ return self._pagination(url, {})
+
+
+class EromeSearchExtractor(EromeExtractor):
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
+ test = ("https://www.erome.com/search?q=cute", {
+ "range": "1-25",
+ "count": 25,
+ })
+
+ def albums(self):
+ url = self.root + "/search"
+ params = {"q": text.unquote(self.item)}
+ return self._pagination(url, params)
+
+
+@cache()
+def _cookie_cache():
+ return ()
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index df5a73e..a7b0356 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -186,7 +186,8 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor):
subcategory = "gallery"
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)"
test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
- "pattern": r"https://d\d?.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+",
+ "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
+ r"/art/mirlinthloth/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
})
@@ -198,7 +199,8 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
directory_fmt = ("{category}", "{user!l}", "Scraps")
pattern = BASE_PATTERN + r"/scraps/([^/?#]+)"
test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
- "pattern": r"https://d\d?.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.",
+ "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
+ r"/art/[^/]+(/stories)?/\d+/\d+.\w+.",
"count": ">= 3",
})
@@ -209,7 +211,8 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
directory_fmt = ("{category}", "{user!l}", "Favorites")
pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
- "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
+ "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
+ r"/art/[^/]+/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
})
@@ -224,7 +227,8 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
test = ("https://www.furaffinity.net/search/?q=cute", {
- "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
+ "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
+ r"/art/[^/]+/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
})
@@ -243,9 +247,9 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
test = (
("https://www.furaffinity.net/view/21835115/", {
- "pattern": r"https://d\d*\.facdn\.net/(download/)?art/mirlinthloth"
- r"/music/1488278723/1480267446.mirlinthloth_dj_fennmink"
- r"_-_bude_s_4_ever\.mp3",
+ "pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/(download/)?art"
+ r"/mirlinthloth/music/1488278723/1480267446.mirlinthlot"
+ r"h_dj_fennmink_-_bude_s_4_ever\.mp3",
"keyword": {
"artist" : "mirlinthloth",
"artist_url" : "mirlinthloth",
@@ -256,7 +260,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
"id" : 21835115,
"tags" : list,
"title" : "Bude's 4 Ever",
- "url" : r"re:https://d\d?.facdn.net/art/mirlinthloth/m",
+ "url" : r"re:https://d\d?\.f(uraffinity|acdn)\.net/art",
"user" : "mirlinthloth",
"views" : int,
"favorites" : int,
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index 84ad3af..093f3fe 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -82,12 +82,13 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
"""Extractor for search results and listings on hentaifox.com"""
subcategory = "search"
pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
- r"(/(?:parody|tag|artist|character|search)/[^/?%#]+)")
+ r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)")
test = (
("https://hentaifox.com/parody/touhou-project/"),
("https://hentaifox.com/character/reimu-hakurei/"),
("https://hentaifox.com/artist/distance/"),
("https://hentaifox.com/search/touhou/"),
+ ("https://hentaifox.com/group/v-slash/"),
("https://hentaifox.com/tag/heterochromia/", {
"pattern": HentaifoxGalleryExtractor.pattern,
"count": ">= 60",
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index fe3afbb..abb6d10 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,9 +19,8 @@ class ImagehostImageExtractor(Extractor):
basecategory = "imagehost"
subcategory = "image"
archive_fmt = "{token}"
- https = False
- method = "post"
- params = "simple"
+ https = True
+ params = None
cookies = None
encoding = None
@@ -30,6 +29,7 @@ class ImagehostImageExtractor(Extractor):
self.page_url = "http{}://{}".format(
"s" if self.https else "", match.group(1))
self.token = match.group(2)
+
if self.params == "simple":
self.params = {
"imgContinue": "Continue+to+image+...+",
@@ -42,14 +42,11 @@ class ImagehostImageExtractor(Extractor):
"adb": "1",
"next": "Continue+to+image+...+",
}
- else:
- self.params = {}
- self.method = "get"
def items(self):
page = self.request(
self.page_url,
- method=self.method,
+ method=("POST" if self.params else "GET"),
data=self.params,
cookies=self.cookies,
encoding=self.encoding,
@@ -91,7 +88,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
"exception": exception.NotFoundError,
}),
)
- https = True
+ params = "simple"
encoding = "utf-8"
def __init__(self, match):
@@ -122,7 +119,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
"keyword": "a8bb9ab8b2f6844071945d31f8c6e04724051f37",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
- https = True
+ params = "simple"
encoding = "utf-8"
def get_info(self, page):
@@ -143,7 +140,7 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
"url": "46812995d557f2c6adf0ebd0e631e6e4e45facde",
"content": "59ec819cbd972dd9a71f25866fbfc416f2f215b3",
})
- params = None
+ https = False
def get_info(self, page):
url = text.extract(page, "SRC='", "'")[0]
@@ -159,8 +156,6 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
"keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
- https = True
- params = None
@property
@memcache(maxage=3*3600)
@@ -182,8 +177,6 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
"keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
- https = True
- params = None
def get_info(self, page):
pos = page.find('id="imgpreview"')
@@ -204,8 +197,6 @@ class PixhostImageExtractor(ImagehostImageExtractor):
"keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
- https = True
- params = None
cookies = {"pixhostads": "1", "pixhosttest": "1"}
def get_info(self, page):
@@ -224,8 +215,6 @@ class PostimgImageExtractor(ImagehostImageExtractor):
"keyword": "2d05808d04e4e83e33200db83521af06e3147a84",
"content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
})
- https = True
- params = None
def get_info(self, page):
url , pos = text.extract(page, 'id="main-image" src="', '"')
@@ -243,8 +232,6 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
"keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
- https = True
- params = None
def get_info(self, page):
url = text.extract(page, 'src="', '"', page.index("<img "))[0]
@@ -259,9 +246,24 @@ class ViprImageExtractor(ImagehostImageExtractor):
"url": "88f6a3ecbf3356a11ae0868b518c60800e070202",
"keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771",
})
- https = True
- params = None
def get_info(self, page):
url = text.extract(page, '<img src="', '"')[0]
return url, url
+
+
+class ImgclickImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from imgclick.net"""
+ category = "imgclick"
+ pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))"
+ test = ("http://imgclick.net/4tbrre1oxew9/test-_-_.png.html", {
+ "url": "b967f2d372ffb9f5d3a927c6dd560e120b10a808",
+ "keyword": "6895256143eab955622fc149aa367777a8815ba3",
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
+ })
+ params = "complex"
+
+ def get_info(self, page):
+ url , pos = text.extract(page, '<br><img src="', '"')
+ filename, pos = text.extract(page, 'alt="', '"', pos)
+ return url, filename
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 6051db0..9b5331a 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -54,7 +54,11 @@ class InkbunnyExtractor(Extractor):
post["date"] = text.parse_datetime(
file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
text.nameext_from_url(file["file_name"], post)
- yield Message.Url, file["file_url_full"], post
+
+ url = file["file_url_full"]
+ if "/private_files/" in url:
+ url += "?sid=" + self.api.session_id
+ yield Message.Url, url, post
class InkbunnyUserExtractor(InkbunnyExtractor):
@@ -154,7 +158,10 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
self.submission_id = match.group(1)
def posts(self):
- return self.api.detail(({"submission_id": self.submission_id},))
+ submissions = self.api.detail(({"submission_id": self.submission_id},))
+ if submissions[0] is None:
+ raise exception.NotFoundError("submission")
+ return submissions
class InkbunnyAPI():
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index a5b5e00..377e00b 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -10,17 +10,22 @@
from .common import Extractor, Message
from .. import text
+import re
+
+BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)"
class KemonopartyExtractor(Extractor):
"""Base class for kemonoparty extractors"""
category = "kemonoparty"
root = "https://kemono.party"
- directory_fmt = ("{category}", "{user}")
+ directory_fmt = ("{category}", "{service}", "{user}")
filename_fmt = "{id}_{title}_{filename}.{extension}"
- archive_fmt = "{user}_{id}_{filename}.{extension}"
+ archive_fmt = "{service}_{user}_{id}_{filename}.{extension}"
def items(self):
+ find_inline = re.compile(r'src="(/inline/[^"]+)').findall
+
for post in self.posts():
files = []
@@ -28,23 +33,32 @@ class KemonopartyExtractor(Extractor):
files.append(post["file"])
if post["attachments"]:
files.extend(post["attachments"])
+ for path in find_inline(post["content"] or ""):
+ files.append({"path": path, "name": path})
+
post["date"] = text.parse_datetime(
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
yield Message.Directory, post
for post["num"], file in enumerate(files, 1):
+ url = file["path"]
+ if url[0] == "/":
+ url = self.root + url
text.nameext_from_url(file["name"], post)
- yield Message.Url, self.root + file["path"], post
+ yield Message.Url, url, post
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
- pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/?(?:$|[?#])"
- test = ("https://kemono.party/fanbox/user/6993449", {
- "range": "1-25",
- "count": 25,
- })
+ pattern = BASE_PATTERN + r"/?(?:$|[?#])"
+ test = (
+ ("https://kemono.party/fanbox/user/6993449", {
+ "range": "1-25",
+ "count": 25,
+ }),
+ ("https://kemono.party/subscribestar/user/alcorart"),
+ )
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@@ -67,28 +81,37 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
- pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/post/(\d+)"
- test = ("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/files/fanbox"
- r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
- "keyword": {
- "added": "Wed, 06 May 2020 20:28:02 GMT",
- "content": str,
- "date": "dt:2019-08-11 02:09:04",
- "edited": None,
- "embed": dict,
- "extension": "jpeg",
- "filename": "P058kDFYus7DbqAkGlfWTlOr",
- "id": "506575",
- "num": 1,
- "published": "Sun, 11 Aug 2019 02:09:04 GMT",
- "service": "fanbox",
- "shared_file": False,
- "subcategory": "post",
- "title": "c96取り置き",
- "user": "6993449",
- },
- })
+ pattern = BASE_PATTERN + r"/post/([^/?#]+)"
+ test = (
+ ("https://kemono.party/fanbox/user/6993449/post/506575", {
+ "pattern": r"https://kemono\.party/files/fanbox"
+ r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
+ "keyword": {
+ "added": "Wed, 06 May 2020 20:28:02 GMT",
+ "content": str,
+ "date": "dt:2019-08-11 02:09:04",
+ "edited": None,
+ "embed": dict,
+ "extension": "jpeg",
+ "filename": "P058kDFYus7DbqAkGlfWTlOr",
+ "id": "506575",
+ "num": 1,
+ "published": "Sun, 11 Aug 2019 02:09:04 GMT",
+ "service": "fanbox",
+ "shared_file": False,
+ "subcategory": "post",
+ "title": "c96取り置き",
+ "user": "6993449",
+ },
+ }),
+ # inline image (#1286)
+ ("https://kemono.party/fanbox/user/7356311/post/802343", {
+ "pattern": r"https://kemono\.party/inline/fanbox"
+ r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
+ }),
+ ("https://kemono.party/subscribestar/user/alcorart/post/184330"),
+ ("https://kemono.party/gumroad/user/trylsc/post/IURjT"),
+ )
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 2156ecf..d59e5bb 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -17,7 +17,6 @@ class MangadexExtractor(Extractor):
"""Base class for mangadex extractors"""
category = "mangadex"
root = "https://mangadex.org"
- api_root = "https://api.mangadex.org"
# mangadex-to-iso639-1 codes
iso639_map = {
@@ -27,6 +26,10 @@ class MangadexExtractor(Extractor):
"vn": "vi",
}
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.api_root = self.config("api-server") or "https://mangadex.org/api"
+
def chapter_data(self, chapter_id):
"""Request API results for 'chapter_id'"""
url = "{}/v2/chapter/{}".format(self.api_root, chapter_id)
diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py
index 8f1f3f2..9c4d686 100644
--- a/gallery_dl/extractor/nsfwalbum.py
+++ b/gallery_dl/extractor/nsfwalbum.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -44,17 +44,24 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
iframe = self.root + "/iframe_image.php?id="
backend = self.root + "/backend.php"
for image_id in text.extract_iter(page, 'data-img-id="', '"'):
- spirit = text.extract(self.request(
- iframe + image_id).text, 'giraffe.annihilate("', '"')[0]
- params = {"spirit": self._annihilate(spirit), "photo": image_id}
+ spirit = self._annihilate(text.extract(self.request(
+ iframe + image_id).text, 'giraffe.annihilate("', '"')[0])
+ params = {"spirit": spirit, "photo": image_id}
data = self.request(backend, params=params).json()
yield data[0], {
"id" : text.parse_int(image_id),
"width" : text.parse_int(data[1]),
"height": text.parse_int(data[2]),
+ "_http_validate": self._validate_response,
+ "_fallback": ("{}/imageProxy.php?photoId={}&spirit={}".format(
+ self.root, image_id, spirit),),
}
@staticmethod
+ def _validate_response(response):
+ return not response.request.url.endswith("/no_image.jpg")
+
+ @staticmethod
def _annihilate(value, base=6):
return "".join(
chr(ord(char) ^ base)
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 4bb2c48..2ec7165 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2020 Mike Fährmann
+# Copyright 2017-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,10 +9,12 @@
"""Utility classes to setup OAuth and link accounts to gallery-dl"""
from .common import Extractor, Message
-from . import deviantart, flickr, reddit, smugmug, tumblr
+from . import deviantart, flickr, pixiv, reddit, smugmug, tumblr
from .. import text, oauth, util, config, exception
from ..cache import cache
import urllib.parse
+import hashlib
+import base64
REDIRECT_URI_LOCALHOST = "http://localhost:6414/"
REDIRECT_URI_HTTPS = "https://mikf.github.io/gallery-dl/oauth-redirect.html"
@@ -62,14 +64,14 @@ class OAuthBase(Extractor):
self.client.send(b"HTTP/1.1 200 OK\r\n\r\n" + msg.encode())
self.client.close()
- def open(self, url, params):
+ def open(self, url, params, recv=None):
"""Open 'url' in browser amd return response parameters"""
import webbrowser
url += "?" + urllib.parse.urlencode(params)
if not self.config("browser", True) or not webbrowser.open(url):
print("Please open this URL in your browser:")
print(url, end="\n\n", flush=True)
- return self.recv()
+ return (recv or self.recv)()
def _oauth1_authorization_flow(
self, request_token_url, authorize_url, access_token_url):
@@ -362,6 +364,69 @@ class OAuthMastodon(OAuthBase):
return data
+class OAuthPixiv(OAuthBase):
+ subcategory = "pixiv"
+ pattern = "oauth:pixiv$"
+
+ def items(self):
+ yield Message.Version, 1
+
+ code_verifier = util.generate_token(32)
+ digest = hashlib.sha256(code_verifier.encode("ascii")).digest()
+ code_challenge = base64.urlsafe_b64encode(
+ digest).rstrip(b"=").decode("ascii")
+
+ url = "https://app-api.pixiv.net/web/v1/login"
+ params = {
+ "code_challenge": code_challenge,
+ "code_challenge_method": "S256",
+ "client": "pixiv-android",
+ }
+ code = self.open(url, params, self._input)
+
+ url = "https://oauth.secure.pixiv.net/auth/token"
+ headers = {
+ "User-Agent": "PixivAndroidApp/5.0.234 (Android 11; Pixel 5)",
+ }
+ data = {
+ "client_id" : self.oauth_config(
+ "client-id" , pixiv.PixivAppAPI.CLIENT_ID),
+ "client_secret" : self.oauth_config(
+ "client-secret", pixiv.PixivAppAPI.CLIENT_SECRET),
+ "code" : code,
+ "code_verifier" : code_verifier,
+ "grant_type" : "authorization_code",
+ "include_policy": "true",
+ "redirect_uri" : "https://app-api.pixiv.net"
+ "/web/v1/users/auth/pixiv/callback",
+ }
+ data = self.session.post(url, headers=headers, data=data).json()
+
+ if "error" in data:
+ print(data)
+ if data["error"] == "invalid_request":
+ print("'code' expired, try again")
+ return
+
+ token = data["refresh_token"]
+ if self.cache:
+ username = self.oauth_config("username")
+ pixiv._refresh_token_cache.update(username, token)
+ self.log.info("Writing 'refresh-token' to cache")
+
+ print(self._generate_message(("refresh-token",), (token,)))
+
+ def _input(self):
+ print("""
+1) Open your browser's Developer Tools (F12) and switch to the Network tab
+2) Login
+4) Select the last network monitor entry ('callback?state=...')
+4) Copy its 'code' query parameter, paste it below, and press Enter
+""")
+ code = input("code: ")
+ return code.rpartition("=")[2].strip()
+
+
MASTODON_MSG_TEMPLATE = """
Your 'access-token' is
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index abcc33d..05cbcdf 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -115,10 +115,12 @@ class PahealPostExtractor(PahealExtractor):
tags , pos = text.extract(page, ": ", "<")
md5 , pos = text.extract(page, "/_thumbs/", "/", pos)
url , pos = text.extract(page, "id='main_image' src='", "'", pos)
- width , pos = text.extract(page, "data-width='", "'", pos)
- height, pos = text.extract(page, "data-height='", "'", pos)
+ width , pos = text.extract(page, "data-width=", " ", pos)
+ height, pos = text.extract(page, "data-height=", " ", pos)
return ({
"id": self.post_id, "md5": md5, "tags": tags, "file_url": url,
- "width": width, "height": height, "size": 0,
+ "size" : 0,
+ "width" : width.strip("'\""),
+ "height": height.strip("'\""),
},)
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index ad259f4..688c005 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -34,6 +34,10 @@ class PatreonExtractor(Extractor):
PatreonExtractor._warning = False
for post in self.posts():
+
+ if not post.get("current_user_can_view", True):
+ self.log.warning("Not allowed to view post %s", post["id"])
+ continue
post["num"] = 0
hashes = set()
@@ -113,14 +117,17 @@ class PatreonExtractor(Extractor):
"""Process and extend a 'post' object"""
attr = post["attributes"]
attr["id"] = text.parse_int(post["id"])
- attr["images"] = self._files(post, included, "images")
- attr["attachments"] = self._files(post, included, "attachments")
- attr["date"] = text.parse_datetime(
- attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
- user = post["relationships"]["user"]
- attr["creator"] = (
- self._user(user["links"]["related"]) or
- included["user"][user["data"]["id"]])
+
+ if post.get("current_user_can_view", True):
+ attr["images"] = self._files(post, included, "images")
+ attr["attachments"] = self._files(post, included, "attachments")
+ attr["date"] = text.parse_datetime(
+ attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ user = post["relationships"]["user"]
+ attr["creator"] = (
+ self._user(user["links"]["related"]) or
+ included["user"][user["data"]["id"]])
+
return attr
@staticmethod
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
new file mode 100644
index 0000000..cbd65d7
--- /dev/null
+++ b/gallery_dl/extractor/pillowfort.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.pillowfort.social/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
+
+
+class PillowfortExtractor(Extractor):
+ """Base class for pillowfort extractors"""
+ category = "pillowfort"
+ root = "https://www.pillowfort.social"
+ directory_fmt = ("{category}", "{username}")
+ filename_fmt = ("{post_id} {title|original_post[title]} "
+ "{num:>02}.{extension}")
+ archive_fmt = "{id}"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item = match.group(1)
+ self.reblogs = self.config("reblogs", False)
+
+ def items(self):
+ for post in self.posts():
+
+ if "original_post" in post and not self.reblogs:
+ continue
+
+ files = post["media"]
+ del post["media"]
+
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ yield Message.Directory, post
+
+ post["num"] = 0
+ for file in files:
+ url = file["url"]
+ if url:
+ post.update(file)
+ post["num"] += 1
+ post["date"] = text.parse_datetime(
+ file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+
+class PillowfortPostExtractor(PillowfortExtractor):
+ """Extractor for a single pillowfort post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/posts/(\d+)"
+ test = ("https://www.pillowfort.social/posts/27510", {
+ "pattern": r"https://img\d+\.pillowfort\.social/posts/\w+_out\d+\.png",
+ "count": 4,
+ "keyword": {
+ "avatar_url": str,
+ "col": 0,
+ "commentable": True,
+ "comments_count": int,
+ "community_id": None,
+ "content": str,
+ "created_at": str,
+ "date": "type:datetime",
+ "deleted": None,
+ "deleted_at": None,
+ "deleted_by_mod": None,
+ "deleted_for_flag_id": None,
+ "embed_code": None,
+ "id": int,
+ "last_activity": str,
+ "last_activity_elapsed": str,
+ "last_edited_at": None,
+ "likes_count": int,
+ "media_type": "picture",
+ "nsfw": False,
+ "num": int,
+ "original_post_id": None,
+ "original_post_user_id": None,
+ "picture_content_type": None,
+ "picture_file_name": None,
+ "picture_file_size": None,
+ "picture_updated_at": None,
+ "post_id": 27510,
+ "post_type": "picture",
+ "privacy": "public",
+ "reblog_copy_info": list,
+ "rebloggable": True,
+ "reblogged_from_post_id": None,
+ "reblogged_from_user_id": None,
+ "reblogs_count": int,
+ "row": int,
+ "small_image_url": None,
+ "tags": list,
+ "time_elapsed": str,
+ "timestamp": str,
+ "title": "What is Pillowfort.io? ",
+ "updated_at": str,
+ "url": r"re:https://img3.pillowfort.social/posts/.*\.png",
+ "user_id": 5,
+ "username": "Staff"
+ },
+ })
+
+ def posts(self):
+ url = "{}/posts/{}/json/".format(self.root, self.item)
+ return (self.request(url).json(),)
+
+
+class PillowfortUserExtractor(PillowfortExtractor):
+ """Extractor for all posts of a pillowfort user"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)"
+ test = ("https://www.pillowfort.social/Pome", {
+ "pattern": r"https://img\d+\.pillowfort\.social/posts/",
+ "range": "1-15",
+ "count": 15,
+ })
+
+ def posts(self):
+ url = "{}/{}/json/".format(self.root, self.item)
+ params = {"p": 1}
+
+ while True:
+ posts = self.request(url, params=params).json()["posts"]
+ yield from posts
+
+ if len(posts) < 20:
+ return
+ params["p"] += 1
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index a872ada..be976e9 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -510,49 +510,48 @@ class PixivAppAPI():
def __init__(self, extractor):
self.extractor = extractor
self.log = extractor.log
- self.username, self.password = extractor._get_auth_info()
+ self.username = extractor._get_auth_info()[0]
self.user = None
+ extractor.session.headers.update({
+ "App-OS" : "ios",
+ "App-OS-Version": "13.1.2",
+ "App-Version" : "7.7.6",
+ "User-Agent" : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
+ "Referer" : "https://app-api.pixiv.net/",
+ })
+
self.client_id = extractor.config(
"client-id", self.CLIENT_ID)
self.client_secret = extractor.config(
"client-secret", self.CLIENT_SECRET)
- extractor.session.headers.update({
- "App-OS": "ios",
- "App-OS-Version": "10.3.1",
- "App-Version": "6.7.1",
- "User-Agent": "PixivIOSApp/6.7.1 (iOS 10.3.1; iPhone8,1)",
- "Referer": "https://app-api.pixiv.net/",
- })
+
+ token = extractor.config("refresh-token")
+ if token is None or token == "cache":
+ token = _refresh_token_cache(self.username)
+ self.refresh_token = token
def login(self):
"""Login and gain an access token"""
- self.user, auth = self._login_impl(self.username, self.password)
+ self.user, auth = self._login_impl(self.username)
self.extractor.session.headers["Authorization"] = auth
@cache(maxage=3600, keyarg=1)
- def _login_impl(self, username, password):
- if not username or not password:
+ def _login_impl(self, username):
+ if not self.refresh_token:
raise exception.AuthenticationError(
- "Username and password required")
+ "'refresh-token' required.\n"
+ "Run `gallery-dl oauth:pixiv` to get one.")
+ self.log.info("Refreshing access token")
url = "https://oauth.secure.pixiv.net/auth/token"
data = {
- "client_id": self.client_id,
- "client_secret": self.client_secret,
- "get_secure_url": 1,
+ "client_id" : self.client_id,
+ "client_secret" : self.client_secret,
+ "grant_type" : "refresh_token",
+ "refresh_token" : self.refresh_token,
+ "get_secure_url": "1",
}
- refresh_token = _refresh_token_cache(username)
-
- if refresh_token:
- self.log.info("Refreshing access token")
- data["grant_type"] = "refresh_token"
- data["refresh_token"] = refresh_token
- else:
- self.log.info("Logging in as %s", username)
- data["grant_type"] = "password"
- data["username"] = username
- data["password"] = password
time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
headers = {
@@ -565,11 +564,9 @@ class PixivAppAPI():
url, method="POST", headers=headers, data=data, fatal=False)
if response.status_code >= 400:
self.log.debug(response.text)
- raise exception.AuthenticationError()
+ raise exception.AuthenticationError("Invalid refresh token")
data = response.json()["response"]
- if not refresh_token:
- _refresh_token_cache.update(username, data["refresh_token"])
return data["user"], "Bearer " + data["access_token"]
def illust_detail(self, illust_id):
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index e98b630..a5f0138 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -242,7 +242,7 @@ class SankakuAPI():
success = True
if not success:
code = data.get("code")
- if code == "invalid_token":
+ if code and code.endswith(("invalid-token", "invalid_token")):
_authenticate_impl.invalidate(self.username)
continue
raise exception.StopExtraction(code)
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 8f668df..0b970cc 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann, Leonardo Taccari
+# Copyright 2016-2021 Mike Fährmann, Leonardo Taccari
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -61,7 +61,7 @@ class SlidesharePresentationExtractor(Extractor):
title, pos = text.extract(
page, '<span class="j-title-breadcrumb">', '</span>', pos)
views, pos = text.extract(
- page, '<span class="notranslate pippin-data">', 'views<', pos)
+ page, '<span class="notranslate">', 'views<', pos)
published, pos = text.extract(
page, '<time datetime="', '"', pos)
alt_descr, pos = text.extract(
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4034732..7b6bf21 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -26,6 +26,7 @@ class TwitterExtractor(Extractor):
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
cookiedomain = ".twitter.com"
+ cookienames = ("auth_token",)
root = "https://twitter.com"
def __init__(self, match):
@@ -231,9 +232,10 @@ class TwitterExtractor(Extractor):
"""Yield all relevant tweet objects"""
def login(self):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(self._login_impl(username, password))
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
@cache(maxage=360*24*3600, keyarg=1)
def _login_impl(self, username, password):
diff --git a/gallery_dl/extractor/vanillarock.py b/gallery_dl/extractor/vanillarock.py
index e10c642..32eaa36 100644
--- a/gallery_dl/extractor/vanillarock.py
+++ b/gallery_dl/extractor/vanillarock.py
@@ -37,7 +37,7 @@ class VanillarockPostExtractor(VanillarockExtractor):
def items(self):
extr = text.extract_from(self.request(self.root + self.path).text)
- name = extr("<title>", "</title>")
+ name = extr('<h1 class="entry-title">', "<")
imgs = []
while True:
@@ -48,7 +48,7 @@ class VanillarockPostExtractor(VanillarockExtractor):
data = {
"count": len(imgs),
- "title": text.unescape(name.rpartition(" | ")[0]),
+ "title": text.unescape(name),
"path" : self.path.strip("/"),
"date" : text.parse_datetime(extr(
'<div class="date">', '</div>'), "%Y-%m-%d %H:%M"),
@@ -76,7 +76,7 @@ class VanillarockTagExtractor(VanillarockExtractor):
"%ad%e7%94%bb%e5%83%8f/%e8%90%8c%e3%81%88%e3%83%bb%e3%82%bd%e3%83%95"
"%e3%83%88%e3%82%a8%e3%83%ad"), {
"pattern": VanillarockPostExtractor.pattern,
- "count": 3,
+ "count": ">= 5",
}),
)
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 4449e19..1a26264 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -48,6 +48,7 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor):
"/ep-572-earth/viewer?title_no=352&episode_no=572"), {
"url": "11041d71a3f92728305c11a228e77cf0f7aa02ef",
"content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7",
+ "42055e44659f6ffc410b3fb6557346dfbb993df3",
"49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"),
"count": 5,
}),
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 71a67c1..c08f111 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -91,7 +91,7 @@ class MetadataPP(PostProcessor):
def _filename_extfmt(self, pathfmt):
kwdict = pathfmt.kwdict
- ext = kwdict["extension"]
+ ext = kwdict.get("extension")
kwdict["extension"] = pathfmt.extension
kwdict["extension"] = pathfmt.prefix + self._extension_fmt(kwdict)
filename = pathfmt.build_filename()
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 572d3bb..8244a95 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.16.4"
+__version__ = "1.16.5"