aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2020-04-14 18:18:46 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2020-04-14 18:18:46 -0400
commit4c497ac833ee559fc1d4cf91c31ae0ac6a0fdb08 (patch)
tree263b54798896833eba7cd89a5f7b5cbb102c1f61
parent093c726dcd0772f0a3c7607be9d3025d46f60ea7 (diff)
parentcf188f30e1c27bdb900fa2623a9ff91b944633b2 (diff)
downloadgallery-dl-4c497ac833ee559fc1d4cf91c31ae0ac6a0fdb08.tar.bz2
gallery-dl-4c497ac833ee559fc1d4cf91c31ae0ac6a0fdb08.tar.xz
gallery-dl-4c497ac833ee559fc1d4cf91c31ae0ac6a0fdb08.tar.zst
Update upstream source from tag 'upstream/1.13.4'
Update to upstream version '1.13.4' with Debian dir 60f0f28d8e471cd35c47195c7dba1f1d50a2b576
-rw-r--r--PKG-INFO12
-rw-r--r--README.rst10
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.52
-rw-r--r--gallery_dl.egg-info/PKG-INFO12
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/config.py10
-rw-r--r--gallery_dl/downloader/http.py6
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/aryion.py161
-rw-r--r--gallery_dl/extractor/common.py24
-rw-r--r--gallery_dl/extractor/deviantart.py428
-rw-r--r--gallery_dl/extractor/hentainexus.py34
-rw-r--r--gallery_dl/extractor/hiperdex.py57
-rw-r--r--gallery_dl/extractor/luscious.py6
-rw-r--r--gallery_dl/extractor/mastodon.py68
-rw-r--r--gallery_dl/extractor/myportfolio.py25
-rw-r--r--gallery_dl/extractor/oauth.py4
-rw-r--r--gallery_dl/extractor/piczel.py8
-rw-r--r--gallery_dl/extractor/realbooru.py2
-rw-r--r--gallery_dl/extractor/reddit.py3
-rw-r--r--gallery_dl/extractor/tumblr.py2
-rw-r--r--gallery_dl/extractor/twitter.py3
-rw-r--r--gallery_dl/extractor/vsco.py2
-rw-r--r--gallery_dl/extractor/weibo.py8
-rw-r--r--gallery_dl/text.py8
-rw-r--r--gallery_dl/util.py17
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_extractor.py122
-rw-r--r--test/test_results.py4
-rw-r--r--test/test_text.py6
31 files changed, 690 insertions, 360 deletions
diff --git a/PKG-INFO b/PKG-INFO
index 1cec073..110cf95 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.13.3
+Version: 1.13.4
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -247,7 +247,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -263,8 +263,8 @@ Description: ==========
.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg
:target: https://pypi.org/project/gallery-dl/
- .. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master
- :target: https://travis-ci.org/mikf/gallery-dl
+ .. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master
+ :target: https://travis-ci.com/mikf/gallery-dl
.. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg
:target: https://gitter.im/gallery-dl/main
diff --git a/README.rst b/README.rst
index 37f07b7..3e78d9a 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -236,7 +236,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -252,8 +252,8 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg
:target: https://pypi.org/project/gallery-dl/
-.. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master
- :target: https://travis-ci.org/mikf/gallery-dl
+.. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master
+ :target: https://travis-ci.com/mikf/gallery-dl
.. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg
:target: https://gitter.im/gallery-dl/main
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index af9ac7d..e7e1566 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-03-28" "1.13.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-04-12" "1.13.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 9a374da..2fd4dba 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-03-28" "1.13.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-04-12" "1.13.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index c9ca17b..45381a6 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.13.3
+Version: 1.13.4
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.4/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -247,7 +247,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -263,8 +263,8 @@ Description: ==========
.. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg
:target: https://pypi.org/project/gallery-dl/
- .. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master
- :target: https://travis-ci.org/mikf/gallery-dl
+ .. |build| image:: https://travis-ci.com/mikf/gallery-dl.svg?branch=master
+ :target: https://travis-ci.com/mikf/gallery-dl
.. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg
:target: https://gitter.im/gallery-dl/main
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index ecb052c..1df3675 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -39,6 +39,7 @@ gallery_dl/extractor/8muses.py
gallery_dl/extractor/__init__.py
gallery_dl/extractor/adultempire.py
gallery_dl/extractor/artstation.py
+gallery_dl/extractor/aryion.py
gallery_dl/extractor/bcy.py
gallery_dl/extractor/behance.py
gallery_dl/extractor/blogger.py
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index 785ffc3..c2787ad 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -35,6 +35,14 @@ else:
]
+if getattr(sys, "frozen", False):
+ # look for config file in PyInstaller executable directory (#682)
+ _default_configs.append(os.path.join(
+ os.path.dirname(sys.executable),
+ "gallery-dl.conf",
+ ))
+
+
# --------------------------------------------------------------------
# public interface
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 844e422..64a2978 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -164,7 +164,11 @@ class HttpDownloader(DownloaderBase):
self.downloading = False
if self.mtime:
- pathfmt.kwdict["_mtime"] = response.headers.get("Last-Modified")
+ pathfmt.kwdict.setdefault(
+ "_mtime", response.headers.get("Last-Modified"))
+ else:
+ pathfmt.kwdict["_mtime"] = None
+
return True
def receive(self, response, file):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 74c553d..2c87eb3 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -19,6 +19,7 @@ modules = [
"8muses",
"adultempire",
"artstation",
+ "aryion",
"bcy",
"behance",
"blogger",
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
new file mode 100644
index 0000000..d8f55bd
--- /dev/null
+++ b/gallery_dl/extractor/aryion.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://aryion.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
+
+
+class AryionExtractor(Extractor):
+ """Base class for aryion extractors"""
+ category = "aryion"
+ directory_fmt = ("{category}", "{user!l}", "{path:J - }")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+ root = "https://aryion.com"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+ self.offset = 0
+
+ def items(self):
+ for post_id in util.advance(self.posts(), self.offset):
+ post = self._parse_post(post_id)
+ if post:
+ yield Message.Directory, post
+ yield Message.Url, post["url"], post
+
+ def posts(self):
+ return ()
+
+ def skip(self, num):
+ self.offset += num
+ return num
+
+ def _parse_post(self, post_id):
+ url = "{}/g4/data.php?id={}".format(self.root, post_id)
+ with self.request(url, method="HEAD", fatal=False) as response:
+
+ if response.status_code >= 400:
+ return None
+ headers = response.headers
+
+ # ignore folders
+ if headers["content-type"] == "application/x-folder":
+ return None
+
+ # get filename from 'content-disposition' header
+ cdis = headers["content-disposition"]
+ fname, _, ext = text.extract(
+ cdis, 'filename="', '"')[0].rpartition(".")
+ if not fname:
+ fname, ext = ext, fname
+
+ # fix 'last-modified' header
+ lmod = headers["last-modified"]
+ if lmod[22] != ":":
+ lmod = "{}:{} GMT".format(lmod[:22], lmod[22:24])
+
+ post_url = "{}/g4/view/{}".format(self.root, post_id)
+ extr = text.extract_from(self.request(post_url).text)
+
+ title, _, artist = text.unescape(extr(
+ "<title>g4 :: ", "<")).rpartition(" by ")
+ data = {
+ "id" : text.parse_int(post_id),
+ "url" : url,
+ "user" : self.user or artist,
+ "title" : title,
+ "artist": artist,
+ "path" : text.split_html(extr("cookiecrumb'>", '</span'))[4:-1:2],
+ "date" : extr("class='pretty-date' title='", "'"),
+ "views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
+ "size" : text.parse_bytes(extr("File size</b>:", "<")[:-2]),
+ "width" : text.parse_int(extr("Resolution</b>:", "x")),
+ "height": text.parse_int(extr("", "<")),
+ "comments" : text.parse_int(extr("Comments</b>:", "<")),
+ "favorites": text.parse_int(extr("Favorites</b>:", "<")),
+ "tags" : text.split_html(extr("class='taglist'>", "</span>")),
+ "description": text.unescape(text.remove_html(extr(
+ "<p>", "</p>"), "", "")),
+ "filename" : fname,
+ "extension" : ext,
+ "_mtime" : lmod,
+ }
+
+ d1, _, d2 = data["date"].partition(",")
+ data["date"] = text.parse_datetime(
+ d1[:-2] + d2, "%b %d %Y %I:%M %p", -5)
+
+ return data
+
+
+class AryionGalleryExtractor(AryionExtractor):
+ """Extractor for a user's gallery on eka's portal"""
+ subcategory = "gallery"
+ pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)"
+ test = (
+ ("https://aryion.com/g4/gallery/jameshoward", {
+ "pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$",
+ "range": "48-52",
+ "count": 5,
+ }),
+ ("https://aryion.com/g4/user/jameshoward"),
+ ("https://aryion.com/g4/latest.php?name=jameshoward"),
+ )
+
+ def posts(self):
+ url = "{}/g4/latest.php?name={}".format(self.root, self.user)
+
+ while True:
+ page = self.request(url).text
+ yield from text.extract_iter(
+ page, "class='thumb' href='/g4/view/", "'")
+
+ pos = page.find("Next &gt;&gt;")
+ if pos < 0:
+ return
+ url = self.root + text.rextract(page, "href='", "'", pos)[0]
+
+
+class AryionPostExtractor(AryionExtractor):
+ """Extractor for individual posts on eka's portal"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/view/(\d+)"
+ test = ("https://aryion.com/g4/view/510079", {
+ "url": "f233286fa5558c07ae500f7f2d5cb0799881450e",
+ "keyword": {
+ "artist" : "jameshoward",
+ "user" : "jameshoward",
+ "filename" : "jameshoward-510079-subscribestar_150",
+ "extension": "jpg",
+ "id" : 510079,
+ "width" : 1665,
+ "height" : 1619,
+ "size" : 784241,
+ "title" : "I'm on subscribestar now too!",
+ "description": r"re:Doesn't hurt to have a backup, right\?",
+ "tags" : ["Non-Vore", "subscribestar"],
+ "date" : "dt:2019-02-16 19:30:00",
+ "path" : [],
+ "views" : int,
+ "favorites": int,
+ "comments" : int,
+ "_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT",
+ },
+ })
+
+ def posts(self):
+ post_id = self.user
+ self.user = None
+ return (post_id,)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 19ee182..8986c99 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -122,23 +122,33 @@ class Extractor():
raise exception.HttpError(msg)
- def wait(self, *, seconds=None, until=None, reason=None, adjust=1):
- now = datetime.datetime.now()
+ def wait(self, *, seconds=None, until=None, adjust=1.0,
+ reason="rate limit reset"):
+ now = time.time()
if seconds:
seconds = float(seconds)
- until = now + datetime.timedelta(seconds=seconds)
+ until = now + seconds
elif until:
- until = datetime.datetime.fromtimestamp(float(until))
- seconds = (until - now).total_seconds()
+ if isinstance(until, datetime.datetime):
+ # convert to UTC timestamp
+ epoch = datetime.datetime(1970, 1, 1)
+ until = (until - epoch) / datetime.timedelta(0, 1)
+ else:
+ until = float(until)
+ seconds = until - now
else:
raise ValueError("Either 'seconds' or 'until' is required")
+ seconds += adjust
+ if seconds <= 0.0:
+ return
+
if reason:
- t = until.time()
+ t = datetime.datetime.fromtimestamp(until).time()
isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)
self.log.info("Waiting until %s for %s.", isotime, reason)
- time.sleep(seconds + adjust)
+ time.sleep(seconds)
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index d6669d1..2dcf0b7 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -27,14 +27,15 @@ BASE_PATTERN = (
class DeviantartExtractor(Extractor):
- """Base class for deviantart extractors using the OAuth API"""
+ """Base class for deviantart extractors"""
category = "deviantart"
directory_fmt = ("{category}", "{username}")
filename_fmt = "{category}_{index}_{title}.{extension}"
cookiedomain = None
root = "https://www.deviantart.com"
+ _last_request = 0
- def __init__(self, match=None):
+ def __init__(self, match):
Extractor.__init__(self, match)
self.offset = 0
self.flat = self.config("flat", True)
@@ -43,10 +44,10 @@ class DeviantartExtractor(Extractor):
self.original = self.config("original", True)
self.user = match.group(1) or match.group(2)
self.group = False
- self.api = DeviantartAPI(self)
+ self.api = None
if self.quality:
- self.quality = "q_{}".format(self.quality)
+ self.quality = ",q_{}".format(self.quality)
if self.original != "image":
self._update_content = self._update_content_default
@@ -64,6 +65,8 @@ class DeviantartExtractor(Extractor):
return num
def items(self):
+ self.api = DeviantartOAuthAPI(self)
+
if self.user:
profile = self.api.user_profile(self.user)
self.group = not profile
@@ -95,12 +98,12 @@ class DeviantartExtractor(Extractor):
# https://github.com/r888888888/danbooru/issues/4069
intermediary, count = re.subn(
r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", content["src"])
+ r"/intermediary\1", content["src"], 1)
if count and self._check_url(intermediary):
content["src"] = intermediary
if self.quality:
content["src"] = re.sub(
- r"q_\d+", self.quality, content["src"])
+ r",q_\d+", self.quality, content["src"], 1)
yield self.commit(deviation, content)
@@ -266,6 +269,23 @@ class DeviantartExtractor(Extractor):
def _check_url(self, url):
return self.request(url, method="HEAD", fatal=False).status_code < 400
+ def _limited_request(self, url, **kwargs):
+ """Limits HTTP requests to one every 2 seconds"""
+ kwargs["fatal"] = None
+ diff = time.time() - DeviantartExtractor._last_request
+ if diff < 2.0:
+ delay = 2.0 - diff
+ self.log.debug("Sleeping %.2f seconds", delay)
+ time.sleep(delay)
+
+ while True:
+ response = self.request(url, **kwargs)
+ if response.status_code != 403 or \
+ b"Request blocked." not in response.content:
+ DeviantartExtractor._last_request = time.time()
+ return response
+ self.wait(seconds=180)
+
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
@@ -293,6 +313,9 @@ class DeviantartUserExtractor(DeviantartExtractor):
), ("gallery",))
+###############################################################################
+# OAuth #######################################################################
+
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
@@ -439,7 +462,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
}),
# multiple stash items
("https://sta.sh/21jf51j7pzl2", {
- "pattern": pattern,
+ "options": (("original", False),),
"count": 4,
}),
# downloadable, but no "content" field (#307)
@@ -447,8 +470,13 @@ class DeviantartStashExtractor(DeviantartExtractor):
"pattern": r"https://api-da\.wixmp\.com/_api/download/file",
"count": 1,
}),
+ # mixed folders and images (#659)
+ ("https://sta.sh/215twi387vfj", {
+ "options": (("original", False),),
+ "count": 4,
+ }),
("https://sta.sh/abcdefghijkl", {
- "exception": exception.HttpError,
+ "count": 0,
}),
)
@@ -459,21 +487,31 @@ class DeviantartStashExtractor(DeviantartExtractor):
self.user = None
self.stash_id = match.group(1)
- def deviations(self):
- url = "https://sta.sh/" + self.stash_id
- page = self.request(url).text
- deviation_id = text.extract(page, '//deviation/', '"')[0]
+ def deviations(self, stash_id=None):
+ if stash_id is None:
+ stash_id = self.stash_id
+ url = "https://sta.sh/" + stash_id
+ page = self._limited_request(url).text
- if deviation_id:
- return (self.api.deviation(deviation_id),)
+ if stash_id[0] == "0":
+ uuid = text.extract(page, '//deviation/', '"')[0]
+ if uuid:
+ yield self.api.deviation(uuid)
+ return
- else:
- data = {"_extractor": DeviantartStashExtractor}
- page = text.extract(page, 'id="stash-body"', 'class="footer"')[0]
- return [
- (url, data)
- for url in text.extract_iter(page, '<a href="', '"')
- ]
+ for item in text.extract_iter(
+ page, 'class="stash-thumb-container', '</div>'):
+ url = text.extract(item, '<a href="', '"')[0]
+
+ if url:
+ stash_id = url.rpartition("/")[2]
+ else:
+ stash_id = text.extract(item, 'gmi-stashid="', '"')[0]
+ stash_id = "2" + util.bencode(text.parse_int(
+ stash_id), "0123456789abcdefghijklmnopqrstuvwxyz")
+
+ if len(stash_id) > 2:
+ yield from self.deviations(stash_id)
class DeviantartFavoriteExtractor(DeviantartExtractor):
@@ -635,148 +673,10 @@ class DeviantartPopularExtractor(DeviantartExtractor):
deviation["popular"] = self.popular
-class DeviantartExtractorV2(DeviantartExtractor):
- """Base class for deviantart extractors using the NAPI"""
- cookiedomain = ".deviantart.com"
- cookienames = ("auth", "auth_secure", "userinfo")
- _warning = True
-
- def items(self):
- if self.original and not self._check_cookies(self.cookienames):
- self.original = False
- if self._warning:
- DeviantartExtractorV2._warning = False
- self.log.warning("No session cookies set: "
- "Disabling original file downloads.")
-
- yield Message.Version, 1
- for deviation in self.deviations():
- data = self.api.deviation_extended_fetch(
- deviation["deviationId"],
- deviation["author"]["username"],
- "journal" if deviation["isJournal"] else "art",
- )
-
- if "deviation" not in data:
- self.log.warning("Unable to fetch deviation ID %s",
- deviation["deviationId"])
- self.log.debug("Server response: %s", data)
- continue
-
- deviation = self._extract(data)
- if not deviation:
- continue
-
- yield Message.Directory, deviation
- yield Message.Url, deviation["target"]["src"], deviation
- if self.extra:
- for match in DeviantartStashExtractor.pattern.finditer(
- deviation["description"]):
- deviation["_extractor"] = DeviantartStashExtractor
- yield Message.Queue, match.group(0), deviation
-
- def _extract(self, data):
- deviation = data["deviation"]
- extended = deviation["extended"]
- media = deviation["media"]
- del deviation["extended"]
- del deviation["media"]
-
- # prepare deviation metadata
- deviation["description"] = extended.get("description", "")
- deviation["username"] = deviation["author"]["username"]
- deviation["_username"] = deviation["username"].lower()
- deviation["stats"] = extended["stats"]
- deviation["stats"]["comments"] = data["comments"]["total"]
- deviation["index"] = deviation["deviationId"]
- deviation["tags"] = [t["name"] for t in extended.get("tags") or ()]
- deviation["date"] = text.parse_datetime(
- deviation["publishedTime"])
- deviation["category_path"] = "/".join(
- extended[key]["displayNameEn"]
- for key in ("typeFacet", "contentFacet", "categoryFacet")
- if key in extended
- )
-
- # extract download target
- target = media["types"][-1]
- src = token = None
-
- if "textContent" in deviation:
- if not self.commit_journal:
- return None
- journal = deviation["textContent"]
- journal["html"] = journal["html"]["markup"]
- src = self.commit_journal(deviation, journal)[1]
-
- elif target["t"] == "gif":
- src = target["b"]
- token = media["token"][0]
-
- elif "download" in extended and self.original:
- target = extended["download"]
- src = target["url"]
- del target["url"]
-
- elif target["t"] == "video":
- # select largest video
- target = max(media["types"],
- key=lambda x: text.parse_int(x.get("q", "")[:-1]))
- src = target["b"]
-
- elif target["t"] == "flash":
- src = target["s"]
- if src.startswith("https://sandbox.deviantart.com"):
- # extract SWF file from "sandbox"
- src = text.extract(
- self.request(src).text, 'id="sandboxembed" src="', '"')[0]
-
- else:
- src = media["baseUri"]
- if "token" in media:
- token = media["token"][0]
-
- if "c" in target:
- src += "/" + target["c"].replace(
- "<prettyName>", media["prettyName"])
- if src.startswith("https://images-wixmp-"):
- if deviation["index"] <= 790677560:
- # https://github.com/r888888888/danbooru/issues/4069
- intermediary, count = re.subn(
- r"(/f/[^/]+/[^/]+)/v\d+/.*", r"/intermediary\1", src)
- if count and self._check_url(intermediary):
- src = intermediary
- if self.quality:
- src = re.sub(r"q_\d+", self.quality, src)
-
- # filename and extension metadata
- alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
- sub = re.compile(r"\W").sub
- deviation["filename"] = "".join((
- sub("_", deviation["title"].lower()), "_by_",
- sub("_", deviation["author"]["username"].lower()), "-d",
- util.bencode(deviation["index"], alphabet),
- ))
- if "extension" not in deviation:
- deviation["extension"] = text.ext_from_url(src)
-
- if token:
- src = src + "?token=" + token
- target["src"] = src
- deviation["target"] = target
- return deviation
-
- def _pagination(self, url, params, headers=None):
- while True:
- data = self.request(url, params=params, headers=headers).json()
- yield from data["results"]
+###############################################################################
+# Eclipse #####################################################################
- if not data["hasMore"]:
- return
- params["offset"] = data["nextOffset"]
-
-
-class DeviantartDeviationExtractor(DeviantartExtractorV2):
+class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "{index}.{extension}"
@@ -784,16 +684,13 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
- # "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+ "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
- "count": 0,
+ "exception": exception.NotFoundError,
}),
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
- # "pattern": (r"https://www.deviantart.com/download/261986576"
- # r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/intermediary/f/[^/]+/[^.]+\.jpg")
+ "pattern": r"https://api-da\.wixmp\.com/_api/download/file",
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
@@ -809,10 +706,10 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
r"/f/[^/]+/[^.]+\.gif\?token="),
}),
- # external URLs from description (#302)
+ # sta.sh URLs from description (#302)
(("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
"options": (("extra", 1), ("original", 0)),
- "pattern": r"https?://sta\.sh/\w+$",
+ "pattern": DeviantartStashExtractor.pattern,
"range": "2-",
"count": 4,
}),
@@ -823,33 +720,21 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
"filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
"extension": "mp4",
"target": {
- "d": 306,
- "f": 19367585,
- "h": 720,
- "q": "720p",
- "t": "video",
- "w": 1364,
+ "duration": 306,
+ "filesize": 19367585,
+ "quality": "720p",
"src": str,
},
}
}),
- # archive
- ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
- # "pattern": r"https://.+deviantart.com/download/763300948/.*rar",
- "pattern": r"https://images-wixmp-\w+\.wixmp\.com/i/.*\.png"
- }),
- # swf
- ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
- "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf",
- }),
# journal
("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
- "url": "f33f8127ab71819be7de849175b6d5f8b37bb629",
+ "url": "d34b2c9f873423e665a1b8ced20fcb75951694a3",
"pattern": "text:<!DOCTYPE html>\n",
}),
# journal-like post with isJournal == False (#419)
("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", {
- "url": "1534d6ea0561247ab921d07505e57a9d663a833b",
+ "url": "e2e0044bd255304412179b6118536dbd9bb3bb0e",
"pattern": "text:<!DOCTYPE html>\n",
}),
# old-style URLs
@@ -863,19 +748,20 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
skip = Extractor.skip
def __init__(self, match):
- DeviantartExtractorV2.__init__(self, match)
+ DeviantartExtractor.__init__(self, match)
self.type = match.group(3)
self.deviation_id = match.group(4)
def deviations(self):
- return ({
- "deviationId": self.deviation_id,
- "author" : {"username": self.user},
- "isJournal" : self.type == "journal",
- },)
+ deviation = DeviantartEclipseAPI(self).deviation_extended_fetch(
+ self.deviation_id, self.user, self.type)
+ if "error" in deviation:
+ raise exception.NotFoundError("deviation")
+ return (self.api.deviation(
+ deviation["deviation"]["extended"]["deviationUuid"]),)
-class DeviantartScrapsExtractor(DeviantartExtractorV2):
+class DeviantartScrapsExtractor(DeviantartExtractor):
"""Extractor for an artist's scraps"""
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
@@ -888,24 +774,31 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
)
+ cookiedomain = ".deviantart.com"
+ cookienames = ("auth", "auth_secure", "userinfo")
+ _warning = True
def deviations(self):
- url = self.root + "/_napi/da-user-profile/api/gallery/contents"
- params = {
- "username" : self.user,
- "offset" : self.offset,
- "limit" : "24",
- "scraps_folder": "true",
- }
- headers = {
- "Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
- }
+ eclipse_api = DeviantartEclipseAPI(self)
+ if self._warning:
+ DeviantartScrapsExtractor._warning = False
+ if not self._check_cookies(self.cookienames):
+ self.log.warning(
+ "No session cookies set: Unable to fetch mature scraps.")
+
+ for obj in eclipse_api.gallery_scraps(self.user, self.offset):
+ deviation = obj["deviation"]
+ deviation_uuid = eclipse_api.deviation_extended_fetch(
+ deviation["deviationId"],
+ deviation["author"]["username"],
+ "journal" if deviation["isJournal"] else "art",
+ )["deviation"]["extended"]["deviationUuid"]
- for obj in self._pagination(url, params, headers):
- yield obj["deviation"]
+ yield self.api.deviation(deviation_uuid)
-class DeviantartFollowingExtractor(DeviantartExtractorV2):
+class DeviantartFollowingExtractor(DeviantartExtractor):
+ """Extractor for user's watched users"""
subcategory = "following"
pattern = BASE_PATTERN + "/about#watching$"
test = ("https://www.deviantart.com/shimoda7/about#watching", {
@@ -915,30 +808,19 @@ class DeviantartFollowingExtractor(DeviantartExtractorV2):
})
def items(self):
- url = "{}/_napi/da-user-profile/api/module/watching".format(self.root)
- params = {
- "username": self.user,
- "moduleid": self._module_id(self.user),
- "offset" : "0",
- "limit" : "24",
- }
+ eclipse_api = DeviantartEclipseAPI(self)
yield Message.Version, 1
- for user in self._pagination(url, params):
+ for user in eclipse_api.user_watching(self.user, self.offset):
url = "{}/{}".format(self.root, user["username"])
yield Message.Queue, url, user
- def _module_id(self, username):
- url = "{}/{}/about".format(self.root, username)
- page = self.request(url).text
- pos = page.find('\\"type\\":\\"watching\\"')
- if pos < 0:
- raise exception.NotFoundError("module")
- return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ')
+###############################################################################
+# API Interfaces ##############################################################
-class DeviantartAPI():
- """Minimal interface for the DeviantArt API
+class DeviantartOAuthAPI():
+ """Interface for the DeviantArt OAuth API
Ref: https://www.deviantart.com/developers/http/v1/20160316
"""
@@ -1029,31 +911,6 @@ class DeviantartAPI():
params = {"mature_content": self.mature}
return self._call(endpoint, params)
- def deviation_extended_fetch(self, deviation_id, user, kind):
- url = ("https://www.deviantart.com/_napi/da-browse/shared_api"
- "/deviation/extended_fetch")
- headers = {"Referer": "https://www.deviantart.com/"}
- params = {
- "deviationid" : deviation_id,
- "username" : user,
- "type" : kind,
- "include_session": "false",
- }
- response = self.extractor.request(
- url, headers=headers, params=params, fatal=None)
- code = response.status_code
-
- if code == 404:
- raise exception.StopExtraction(
- "Your account must use the Eclipse interface.")
- elif code == 403 and b"Request blocked." in response.content:
- raise exception.StopExtraction(
- "Requests to deviantart.com blocked due to too much traffic.")
- try:
- return response.json()
- except Exception:
- return {"error": response.text}
-
def deviation_metadata(self, deviations):
""" Fetch deviation metadata for a set of deviations"""
if not deviations:
@@ -1225,11 +1082,84 @@ class DeviantartAPI():
return dmap
+class DeviantartEclipseAPI():
+ """Interface to the DeviantArt Eclipse API"""
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.log = extractor.log
+
+ def deviation_extended_fetch(self, deviation_id, user=None, kind=None):
+ endpoint = "da-browse/shared_api/deviation/extended_fetch"
+ params = {
+ "deviationid" : deviation_id,
+ "username" : user,
+ "type" : kind,
+ "include_session": "false",
+ }
+ return self._call(endpoint, params)
+
+ def gallery_scraps(self, user, offset=None):
+ endpoint = "da-user-profile/api/gallery/contents"
+ params = {
+ "username" : user,
+ "offset" : offset,
+ "limit" : "24",
+ "scraps_folder": "true",
+ }
+ return self._pagination(endpoint, params)
+
+ def user_watching(self, user, offset=None):
+ endpoint = "da-user-profile/api/module/watching"
+ params = {
+ "username": user,
+ "moduleid": self._module_id_watching(user),
+ "offset" : None,
+ "limit" : "24",
+ }
+ return self._pagination(endpoint, params)
+
+ def _call(self, endpoint, params=None):
+ url = "https://www.deviantart.com/_napi/" + endpoint
+ headers = {"Referer": "https://www.deviantart.com/"}
+
+ response = self.extractor._limited_request(
+ url, params=params, headers=headers, fatal=None)
+
+ if response.status_code == 404:
+ raise exception.StopExtraction(
+ "Your account must use the Eclipse interface.")
+ try:
+ return response.json()
+ except Exception:
+ return {"error": response.text}
+
+ def _pagination(self, endpoint, params=None):
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["results"]
+
+ if not data["hasMore"]:
+ return
+ params["offset"] = data["nextOffset"]
+
+ def _module_id_watching(self, user):
+ url = "{}/{}/about".format(self.extractor.root, user)
+ page = self.extractor._limited_request(url).text
+ pos = page.find('\\"type\\":\\"watching\\"')
+ if pos < 0:
+ raise exception.NotFoundError("module")
+ return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ')
+
+
@cache(maxage=10*365*24*3600, keyarg=0)
def _refresh_token_cache(original_token, new_token=None):
return new_token or original_token
+###############################################################################
+# Journal Formats #############################################################
+
SHADOW_TEMPLATE = """
<span class="shadow">
<img src="{src}" class="smshadow" width="{width}" height="{height}">
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py
index ad97eba..ef64942 100644
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
test = (
("https://hentainexus.com/view/5688", {
"url": "746d0043e20030f1171aae5ea113176607302517",
- "keyword": "77702b42f8f76ecfe5d8a14cfbbcbd855eb14d7f",
+ "keyword": "5e5bb4b1553b1c6e126b198f9ae017a1a5d0a5ad",
}),
("https://hentainexus.com/read/5688"),
)
@@ -42,6 +42,8 @@ class HentainexusGalleryExtractor(GalleryExtractor):
"title" : extr('<h1 class="title">', '</h1>'),
"artist" : rmve(extr('viewcolumn">Artist</td>' , '</td>')),
"book" : rmve(extr('viewcolumn">Book</td>' , '</td>')),
+ "circle" : rmve(extr('viewcolumn">Circle</td>' , '</td>')),
+ "event" : rmve(extr('viewcolumn">Event</td>' , '</td>')),
"language" : rmve(extr('viewcolumn">Language</td>' , '</td>')),
"magazine" : rmve(extr('viewcolumn">Magazine</td>' , '</td>')),
"parody" : rmve(extr('viewcolumn">Parody</td>' , '</td>')),
@@ -49,8 +51,36 @@ class HentainexusGalleryExtractor(GalleryExtractor):
"description": rmve(extr('viewcolumn">Description</td>', '</td>')),
}
data["lang"] = util.language_to_code(data["language"])
+ data["type"] = "Doujinshi" if 'doujin' in data["tags"] else "Manga"
+ data["title_conventional"] = self.join_title(
+ data["event"],
+ data["circle"],
+ data["artist"],
+ data["title"],
+ data["parody"],
+ data["book"],
+ data["magazine"],
+ )
return data
+ @staticmethod
+ def join_title(event, circle, artist, title, parody, book, magazine):
+ jt = ''
+ if event:
+ jt += '({}) '.format(event)
+ if circle:
+ jt += '[{} ({})] '.format(circle, artist)
+ else:
+ jt += '[{}] '.format(artist)
+ jt += title
+ if parody.lower() != 'original work':
+ jt += ' ({})'.format(parody)
+ if book:
+ jt += ' ({})'.format(book)
+ if magazine:
+ jt += ' ({})'.format(magazine)
+ return jt
+
def images(self, page):
url = "{}/read/{}".format(self.root, self.gallery_id)
extr = text.extract_from(self.request(url).text)
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index e0b0f50..85cfe49 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -64,7 +64,9 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com"
r"(/manga/([^/?&#]+)/([^/?&#]+))")
test = ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
- "url": "111bc3ee14ce91d78c275770ef63b56c9ac15d8d",
+ "pattern": r"https://hiperdex.com/wp-content/uploads"
+ r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp",
+ "count": 9,
"keyword": {
"artist" : "Sasuga Kei",
"author" : "Sasuga Kei",
@@ -89,7 +91,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
def images(self, page):
return [
(url.strip(), None)
- for url in re.findall(r'id="image-\d+"\s+src="([^"]+)', page)
+ for url in re.findall(
+ r'id="image-\d+"\s+(?:data-)?src="([^"]+)', page)
]
@@ -122,16 +125,44 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
def chapters(self, page):
self.manga_data(self.manga, page)
results = []
- last = None
-
- page = text.extract(page, 'class="page-content-listing', '</ul>')[0]
- for match in HiperdexChapterExtractor.pattern.finditer(page):
- path = match.group(1)
- if last != path:
- last = path
- results.append((
- self.root + path,
- self.chapter_data(path.rpartition("/")[2]),
- ))
+ shortlink = text.extract(page, "rel='shortlink' href='", "'")[0]
+ data = {
+ "action": "manga_get_chapters",
+ "manga" : shortlink.rpartition("=")[2],
+ }
+ url = self.root + "/wp-admin/admin-ajax.php"
+ page = self.request(url, method="POST", data=data).text
+
+ for url in text.extract_iter(page, 'href="', '"', 320):
+ chapter = url.rpartition("/")[2]
+ results.append((url, self.chapter_data(chapter)))
+
+ return results
+
+
+class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
+ """Extractor for an artists's manga on hiperdex.com"""
+ subcategory = "artist"
+ categorytransfer = False
+ chapterclass = HiperdexMangaExtractor
+ reverse = False
+ pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com"
+ r"(/manga-a(?:rtist|uthor)/([^/?&#]+))")
+ test = (
+ ("https://hiperdex.com/manga-artist/beck-ho-an/"),
+ ("https://hiperdex.com/manga-author/viagra/", {
+ "pattern": HiperdexMangaExtractor.pattern,
+ "count": ">= 6",
+ }),
+ )
+
+ def __init__(self, match):
+ MangaExtractor.__init__(self, match, self.root + match.group(1) + "/")
+
+ def chapters(self, page):
+ results = []
+ for info in text.extract_iter(page, 'id="manga-item-', '<img'):
+ url = text.extract(info, 'href="', '"')[0]
+ results.append((url, {}))
return results
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index c31de1c..7561c64 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -61,7 +61,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
"created_by" : "NTRshouldbeillegal",
"date" : "dt:2016-11-20 07:10:53",
"description" : "Enjoy.",
- "download_url": "/download/824778/277031/",
+ "download_url": "re:/download/(r/)?824778/277031/",
"genres" : list,
"id" : 277031,
"is_manga" : True,
@@ -72,7 +72,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
"permissions" : list,
"rating" : float,
"slug" : "okinami-no-koigokoro",
- "status" : "not_moderated",
+ "status" : str,
"tags" : list,
"title" : "Okinami no Koigokoro",
"url" : "/albums/okinami-no-koigokoro_277031/",
@@ -92,7 +92,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
"like_status" : "none",
"position" : int,
"resolution" : r"re:\d+x\d+",
- "status" : "not_moderated",
+ "status" : str,
"tags" : list,
"thumbnail" : str,
"title" : str,
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 36e0b62..3f07d21 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -9,7 +9,7 @@
"""Extractors for mastodon instances"""
from .common import Extractor, Message
-from .. import text, config, exception
+from .. import text, util, config, exception
import re
@@ -108,7 +108,7 @@ class MastodonAPI():
def account_search(self, query, limit=40):
"""Search for content"""
params = {"q": query, "limit": limit}
- return self._call("accounts/search", params)
+ return self._call("accounts/search", params).json()
def account_statuses(self, account_id):
"""Get an account's statuses"""
@@ -118,28 +118,38 @@ class MastodonAPI():
def status(self, status_id):
"""Fetch a Status"""
- return self._call("statuses/" + status_id)
+ return self._call("statuses/" + status_id).json()
def _call(self, endpoint, params=None):
url = "{}/api/v1/{}".format(self.root, endpoint)
- response = self.extractor.request(
- url, params=params, headers=self.headers)
- return self._parse(response)
+
+ while True:
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+ code = response.status_code
+
+ if code < 400:
+ return response
+ if code == 404:
+ raise exception.NotFoundError()
+ if code == 429:
+ self.extractor.wait(until=text.parse_datetime(
+ response.headers["x-ratelimit-reset"],
+ "%Y-%m-%dT%H:%M:%S.%fZ",
+ ))
+ continue
+ raise exception.StopExtraction(response.json().get("error"))
def _pagination(self, endpoint, params):
url = "{}/api/v1/{}".format(self.root, endpoint)
while url:
- response = self.extractor.request(
- url, params=params, headers=self.headers)
- yield from self._parse(response)
- url = response.links.get("next", {}).get("url")
+ response = self._call(endpoint, params)
+ yield from response.json()
- @staticmethod
- def _parse(response):
- """Parse an API response"""
- if response.status_code == 404:
- raise exception.NotFoundError()
- return response.json()
+ url = response.links.get("next")
+ if not url:
+ return
+ url = url["url"]
def generate_extractors():
@@ -148,7 +158,7 @@ def generate_extractors():
symtable = globals()
extractors = config.get(("extractor",), "mastodon")
if extractors:
- EXTRACTORS.update(extractors)
+ util.combine_dict(EXTRACTORS, extractors)
config.set(("extractor",), "mastodon", EXTRACTORS)
for instance, info in EXTRACTORS.items():
@@ -189,14 +199,26 @@ def generate_extractors():
EXTRACTORS = {
+ "mastodon.social": {
+ "category" : "mastodon.social",
+ "access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
+ "client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
+ "client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
+ },
"pawoo.net": {
"category" : "pawoo",
- "access-token" : "286462927198d0cf3e24683e91c8259a"
- "ac4367233064e0570ca18df2ac65b226",
- "client-id" : "97b142b6904abf97a1068d51a7bc2f2f"
- "cf9323cef81f13cb505415716dba7dac",
- "client-secret": "e45bef4bad45b38abf7d9ef88a646b73"
- "75e7fb2532c31a026327a93549236481",
+ "access-token" : "c12c9d275050bce0dc92169a28db09d7"
+ "0d62d0a75a8525953098c167eacd3668",
+ "client-id" : "978a25f843ec01e53d09be2c290cd75c"
+ "782bc3b7fdbd7ea4164b9f3c3780c8ff",
+ "client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
+ "8428ef1fadb446dcfeb4f5ed6872d97b",
+ },
+ "baraag.net": {
+ "category" : "baraag",
+ "access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
+ "client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
+ "client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
},
}
diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py
index 95799cf..51b314a 100644
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@@ -23,18 +23,24 @@ class MyportfolioGalleryExtractor(Extractor):
r"(?:https?://)?([^.]+\.myportfolio\.com))"
r"(/[^/?&#]+)?")
test = (
- ("https://hannahcosgrove.myportfolio.com/niamh-1", {
- "url": "8cbd73a73e5bf3b4f5d1b1d4a1eb114c01a72a66",
- "keyword": "7a460bb5641e648ae70702ff91c2fb11054b0e0b",
+ ("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", {
+ "url": "acea0690c76db0e5cf267648cefd86e921bc3499",
+ "keyword": "6ac6befe2ee0af921d24cf1dd4a4ed71be06db6d",
}),
- ("https://hannahcosgrove.myportfolio.com/lfw", {
- "pattern": r"https://hannahcosgrove\.myportfolio\.com/[^/?&#+]+$",
- "count": ">= 8",
+ ("https://andrewling.myportfolio.com/", {
+ "pattern": r"https://andrewling\.myportfolio\.com/[^/?&#+]+$",
+ "count": ">= 6",
}),
+ # no explicit title
+ ("https://stevenilousphotography.myportfolio.com/society", {
+ "keyword": "49e7ff6322645c22b409280656202c2736a380c9",
+ }),
+ # custom domain
("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", {
"count": 3,
}),
("myportfolio:https://tooco.com.ar/", {
+ "pattern": pattern,
"count": ">= 40",
}),
)
@@ -80,8 +86,11 @@ class MyportfolioGalleryExtractor(Extractor):
title, pos = text.extract(
page, '<h1 ', '</h1>', pos)
- title = title.partition(">")[2]
- user = user[:-len(title)-3]
+ if title:
+ title = title.partition(">")[2]
+ user = user[:-len(title)-3]
+ else:
+ user, _, title = user.partition(" - ")
return {
"user": text.unescape(user),
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 2f5b429..c06721c 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -178,9 +178,9 @@ class OAuthDeviantart(OAuthBase):
self._oauth2_authorization_code_grant(
self.oauth_config(
- "client-id", deviantart.DeviantartAPI.CLIENT_ID),
+ "client-id", deviantart.DeviantartOAuthAPI.CLIENT_ID),
self.oauth_config(
- "client-secret", deviantart.DeviantartAPI.CLIENT_SECRET),
+ "client-secret", deviantart.DeviantartOAuthAPI.CLIENT_SECRET),
"https://www.deviantart.com/oauth2/authorize",
"https://www.deviantart.com/oauth2/token",
scope="browse",
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 41b1039..064967d 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -10,7 +10,6 @@
from .common import Extractor, Message
from .. import text
-import json
class PiczelExtractor(Extractor):
@@ -137,8 +136,5 @@ class PiczelImageExtractor(PiczelExtractor):
self.image_id = match.group(1)
def posts(self):
- url = "{}/gallery/image/{}".format(self.root, self.image_id)
- page = self.request(url).text
- data = json.loads(text.extract(
- page, 'window.__PRELOADED_STATE__ =', '</script>')[0])
- return (data["gallery"]["images"]["byId"][self.image_id],)
+ url = "{}/api/gallery/{}".format(self.root, self.image_id)
+ return (self.request(url).json(),)
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py
index 70b4833..f6bb4df 100644
--- a/gallery_dl/extractor/realbooru.py
+++ b/gallery_dl/extractor/realbooru.py
@@ -53,7 +53,7 @@ class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
"options": (("tags", True),),
"keyword": {
"tags_general" : str,
- "tags_metadata": "tagme",
+ "tags_metadata": "cute tagme",
"tags_model" : "jennifer_lawrence",
},
})
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index a312c1c..d0232cc 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -313,8 +313,7 @@ class RedditAPI():
remaining = response.headers.get("x-ratelimit-remaining")
if remaining and float(remaining) < 2:
- reset = response.headers["x-ratelimit-reset"]
- self.extractor.wait(seconds=reset, reason="rate limit reset")
+ self.extractor.wait(seconds=response.headers["x-ratelimit-reset"])
return self._call(endpoint, params)
data = response.json()
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 0505fa9..7e99823 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -418,7 +418,7 @@ class TumblrAPI(oauth.OAuth1API):
reset = response.headers.get("x-ratelimit-perhour-reset")
if reset:
self.log.info("Hourly API rate limit exceeded")
- self.extractor.wait(seconds=reset, reason="rate limit reset")
+ self.extractor.wait(seconds=reset)
return self._call(blog, endpoint, params)
raise exception.StopExtraction(data)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index cbb075c..03ce3dd 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -224,8 +224,7 @@ class TwitterExtractor(Extractor):
if response.status_code == 429 or \
response.headers.get("x-rate-limit-remaining") == "0":
if self.logged_in:
- reset = response.headers.get("x-rate-limit-reset")
- self.wait(until=reset, reason="rate limit reset")
+ self.wait(until=response.headers.get("x-rate-limit-reset"))
else:
_guest_token.invalidate()
return self._video_from_tweet(tweet_id)
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index a020064..0306112 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -172,7 +172,7 @@ class VscoCollectionExtractor(VscoExtractor):
url = "{}/api/2.0/collections/{}/medias".format(self.root, cid)
params = {"page": 2, "size": "20"}
return self._pagination(url, params, tkn, "medias", (
- data["medias"]["byId"][mid]["media"]
+ data["medias"]["byId"][mid["id"]]["media"]
for mid in data
["collections"]["byCollectionId"][cid]["byPage"]["1"]["collection"]
))
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 6a779d9..9539c2f 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -118,7 +118,7 @@ class WeiboStatusExtractor(WeiboExtractor):
"""Extractor for images from a status on weibo.cn"""
subcategory = "status"
pattern = (r"(?:https?://)?(?:www\.|m\.)?weibo\.c(?:om|n)"
- r"/(?:detail|status|\d+)/(\d+)")
+ r"/(?:detail|status|\d+)/(\w+)")
test = (
("https://m.weibo.cn/detail/4323047042991618", {
"pattern": r"https?://wx\d+.sinaimg.cn/large/\w+.jpg",
@@ -130,6 +130,10 @@ class WeiboStatusExtractor(WeiboExtractor):
("https://m.weibo.cn/status/4268682979207023", {
"exception": exception.NotFoundError,
}),
+ # non-numeric status ID (#664)
+ ("https://weibo.com/3314883543/Iy7fj4qVg", {
+ "pattern": r"https?://f.video.weibocdn.com/\w+\.mp4\?label=mp4_hd",
+ }),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
)
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index a3f4e0a..3bb6390 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -233,7 +233,7 @@ def parse_timestamp(ts, default=None):
return default
-def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"):
+def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z", utcoffset=0):
"""Create a datetime object by parsing 'date_string'"""
try:
if format.endswith("%z") and date_string[-3] == ":":
@@ -244,7 +244,11 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"):
d = datetime.datetime.strptime(ds, format)
o = d.utcoffset()
if o is not None:
- d = d.replace(tzinfo=None) - o # convert to naive UTC
+ # convert to naive UTC
+ d = d.replace(tzinfo=None) - o
+ elif utcoffset:
+ # apply manual UTC offset
+ d += datetime.timedelta(0, utcoffset * -3600)
return d
except (TypeError, IndexError, KeyError):
return None
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 47fad9e..83cf84b 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -840,16 +840,15 @@ class PathFormat():
shutil.copyfile(self.temppath, self.realpath)
os.unlink(self.temppath)
- if "_mtime" in self.kwdict:
+ mtime = self.kwdict.get("_mtime")
+ if mtime:
# Set file modification time
- mtime = self.kwdict["_mtime"]
- if mtime:
- try:
- if isinstance(mtime, str):
- mtime = mktime_tz(parsedate_tz(mtime))
- os.utime(self.realpath, (time.time(), mtime))
- except Exception:
- pass
+ try:
+ if isinstance(mtime, str):
+ mtime = mktime_tz(parsedate_tz(mtime))
+ os.utime(self.realpath, (time.time(), mtime))
+ except Exception:
+ pass
class DownloadArchive():
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 37d133e..7905500 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.13.3"
+__version__ = "1.13.4"
diff --git a/test/test_extractor.py b/test/test_extractor.py
index 2555b58..e6f4963 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -1,17 +1,22 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import sys
-import unittest
+import time
import string
+from datetime import datetime, timedelta
+
+import unittest
+from unittest.mock import patch
from gallery_dl import extractor
+from gallery_dl.extractor import mastodon
from gallery_dl.extractor.common import Extractor, Message
from gallery_dl.extractor.directlink import DirectlinkExtractor as DLExtractor
@@ -26,7 +31,7 @@ class FakeExtractor(Extractor):
yield Message.Url, "text:foobar", {}
-class TestExtractor(unittest.TestCase):
+class TestExtractorModule(unittest.TestCase):
VALID_URIS = (
"https://example.org/file.jpg",
"tumblr:foobar",
@@ -170,5 +175,116 @@ class TestExtractor(unittest.TestCase):
self.assertEqual(expected, extr.__name__)
+class TestExtractorWait(unittest.TestCase):
+
+ def test_wait_seconds(self):
+ extr = extractor.find("test:")
+ seconds = 5
+ until = time.time() + seconds
+
+ with patch("time.sleep") as sleep, patch.object(extr, "log") as log:
+ extr.wait(seconds=seconds)
+
+ sleep.assert_called_once_with(6.0)
+
+ calls = log.info.mock_calls
+ self.assertEqual(len(calls), 1)
+ self._assert_isotime(calls[0][1][1], until)
+
+ def test_wait_until(self):
+ extr = extractor.find("test:")
+ until = time.time() + 5
+
+ with patch("time.sleep") as sleep, patch.object(extr, "log") as log:
+ extr.wait(until=until)
+
+ calls = sleep.mock_calls
+ self.assertEqual(len(calls), 1)
+ self.assertAlmostEqual(calls[0][1][0], 6.0, places=1)
+
+ calls = log.info.mock_calls
+ self.assertEqual(len(calls), 1)
+ self._assert_isotime(calls[0][1][1], until)
+
+ def test_wait_until_datetime(self):
+ extr = extractor.find("test:")
+ until = datetime.utcnow() + timedelta(seconds=5)
+ until_local = datetime.now() + timedelta(seconds=5)
+
+ with patch("time.sleep") as sleep, patch.object(extr, "log") as log:
+ extr.wait(until=until)
+
+ calls = sleep.mock_calls
+ self.assertEqual(len(calls), 1)
+ self.assertAlmostEqual(calls[0][1][0], 6.0, places=1)
+
+ calls = log.info.mock_calls
+ self.assertEqual(len(calls), 1)
+ self._assert_isotime(calls[0][1][1], until_local)
+
+ def _assert_isotime(self, output, until):
+ if not isinstance(until, datetime):
+ until = datetime.fromtimestamp(until)
+ o = self._isotime_to_seconds(output)
+ u = self._isotime_to_seconds(until.time().isoformat()[:8])
+ self.assertLess(o-u, 1.0)
+
+ @staticmethod
+ def _isotime_to_seconds(isotime):
+ parts = isotime.split(":")
+ return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
+
+
+class TextExtractorOAuth(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ mastodon.generate_extractors()
+
+ def test_oauth1(self):
+ for category in ("flickr", "smugmug", "tumblr"):
+ extr = extractor.find("oauth:" + category)
+
+ with patch.object(extr, "_oauth1_authorization_flow") as m:
+ for msg in extr:
+ pass
+ self.assertEqual(len(m.mock_calls), 1)
+
+ def test_oauth2(self):
+ for category in ("deviantart", "reddit"):
+ extr = extractor.find("oauth:" + category)
+
+ with patch.object(extr, "_oauth2_authorization_code_grant") as m:
+ for msg in extr:
+ pass
+ self.assertEqual(len(m.mock_calls), 1)
+
+ def test_oauth2_mastodon(self):
+ extr = extractor.find("oauth:mastodon:pawoo.net")
+
+ with patch.object(extr, "_oauth2_authorization_code_grant") as m, \
+ patch.object(extr, "_register") as r:
+ for msg in extr:
+ pass
+ self.assertEqual(len(r.mock_calls), 0)
+ self.assertEqual(len(m.mock_calls), 1)
+
+ def test_oauth2_mastodon_unknown(self):
+ extr = extractor.find("oauth:mastodon:example.com")
+
+ with patch.object(extr, "_oauth2_authorization_code_grant") as m, \
+ patch.object(extr, "_register") as r:
+ r.return_value = {
+ "client-id" : "foo",
+ "client-secret": "bar",
+ }
+
+ for msg in extr:
+ pass
+
+ self.assertEqual(len(r.mock_calls), 1)
+ self.assertEqual(len(m.mock_calls), 1)
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/test/test_results.py b/test/test_results.py
index b697d15..9064810 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -27,7 +27,9 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "myportfolio",
+ "dokireader",
+ "mangafox",
+ "mangahere",
"photobucket",
"worldthree",
}
diff --git a/test/test_text.py b/test/test_text.py
index 6a6d83a..0390823 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2018 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -376,6 +376,10 @@ class TestText(unittest.TestCase):
datetime.datetime(2019, 5, 7, 12, 25, 2),
)
self.assertEqual(
+ f("2019-05-07T21:25:02", "%Y-%m-%dT%H:%M:%S", utcoffset=9),
+ datetime.datetime(2019, 5, 7, 12, 25, 2),
+ )
+ self.assertEqual(
f("2019-05-07 21:25:02"),
"2019-05-07 21:25:02",
)