aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Unit 193 <unit193@ubuntu.com>2020-01-09 22:31:31 -0500
committerLibravatar Unit 193 <unit193@ubuntu.com>2020-01-09 22:31:31 -0500
commit89bf167db5a998a217135f55593391a337bdad31 (patch)
treeadcd59e7b6f49e0826ef07f0f0e138282a8ae108
parent3bf3f951e09ae597552e35996d843b554e593c78 (diff)
parentbc435e826dbe37969d9cbe280f58810d054932cc (diff)
downloadgallery-dl-89bf167db5a998a217135f55593391a337bdad31.tar.bz2
gallery-dl-89bf167db5a998a217135f55593391a337bdad31.tar.xz
gallery-dl-89bf167db5a998a217135f55593391a337bdad31.tar.zst
Update upstream source from tag 'upstream/1.12.2'
Update to upstream version '1.12.2' with Debian dir 767bd062bb1809128547cb7c3ace169e3501bbcc
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.516
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl/cache.py2
-rw-r--r--gallery_dl/extractor/artstation.py59
-rw-r--r--gallery_dl/extractor/common.py23
-rw-r--r--gallery_dl/extractor/deviantart.py87
-rw-r--r--gallery_dl/extractor/directlink.py2
-rw-r--r--gallery_dl/extractor/exhentai.py2
-rw-r--r--gallery_dl/extractor/flickr.py3
-rw-r--r--gallery_dl/extractor/foolfuuka.py4
-rw-r--r--gallery_dl/extractor/imagefap.py28
-rw-r--r--gallery_dl/extractor/imgur.py2
-rw-r--r--gallery_dl/extractor/luscious.py4
-rw-r--r--gallery_dl/extractor/mangadex.py22
-rw-r--r--gallery_dl/extractor/mangahere.py9
-rw-r--r--gallery_dl/extractor/mastodon.py3
-rw-r--r--gallery_dl/extractor/newgrounds.py2
-rw-r--r--gallery_dl/extractor/pinterest.py2
-rw-r--r--gallery_dl/extractor/pixiv.py50
-rw-r--r--gallery_dl/extractor/reddit.py12
-rw-r--r--gallery_dl/extractor/smugmug.py3
-rw-r--r--gallery_dl/extractor/tumblr.py27
-rw-r--r--gallery_dl/extractor/twitter.py58
-rw-r--r--gallery_dl/extractor/wikiart.py4
-rw-r--r--gallery_dl/job.py21
-rw-r--r--gallery_dl/postprocessor/metadata.py31
-rw-r--r--gallery_dl/util.py11
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_postprocessor.py29
32 files changed, 392 insertions, 150 deletions
diff --git a/PKG-INFO b/PKG-INFO
index b7094a1..0e6429e 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.12.1
+Version: 1.12.2
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -95,8 +95,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -241,7 +241,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 90ca29a..e6846b6 100644
--- a/README.rst
+++ b/README.rst
@@ -84,8 +84,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -230,7 +230,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index a530760..7249537 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2019-12-22" "1.12.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-01-05" "1.12.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 07f1b88..7e7993a 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2019-12-22" "1.12.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-01-05" "1.12.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1662,6 +1662,20 @@ Select how to write metadata.
* \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[]
to a file's metadata dictionary
+.SS metadata.directory
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"."\f[]
+
+.IP "Example:" 4
+"metadata"
+
+.IP "Description:" 4
+Directory where metadata files are stored in relative to the
+current target location for file downloads.
+
.SS metadata.extension
.IP "Type:" 6
\f[I]string\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index a8700a6..3aa6d61 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.12.1
+Version: 1.12.2
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -95,8 +95,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -241,7 +241,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index 1824195..c48b53f 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -37,7 +37,7 @@ class CacheDecorator():
def update(self, key, value):
self.cache[key] = value
- def invalidate(self, key):
+ def invalidate(self, key=""):
try:
del self.cache[key]
except KeyError:
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 2892bd4..ceda29c 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -83,14 +83,20 @@ class ArtstationExtractor(Extractor):
response = self.request(url, notfound="user")
return response.json()
- def _pagination(self, url, params=None):
- if not params:
- params = {}
+ def _pagination(self, url, params=None, json=None):
+ if json:
+ params = json
+ kwargs = {"json": json}
+ else:
+ if not params:
+ params = {}
+ kwargs = {"params": params}
+
params["page"] = 1
total = 0
while True:
- data = self.request(url, params=params).json()
+ data = self.request(url, **kwargs).json()
yield from data["data"]
total += len(data["data"])
@@ -268,34 +274,38 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
class ArtstationSearchExtractor(ArtstationExtractor):
"""Extractor for artstation search results"""
subcategory = "search"
- directory_fmt = ("{category}", "Searches", "{search[searchterm]}")
- archive_fmt = "s_{search[searchterm]}_{asset[id]}"
+ directory_fmt = ("{category}", "Searches", "{search[query]}")
+ archive_fmt = "s_{search[query]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/search/?\?([^#]+)")
- test = ("https://www.artstation.com/search?sorting=recent&q=ancient",)
+ test = ("https://www.artstation.com/search?q=ancient&sort_by=rank", {
+ "range": "1-20",
+ "count": 20,
+ })
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
query = text.parse_query(match.group(1))
- self.searchterm = query.get("q", "")
- self.order = query.get("sorting", "recent").lower()
+ self.query = query.get("q", "")
+ self.sorting = query.get("sort_by", "rank").lower()
def metadata(self):
return {"search": {
- "searchterm": self.searchterm,
- "order": self.order,
+ "query" : self.query,
+ "sorting": self.sorting,
}}
def projects(self):
- order = "likes_count" if self.order == "likes" else "published_at"
- url = "{}/search/projects.json".format(self.root)
- params = {
- "direction": "desc",
- "order": order,
- "q": self.searchterm,
- # "show_pro_first": "true",
- }
- return self._pagination(url, params)
+ url = "{}/api/v2/search/projects.json".format(self.root)
+ return self._pagination(url, json={
+ "additional_fields": "[]",
+ "filters" : "[]",
+ "page" : None,
+ "per_page" : "50",
+ "pro_first" : "1",
+ "query" : self.query,
+ "sorting" : self.sorting,
+ })
class ArtstationArtworkExtractor(ArtstationExtractor):
@@ -305,7 +315,10 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
archive_fmt = "A_{asset[id]}"
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/artwork/?\?([^#]+)")
- test = ("https://www.artstation.com/artwork?sorting=latest",)
+ test = ("https://www.artstation.com/artwork?sorting=latest", {
+ "range": "1-20",
+ "count": 20,
+ })
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@@ -316,9 +329,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
def projects(self):
url = "{}/projects.json".format(self.root)
- params = self.query.copy()
- params["page"] = 1
- return self._pagination(url, params)
+ return self._pagination(url, self.query.copy())
class ArtstationImageExtractor(ArtstationExtractor):
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index a1a4890..380bcc7 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -122,6 +122,24 @@ class Extractor():
raise exception.HttpError(msg)
+ def wait(self, *, seconds=None, until=None, reason=None, adjust=1):
+ now = datetime.datetime.now()
+
+ if seconds:
+ seconds = float(seconds)
+ until = now + datetime.timedelta(seconds=seconds)
+ elif until:
+ until = datetime.datetime.fromtimestamp(float(until))
+ seconds = (until - now).total_seconds()
+ else:
+ raise ValueError("Either 'seconds' or 'until' is required")
+
+ if reason:
+ t = until.time()
+ isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)
+ self.log.info("Waiting until %s for %s.", isotime, reason)
+ time.sleep(seconds + adjust)
+
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
username = self.config("username")
@@ -170,6 +188,9 @@ class Extractor():
def _init_cookies(self):
"""Populate the session's cookiejar"""
+ if self.cookiedomain is None:
+ return
+
cookies = self.config("cookies")
if cookies:
if isinstance(cookies, dict):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 604966f..02a14e3 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -31,6 +31,7 @@ class DeviantartExtractor(Extractor):
category = "deviantart"
directory_fmt = ("{category}", "{username}")
filename_fmt = "{category}_{index}_{title}.{extension}"
+ cookiedomain = None
root = "https://www.deviantart.com"
def __init__(self, match=None):
@@ -475,7 +476,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{username}", "Favourites")
archive_fmt = "f_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/favourites/?(?:\?catpath=/)?$"
+ pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
test = (
("https://www.deviantart.com/h3813067/favourites/", {
"options": (("metadata", True), ("flat", False)), # issue #271
@@ -484,8 +485,10 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
("https://www.deviantart.com/h3813067/favourites/", {
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
+ ("https://www.deviantart.com/h3813067/favourites/all"),
("https://www.deviantart.com/h3813067/favourites/?catpath=/"),
("https://h3813067.deviantart.com/favourites/"),
+ ("https://h3813067.deviantart.com/favourites/all"),
("https://h3813067.deviantart.com/favourites/?catpath=/"),
)
@@ -573,12 +576,22 @@ class DeviantartPopularExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "Popular",
"{popular[range]}", "{popular[search]}")
archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
- pattern = (r"(?:https?://)?www\.deviantart\.com"
- r"((?:/\w+)*)/(?:popular-([^/?&#]+))/?(?:\?([^#]*))?")
+ pattern = (r"(?:https?://)?www\.deviantart\.com/(?:"
+ r"search(?:/deviations)?"
+ r"|(?:deviations/?)?\?order=(popular-[^/?&#]+)"
+ r"|((?:[\w-]+/)*)(popular-[^/?&#]+)"
+ r")/?(?:\?([^#]*))?")
test = (
+ ("https://www.deviantart.com/?order=popular-all-time", {
+ "options": (("original", False),),
+ "range": "1-30",
+ "count": 30,
+ }),
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
"options": (("original", False),),
}),
+ ("https://www.deviantart.com/search?q=tree"),
+ ("https://www.deviantart.com/search/deviations?order=popular-1-week"),
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
)
@@ -587,13 +600,20 @@ class DeviantartPopularExtractor(DeviantartExtractor):
self.search_term = self.time_range = self.category_path = None
self.user = ""
- path, trange, query = match.groups()
+ trange1, path, trange2, query = match.groups()
+ trange = trange1 or trange2
+ query = text.parse_query(query)
+
+ if not trange:
+ trange = query.get("order")
+
if path:
- self.category_path = path.lstrip("/")
+ self.category_path = path.strip("/")
if trange:
+ trange = trange[8:] if trange.startswith("popular-") else ""
self.time_range = trange.replace("-", "").replace("hours", "hr")
if query:
- self.search_term = text.parse_query(query).get("q")
+ self.search_term = query.get("q")
self.popular = {
"search": self.search_term or "",
@@ -739,6 +759,15 @@ class DeviantartExtractorV2(DeviantartExtractor):
deviation["target"] = target
return deviation
+ def _pagination(self, url, params, headers=None):
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+ yield from data["results"]
+
+ if not data["hasMore"]:
+ return
+ params["offset"] = data["nextOffset"]
+
class DeviantartDeviationExtractor(DeviantartExtractorV2):
"""Extractor for single deviations"""
@@ -863,15 +892,40 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
"Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
}
- while True:
- data = self.request(url, params=params, headers=headers).json()
+ for obj in self._pagination(url, params, headers):
+ yield obj["deviation"]
- for obj in data["results"]:
- yield obj["deviation"]
- if not data["hasMore"]:
- return
- params["offset"] = data["nextOffset"]
+class DeviantartFollowingExtractor(DeviantartExtractorV2):
+ subcategory = "following"
+ pattern = BASE_PATTERN + "/about#watching$"
+ test = ("https://www.deviantart.com/shimoda7/about#watching", {
+ "pattern": DeviantartUserExtractor.pattern,
+ "range": "1-50",
+ "count": 50,
+ })
+
+ def items(self):
+ url = "{}/_napi/da-user-profile/api/module/watching".format(self.root)
+ params = {
+ "username": self.user,
+ "moduleid": self._module_id(self.user),
+ "offset" : "0",
+ "limit" : "24",
+ }
+
+ yield Message.Version, 1
+ for user in self._pagination(url, params):
+ url = "{}/{}".format(self.root, user["username"])
+ yield Message.Queue, url, user
+
+ def _module_id(self, username):
+ url = "{}/{}/about".format(self.root, username)
+ page = self.request(url).text
+ pos = page.find('\\"type\\":\\"watching\\"')
+ if pos < 0:
+ raise exception.NotFoundError("module")
+ return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ')
class DeviantartAPI():
@@ -1076,7 +1130,7 @@ class DeviantartAPI():
return data
def _pagination(self, endpoint, params, extend=True):
- public = True
+ public = warn = True
while True:
data = self._call(endpoint, params, public=public)
if "results" not in data:
@@ -1089,7 +1143,8 @@ class DeviantartAPI():
self.log.debug("Switching to private access token")
public = False
continue
- elif data["has_more"]:
+ elif data["has_more"] and warn:
+ warn = False
self.log.warning(
"Private deviations detected! Run 'gallery-dl "
"oauth:deviantart' and follow the instructions to "
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 80db096..1d17658 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -55,9 +55,11 @@ class DirectlinkExtractor(Extractor):
for key, value in data.items():
if value:
data[key] = text.unquote(value)
+
data["path"], _, name = data["path"].rpartition("/")
data["filename"], _, ext = name.rpartition(".")
data["extension"] = ext.lower()
+ data["_http_headers"] = {"Referer": self.url}
yield Message.Version, 1
yield Message.Directory, data
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index cba9627..6cc3abc 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -23,7 +23,7 @@ BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org"
class ExhentaiExtractor(Extractor):
"""Base class for exhentai extractors"""
category = "exhentai"
- directory_fmt = ("{category}", "{gallery_id} {title}")
+ directory_fmt = ("{category}", "{gallery_id} {title[:247]}")
filename_fmt = (
"{gallery_id}_{num:>04}_{image_token}_{filename}.{extension}")
archive_fmt = "{gallery_id}_{num}"
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index bd34bdb..967fd9c 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -16,6 +16,7 @@ class FlickrExtractor(Extractor):
"""Base class for flickr extractors"""
category = "flickr"
filename_fmt = "{category}_{id}.{extension}"
+ cookiedomain = None
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 428f3c3..4af9d4a 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -120,8 +120,8 @@ EXTRACTORS = {
},
"fireden": {
"root": "https://boards.fireden.net",
- "test-thread": ("https://boards.fireden.net/a/thread/159803223/", {
- "url": "01b7baacfb0656a68e566368290e3072b27f86c9",
+ "test-thread": ("https://boards.fireden.net/sci/thread/11264294/", {
+ "url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
}),
},
"nyafuu": {
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index d6eea7f..fd97605 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,6 +13,9 @@ from .. import text
import json
+BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
+
+
class ImagefapExtractor(Extractor):
"""Base class for imagefap extractors"""
category = "imagefap"
@@ -29,8 +32,8 @@ class ImagefapExtractor(Extractor):
class ImagefapGalleryExtractor(ImagefapExtractor):
"""Extractor for image galleries from imagefap.com"""
subcategory = "gallery"
- pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/"
- r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")
+ pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"
+
test = (
("https://www.imagefap.com/pictures/7102714", {
"pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
@@ -42,6 +45,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
"keyword": "3e24eace5b09639b881ebd393165862feb46adde",
}),
("https://www.imagefap.com/gallery.php?gid=7102714"),
+ ("https://beta.imagefap.com/gallery.php?gid=7102714"),
)
def __init__(self, match):
@@ -99,11 +103,14 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
class ImagefapImageExtractor(ImagefapExtractor):
"""Extractor for single images from imagefap.com"""
subcategory = "image"
- pattern = r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"
- test = ("https://www.imagefap.com/photo/1369341772/", {
- "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
- "keyword": "8894e45f7262020d8d66ce59917315def1fc475b",
- })
+ pattern = BASE_PATTERN + r"/photo/(\d+)"
+ test = (
+ ("https://www.imagefap.com/photo/1369341772/", {
+ "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
+ "keyword": "8894e45f7262020d8d66ce59917315def1fc475b",
+ }),
+ ("https://beta.imagefap.com/photo/1369341772/"),
+ )
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
@@ -143,8 +150,8 @@ class ImagefapUserExtractor(ImagefapExtractor):
"""Extractor for all galleries from a user at imagefap.com"""
subcategory = "user"
categorytransfer = True
- pattern = (r"(?:https?://)?(?:www\.)?imagefap\.com/"
- r"(?:profile(?:\.php\?user=|/)([^/?&#]+)"
+ pattern = (BASE_PATTERN +
+ r"/(?:profile(?:\.php\?user=|/)([^/?&#]+)"
r"|usergallery\.php\?userid=(\d+))")
test = (
("https://www.imagefap.com/profile/LucyRae/galleries", {
@@ -154,6 +161,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
"url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd",
}),
("https://www.imagefap.com/profile.php?user=LucyRae"),
+ ("https://beta.imagefap.com/profile.php?user=LucyRae"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index ce3e1ce..6ff6588 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -153,7 +153,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
"is_album" : True,
"layout" : "blog",
"link" : "https://imgur.com/a/TcBmP",
- "nsfw" : False,
+ "nsfw" : True,
"privacy" : "hidden",
"section" : None,
"title" : "138",
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 0aeeb4a..c80cf14 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -56,7 +56,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
"__typename" : "Album",
"audiences" : list,
"content" : "Hentai",
- "cover" : "re:https://cdnio.luscious.net/.+/277031/",
+ "cover" : "re:https://\\w+.luscious.net/.+/277031/",
"created" : 1479625853,
"created_by" : "NTRshouldbeillegal",
"date" : "type:datetime",
@@ -102,7 +102,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
},
}),
("https://luscious.net/albums/virgin-killer-sweater_282582/", {
- "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c",
+ "url": "0be0cc279be1de99f727764819e03435e2a79915",
}),
("https://luscious.net/albums/not-found_277035/", {
"exception": exception.NotFoundError,
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index d0eb2a9..558aa9d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://mangadex.org/"""
+"""Extract manga-chapters and entire manga from https://mangadex.cc/"""
from .common import Extractor, Message
from .. import text, util
@@ -16,7 +16,7 @@ from ..cache import memcache
class MangadexExtractor(Extractor):
"""Base class for mangadex extractors"""
category = "mangadex"
- root = "https://mangadex.org"
+ root = "https://mangadex.cc"
# mangadex-to-iso639-1 codes
iso639_map = {
@@ -39,7 +39,7 @@ class MangadexExtractor(Extractor):
class MangadexChapterExtractor(MangadexExtractor):
- """Extractor for manga-chapters from mangadex.org"""
+ """Extractor for manga-chapters from mangadex.cc"""
subcategory = "chapter"
directory_fmt = (
"{category}", "{manga}",
@@ -47,14 +47,14 @@ class MangadexChapterExtractor(MangadexExtractor):
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{chapter_id}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"
+ pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)/chapter/(\d+)"
test = (
- ("https://mangadex.org/chapter/122094", {
+ ("https://mangadex.cc/chapter/122094", {
"keyword": "1c834dca33025f521e1874aee1f71c51e28ebf99",
- "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
+ # "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
}),
# oneshot
- ("https://mangadex.org/chapter/138086", {
+ ("https://mangadex.cc/chapter/138086", {
"count": 64,
"keyword": "178777bd0352fb19eb934cbee5630d16e3fb60ab",
}),
@@ -107,14 +107,14 @@ class MangadexChapterExtractor(MangadexExtractor):
class MangadexMangaExtractor(MangadexExtractor):
- """Extractor for manga from mangadex.org"""
+ """Extractor for manga from mangadex.cc"""
subcategory = "manga"
categorytransfer = True
- pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)"
+ pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)"
r"/(?:title|manga)/(\d+)")
test = (
- ("https://mangadex.org/manga/2946/souten-no-koumori", {
- "pattern": r"https://mangadex.org/chapter/\d+",
+ ("https://mangadex.cc/manga/2946/souten-no-koumori", {
+ "pattern": r"https://mangadex.cc/chapter/\d+",
"keywords": {
"manga": "Souten no Koumori",
"manga_id": 2946,
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index e15acbe..52cc672 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -97,10 +97,19 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
"url": "654850570aa03825cd57e2ae2904af489602c523",
"keyword": "c8084d89a9ea6cf40353093669f9601a39bf5ca2",
}),
+ # adult filter (#556)
+ ("http://www.mangahere.cc/manga/gunnm_mars_chronicle/", {
+ "pattern": MangahereChapterExtractor.pattern,
+ "count": ">= 50",
+ }),
("https://www.mangahere.co/manga/aria/"),
("https://m.mangahere.co/manga/aria/"),
)
+ def __init__(self, match):
+ MangaExtractor.__init__(self, match)
+ self.session.cookies.set("isAdult", "1", domain="www.mangahere.cc")
+
def chapters(self, page):
results = []
manga, pos = text.extract(page, '<meta name="og:title" content="', '"')
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index a325264..36e0b62 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@ class MastodonExtractor(Extractor):
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}"
+ cookiedomain = None
instance = None
root = None
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 5454e52..54e60b0 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -98,7 +98,7 @@ class NewgroundsExtractor(Extractor):
'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<'))
data["tags"] = text.split_html(extr(
- '<dd class="tags momag">', '</dd>'))
+ '<dd class="tags">', '</dd>'))
data["artist"] = [
text.extract(user, '//', '.')[0]
for user in text.extract_iter(page, '<div class="item-user">', '>')
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index bcdd082..b72a896 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -175,7 +175,7 @@ class PinterestPinitExtractor(PinterestExtractor):
"https://pin.it/None",
"https://www.pinterest.com"):
raise exception.NotFoundError("pin")
- yield Message.Queue, location, {}
+ yield Message.Queue, location, {"_extractor": PinterestPinExtractor}
class PinterestAPI():
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 7901149..36fa0fe 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,6 +13,7 @@ from .. import text, exception
from ..cache import cache
from datetime import datetime, timedelta
import hashlib
+import time
class PixivExtractor(Extractor):
@@ -21,6 +22,7 @@ class PixivExtractor(Extractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
+ cookiedomain = None
def __init__(self, match):
Extractor.__init__(self, match)
@@ -141,10 +143,11 @@ class PixivMeExtractor(PixivExtractor):
def items(self):
url = "https://pixiv.me/" + self.account
+ data = {"_extractor": PixivUserExtractor}
response = self.request(
url, method="HEAD", allow_redirects=False, notfound="user")
yield Message.Version, 1
- yield Message.Queue, response.headers["Location"], {}
+ yield Message.Queue, response.headers["Location"], data
class PixivWorkExtractor(PixivExtractor):
@@ -217,6 +220,11 @@ class PixivFavoriteExtractor(PixivExtractor):
("https://www.pixiv.net/bookmark.php", {
"url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
}),
+ # followed users (#515)
+ ("https://www.pixiv.net/bookmark.php?id=173530&type=user", {
+ "pattern": PixivUserExtractor.pattern,
+ "count": ">= 12",
+ }),
# touch URLs
("https://touch.pixiv.net/bookmark.php?id=173530"),
("https://touch.pixiv.net/bookmark.php"),
@@ -227,6 +235,9 @@ class PixivFavoriteExtractor(PixivExtractor):
self.query = text.parse_query(match.group(1))
if "id" not in self.query:
self.subcategory = "bookmark"
+ elif self.query.get("type") == "user":
+ self.subcategory = "following"
+ self.items = self._items_following
def works(self):
tag = None
@@ -249,6 +260,15 @@ class PixivFavoriteExtractor(PixivExtractor):
self.user_id = user["id"]
return {"user_bookmark": user}
+ def _items_following(self):
+ yield Message.Version, 1
+
+ for preview in self.api.user_following(self.query["id"]):
+ user = preview["user"]
+ user["_extractor"] = PixivUserExtractor
+ url = "https://www.pixiv.net/member.php?id={}".format(user["id"])
+ yield Message.Queue, url, user
+
class PixivRankingExtractor(PixivExtractor):
"""Extractor for pixiv ranking pages"""
@@ -493,6 +513,10 @@ class PixivAppAPI():
params = {"user_id": user_id}
return self._call("v1/user/detail", params)["user"]
+ def user_following(self, user_id):
+ params = {"user_id": user_id}
+ return self._pagination("v1/user/following", params, "user_previews")
+
def user_illusts(self, user_id):
params = {"user_id": user_id}
return self._pagination("v1/user/illusts", params)
@@ -506,17 +530,25 @@ class PixivAppAPI():
self.login()
response = self.extractor.request(url, params=params, fatal=False)
+ data = response.json()
+
+ if "error" in data:
+ if response.status_code == 404:
+ raise exception.NotFoundError()
+
+ error = data["error"]
+ if "rate limit" in (error.get("message") or "").lower():
+ self.log.info("Waiting two minutes for API rate limit reset.")
+ time.sleep(120)
+ return self._call(endpoint, params)
+ raise exception.StopExtraction("API request failed: %s", error)
- if response.status_code < 400:
- return response.json()
- if response.status_code == 404:
- raise exception.NotFoundError()
- raise exception.StopExtraction("API request failed: %s", response.text)
+ return data
- def _pagination(self, endpoint, params):
+ def _pagination(self, endpoint, params, key="illusts"):
while True:
data = self._call(endpoint, params)
- yield from data["illusts"]
+ yield from data[key]
if not data["next_url"]:
return
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 656148e..4c83019 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,12 +11,12 @@
from .common import Extractor, Message
from .. import text, util, extractor, exception
from ..cache import cache
-import time
class RedditExtractor(Extractor):
"""Base class for reddit extractors"""
category = "reddit"
+ cookiedomain = None
def __init__(self, match):
Extractor.__init__(self, match)
@@ -277,11 +277,13 @@ class RedditAPI():
params["raw_json"] = 1
self.authenticate()
response = self.extractor.request(url, params=params, fatal=None)
+
remaining = response.headers.get("x-ratelimit-remaining")
if remaining and float(remaining) < 2:
- wait = int(response.headers["x-ratelimit-reset"])
- self.log.info("Waiting %d seconds for ratelimit reset", wait)
- time.sleep(wait)
+ reset = response.headers["x-ratelimit-reset"]
+ self.extractor.wait(seconds=reset, reason="rate limit reset")
+ return self._call(endpoint, params)
+
data = response.json()
if "error" in data:
if data["error"] == 403:
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 0c13825..69b8cb9 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,6 +21,7 @@ class SmugmugExtractor(Extractor):
category = "smugmug"
filename_fmt = ("{category}_{User[NickName]:?/_/}"
"{Image[UploadKey]}_{Image[ImageKey]}.{extension}")
+ cookiedomain = None
empty_user = {
"Uri": "",
"ResponseLevel": "Public",
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 1d37419..a1f2199 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text, oauth, extractor, exception
from datetime import datetime, timedelta
import re
-import time
def _original_inline_image(url):
@@ -45,6 +44,7 @@ class TumblrExtractor(Extractor):
directory_fmt = ("{category}", "{name}")
filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
+ cookiedomain = None
def __init__(self, match):
Extractor.__init__(self, match)
@@ -407,27 +407,18 @@ class TumblrAPI(oauth.OAuth1API):
# daily rate limit
if response.headers.get("x-ratelimit-perday-remaining") == "0":
reset = response.headers.get("x-ratelimit-perday-reset")
+ t = (datetime.now() + timedelta(seconds=float(reset))).time()
+
+ self.log.error("Daily API rate limit exceeded")
raise exception.StopExtraction(
- "Daily API rate limit exceeded: aborting; "
- "rate limit will reset at %s", self._to_time(reset),
- )
+ "Aborting - Rate limit will reset at %s",
+ "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second))
# hourly rate limit
reset = response.headers.get("x-ratelimit-perhour-reset")
if reset:
- self.log.info(
- "Hourly API rate limit exceeded; waiting until "
- "%s for rate limit reset", self._to_time(reset),
- )
- time.sleep(int(reset) + 1)
+ self.log.info("Hourly API rate limit exceeded")
+ self.extractor.wait(seconds=reset, reason="rate limit reset")
return self._call(blog, endpoint, params)
raise exception.StopExtraction(data)
-
- @staticmethod
- def _to_time(reset):
- try:
- reset_time = datetime.now() + timedelta(seconds=int(reset))
- except (ValueError, TypeError):
- return "?"
- return reset_time.strftime("%H:%M:%S")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 8ef966f..610e0ee 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -53,10 +53,12 @@ class TwitterExtractor(Extractor):
if self.videos == "ytdl":
data["extension"] = None
- url = "ytdl:{}/{}/status/{}".format(
- self.root, data["user"], data["tweet_id"])
+ url = "ytdl:{}/i/web/status/{}".format(
+ self.root, data["tweet_id"])
else:
url = self._video_from_tweet(data["tweet_id"])
+ if not url:
+ continue
ext = text.ext_from_url(url)
if ext == "m3u8":
url = "ytdl:" + url
@@ -155,6 +157,16 @@ class TwitterExtractor(Extractor):
cl, _, cr = content.rpartition("pic.twitter.com/")
data["content"] = cl if cl and len(cr) < 16 else content
+ if extr('<div class="QuoteTweet', '>'):
+ data["retweet_id"] = text.parse_int(extr('data-item-id="', '"'))
+ data["retweeter"] = data["user"]["name"]
+ data["author"] = {
+ "name" : extr('data-screen-name="', '"'),
+ "id" : text.parse_int(extr('data-user-id="' , '"')),
+ "nick" : text.unescape(extr(
+ 'QuoteTweet-fullname', '<').partition('>')[2]),
+ }
+
return data
def _video_from_tweet(self, tweet_id):
@@ -173,19 +185,28 @@ class TwitterExtractor(Extractor):
if self.logged_in:
headers["x-twitter-auth-type"] = "OAuth2Session"
else:
- token = self._guest_token(headers)
+ token = _guest_token(self, headers)
cookies = {"gt": token}
headers["x-guest-token"] = token
- data = self.request(url, cookies=cookies, headers=headers).json()
- return data["track"]["playbackUrl"]
+ response = self.request(
+ url, cookies=cookies, headers=headers, fatal=None)
+
+ if response.status_code == 429 or \
+ response.headers.get("x-rate-limit-remaining") == "0":
+ if self.logged_in:
+ reset = response.headers.get("x-rate-limit-reset")
+ self.wait(until=reset, reason="rate limit reset")
+ else:
+ _guest_token.invalidate()
+ return self._video_from_tweet(tweet_id)
- @memcache()
- def _guest_token(self, headers):
- return self.request(
- "https://api.twitter.com/1.1/guest/activate.json",
- method="POST", headers=headers,
- ).json().get("guest_token")
+ elif response.status_code >= 400:
+ self.log.warning("Unable to fetch video data for %s ('%s %s')",
+ tweet_id, response.status_code, response.reason)
+ return None
+
+ return response.json()["track"]["playbackUrl"]
def _tweets_from_api(self, url, max_position=None):
params = {
@@ -313,12 +334,17 @@ class TwitterTweetExtractor(TwitterExtractor):
# Reply to another tweet (#403)
("https://twitter.com/tyson_hesse/status/1103767554424598528", {
"options": (("videos", "ytdl"),),
- "pattern": r"ytdl:https://twitter.com/.+/1103767554424598528",
+ "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528",
}),
# /i/web/ URL
("https://twitter.com/i/web/status/1155074198240292865", {
"pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
}),
+ # quoted tweet (#526)
+ ("https://twitter.com/Meiyu_miu/status/1070693241413021696", {
+ "count": 4,
+ "keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8",
+ }),
)
def __init__(self, match):
@@ -342,3 +368,11 @@ class TwitterTweetExtractor(TwitterExtractor):
end = page.index('class="js-tweet-stats-container')
beg = page.rindex('<div class="tweet ', 0, end)
return (page[beg:end],)
+
+
+@memcache()
+def _guest_token(extr, headers):
+ return extr.request(
+ "https://api.twitter.com/1.1/guest/activate.json",
+ method="POST", headers=headers,
+ ).json().get("guest_token")
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index ac289df..b614cab 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -69,8 +69,8 @@ class WikiartArtistExtractor(WikiartExtractor):
directory_fmt = ("{category}", "{artist[artistName]}")
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)"
test = ("https://www.wikiart.org/en/thomas-cole", {
- "url": "f1eee8158f5b8b7380382ab730a8f53884715c8b",
- "keyword": "c61f5a4774b977106000e9554d19cfb9438a7032",
+ "url": "9049e52e897b9ae6586df4c2c4f827d0a19dafa3",
+ "keyword": "c3168b21a993707c41efb7674e8c90d53a79d483",
})
def __init__(self, match):
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 88b6a55..699f057 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -182,6 +182,7 @@ class DownloadJob(Job):
self.downloaders = {}
self.postprocessors = None
self.out = output.select()
+ self.visited = parent.visited if parent else set()
def handle_url(self, url, kwdict, fallback=None):
"""Download the resource specified in 'url'"""
@@ -261,6 +262,10 @@ class DownloadJob(Job):
pp.run_metadata(pathfmt)
def handle_queue(self, url, kwdict):
+ if url in self.visited:
+ return
+ self.visited.add(url)
+
if "_extractor" in kwdict:
extr = kwdict["_extractor"].from_url(url)
else:
@@ -422,11 +427,19 @@ class KeywordJob(Job):
self.print_kwdict(kwdict)
def handle_queue(self, url, kwdict):
- if not kwdict:
+ if not util.filter_dict(kwdict):
self.extractor.log.info(
- "This extractor delegates work to other extractors "
- "and does not provide any keywords on its own. Try "
- "'gallery-dl -K \"%s\"' instead.", url)
+ "This extractor only spawns other extractors "
+ "and does not provide any metadata on its own.")
+
+ if "_extractor" in kwdict:
+ self.extractor.log.info(
+ "Showing results for '%s' instead:\n", url)
+ extr = kwdict["_extractor"].from_url(url)
+ KeywordJob(extr, self).run()
+ else:
+ self.extractor.log.info(
+ "Try 'gallery-dl -K \"%s\"' instead.", url)
else:
print("Keywords for --chapter-filter:")
print("------------------------------")
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index bc26484..aa50dfd 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -10,6 +10,7 @@
from .common import PostProcessor
from .. import util
+import os
class MetadataPP(PostProcessor):
@@ -32,32 +33,46 @@ class MetadataPP(PostProcessor):
self.ascii = options.get("ascii", False)
ext = "json"
+ directory = options.get("directory")
+ if directory:
+ self._directory = self._directory_custom
+ sep = os.sep + (os.altsep or "")
+ self.metadir = directory.rstrip(sep) + os.sep
+
extfmt = options.get("extension-format")
if extfmt:
- self.path = self._path_format
+ self._filename = self._filename_custom
self.extfmt = util.Formatter(extfmt).format_map
else:
- self.path = self._path_append
self.extension = options.get("extension", ext)
if options.get("bypost"):
self.run_metadata, self.run = self.run, self.run_metadata
def run(self, pathfmt):
- with open(self.path(pathfmt), "w", encoding="utf-8") as file:
+ path = self._directory(pathfmt) + self._filename(pathfmt)
+ with open(path, "w", encoding="utf-8") as file:
self.write(file, pathfmt.kwdict)
- def _path_append(self, pathfmt):
- return "{}.{}".format(pathfmt.realpath, self.extension)
+ def _directory(self, pathfmt):
+ return pathfmt.realdirectory
+
+ def _directory_custom(self, pathfmt):
+ directory = os.path.join(pathfmt.realdirectory, self.metadir)
+ os.makedirs(directory, exist_ok=True)
+ return directory
+
+ def _filename(self, pathfmt):
+ return pathfmt.filename + "." + self.extension
- def _path_format(self, pathfmt):
+ def _filename_custom(self, pathfmt):
kwdict = pathfmt.kwdict
ext = kwdict["extension"]
kwdict["extension"] = pathfmt.extension
kwdict["extension"] = pathfmt.prefix + self.extfmt(kwdict)
- path = pathfmt.realdirectory + pathfmt.build_filename()
+ filename = pathfmt.build_filename()
kwdict["extension"] = ext
- return path
+ return filename
def _write_custom(self, file, kwdict):
file.write(self.contentfmt(kwdict))
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 48ae0be..f426829 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -740,8 +740,15 @@ class DownloadArchive():
con.isolation_level = None
self.close = con.close
self.cursor = con.cursor()
- self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
- "(entry PRIMARY KEY) WITHOUT ROWID")
+
+ try:
+ self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ "(entry PRIMARY KEY) WITHOUT ROWID")
+ except sqlite3.OperationalError:
+ # fallback for missing WITHOUT ROWID support (#553)
+ self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ "(entry PRIMARY KEY)")
+
self.keygen = (extractor.category + extractor.config(
"archive-format", extractor.archive_fmt)
).format_map
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 2ac7ceb..4b83107 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.12.1"
+__version__ = "1.12.2"
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 17f82c9..629b0d7 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -7,6 +7,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+import os
import os.path
import zipfile
import tempfile
@@ -156,7 +157,6 @@ class MetadataTest(BasePostprocessorTest):
"_private" : "world",
})
- self.assertEqual(pp.path , pp._path_append)
self.assertEqual(pp.write , pp._write_json)
self.assertEqual(pp.ascii , True)
self.assertEqual(pp.indent , 2)
@@ -242,7 +242,7 @@ class MetadataTest(BasePostprocessorTest):
"extension-format": "json",
})
- self.assertEqual(pp.path, pp._path_format)
+ self.assertEqual(pp._filename, pp._filename_custom)
with patch("builtins.open", mock_open()) as m:
pp.prepare(self.pathfmt)
@@ -264,6 +264,31 @@ class MetadataTest(BasePostprocessorTest):
path = self.pathfmt.realdirectory + "file.2.EXT-data:tESt"
m.assert_called_once_with(path, "w", encoding="utf-8")
+ def test_metadata_directory(self):
+ pp = self._create({
+ "directory": "metadata",
+ })
+
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ path = self.pathfmt.realdirectory + "metadata/file.ext.json"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+
+ def test_metadata_directory_2(self):
+ pp = self._create({
+ "directory" : "metadata////",
+ "extension-format": "json",
+ })
+
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ path = self.pathfmt.realdirectory + "metadata/file.json"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+
@staticmethod
def _output(mock):
return "".join(