aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2020-06-29 00:33:23 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2020-06-29 00:33:23 -0400
commitd7db0f63f4b1f051a7ecf6d80cfacaae1626ce69 (patch)
tree64c9657783b7c1ec7db81ec1e41fedba3c5ad0b2
parent2865adf5de64ea6ca38f734cc61ef805c4bc27d2 (diff)
parent02dd2886783cd303cff6890a741152d013bb00ce (diff)
downloadgallery-dl-d7db0f63f4b1f051a7ecf6d80cfacaae1626ce69.tar.bz2
gallery-dl-d7db0f63f4b1f051a7ecf6d80cfacaae1626ce69.tar.xz
gallery-dl-d7db0f63f4b1f051a7ecf6d80cfacaae1626ce69.tar.zst
Update upstream source from tag 'upstream/1.14.2'
Update to upstream version '1.14.2' with Debian dir 4efcdfd56b8d506548ba3400af16c1e34dea7260
-rw-r--r--CHANGELOG.md21
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.542
-rw-r--r--docs/gallery-dl.conf1
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl/config.py32
-rw-r--r--gallery_dl/extractor/artstation.py2
-rw-r--r--gallery_dl/extractor/aryion.py66
-rw-r--r--gallery_dl/extractor/common.py21
-rw-r--r--gallery_dl/extractor/foolslide.py6
-rw-r--r--gallery_dl/extractor/imgbb.py4
-rw-r--r--gallery_dl/extractor/imgur.py4
-rw-r--r--gallery_dl/extractor/instagram.py4
-rw-r--r--gallery_dl/extractor/kissmanga.py16
-rw-r--r--gallery_dl/extractor/mastodon.py31
-rw-r--r--gallery_dl/extractor/naver.py1
-rw-r--r--gallery_dl/extractor/pinterest.py88
-rw-r--r--gallery_dl/extractor/slickpic.py3
-rw-r--r--gallery_dl/extractor/speakerdeck.py4
-rw-r--r--gallery_dl/extractor/tsumino.py2
-rw-r--r--gallery_dl/extractor/tumblr.py2
-rw-r--r--gallery_dl/extractor/twitter.py149
-rw-r--r--gallery_dl/extractor/webtoons.py1
-rw-r--r--gallery_dl/extractor/weibo.py96
-rw-r--r--gallery_dl/job.py21
-rw-r--r--gallery_dl/text.py11
-rw-r--r--gallery_dl/util.py21
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_config.py28
-rw-r--r--test/test_results.py3
-rw-r--r--test/test_text.py4
33 files changed, 487 insertions, 223 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 043d964..f84e423 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,26 @@
# Changelog
+## 1.14.2 - 2020-06-27
+### Additions
+- [artstation] add `date` metadata field ([#839](https://github.com/mikf/gallery-dl/issues/839))
+- [mastodon] add `date` metadata field ([#839](https://github.com/mikf/gallery-dl/issues/839))
+- [pinterest] add support for board sections ([#835](https://github.com/mikf/gallery-dl/issues/835))
+- [twitter] add extractor for liked tweets ([#837](https://github.com/mikf/gallery-dl/issues/837))
+- [twitter] add option to filter media from quoted tweets ([#854](https://github.com/mikf/gallery-dl/issues/854))
+- [weibo] add `date` metadata field to `status` objects ([#829](https://github.com/mikf/gallery-dl/issues/829))
+### Fixes
+- [aryion] fix user gallery extraction ([#832](https://github.com/mikf/gallery-dl/issues/832))
+- [imgur] build directory paths for each file ([#842](https://github.com/mikf/gallery-dl/issues/842))
+- [tumblr] prevent errors when using `reblogs=same-blog` ([#851](https://github.com/mikf/gallery-dl/issues/851))
+- [twitter] always provide an `author` metadata field ([#831](https://github.com/mikf/gallery-dl/issues/831), [#833](https://github.com/mikf/gallery-dl/issues/833))
+- [twitter] don't download video previews ([#833](https://github.com/mikf/gallery-dl/issues/833))
+- [twitter] improve handling of deleted tweets ([#838](https://github.com/mikf/gallery-dl/issues/838))
+- [twitter] fix search results ([#847](https://github.com/mikf/gallery-dl/issues/847))
+- [twitter] improve handling of quoted tweets ([#854](https://github.com/mikf/gallery-dl/issues/854))
+- fix config lookups when multiple locations are involved ([#843](https://github.com/mikf/gallery-dl/issues/843))
+- improve output of `-K/--list-keywords` for parent extractors ([#825](https://github.com/mikf/gallery-dl/issues/825))
+- call `flush()` after writing JSON in `DataJob()` ([#727](https://github.com/mikf/gallery-dl/issues/727))
+
## 1.14.1 - 2020-06-12
### Additions
- [furaffinity] add `artist_url` metadata field ([#821](https://github.com/mikf/gallery-dl/issues/821))
diff --git a/PKG-INFO b/PKG-INFO
index 51e514a..5322ef0 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.14.1
+Version: 1.14.2
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -302,7 +302,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index b66efb7..861d8a7 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -291,7 +291,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.1.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 76a57d1..21055ca 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-06-12" "1.14.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-06-27" "1.14.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 88f8ebc..7060751 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-06-12" "1.14.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-06-27" "1.14.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -631,6 +631,22 @@ See \f[I]strptime\f[] for a list of formatting directives.
.IP "Description:" 4
Try to follow external URLs of embedded players.
+.SS extractor.aryion.recursive
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Controls the post extraction strategy.
+
+.br
+* \f[I]true\f[]: Start on users' main gallery pages and recursively
+descend into subfolders
+.br
+* \f[I]false\f[]: Get posts from "Latest Updates" pages
+
.SS extractor.blogger.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -1079,6 +1095,16 @@ port than the default.
.IP "Description:" 4
Download subalbums.
+.SS extractor.pinterest.sections
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include pins from board sections.
+
.SS extractor.pixiv.user.avatar
.IP "Type:" 6
\f[I]bool\f[]
@@ -1375,6 +1401,16 @@ Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[]
You can use \f[I]"all"\f[] instead of listing all types separately.
+.SS extractor.twitter.quoted
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Fetch media from quoted Tweets.
+
.SS extractor.twitter.replies
.IP "Type:" 6
\f[I]bool\f[]
@@ -1383,7 +1419,7 @@ You can use \f[I]"all"\f[] instead of listing all types separately.
\f[I]true\f[]
.IP "Description:" 4
-Extract media from replies to other Tweets.
+Fetch media from replies to other Tweets.
.SS extractor.twitter.retweets
.IP "Type:" 6
@@ -1393,7 +1429,7 @@ Extract media from replies to other Tweets.
\f[I]true\f[]
.IP "Description:" 4
-Extract media from Retweets.
+Fetch media from Retweets.
.SS extractor.twitter.twitpic
.IP "Type:" 6
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index ae4839d..aa54e1a 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -155,6 +155,7 @@
},
"twitter":
{
+ "quoted": true,
"replies": true,
"retweets": true,
"twitpic": false,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 3f6f077..0b01abc 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.14.1
+Version: 1.14.2
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -302,7 +302,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index 5303616..a3c71cd 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -108,6 +108,38 @@ def interpolate(path, key, default=None, *, conf=_config):
return default
+def interpolate_common(common, paths, key, default=None, *, conf=_config):
+ """Interpolate the value of 'key'
+ using multiple 'paths' along a 'common' ancestor
+ """
+ if key in conf:
+ return conf[key]
+
+ # follow the common path
+ try:
+ for p in common:
+ conf = conf[p]
+ if key in conf:
+ default = conf[key]
+ except Exception:
+ return default
+
+ # try all paths until a value is found
+ value = util.SENTINEL
+ for path in paths:
+ c = conf
+ try:
+ for p in path:
+ c = c[p]
+ if key in c:
+ value = c[key]
+ except Exception:
+ pass
+ if value is not util.SENTINEL:
+ return value
+ return default
+
+
def set(path, key, value, *, conf=_config):
"""Set the value of property 'key' for this session"""
for p in path:
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index c504dba..64a4bf4 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -66,6 +66,8 @@ class ArtstationExtractor(Extractor):
data["title"] = text.unescape(data["title"])
data["description"] = text.unescape(text.remove_html(
data["description"]))
+ data["date"] = text.parse_datetime(
+ data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
assets = data["assets"]
del data["assets"]
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 7575de9..04bb146 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -26,9 +26,24 @@ class AryionExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
- self.offset = 0
+ self.recursive = True
- def posts(self, url):
+ def items(self):
+ for post_id in self.posts():
+ post = self._parse_post(post_id)
+ if post:
+ yield Message.Directory, post
+ yield Message.Url, post["url"], post
+ elif post is False and self.recursive:
+ base = self.root + "/g4/view/"
+ data = {"_extractor": AryionPostExtractor}
+ for post_id in self._pagination(base + post_id):
+ yield Message.Queue, base + post_id, data
+
+ def posts(self):
+ """Yield relevant post IDs"""
+
+ def _pagination(self, url):
while True:
page = self.request(url).text
yield from text.extract_iter(
@@ -39,11 +54,14 @@ class AryionExtractor(Extractor):
return
url = self.root + text.rextract(page, "href='", "'", pos)[0]
- def parse_post(self, post_id):
+ def _parse_post(self, post_id):
url = "{}/g4/data.php?id={}".format(self.root, post_id)
with self.request(url, method="HEAD", fatal=False) as response:
if response.status_code >= 400:
+ self.log.warning(
+ "Unable to fetch post %s ('%s %s')",
+ post_id, response.status_code, response.reason)
return None
headers = response.headers
@@ -106,9 +124,11 @@ class AryionExtractor(Extractor):
class AryionGalleryExtractor(AryionExtractor):
"""Extractor for a user's gallery on eka's portal"""
subcategory = "gallery"
+ categorytransfer = True
pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)"
test = (
("https://aryion.com/g4/gallery/jameshoward", {
+ "options": (("recursive", False),),
"pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$",
"range": "48-52",
"count": 5,
@@ -117,17 +137,24 @@ class AryionGalleryExtractor(AryionExtractor):
("https://aryion.com/g4/latest.php?name=jameshoward"),
)
+ def __init__(self, match):
+ AryionExtractor.__init__(self, match)
+ self.recursive = self.config("recursive", True)
+ self.offset = 0
+
def skip(self, num):
+ if self.recursive:
+ num = 0
self.offset += num
return num
- def items(self):
- url = "{}/g4/latest.php?name={}".format(self.root, self.user)
- for post_id in util.advance(self.posts(url), self.offset):
- post = self.parse_post(post_id)
- if post:
- yield Message.Directory, post
- yield Message.Url, post["url"], post
+ def posts(self):
+ if self.recursive:
+ url = "{}/g4/gallery/{}".format(self.root, self.user)
+ return self._pagination(url)
+ else:
+ url = "{}/g4/latest.php?name={}".format(self.root, self.user)
+ return util.advance(self._pagination(url), self.offset)
class AryionPostExtractor(AryionExtractor):
@@ -164,19 +191,6 @@ class AryionPostExtractor(AryionExtractor):
}),
)
- def items(self):
- post_id = self.user
- self.user = None
- post = self.parse_post(post_id)
-
- if post:
- yield Message.Directory, post
- yield Message.Url, post["url"], post
-
- elif post is False:
- folder_url = "{}/g4/view/{}".format(self.root, post_id)
- data = {"_extractor": AryionPostExtractor}
-
- for post_id in self.posts(folder_url):
- url = "{}/g4/view/{}".format(self.root, post_id)
- yield Message.Queue, url, data
+ def posts(self):
+ post_id, self.user = self.user, None
+ return (post_id,)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index dd685df..bbbd8a6 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -328,14 +328,15 @@ class Extractor():
test = (test, None)
yield test
- def _dump_response(self, response):
+ def _dump_response(self, response, history=True):
"""Write the response content to a .dump file in the current directory.
The file name is derived from the response url,
replacing special characters with "_"
"""
- for resp in response.history:
- self._dump_response(resp)
+ if history:
+ for resp in response.history:
+ self._dump_response(resp, False)
if hasattr(Extractor, "_dump_index"):
Extractor._dump_index += 1
@@ -350,7 +351,8 @@ class Extractor():
try:
with open(fname + ".dump", 'wb') as fp:
- util.dump_response(response, fp)
+ util.dump_response(
+ response, fp, headers=(self._write_pages == "all"))
except Exception as e:
self.log.warning("Failed to dump HTTP request (%s: %s)",
e.__class__.__name__, e)
@@ -490,10 +492,13 @@ class SharedConfigMixin():
"""Enable sharing of config settings based on 'basecategory'"""
basecategory = ""
- def config(self, key, default=None, *, sentinel=util.SENTINEL):
- value = Extractor.config(self, key, sentinel)
- return value if value is not sentinel else config.interpolate(
- ("extractor", self.basecategory, self.subcategory), key, default)
+ def config(self, key, default=None):
+ return config.interpolate_common(
+ ("extractor",), (
+ (self.category, self.subcategory),
+ (self.basecategory, self.subcategory),
+ ), key, default,
+ )
def generate_extractors(extractor_data, symtable, classes):
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 86f63ae..731f54b 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -225,9 +225,9 @@ EXTRACTORS = {
}),
),
"test-manga":
- ("https://sensescans.com/reader/series/hakkenden/", {
- "url": "3e0559029c21ca5af8a2082dd6de1567fcec4d83",
- "keyword": "4919f2bfed38e3a34dc984ec8d1dbd7a03044e23",
+ ("https://sensescans.com/reader/series/yotsubato/", {
+ "url": "ee4dca7c421bf15ac039200f8c0bcb0858153640",
+ "keyword": "f94961bd731bd878bbd4d48555bc3ace1d937364",
}),
},
"worldthree": {
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 8d2c937..3882a92 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -124,8 +124,8 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
}),
("https://ibb.co/album/i5PggF?sort=title_asc", {
"range": "1-80",
- "url": "a2dfc58fe3348fa37e242082bd5a85eaa490d0a5",
- "keyword": "5bb79c82411c3770d673fac64a0a98fa28111c3b",
+ "url": "afdf5fc95d8e09d77e8f44312f3e9b843987bb5a",
+ "keyword": "f090e14d0e5f7868595082b2c95da1309c84872d",
}),
# no user data (#471)
("https://ibb.co/album/kYKpwF", {
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 44fa5f2..20b698b 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -210,6 +210,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
album = self.api.album(self.key)
album["date"] = text.parse_timestamp(album["datetime"])
images = album["images"]
+ count = len(images)
try:
del album["images"]
@@ -218,11 +219,12 @@ class ImgurAlbumExtractor(ImgurExtractor):
pass
yield Message.Version, 1
- yield Message.Directory, {"album": album, "count": len(images)}
for num, image in enumerate(images, 1):
url = self._prepare(image)
image["num"] = num
+ image["count"] = count
image["album"] = album
+ yield Message.Directory, image
yield Message.Url, url, image
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 3781711..bf6b10f 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -386,7 +386,7 @@ class InstagramImageExtractor(InstagramExtractor):
# GraphVideo
("https://www.instagram.com/p/Bqxp0VSBgJg/", {
- "pattern": r"/47129943_191645575115739_8539303288426725376_n\.mp4",
+ "pattern": r"/46840863_726311431074534_7805566102611403091_n\.mp4",
"keyword": {
"date": "dt:2018-11-29 19:23:58",
"description": str,
@@ -404,7 +404,7 @@ class InstagramImageExtractor(InstagramExtractor):
# GraphVideo (IGTV)
("https://www.instagram.com/tv/BkQjCfsBIzi/", {
- "pattern": r"/10000000_1760663964018792_716207142595461120_n\.mp4",
+ "pattern": r"/10000000_597132547321814_702169244961988209_n\.mp4",
"keyword": {
"date": "dt:2018-06-20 19:51:32",
"description": str,
diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
index ade245b..348453d 100644
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -9,10 +9,9 @@
"""Extract manga-chapters and entire manga from https://kissmanga.com/"""
from .common import ChapterExtractor, MangaExtractor, Extractor
-from .. import text, aes
+from .. import text, aes, exception
from ..cache import cache
import hashlib
-import time
import ast
import re
@@ -25,7 +24,18 @@ class RedirectMixin():
response = Extractor.request(self, url, **kwargs)
if not response.history or "/AreYouHuman" not in response.url:
return response
- time.sleep(2)
+ if self.config("captcha", "stop") == "wait":
+ self.log.warning(
+ "Redirect to \n%s\nVisit this URL in your browser, solve "
+ "the CAPTCHA, and press ENTER to continue", response.url)
+ try:
+ input()
+ except (EOFError, OSError):
+ pass
+ else:
+ raise exception.StopExtraction(
+ "Redirect to \n%s\nVisit this URL in your browser and "
+ "solve the CAPTCHA to continue", response.url)
class KissmangaBase(RedirectMixin):
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 002c8f7..fa1fecc 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -27,22 +27,25 @@ class MastodonExtractor(Extractor):
Extractor.__init__(self, match)
self.api = MastodonAPI(self)
- def config(self, key, default=None, *, sentinel=util.SENTINEL):
- value = Extractor.config(self, key, sentinel)
- return value if value is not sentinel else config.interpolate(
- ("extractor", "mastodon", self.instance, self.subcategory),
- key, default,
+ def config(self, key, default=None):
+ return config.interpolate_common(
+ ("extractor",), (
+ (self.category, self.subcategory),
+ (self.basecategory, self.instance, self.subcategory),
+ ), key, default,
)
def items(self):
yield Message.Version, 1
for status in self.statuses():
- attachments = self.prepare(status)
- yield Message.Directory, status
- for media in attachments:
- status["media"] = media
- url = media["url"]
- yield Message.Url, url, text.nameext_from_url(url, status)
+ attachments = status["media_attachments"]
+ if attachments:
+ self.prepare(status)
+ yield Message.Directory, status
+ for media in attachments:
+ status["media"] = media
+ url = media["url"]
+ yield Message.Url, url, text.nameext_from_url(url, status)
def statuses(self):
"""Return an iterable containing all relevant Status-objects"""
@@ -50,11 +53,11 @@ class MastodonExtractor(Extractor):
def prepare(self, status):
"""Prepare a status object"""
+ del status["media_attachments"]
status["instance"] = self.instance
status["tags"] = [tag["name"] for tag in status["tags"]]
- attachments = status["media_attachments"]
- del status["media_attachments"]
- return attachments
+ status["date"] = text.parse_datetime(
+ status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
class MastodonUserExtractor(MastodonExtractor):
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index c980a38..413a58a 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -81,6 +81,7 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
class NaverBlogExtractor(NaverBase, Extractor):
"""Extractor for a user's blog on blog.naver.com"""
subcategory = "blog"
+ categorytransfer = True
pattern = (r"(?:https?://)?blog\.naver\.com/"
r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
test = (
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 24a0a55..3bbe06a 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -1,15 +1,16 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.pinterest.com/"""
+"""Extractors for https://www.pinterest.com/"""
from .common import Extractor, Message
from .. import text, exception
+import itertools
import json
@@ -86,12 +87,17 @@ class PinterestBoardExtractor(PinterestExtractor):
subcategory = "board"
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)(?!.*#related$)"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?$"
test = (
("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/",
"count": 2,
}),
+ # board with sections (#835)
+ ("https://www.pinterest.com/g1952849/stuff/", {
+ "options": (("sections", True),),
+ "count": 5,
+ }),
("https://www.pinterest.com/g1952848/test/", {
"exception": exception.GalleryDLException,
}),
@@ -100,16 +106,51 @@ class PinterestBoardExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
self.user = text.unquote(match.group(1))
- self.board = text.unquote(match.group(2))
- self.board_id = 0
+ self.board_name = text.unquote(match.group(2))
+ self.board = None
def metadata(self):
- board = self.api.board(self.user, self.board)
- self.board_id = board["id"]
- return {"board": board}
+ self.board = self.api.board(self.user, self.board_name)
+ return {"board": self.board}
def pins(self):
- return self.api.board_pins(self.board_id)
+ board = self.board
+
+ if board["section_count"] and self.config("sections", True):
+ pins = [self.api.board_pins(board["id"])]
+ for section in self.api.board_sections(board["id"]):
+ pins.append(self.api.board_section_pins(section["id"]))
+ return itertools.chain.from_iterable(pins)
+ else:
+ return self.api.board_pins(board["id"])
+
+
+class PinterestSectionExtractor(PinterestExtractor):
+ """Extractor for board sections on pinterest.com"""
+ subcategory = "section"
+ directory_fmt = ("{category}", "{board[owner][username]}",
+ "{board[name]}", "{section[title]}")
+ archive_fmt = "{board[id]}_{id}"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/([^/?#&]+)"
+ test = ("https://www.pinterest.com/g1952849/stuff/section", {
+ "count": 2,
+ })
+
+ def __init__(self, match):
+ PinterestExtractor.__init__(self, match)
+ self.user = text.unquote(match.group(1))
+ self.board_slug = text.unquote(match.group(2))
+ self.section_slug = text.unquote(match.group(3))
+ self.section = None
+
+ def metadata(self):
+ section = self.section = self.api.board_section(
+ self.user, self.board_slug, self.section_slug)
+ section.pop("preview_pins", None)
+ return {"board": section.pop("board"), "section": section}
+
+ def pins(self):
+ return self.api.board_section_pins(self.section["id"])
class PinterestRelatedPinExtractor(PinterestPinExtractor):
@@ -136,7 +177,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
subcategory = "related-board"
directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "related")
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+).*#related$"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?#related$"
test = ("https://www.pinterest.com/g1952849/test-/#related", {
"range": "31-70",
"count": 40,
@@ -144,7 +185,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
})
def pins(self):
- return self.api.board_related(self.board_id)
+ return self.api.board_related(self.board["id"])
class PinterestPinitExtractor(PinterestExtractor):
@@ -188,9 +229,10 @@ class PinterestAPI():
"*/*, q=0.01",
"Accept-Language" : "en-US,en;q=0.5",
"X-Pinterest-AppState": "active",
- "X-APP-VERSION" : "cb1c7f9",
+ "X-APP-VERSION" : "b00dd49",
"X-Requested-With" : "XMLHttpRequest",
- "Origin" : BASE_URL + "/",
+ "Origin" : BASE_URL,
+ "Referer" : BASE_URL + "/",
}
def __init__(self, extractor):
@@ -206,9 +248,9 @@ class PinterestAPI():
options = {"pin": pin_id, "add_vase": True, "pins_only": True}
return self._pagination("RelatedPinFeed", options)
- def board(self, user, board):
+ def board(self, user, board_name):
"""Query information about a board"""
- options = {"slug": board, "username": user,
+ options = {"slug": board_name, "username": user,
"field_set_key": "detailed"}
return self._call("Board", options)["resource_response"]["data"]
@@ -217,6 +259,22 @@ class PinterestAPI():
options = {"board_id": board_id}
return self._pagination("BoardFeed", options)
+ def board_section(self, user, board_slug, section_slug):
+ """Yield a specific board section"""
+ options = {"board_slug": board_slug, "section_slug": section_slug,
+ "username": user}
+ return self._call("BoardSection", options)["resource_response"]["data"]
+
+ def board_sections(self, board_id):
+ """Yield all sections of a specific board"""
+ options = {"board_id": board_id}
+ return self._pagination("BoardSections", options)
+
+ def board_section_pins(self, section_id):
+ """Yield all pins from a board section"""
+ options = {"section_id": section_id}
+ return self._pagination("BoardSectionPins", options)
+
def board_related(self, board_id):
"""Yield related pins of a specific board"""
options = {"board_id": board_id, "add_vase": True}
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index 1063716..05ec117 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -42,7 +42,8 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
"range": "34",
"content": ("cec6630e659dc72db1ee1a9a6f3b525189261988",
- "6f81e1e74c6cd6db36844e7211eef8e7cd30055d"),
+ "6f81e1e74c6cd6db36844e7211eef8e7cd30055d",
+ "22e83645fc242bc3584eca7ec982c8a53a4d8a44"),
}),
)
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
index 1a9691c..a3819c7 100644
--- a/gallery_dl/extractor/speakerdeck.py
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -23,8 +23,10 @@ class SpeakerdeckPresentationExtractor(Extractor):
r"/([^/?&#]+)/([^/?&#]+)")
test = (
(("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), {
- "url": "e97d4a7d5c64267e921c13eb7946d7074794a0d2",
+ "pattern": r"https://files.speakerdeck.com/presentations/"
+ r"50021f75cf1db900020005e7/slide_\d+.jpg",
"content": "75c7abf0969b0bcab23e0da9712c95ee5113db3a",
+ "count": 6,
}),
)
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index 31dbdad..5809463 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -57,7 +57,7 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
"collection": "",
"artist" : ["Itou Life"],
"group" : ["Itou Life"],
- "parody" : ["Fate/Grand Order"],
+ "parody" : list,
"characters": list,
"tags" : list,
"type" : "Doujinshi",
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 3e3a5a0..70fead8 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -194,7 +194,7 @@ class TumblrExtractor(Extractor):
return not self.reblogs
def _skip_reblog_same_blog(self, post):
- return self.blog != post["reblogged_root_uuid"]
+ return self.blog != post.get("reblogged_root_uuid")
class TumblrUserExtractor(TumblrExtractor):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7cabb8c..1e985e3 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -31,6 +31,7 @@ class TwitterExtractor(Extractor):
self.retweets = self.config("retweets", True)
self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
+ self.quoted = self.config("quoted", True)
self.videos = self.config("videos", True)
self._user_cache = {}
@@ -41,8 +42,9 @@ class TwitterExtractor(Extractor):
for tweet in self.tweets():
- if not self.retweets and "retweeted_status_id_str" in tweet or \
- not self.replies and "in_reply_to_user_id_str" in tweet:
+ if (not self.retweets and "retweeted_status_id_str" in tweet or
+ not self.replies and "in_reply_to_user_id_str" in tweet or
+ not self.quoted and "quoted" in tweet):
continue
if self.twitpic:
@@ -60,7 +62,7 @@ class TwitterExtractor(Extractor):
tdata["width"] = media["original_info"].get("width", 0)
tdata["height"] = media["original_info"].get("height", 0)
- if "video_info" in media and self.videos:
+ if "video_info" in media:
if self.videos == "ytdl":
url = "ytdl:{}/i/web/status/{}".format(
@@ -68,7 +70,7 @@ class TwitterExtractor(Extractor):
tdata["extension"] = None
yield Message.Url, url, tdata
- else:
+ elif self.videos:
video_info = media["video_info"]
variant = max(
video_info["variants"],
@@ -149,11 +151,10 @@ class TwitterExtractor(Extractor):
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
- if "full_text_quoted" in tweet:
- tdata["content_quoted"] = tweet["full_text_quoted"]
-
if "author" in tweet:
tdata["author"] = self._transform_user(tweet["author"])
+ else:
+ tdata["author"] = tdata["user"]
return tdata
@@ -264,6 +265,27 @@ class TwitterMediaExtractor(TwitterExtractor):
return TwitterAPI(self).timeline_media(self.user)
+class TwitterLikesExtractor(TwitterExtractor):
+ """Extractor for liked tweets"""
+ subcategory = "likes"
+ pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
+ r"/(?!search)([^/?&#]+)/likes(?!\w)")
+ test = ("https://twitter.com/supernaturepics/likes",)
+
+ def tweets(self):
+ return TwitterAPI(self).timeline_favorites(self.user)
+
+
+class TwitterBookmarkExtractor(TwitterExtractor):
+ """Extractor for bookmarked tweets"""
+ subcategory = "bookmark"
+ pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
+ test = ("https://twitter.com/i/bookmarks",)
+
+ def tweets(self):
+ return TwitterAPI(self).timeline_bookmark()
+
+
class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for all images from a search timeline"""
subcategory = "search"
@@ -279,7 +301,7 @@ class TwitterSearchExtractor(TwitterExtractor):
return {"search": text.unquote(self.user)}
def tweets(self):
- return TwitterAPI(self).search(self.user)
+ return TwitterAPI(self).search(text.unquote(self.user))
class TwitterTweetExtractor(TwitterExtractor):
@@ -298,7 +320,6 @@ class TwitterTweetExtractor(TwitterExtractor):
}),
# video
("https://twitter.com/perrypumas/status/1065692031626829824", {
- "options": (("videos", True),),
"pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5",
}),
# content with emoji, newlines, hashtags (#338)
@@ -310,23 +331,25 @@ class TwitterTweetExtractor(TwitterExtractor):
"It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ "
)},
}),
- # Reply to another tweet (#403)
- ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
- "options": (("videos", "ytdl"),),
- "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528",
+ # Reply to deleted tweet (#403, #838)
+ ("https://twitter.com/i/web/status/1170041925560258560", {
+ "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_.jpg:orig",
}),
# 'replies' option (#705)
- ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
+ ("https://twitter.com/i/web/status/1170041925560258560", {
"options": (("replies", False),),
"count": 0,
}),
- # /i/web/ URL
- ("https://twitter.com/i/web/status/1155074198240292865", {
- "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
+ # quoted tweet (#526, #854)
+ ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
+ "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+\.jpg",
+ "count": 8,
}),
- # quoted tweet (#526)
- ("https://twitter.com/Pistachio/status/1222690391817932803", {
- "pattern": r"https://pbs\.twimg\.com/media/EPfMfDUU8AAnByO\.jpg",
+ # "quoted" option (#854)
+ ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
+ "options": (("quoted", False),),
+ "pattern": r"https://pbs\.twimg\.com/media/EaK.+\.jpg",
+ "count": 4,
}),
# TwitPic embeds (#579)
("https://twitter.com/i/web/status/112900228289540096", {
@@ -344,16 +367,6 @@ class TwitterTweetExtractor(TwitterExtractor):
return TwitterAPI(self).tweet(self.tweet_id)
-class TwitterBookmarkExtractor(TwitterExtractor):
- """Extractor for bookmarked tweets"""
- subcategory = "bookmark"
- pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
- test = ("https://twitter.com/i/bookmarks",)
-
- def tweets(self):
- return TwitterAPI(self).bookmarks()
-
-
class TwitterAPI():
def __init__(self, extractor):
@@ -409,16 +422,21 @@ class TwitterAPI():
self.headers["x-twitter-auth-type"] = "OAuth2Session"
else:
# guest token
- guest_token = _guest_token(self.extractor, self.headers)
+ guest_token = self._guest_token()
self.headers["x-guest-token"] = guest_token
cookies.set("gt", guest_token, domain=".twitter.com")
def tweet(self, tweet_id):
endpoint = "2/timeline/conversation/{}.json".format(tweet_id)
+ tweets = []
for tweet in self._pagination(endpoint):
if tweet["id_str"] == tweet_id:
- return (tweet,)
- return ()
+ tweets.append(tweet)
+ if "quoted_status_id_str" in tweet:
+ tweet_id = tweet["quoted_status_id_str"]
+ else:
+ break
+ return tweets
def timeline_profile(self, screen_name):
user = self.user_by_screen_name(screen_name)
@@ -430,17 +448,26 @@ class TwitterAPI():
endpoint = "2/timeline/media/{}.json".format(user["rest_id"])
return self._pagination(endpoint)
+ def timeline_favorites(self, screen_name):
+ user = self.user_by_screen_name(screen_name)
+ endpoint = "2/timeline/favorites/{}.json".format(user["rest_id"])
+ return self._pagination(endpoint)
+
+ def timeline_bookmark(self):
+ endpoint = "2/timeline/bookmark.json"
+ return self._pagination(endpoint)
+
def search(self, query):
endpoint = "2/search/adaptive.json"
params = self.params.copy()
- params["q"] = text.unquote(query)
+ params["q"] = query
+ params["tweet_search_mode"] = "live"
+ params["query_source"] = "typed_query"
+ params["pc"] = "1"
+ params["spelling_corrections"] = "1"
return self._pagination(
endpoint, params, "sq-I-t-", "sq-cursor-bottom")
- def bookmarks(self):
- endpoint = "2/timeline/bookmark.json"
- return self._pagination(endpoint)
-
def user_by_screen_name(self, screen_name):
endpoint = "graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName"
params = {
@@ -449,10 +476,16 @@ class TwitterAPI():
}
return self._call(endpoint, params)["data"]["user"]
- def _call(self, endpoint, params):
+ @cache(maxage=3600)
+ def _guest_token(self):
+ endpoint = "1.1/guest/activate.json"
+ return self._call(endpoint, None, "POST")["guest_token"]
+
+ def _call(self, endpoint, params, method="GET"):
url = "https://api.twitter.com/" + endpoint
response = self.extractor.request(
- url, params=params, headers=self.headers, fatal=None)
+ url, method=method, params=params, headers=self.headers,
+ fatal=None)
if response.status_code < 400:
return response.json()
if response.status_code == 429:
@@ -479,28 +512,30 @@ class TwitterAPI():
for entry in instr[0]["addEntries"]["entries"]:
if entry["entryId"].startswith(entry_tweet):
- tid = entry["content"]["item"]["content"]["tweet"]["id"]
- if tid not in tweets:
+ try:
+ tweet = tweets[
+ entry["content"]["item"]["content"]["tweet"]["id"]]
+ except KeyError:
self.extractor.log.debug(
- "Skipping unavailable Tweet %s", tid)
+ "Skipping unavailable Tweet %s",
+ entry["entryId"][6:])
continue
- tweet = tweets[tid]
tweet["user"] = users[tweet["user_id_str"]]
- if "quoted_status_id_str" in tweet:
- quoted = tweets.get(tweet["quoted_status_id_str"])
- if quoted:
- tweet["full_text_quoted"] = quoted["full_text"]
- if "extended_entities" in quoted:
- tweet["extended_entities"] = \
- quoted["extended_entities"]
- elif "retweeted_status_id_str" in tweet:
+ if "retweeted_status_id_str" in tweet:
retweet = tweets.get(tweet["retweeted_status_id_str"])
if retweet:
tweet["author"] = users[retweet["user_id_str"]]
-
yield tweet
+ if "quoted_status_id_str" in tweet:
+ quoted = tweets.get(tweet["quoted_status_id_str"])
+ if quoted:
+ quoted["author"] = users[quoted["user_id_str"]]
+ quoted["user"] = tweet["user"]
+ quoted["quoted"] = True
+ yield quoted
+
elif entry["entryId"].startswith(entry_cursor):
cursor = entry["content"]["operation"]["cursor"]
if not cursor.get("stopOnEmptyResponse"):
@@ -515,11 +550,3 @@ class TwitterAPI():
if not cursor or not tweet:
return
params["cursor"] = cursor
-
-
-@cache(maxage=3600)
-def _guest_token(extr, headers):
- return extr.request(
- "https://api.twitter.com/1.1/guest/activate.json",
- method="POST", headers=headers,
- ).json().get("guest_token")
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 3b992a2..d42730e 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -96,6 +96,7 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor):
class WebtoonsComicExtractor(WebtoonsExtractor):
"""Extractor for an entire comic on webtoons.com"""
subcategory = "comic"
+ categorytransfer = True
pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+))"
r"/list(?:\?([^#]+))")
test = (
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index aa9bdae..d1ad388 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text, exception
+import itertools
import json
@@ -30,53 +31,53 @@ class WeiboExtractor(Extractor):
for status in self.statuses():
- yield Message.Directory, status
- obj = status
- num = 1
-
- while True:
-
- if "pics" in obj:
- for image in obj["pics"]:
- pid = image["pid"]
- if "large" in image:
- image = image["large"]
- geo = image.get("geo") or {}
- data = text.nameext_from_url(image["url"], {
- "num" : num,
- "pid" : pid,
- "url" : image["url"],
- "width" : text.parse_int(geo.get("width")),
- "height": text.parse_int(geo.get("height")),
- "status": status,
- })
- yield Message.Url, image["url"], data
- num += 1
-
- if self.videos and "media_info" in obj.get("page_info", ()):
- info = obj["page_info"]["media_info"]
- url = info.get("stream_url_hd") or info.get("stream_url")
-
- if url:
- data = text.nameext_from_url(url, {
- "num" : num,
- "pid" : 0,
- "url" : url,
- "width" : 0,
- "height": 0,
- "status": status,
- })
- if data["extension"] == "m3u8":
- url = "ytdl:" + url
- data["extension"] = "mp4"
- data["_ytdl_extra"] = {"protocol": "m3u8_native"}
- yield Message.Url, url, data
- num += 1
-
- if self.retweets and "retweeted_status" in obj:
- obj = obj["retweeted_status"]
- else:
- break
+ files = self._files_from_status(status)
+ if self.retweets and "retweeted_status" in status:
+ files = itertools.chain(
+ files,
+ self._files_from_status(status["retweeted_status"]),
+ )
+
+ for num, file in enumerate(files, 1):
+ if num == 1:
+ status["date"] = text.parse_datetime(
+ status["created_at"], "%a %b %d %H:%M:%S %z %Y")
+ yield Message.Directory, status
+ file["status"] = status
+ file["num"] = num
+ yield Message.Url, file["url"], file
+
+ def _files_from_status(self, status):
+ images = status.pop("pics", ())
+ page_info = status.pop("page_info", ())
+
+ for image in images:
+ pid = image["pid"]
+ if "large" in image:
+ image = image["large"]
+ geo = image.get("geo") or {}
+ yield text.nameext_from_url(image["url"], {
+ "url" : image["url"],
+ "pid" : pid,
+ "width" : text.parse_int(geo.get("width")),
+ "height": text.parse_int(geo.get("height")),
+ })
+
+ if self.videos and "media_info" in page_info:
+ info = page_info["media_info"]
+ url = info.get("stream_url_hd") or info.get("stream_url")
+ if url:
+ data = text.nameext_from_url(url, {
+ "url" : url,
+ "pid" : 0,
+ "width" : 0,
+ "height": 0,
+ })
+ if data["extension"] == "m3u8":
+ data["extension"] = "mp4"
+ data["url"] = "ytdl:" + url
+ data["_ytdl_extra"] = {"protocol": "m3u8_native"}
+ yield data
def statuses(self):
"""Returns an iterable containing all relevant 'status' objects"""
@@ -124,6 +125,7 @@ class WeiboStatusExtractor(WeiboExtractor):
test = (
("https://m.weibo.cn/detail/4323047042991618", {
"pattern": r"https?://wx\d+.sinaimg.cn/large/\w+.jpg",
+ "keyword": {"status": {"date": "dt:2018-12-30 13:56:36"}},
}),
("https://m.weibo.cn/detail/4339748116375525", {
"pattern": r"https?://f.us.sinaimg.cn/\w+\.mp4\?label=mp4_hd",
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 130df58..923a4e6 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -454,15 +454,18 @@ class KeywordJob(Job):
self.print_kwdict(kwdict)
def handle_queue(self, url, kwdict):
+ extr = None
+ if "_extractor" in kwdict:
+ extr = kwdict["_extractor"].from_url(url)
+
if not util.filter_dict(kwdict):
self.extractor.log.info(
"This extractor only spawns other extractors "
"and does not provide any metadata on its own.")
- if "_extractor" in kwdict:
+ if extr:
self.extractor.log.info(
"Showing results for '%s' instead:\n", url)
- extr = kwdict["_extractor"].from_url(url)
KeywordJob(extr, self).run()
else:
self.extractor.log.info(
@@ -471,9 +474,9 @@ class KeywordJob(Job):
print("Keywords for --chapter-filter:")
print("------------------------------")
self.print_kwdict(kwdict)
- if self.extractor.categorytransfer:
+ if extr or self.extractor.categorytransfer:
print()
- KeywordJob(url, self).run()
+ KeywordJob(extr or url, self).run()
raise exception.StopExtraction()
@staticmethod
@@ -559,7 +562,12 @@ class DataJob(Job):
util.transform_dict(msg[-1], util.number_to_string)
# dump to 'file'
- util.dump_json(self.data, self.file, self.ascii, 2)
+ try:
+ util.dump_json(self.data, self.file, self.ascii, 2)
+ self.file.flush()
+ except Exception:
+ pass
+
return 0
def handle_url(self, url, kwdict):
@@ -576,6 +584,3 @@ class DataJob(Job):
def handle_queue(self, url, kwdict):
self.data.append((Message.Queue, url, self.filter(kwdict)))
-
- def handle_finalize(self):
- self.file.close()
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 4dc0963..9a716f9 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -252,10 +252,13 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z", utcoffset=0):
o = d.utcoffset()
if o is not None:
# convert to naive UTC
- d = d.replace(tzinfo=None) - o
- elif utcoffset:
- # apply manual UTC offset
- d += datetime.timedelta(0, utcoffset * -3600)
+ d = d.replace(tzinfo=None, microsecond=0) - o
+ else:
+ if d.microsecond:
+ d = d.replace(microsecond=0)
+ if utcoffset:
+ # apply manual UTC offset
+ d += datetime.timedelta(0, utcoffset * -3600)
return d
except (TypeError, IndexError, KeyError):
return None
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index afd96b8..c8d73b6 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -120,13 +120,14 @@ def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4):
fp.write("\n")
-def dump_response(response, fp=sys.stdout,
- headers=True, content=True, hide_auth=True):
+def dump_response(response, fp, *,
+ headers=False, content=True, hide_auth=True):
"""Write the contents of 'response' into a file-like object"""
if headers:
request = response.request
req_headers = request.headers.copy()
+ res_headers = response.headers.copy()
outfmt = """\
{request.method} {request.url}
Status: {response.status_code} {response.reason}
@@ -145,11 +146,17 @@ Response Headers
atype, sep, _ = authorization.partition(" ")
req_headers["Authorization"] = atype + " ***" if sep else "***"
- cookies = req_headers.get("Cookie")
- if cookies:
+ cookie = req_headers.get("Cookie")
+ if cookie:
req_headers["Cookie"] = ";".join(
- cookie.partition("=")[0] + "=***"
- for cookie in cookies.split(";")
+ c.partition("=")[0] + "=***"
+ for c in cookie.split(";")
+ )
+
+ set_cookie = res_headers.get("Set-Cookie")
+ if set_cookie:
+ res_headers["Set-Cookie"] = re.sub(
+ r"(^|, )([^ =]+)=[^,;]*", r"\1\2=***", set_cookie,
)
fp.write(outfmt.format(
@@ -161,7 +168,7 @@ Response Headers
),
response_headers="\n".join(
name + ": " + value
- for name, value in response.headers.items()
+ for name, value in res_headers.items()
),
).encode())
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 8509e1e..3297d03 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.14.1"
+__version__ = "1.14.2"
diff --git a/test/test_config.py b/test/test_config.py
index 4171435..a9cefd4 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -68,6 +68,34 @@ class TestConfig(unittest.TestCase):
self.assertEqual(config.interpolate(("b",), "d", 1) , 2)
self.assertEqual(config.interpolate(("d",), "d", 1) , 2)
+ def test_interpolate_common(self):
+
+ def lookup():
+ return config.interpolate_common(
+ ("Z1", "Z2"), (
+ ("A1", "A2"),
+ ("B1",),
+ ("C1", "C2", "C3"),
+ ), "KEY", "DEFAULT",
+ )
+
+ def test(path, value, expected=None):
+ config.set(path, "KEY", value)
+ self.assertEqual(lookup(), expected or value)
+
+ self.assertEqual(lookup(), "DEFAULT")
+ test(("Z1",), 1)
+ test(("Z1", "Z2"), 2)
+ test(("Z1", "Z2", "C1"), 3)
+ test(("Z1", "Z2", "C1", "C2"), 4)
+ test(("Z1", "Z2", "C1", "C2", "C3"), 5)
+ test(("Z1", "Z2", "B1"), 6)
+ test(("Z1", "Z2", "A1"), 7)
+ test(("Z1", "Z2", "A1", "A2"), 8)
+ test(("Z1", "A1", "A2"), 999, 8)
+ test(("Z1", "Z2", "A1", "A2", "A3"), 999, 8)
+ test((), 9)
+
def test_set(self):
config.set(() , "c", [1, 2, 3])
config.set(("b",) , "c", [1, 2, 3])
diff --git a/test/test_results.py b/test/test_results.py
index 196d859..6a943aa 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -31,10 +31,9 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
+ "bobx",
"imagevenue",
"photobucket",
- "seiga",
- "twitter",
"worldthree",
}
diff --git a/test/test_text.py b/test/test_text.py
index aeb8096..34585d1 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -407,6 +407,10 @@ class TestText(unittest.TestCase):
datetime.datetime(2019, 5, 7, 12, 25, 2),
)
self.assertEqual(
+ f("2019-05-07T21:25:02.753+0900", "%Y-%m-%dT%H:%M:%S.%f%z"),
+ datetime.datetime(2019, 5, 7, 12, 25, 2),
+ )
+ self.assertEqual(
f("2019-05-07T21:25:02", "%Y-%m-%dT%H:%M:%S", utcoffset=9),
datetime.datetime(2019, 5, 7, 12, 25, 2),
)