diff options
author | Unit 193 <unit193@unit193.net> | 2020-06-29 00:33:23 -0400 |
---|---|---|
committer | Unit 193 <unit193@unit193.net> | 2020-06-29 00:33:23 -0400 |
commit | d7db0f63f4b1f051a7ecf6d80cfacaae1626ce69 (patch) | |
tree | 64c9657783b7c1ec7db81ec1e41fedba3c5ad0b2 | |
parent | 2865adf5de64ea6ca38f734cc61ef805c4bc27d2 (diff) | |
parent | 02dd2886783cd303cff6890a741152d013bb00ce (diff) | |
download | gallery-dl-d7db0f63f4b1f051a7ecf6d80cfacaae1626ce69.tar.bz2 gallery-dl-d7db0f63f4b1f051a7ecf6d80cfacaae1626ce69.tar.xz gallery-dl-d7db0f63f4b1f051a7ecf6d80cfacaae1626ce69.tar.zst |
Update upstream source from tag 'upstream/1.14.2'
Update to upstream version '1.14.2'
with Debian dir 4efcdfd56b8d506548ba3400af16c1e34dea7260
33 files changed, 487 insertions, 223 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 043d964..f84e423 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## 1.14.2 - 2020-06-27 +### Additions +- [artstation] add `date` metadata field ([#839](https://github.com/mikf/gallery-dl/issues/839)) +- [mastodon] add `date` metadata field ([#839](https://github.com/mikf/gallery-dl/issues/839)) +- [pinterest] add support for board sections ([#835](https://github.com/mikf/gallery-dl/issues/835)) +- [twitter] add extractor for liked tweets ([#837](https://github.com/mikf/gallery-dl/issues/837)) +- [twitter] add option to filter media from quoted tweets ([#854](https://github.com/mikf/gallery-dl/issues/854)) +- [weibo] add `date` metadata field to `status` objects ([#829](https://github.com/mikf/gallery-dl/issues/829)) +### Fixes +- [aryion] fix user gallery extraction ([#832](https://github.com/mikf/gallery-dl/issues/832)) +- [imgur] build directory paths for each file ([#842](https://github.com/mikf/gallery-dl/issues/842)) +- [tumblr] prevent errors when using `reblogs=same-blog` ([#851](https://github.com/mikf/gallery-dl/issues/851)) +- [twitter] always provide an `author` metadata field ([#831](https://github.com/mikf/gallery-dl/issues/831), [#833](https://github.com/mikf/gallery-dl/issues/833)) +- [twitter] don't download video previews ([#833](https://github.com/mikf/gallery-dl/issues/833)) +- [twitter] improve handling of deleted tweets ([#838](https://github.com/mikf/gallery-dl/issues/838)) +- [twitter] fix search results ([#847](https://github.com/mikf/gallery-dl/issues/847)) +- [twitter] improve handling of quoted tweets ([#854](https://github.com/mikf/gallery-dl/issues/854)) +- fix config lookups when multiple locations are involved ([#843](https://github.com/mikf/gallery-dl/issues/843)) +- improve output of `-K/--list-keywords` for parent extractors ([#825](https://github.com/mikf/gallery-dl/issues/825)) +- call `flush()` after writing JSON in `DataJob()` ([#727](https://github.com/mikf/gallery-dl/issues/727)) + ## 1.14.1 - 2020-06-12 ### Additions - [furaffinity] add `artist_url` metadata field ([#821](https://github.com/mikf/gallery-dl/issues/821)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.14.1 +Version: 1.14.2 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -302,7 +302,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.1.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -291,7 +291,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.1.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 76a57d1..21055ca 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-06-12" "1.14.1" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-06-27" "1.14.2" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 88f8ebc..7060751 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-06-12" "1.14.1" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-06-27" "1.14.2" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -631,6 +631,22 @@ See \f[I]strptime\f[] for a list of formatting directives. .IP "Description:" 4 Try to follow external URLs of embedded players. +.SS extractor.aryion.recursive +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls the post extraction strategy. + +.br +* \f[I]true\f[]: Start on users' main gallery pages and recursively +descend into subfolders +.br +* \f[I]false\f[]: Get posts from "Latest Updates" pages + .SS extractor.blogger.videos .IP "Type:" 6 \f[I]bool\f[] @@ -1079,6 +1095,16 @@ port than the default. .IP "Description:" 4 Download subalbums. +.SS extractor.pinterest.sections +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Include pins from board sections. + .SS extractor.pixiv.user.avatar .IP "Type:" 6 \f[I]bool\f[] @@ -1375,6 +1401,16 @@ Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[] You can use \f[I]"all"\f[] instead of listing all types separately. +.SS extractor.twitter.quoted +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Fetch media from quoted Tweets. + .SS extractor.twitter.replies .IP "Type:" 6 \f[I]bool\f[] @@ -1383,7 +1419,7 @@ You can use \f[I]"all"\f[] instead of listing all types separately. \f[I]true\f[] .IP "Description:" 4 -Extract media from replies to other Tweets. +Fetch media from replies to other Tweets. .SS extractor.twitter.retweets .IP "Type:" 6 @@ -1393,7 +1429,7 @@ Extract media from replies to other Tweets. \f[I]true\f[] .IP "Description:" 4 -Extract media from Retweets. +Fetch media from Retweets. .SS extractor.twitter.twitpic .IP "Type:" 6 diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index ae4839d..aa54e1a 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -155,6 +155,7 @@ }, "twitter": { + "quoted": true, "replies": true, "retweets": true, "twitpic": false, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 3f6f077..0b01abc 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.14.1 +Version: 1.14.2 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.1/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -302,7 +302,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.1.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl/config.py b/gallery_dl/config.py index 5303616..a3c71cd 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -108,6 +108,38 @@ def interpolate(path, key, default=None, *, conf=_config): return default +def interpolate_common(common, paths, key, default=None, *, conf=_config): + """Interpolate the value of 'key' + using multiple 'paths' along a 'common' ancestor + """ + if key in conf: + return conf[key] + + # follow the common path + try: + for p in common: + conf = conf[p] + if key in conf: + default = conf[key] + except Exception: + return default + + # try all paths until a value is found + value = util.SENTINEL + for path in paths: + c = conf + try: + for p in path: + c = c[p] + if key in c: + value = c[key] + except Exception: + pass + if value is not util.SENTINEL: + return value + return default + + def set(path, key, value, *, conf=_config): """Set the value of property 'key' for this session""" for p in path: diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index c504dba..64a4bf4 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -66,6 +66,8 @@ class ArtstationExtractor(Extractor): data["title"] = text.unescape(data["title"]) data["description"] = text.unescape(text.remove_html( data["description"])) + data["date"] = text.parse_datetime( + data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") assets = data["assets"] del data["assets"] diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 7575de9..04bb146 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -26,9 +26,24 @@ class AryionExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) - self.offset = 0 + self.recursive = True - def posts(self, url): + def items(self): + for post_id in self.posts(): + post = self._parse_post(post_id) + if post: + yield Message.Directory, post + yield Message.Url, post["url"], post + elif post is False and self.recursive: + base = self.root + "/g4/view/" + data = {"_extractor": AryionPostExtractor} + for post_id in self._pagination(base + post_id): + yield Message.Queue, base + post_id, data + + def posts(self): + """Yield relevant post IDs""" + + def _pagination(self, url): while True: page = self.request(url).text yield from text.extract_iter( @@ -39,11 +54,14 @@ class AryionExtractor(Extractor): return url = self.root + text.rextract(page, "href='", "'", pos)[0] - def parse_post(self, post_id): + def _parse_post(self, post_id): url = "{}/g4/data.php?id={}".format(self.root, post_id) with self.request(url, method="HEAD", fatal=False) as response: if response.status_code >= 400: + self.log.warning( + "Unable to fetch post %s ('%s %s')", + post_id, response.status_code, response.reason) return None headers = response.headers @@ -106,9 +124,11 @@ class AryionExtractor(Extractor): class AryionGalleryExtractor(AryionExtractor): """Extractor for a user's gallery on eka's portal""" subcategory = "gallery" + categorytransfer = True pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)" test = ( ("https://aryion.com/g4/gallery/jameshoward", { + "options": (("recursive", False),), "pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$", "range": "48-52", "count": 5, @@ -117,17 +137,24 @@ class AryionGalleryExtractor(AryionExtractor): ("https://aryion.com/g4/latest.php?name=jameshoward"), ) + def __init__(self, match): + AryionExtractor.__init__(self, match) + self.recursive = self.config("recursive", True) + self.offset = 0 + def skip(self, num): + if self.recursive: + num = 0 self.offset += num return num - def items(self): - url = "{}/g4/latest.php?name={}".format(self.root, self.user) - for post_id in util.advance(self.posts(url), self.offset): - post = self.parse_post(post_id) - if post: - yield Message.Directory, post - yield Message.Url, post["url"], post + def posts(self): + if self.recursive: + url = "{}/g4/gallery/{}".format(self.root, self.user) + return self._pagination(url) + else: + url = "{}/g4/latest.php?name={}".format(self.root, self.user) + return util.advance(self._pagination(url), self.offset) class AryionPostExtractor(AryionExtractor): @@ -164,19 +191,6 @@ class AryionPostExtractor(AryionExtractor): }), ) - def items(self): - post_id = self.user - self.user = None - post = self.parse_post(post_id) - - if post: - yield Message.Directory, post - yield Message.Url, post["url"], post - - elif post is False: - folder_url = "{}/g4/view/{}".format(self.root, post_id) - data = {"_extractor": AryionPostExtractor} - - for post_id in self.posts(folder_url): - url = "{}/g4/view/{}".format(self.root, post_id) - yield Message.Queue, url, data + def posts(self): + post_id, self.user = self.user, None + return (post_id,) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index dd685df..bbbd8a6 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -328,14 +328,15 @@ class Extractor(): test = (test, None) yield test - def _dump_response(self, response): + def _dump_response(self, response, history=True): """Write the response content to a .dump file in the current directory. The file name is derived from the response url, replacing special characters with "_" """ - for resp in response.history: - self._dump_response(resp) + if history: + for resp in response.history: + self._dump_response(resp, False) if hasattr(Extractor, "_dump_index"): Extractor._dump_index += 1 @@ -350,7 +351,8 @@ class Extractor(): try: with open(fname + ".dump", 'wb') as fp: - util.dump_response(response, fp) + util.dump_response( + response, fp, headers=(self._write_pages == "all")) except Exception as e: self.log.warning("Failed to dump HTTP request (%s: %s)", e.__class__.__name__, e) @@ -490,10 +492,13 @@ class SharedConfigMixin(): """Enable sharing of config settings based on 'basecategory'""" basecategory = "" - def config(self, key, default=None, *, sentinel=util.SENTINEL): - value = Extractor.config(self, key, sentinel) - return value if value is not sentinel else config.interpolate( - ("extractor", self.basecategory, self.subcategory), key, default) + def config(self, key, default=None): + return config.interpolate_common( + ("extractor",), ( + (self.category, self.subcategory), + (self.basecategory, self.subcategory), + ), key, default, + ) def generate_extractors(extractor_data, symtable, classes): diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 86f63ae..731f54b 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -225,9 +225,9 @@ EXTRACTORS = { }), ), "test-manga": - ("https://sensescans.com/reader/series/hakkenden/", { - "url": "3e0559029c21ca5af8a2082dd6de1567fcec4d83", - "keyword": "4919f2bfed38e3a34dc984ec8d1dbd7a03044e23", + ("https://sensescans.com/reader/series/yotsubato/", { + "url": "ee4dca7c421bf15ac039200f8c0bcb0858153640", + "keyword": "f94961bd731bd878bbd4d48555bc3ace1d937364", }), }, "worldthree": { diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 8d2c937..3882a92 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -124,8 +124,8 @@ class ImgbbAlbumExtractor(ImgbbExtractor): }), ("https://ibb.co/album/i5PggF?sort=title_asc", { "range": "1-80", - "url": "a2dfc58fe3348fa37e242082bd5a85eaa490d0a5", - "keyword": "5bb79c82411c3770d673fac64a0a98fa28111c3b", + "url": "afdf5fc95d8e09d77e8f44312f3e9b843987bb5a", + "keyword": "f090e14d0e5f7868595082b2c95da1309c84872d", }), # no user data (#471) ("https://ibb.co/album/kYKpwF", { diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 44fa5f2..20b698b 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -210,6 +210,7 @@ class ImgurAlbumExtractor(ImgurExtractor): album = self.api.album(self.key) album["date"] = text.parse_timestamp(album["datetime"]) images = album["images"] + count = len(images) try: del album["images"] @@ -218,11 +219,12 @@ class ImgurAlbumExtractor(ImgurExtractor): pass yield Message.Version, 1 - yield Message.Directory, {"album": album, "count": len(images)} for num, image in enumerate(images, 1): url = self._prepare(image) image["num"] = num + image["count"] = count image["album"] = album + yield Message.Directory, image yield Message.Url, url, image diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 3781711..bf6b10f 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -386,7 +386,7 @@ class InstagramImageExtractor(InstagramExtractor): # GraphVideo ("https://www.instagram.com/p/Bqxp0VSBgJg/", { - "pattern": r"/47129943_191645575115739_8539303288426725376_n\.mp4", + "pattern": r"/46840863_726311431074534_7805566102611403091_n\.mp4", "keyword": { "date": "dt:2018-11-29 19:23:58", "description": str, @@ -404,7 +404,7 @@ class InstagramImageExtractor(InstagramExtractor): # GraphVideo (IGTV) ("https://www.instagram.com/tv/BkQjCfsBIzi/", { - "pattern": r"/10000000_1760663964018792_716207142595461120_n\.mp4", + "pattern": r"/10000000_597132547321814_702169244961988209_n\.mp4", "keyword": { "date": "dt:2018-06-20 19:51:32", "description": str, diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index ade245b..348453d 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -9,10 +9,9 @@ """Extract manga-chapters and entire manga from https://kissmanga.com/""" from .common import ChapterExtractor, MangaExtractor, Extractor -from .. import text, aes +from .. import text, aes, exception from ..cache import cache import hashlib -import time import ast import re @@ -25,7 +24,18 @@ class RedirectMixin(): response = Extractor.request(self, url, **kwargs) if not response.history or "/AreYouHuman" not in response.url: return response - time.sleep(2) + if self.config("captcha", "stop") == "wait": + self.log.warning( + "Redirect to \n%s\nVisit this URL in your browser, solve " + "the CAPTCHA, and press ENTER to continue", response.url) + try: + input() + except (EOFError, OSError): + pass + else: + raise exception.StopExtraction( + "Redirect to \n%s\nVisit this URL in your browser and " + "solve the CAPTCHA to continue", response.url) class KissmangaBase(RedirectMixin): diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 002c8f7..fa1fecc 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -27,22 +27,25 @@ class MastodonExtractor(Extractor): Extractor.__init__(self, match) self.api = MastodonAPI(self) - def config(self, key, default=None, *, sentinel=util.SENTINEL): - value = Extractor.config(self, key, sentinel) - return value if value is not sentinel else config.interpolate( - ("extractor", "mastodon", self.instance, self.subcategory), - key, default, + def config(self, key, default=None): + return config.interpolate_common( + ("extractor",), ( + (self.category, self.subcategory), + (self.basecategory, self.instance, self.subcategory), + ), key, default, ) def items(self): yield Message.Version, 1 for status in self.statuses(): - attachments = self.prepare(status) - yield Message.Directory, status - for media in attachments: - status["media"] = media - url = media["url"] - yield Message.Url, url, text.nameext_from_url(url, status) + attachments = status["media_attachments"] + if attachments: + self.prepare(status) + yield Message.Directory, status + for media in attachments: + status["media"] = media + url = media["url"] + yield Message.Url, url, text.nameext_from_url(url, status) def statuses(self): """Return an iterable containing all relevant Status-objects""" @@ -50,11 +53,11 @@ class MastodonExtractor(Extractor): def prepare(self, status): """Prepare a status object""" + del status["media_attachments"] status["instance"] = self.instance status["tags"] = [tag["name"] for tag in status["tags"]] - attachments = status["media_attachments"] - del status["media_attachments"] - return attachments + status["date"] = text.parse_datetime( + status["created_at"][:19], "%Y-%m-%dT%H:%M:%S") class MastodonUserExtractor(MastodonExtractor): diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index c980a38..413a58a 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -81,6 +81,7 @@ class NaverPostExtractor(NaverBase, GalleryExtractor): class NaverBlogExtractor(NaverBase, Extractor): """Extractor for a user's blog on blog.naver.com""" subcategory = "blog" + categorytransfer = True pattern = (r"(?:https?://)?blog\.naver\.com/" r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)") test = ( diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 24a0a55..3bbe06a 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -1,15 +1,16 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.pinterest.com/""" +"""Extractors for https://www.pinterest.com/""" from .common import Extractor, Message from .. import text, exception +import itertools import json @@ -86,12 +87,17 @@ class PinterestBoardExtractor(PinterestExtractor): subcategory = "board" directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}") archive_fmt = "{board[id]}_{id}" - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)(?!.*#related$)" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?$" test = ( ("https://www.pinterest.com/g1952849/test-/", { "pattern": r"https://i\.pinimg\.com/originals/", "count": 2, }), + # board with sections (#835) + ("https://www.pinterest.com/g1952849/stuff/", { + "options": (("sections", True),), + "count": 5, + }), ("https://www.pinterest.com/g1952848/test/", { "exception": exception.GalleryDLException, }), @@ -100,16 +106,51 @@ class PinterestBoardExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self, match) self.user = text.unquote(match.group(1)) - self.board = text.unquote(match.group(2)) - self.board_id = 0 + self.board_name = text.unquote(match.group(2)) + self.board = None def metadata(self): - board = self.api.board(self.user, self.board) - self.board_id = board["id"] - return {"board": board} + self.board = self.api.board(self.user, self.board_name) + return {"board": self.board} def pins(self): - return self.api.board_pins(self.board_id) + board = self.board + + if board["section_count"] and self.config("sections", True): + pins = [self.api.board_pins(board["id"])] + for section in self.api.board_sections(board["id"]): + pins.append(self.api.board_section_pins(section["id"])) + return itertools.chain.from_iterable(pins) + else: + return self.api.board_pins(board["id"]) + + +class PinterestSectionExtractor(PinterestExtractor): + """Extractor for board sections on pinterest.com""" + subcategory = "section" + directory_fmt = ("{category}", "{board[owner][username]}", + "{board[name]}", "{section[title]}") + archive_fmt = "{board[id]}_{id}" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/([^/?#&]+)" + test = ("https://www.pinterest.com/g1952849/stuff/section", { + "count": 2, + }) + + def __init__(self, match): + PinterestExtractor.__init__(self, match) + self.user = text.unquote(match.group(1)) + self.board_slug = text.unquote(match.group(2)) + self.section_slug = text.unquote(match.group(3)) + self.section = None + + def metadata(self): + section = self.section = self.api.board_section( + self.user, self.board_slug, self.section_slug) + section.pop("preview_pins", None) + return {"board": section.pop("board"), "section": section} + + def pins(self): + return self.api.board_section_pins(self.section["id"]) class PinterestRelatedPinExtractor(PinterestPinExtractor): @@ -136,7 +177,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor): subcategory = "related-board" directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}", "related") - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+).*#related$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?#related$" test = ("https://www.pinterest.com/g1952849/test-/#related", { "range": "31-70", "count": 40, @@ -144,7 +185,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor): }) def pins(self): - return self.api.board_related(self.board_id) + return self.api.board_related(self.board["id"]) class PinterestPinitExtractor(PinterestExtractor): @@ -188,9 +229,10 @@ class PinterestAPI(): "*/*, q=0.01", "Accept-Language" : "en-US,en;q=0.5", "X-Pinterest-AppState": "active", - "X-APP-VERSION" : "cb1c7f9", + "X-APP-VERSION" : "b00dd49", "X-Requested-With" : "XMLHttpRequest", - "Origin" : BASE_URL + "/", + "Origin" : BASE_URL, + "Referer" : BASE_URL + "/", } def __init__(self, extractor): @@ -206,9 +248,9 @@ class PinterestAPI(): options = {"pin": pin_id, "add_vase": True, "pins_only": True} return self._pagination("RelatedPinFeed", options) - def board(self, user, board): + def board(self, user, board_name): """Query information about a board""" - options = {"slug": board, "username": user, + options = {"slug": board_name, "username": user, "field_set_key": "detailed"} return self._call("Board", options)["resource_response"]["data"] @@ -217,6 +259,22 @@ class PinterestAPI(): options = {"board_id": board_id} return self._pagination("BoardFeed", options) + def board_section(self, user, board_slug, section_slug): + """Yield a specific board section""" + options = {"board_slug": board_slug, "section_slug": section_slug, + "username": user} + return self._call("BoardSection", options)["resource_response"]["data"] + + def board_sections(self, board_id): + """Yield all sections of a specific board""" + options = {"board_id": board_id} + return self._pagination("BoardSections", options) + + def board_section_pins(self, section_id): + """Yield all pins from a board section""" + options = {"section_id": section_id} + return self._pagination("BoardSectionPins", options) + def board_related(self, board_id): """Yield related pins of a specific board""" options = {"board_id": board_id, "add_vase": True} diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py index 1063716..05ec117 100644 --- a/gallery_dl/extractor/slickpic.py +++ b/gallery_dl/extractor/slickpic.py @@ -42,7 +42,8 @@ class SlickpicAlbumExtractor(SlickpicExtractor): ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", { "range": "34", "content": ("cec6630e659dc72db1ee1a9a6f3b525189261988", - "6f81e1e74c6cd6db36844e7211eef8e7cd30055d"), + "6f81e1e74c6cd6db36844e7211eef8e7cd30055d", + "22e83645fc242bc3584eca7ec982c8a53a4d8a44"), }), ) diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py index 1a9691c..a3819c7 100644 --- a/gallery_dl/extractor/speakerdeck.py +++ b/gallery_dl/extractor/speakerdeck.py @@ -23,8 +23,10 @@ class SpeakerdeckPresentationExtractor(Extractor): r"/([^/?&#]+)/([^/?&#]+)") test = ( (("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), { - "url": "e97d4a7d5c64267e921c13eb7946d7074794a0d2", + "pattern": r"https://files.speakerdeck.com/presentations/" + r"50021f75cf1db900020005e7/slide_\d+.jpg", "content": "75c7abf0969b0bcab23e0da9712c95ee5113db3a", + "count": 6, }), ) diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py index 31dbdad..5809463 100644 --- a/gallery_dl/extractor/tsumino.py +++ b/gallery_dl/extractor/tsumino.py @@ -57,7 +57,7 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor): "collection": "", "artist" : ["Itou Life"], "group" : ["Itou Life"], - "parody" : ["Fate/Grand Order"], + "parody" : list, "characters": list, "tags" : list, "type" : "Doujinshi", diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 3e3a5a0..70fead8 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -194,7 +194,7 @@ class TumblrExtractor(Extractor): return not self.reblogs def _skip_reblog_same_blog(self, post): - return self.blog != post["reblogged_root_uuid"] + return self.blog != post.get("reblogged_root_uuid") class TumblrUserExtractor(TumblrExtractor): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 7cabb8c..1e985e3 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -31,6 +31,7 @@ class TwitterExtractor(Extractor): self.retweets = self.config("retweets", True) self.replies = self.config("replies", True) self.twitpic = self.config("twitpic", False) + self.quoted = self.config("quoted", True) self.videos = self.config("videos", True) self._user_cache = {} @@ -41,8 +42,9 @@ class TwitterExtractor(Extractor): for tweet in self.tweets(): - if not self.retweets and "retweeted_status_id_str" in tweet or \ - not self.replies and "in_reply_to_user_id_str" in tweet: + if (not self.retweets and "retweeted_status_id_str" in tweet or + not self.replies and "in_reply_to_user_id_str" in tweet or + not self.quoted and "quoted" in tweet): continue if self.twitpic: @@ -60,7 +62,7 @@ class TwitterExtractor(Extractor): tdata["width"] = media["original_info"].get("width", 0) tdata["height"] = media["original_info"].get("height", 0) - if "video_info" in media and self.videos: + if "video_info" in media: if self.videos == "ytdl": url = "ytdl:{}/i/web/status/{}".format( @@ -68,7 +70,7 @@ class TwitterExtractor(Extractor): tdata["extension"] = None yield Message.Url, url, tdata - else: + elif self.videos: video_info = media["video_info"] variant = max( video_info["variants"], @@ -149,11 +151,10 @@ class TwitterExtractor(Extractor): if "in_reply_to_screen_name" in tweet: tdata["reply_to"] = tweet["in_reply_to_screen_name"] - if "full_text_quoted" in tweet: - tdata["content_quoted"] = tweet["full_text_quoted"] - if "author" in tweet: tdata["author"] = self._transform_user(tweet["author"]) + else: + tdata["author"] = tdata["user"] return tdata @@ -264,6 +265,27 @@ class TwitterMediaExtractor(TwitterExtractor): return TwitterAPI(self).timeline_media(self.user) +class TwitterLikesExtractor(TwitterExtractor): + """Extractor for liked tweets""" + subcategory = "likes" + pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" + r"/(?!search)([^/?&#]+)/likes(?!\w)") + test = ("https://twitter.com/supernaturepics/likes",) + + def tweets(self): + return TwitterAPI(self).timeline_favorites(self.user) + + +class TwitterBookmarkExtractor(TwitterExtractor): + """Extractor for bookmarked tweets""" + subcategory = "bookmark" + pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()" + test = ("https://twitter.com/i/bookmarks",) + + def tweets(self): + return TwitterAPI(self).timeline_bookmark() + + class TwitterSearchExtractor(TwitterExtractor): """Extractor for all images from a search timeline""" subcategory = "search" @@ -279,7 +301,7 @@ class TwitterSearchExtractor(TwitterExtractor): return {"search": text.unquote(self.user)} def tweets(self): - return TwitterAPI(self).search(self.user) + return TwitterAPI(self).search(text.unquote(self.user)) class TwitterTweetExtractor(TwitterExtractor): @@ -298,7 +320,6 @@ class TwitterTweetExtractor(TwitterExtractor): }), # video ("https://twitter.com/perrypumas/status/1065692031626829824", { - "options": (("videos", True),), "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5", }), # content with emoji, newlines, hashtags (#338) @@ -310,23 +331,25 @@ class TwitterTweetExtractor(TwitterExtractor): "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ " )}, }), - # Reply to another tweet (#403) - ("https://twitter.com/tyson_hesse/status/1103767554424598528", { - "options": (("videos", "ytdl"),), - "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528", + # Reply to deleted tweet (#403, #838) + ("https://twitter.com/i/web/status/1170041925560258560", { + "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_.jpg:orig", }), # 'replies' option (#705) - ("https://twitter.com/tyson_hesse/status/1103767554424598528", { + ("https://twitter.com/i/web/status/1170041925560258560", { "options": (("replies", False),), "count": 0, }), - # /i/web/ URL - ("https://twitter.com/i/web/status/1155074198240292865", { - "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig", + # quoted tweet (#526, #854) + ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { + "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+\.jpg", + "count": 8, }), - # quoted tweet (#526) - ("https://twitter.com/Pistachio/status/1222690391817932803", { - "pattern": r"https://pbs\.twimg\.com/media/EPfMfDUU8AAnByO\.jpg", + # "quoted" option (#854) + ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", { + "options": (("quoted", False),), + "pattern": r"https://pbs\.twimg\.com/media/EaK.+\.jpg", + "count": 4, }), # TwitPic embeds (#579) ("https://twitter.com/i/web/status/112900228289540096", { @@ -344,16 +367,6 @@ class TwitterTweetExtractor(TwitterExtractor): return TwitterAPI(self).tweet(self.tweet_id) -class TwitterBookmarkExtractor(TwitterExtractor): - """Extractor for bookmarked tweets""" - subcategory = "bookmark" - pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()" - test = ("https://twitter.com/i/bookmarks",) - - def tweets(self): - return TwitterAPI(self).bookmarks() - - class TwitterAPI(): def __init__(self, extractor): @@ -409,16 +422,21 @@ class TwitterAPI(): self.headers["x-twitter-auth-type"] = "OAuth2Session" else: # guest token - guest_token = _guest_token(self.extractor, self.headers) + guest_token = self._guest_token() self.headers["x-guest-token"] = guest_token cookies.set("gt", guest_token, domain=".twitter.com") def tweet(self, tweet_id): endpoint = "2/timeline/conversation/{}.json".format(tweet_id) + tweets = [] for tweet in self._pagination(endpoint): if tweet["id_str"] == tweet_id: - return (tweet,) - return () + tweets.append(tweet) + if "quoted_status_id_str" in tweet: + tweet_id = tweet["quoted_status_id_str"] + else: + break + return tweets def timeline_profile(self, screen_name): user = self.user_by_screen_name(screen_name) @@ -430,17 +448,26 @@ class TwitterAPI(): endpoint = "2/timeline/media/{}.json".format(user["rest_id"]) return self._pagination(endpoint) + def timeline_favorites(self, screen_name): + user = self.user_by_screen_name(screen_name) + endpoint = "2/timeline/favorites/{}.json".format(user["rest_id"]) + return self._pagination(endpoint) + + def timeline_bookmark(self): + endpoint = "2/timeline/bookmark.json" + return self._pagination(endpoint) + def search(self, query): endpoint = "2/search/adaptive.json" params = self.params.copy() - params["q"] = text.unquote(query) + params["q"] = query + params["tweet_search_mode"] = "live" + params["query_source"] = "typed_query" + params["pc"] = "1" + params["spelling_corrections"] = "1" return self._pagination( endpoint, params, "sq-I-t-", "sq-cursor-bottom") - def bookmarks(self): - endpoint = "2/timeline/bookmark.json" - return self._pagination(endpoint) - def user_by_screen_name(self, screen_name): endpoint = "graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName" params = { @@ -449,10 +476,16 @@ class TwitterAPI(): } return self._call(endpoint, params)["data"]["user"] - def _call(self, endpoint, params): + @cache(maxage=3600) + def _guest_token(self): + endpoint = "1.1/guest/activate.json" + return self._call(endpoint, None, "POST")["guest_token"] + + def _call(self, endpoint, params, method="GET"): url = "https://api.twitter.com/" + endpoint response = self.extractor.request( - url, params=params, headers=self.headers, fatal=None) + url, method=method, params=params, headers=self.headers, + fatal=None) if response.status_code < 400: return response.json() if response.status_code == 429: @@ -479,28 +512,30 @@ class TwitterAPI(): for entry in instr[0]["addEntries"]["entries"]: if entry["entryId"].startswith(entry_tweet): - tid = entry["content"]["item"]["content"]["tweet"]["id"] - if tid not in tweets: + try: + tweet = tweets[ + entry["content"]["item"]["content"]["tweet"]["id"]] + except KeyError: self.extractor.log.debug( - "Skipping unavailable Tweet %s", tid) + "Skipping unavailable Tweet %s", + entry["entryId"][6:]) continue - tweet = tweets[tid] tweet["user"] = users[tweet["user_id_str"]] - if "quoted_status_id_str" in tweet: - quoted = tweets.get(tweet["quoted_status_id_str"]) - if quoted: - tweet["full_text_quoted"] = quoted["full_text"] - if "extended_entities" in quoted: - tweet["extended_entities"] = \ - quoted["extended_entities"] - elif "retweeted_status_id_str" in tweet: + if "retweeted_status_id_str" in tweet: retweet = tweets.get(tweet["retweeted_status_id_str"]) if retweet: tweet["author"] = users[retweet["user_id_str"]] - yield tweet + if "quoted_status_id_str" in tweet: + quoted = tweets.get(tweet["quoted_status_id_str"]) + if quoted: + quoted["author"] = users[quoted["user_id_str"]] + quoted["user"] = tweet["user"] + quoted["quoted"] = True + yield quoted + elif entry["entryId"].startswith(entry_cursor): cursor = entry["content"]["operation"]["cursor"] if not cursor.get("stopOnEmptyResponse"): @@ -515,11 +550,3 @@ class TwitterAPI(): if not cursor or not tweet: return params["cursor"] = cursor - - -@cache(maxage=3600) -def _guest_token(extr, headers): - return extr.request( - "https://api.twitter.com/1.1/guest/activate.json", - method="POST", headers=headers, - ).json().get("guest_token") diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index 3b992a2..d42730e 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -96,6 +96,7 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor): class WebtoonsComicExtractor(WebtoonsExtractor): """Extractor for an entire comic on webtoons.com""" subcategory = "comic" + categorytransfer = True pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+))" r"/list(?:\?([^#]+))") test = ( diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index aa9bdae..d1ad388 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -10,6 +10,7 @@ from .common import Extractor, Message from .. import text, exception +import itertools import json @@ -30,53 +31,53 @@ class WeiboExtractor(Extractor): for status in self.statuses(): - yield Message.Directory, status - obj = status - num = 1 - - while True: - - if "pics" in obj: - for image in obj["pics"]: - pid = image["pid"] - if "large" in image: - image = image["large"] - geo = image.get("geo") or {} - data = text.nameext_from_url(image["url"], { - "num" : num, - "pid" : pid, - "url" : image["url"], - "width" : text.parse_int(geo.get("width")), - "height": text.parse_int(geo.get("height")), - "status": status, - }) - yield Message.Url, image["url"], data - num += 1 - - if self.videos and "media_info" in obj.get("page_info", ()): - info = obj["page_info"]["media_info"] - url = info.get("stream_url_hd") or info.get("stream_url") - - if url: - data = text.nameext_from_url(url, { - "num" : num, - "pid" : 0, - "url" : url, - "width" : 0, - "height": 0, - "status": status, - }) - if data["extension"] == "m3u8": - url = "ytdl:" + url - data["extension"] = "mp4" - data["_ytdl_extra"] = {"protocol": "m3u8_native"} - yield Message.Url, url, data - num += 1 - - if self.retweets and "retweeted_status" in obj: - obj = obj["retweeted_status"] - else: - break + files = self._files_from_status(status) + if self.retweets and "retweeted_status" in status: + files = itertools.chain( + files, + self._files_from_status(status["retweeted_status"]), + ) + + for num, file in enumerate(files, 1): + if num == 1: + status["date"] = text.parse_datetime( + status["created_at"], "%a %b %d %H:%M:%S %z %Y") + yield Message.Directory, status + file["status"] = status + file["num"] = num + yield Message.Url, file["url"], file + + def _files_from_status(self, status): + images = status.pop("pics", ()) + page_info = status.pop("page_info", ()) + + for image in images: + pid = image["pid"] + if "large" in image: + image = image["large"] + geo = image.get("geo") or {} + yield text.nameext_from_url(image["url"], { + "url" : image["url"], + "pid" : pid, + "width" : text.parse_int(geo.get("width")), + "height": text.parse_int(geo.get("height")), + }) + + if self.videos and "media_info" in page_info: + info = page_info["media_info"] + url = info.get("stream_url_hd") or info.get("stream_url") + if url: + data = text.nameext_from_url(url, { + "url" : url, + "pid" : 0, + "width" : 0, + "height": 0, + }) + if data["extension"] == "m3u8": + data["extension"] = "mp4" + data["url"] = "ytdl:" + url + data["_ytdl_extra"] = {"protocol": "m3u8_native"} + yield data def statuses(self): """Returns an iterable containing all relevant 'status' objects""" @@ -124,6 +125,7 @@ class WeiboStatusExtractor(WeiboExtractor): test = ( ("https://m.weibo.cn/detail/4323047042991618", { "pattern": r"https?://wx\d+.sinaimg.cn/large/\w+.jpg", + "keyword": {"status": {"date": "dt:2018-12-30 13:56:36"}}, }), ("https://m.weibo.cn/detail/4339748116375525", { "pattern": r"https?://f.us.sinaimg.cn/\w+\.mp4\?label=mp4_hd", diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 130df58..923a4e6 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -454,15 +454,18 @@ class KeywordJob(Job): self.print_kwdict(kwdict) def handle_queue(self, url, kwdict): + extr = None + if "_extractor" in kwdict: + extr = kwdict["_extractor"].from_url(url) + if not util.filter_dict(kwdict): self.extractor.log.info( "This extractor only spawns other extractors " "and does not provide any metadata on its own.") - if "_extractor" in kwdict: + if extr: self.extractor.log.info( "Showing results for '%s' instead:\n", url) - extr = kwdict["_extractor"].from_url(url) KeywordJob(extr, self).run() else: self.extractor.log.info( @@ -471,9 +474,9 @@ class KeywordJob(Job): print("Keywords for --chapter-filter:") print("------------------------------") self.print_kwdict(kwdict) - if self.extractor.categorytransfer: + if extr or self.extractor.categorytransfer: print() - KeywordJob(url, self).run() + KeywordJob(extr or url, self).run() raise exception.StopExtraction() @staticmethod @@ -559,7 +562,12 @@ class DataJob(Job): util.transform_dict(msg[-1], util.number_to_string) # dump to 'file' - util.dump_json(self.data, self.file, self.ascii, 2) + try: + util.dump_json(self.data, self.file, self.ascii, 2) + self.file.flush() + except Exception: + pass + return 0 def handle_url(self, url, kwdict): @@ -576,6 +584,3 @@ class DataJob(Job): def handle_queue(self, url, kwdict): self.data.append((Message.Queue, url, self.filter(kwdict))) - - def handle_finalize(self): - self.file.close() diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 4dc0963..9a716f9 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -252,10 +252,13 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z", utcoffset=0): o = d.utcoffset() if o is not None: # convert to naive UTC - d = d.replace(tzinfo=None) - o - elif utcoffset: - # apply manual UTC offset - d += datetime.timedelta(0, utcoffset * -3600) + d = d.replace(tzinfo=None, microsecond=0) - o + else: + if d.microsecond: + d = d.replace(microsecond=0) + if utcoffset: + # apply manual UTC offset + d += datetime.timedelta(0, utcoffset * -3600) return d except (TypeError, IndexError, KeyError): return None diff --git a/gallery_dl/util.py b/gallery_dl/util.py index afd96b8..c8d73b6 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -120,13 +120,14 @@ def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4): fp.write("\n") -def dump_response(response, fp=sys.stdout, - headers=True, content=True, hide_auth=True): +def dump_response(response, fp, *, + headers=False, content=True, hide_auth=True): """Write the contents of 'response' into a file-like object""" if headers: request = response.request req_headers = request.headers.copy() + res_headers = response.headers.copy() outfmt = """\ {request.method} {request.url} Status: {response.status_code} {response.reason} @@ -145,11 +146,17 @@ Response Headers atype, sep, _ = authorization.partition(" ") req_headers["Authorization"] = atype + " ***" if sep else "***" - cookies = req_headers.get("Cookie") - if cookies: + cookie = req_headers.get("Cookie") + if cookie: req_headers["Cookie"] = ";".join( - cookie.partition("=")[0] + "=***" - for cookie in cookies.split(";") + c.partition("=")[0] + "=***" + for c in cookie.split(";") + ) + + set_cookie = res_headers.get("Set-Cookie") + if set_cookie: + res_headers["Set-Cookie"] = re.sub( + r"(^|, )([^ =]+)=[^,;]*", r"\1\2=***", set_cookie, ) fp.write(outfmt.format( @@ -161,7 +168,7 @@ Response Headers ), response_headers="\n".join( name + ": " + value - for name, value in response.headers.items() + for name, value in res_headers.items() ), ).encode()) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 8509e1e..3297d03 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.14.1" +__version__ = "1.14.2" diff --git a/test/test_config.py b/test/test_config.py index 4171435..a9cefd4 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -68,6 +68,34 @@ class TestConfig(unittest.TestCase): self.assertEqual(config.interpolate(("b",), "d", 1) , 2) self.assertEqual(config.interpolate(("d",), "d", 1) , 2) + def test_interpolate_common(self): + + def lookup(): + return config.interpolate_common( + ("Z1", "Z2"), ( + ("A1", "A2"), + ("B1",), + ("C1", "C2", "C3"), + ), "KEY", "DEFAULT", + ) + + def test(path, value, expected=None): + config.set(path, "KEY", value) + self.assertEqual(lookup(), expected or value) + + self.assertEqual(lookup(), "DEFAULT") + test(("Z1",), 1) + test(("Z1", "Z2"), 2) + test(("Z1", "Z2", "C1"), 3) + test(("Z1", "Z2", "C1", "C2"), 4) + test(("Z1", "Z2", "C1", "C2", "C3"), 5) + test(("Z1", "Z2", "B1"), 6) + test(("Z1", "Z2", "A1"), 7) + test(("Z1", "Z2", "A1", "A2"), 8) + test(("Z1", "A1", "A2"), 999, 8) + test(("Z1", "Z2", "A1", "A2", "A3"), 999, 8) + test((), 9) + def test_set(self): config.set(() , "c", [1, 2, 3]) config.set(("b",) , "c", [1, 2, 3]) diff --git a/test/test_results.py b/test/test_results.py index 196d859..6a943aa 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -31,10 +31,9 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { + "bobx", "imagevenue", "photobucket", - "seiga", - "twitter", "worldthree", } diff --git a/test/test_text.py b/test/test_text.py index aeb8096..34585d1 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -407,6 +407,10 @@ class TestText(unittest.TestCase): datetime.datetime(2019, 5, 7, 12, 25, 2), ) self.assertEqual( + f("2019-05-07T21:25:02.753+0900", "%Y-%m-%dT%H:%M:%S.%f%z"), + datetime.datetime(2019, 5, 7, 12, 25, 2), + ) + self.assertEqual( f("2019-05-07T21:25:02", "%Y-%m-%dT%H:%M:%S", utcoffset=9), datetime.datetime(2019, 5, 7, 12, 25, 2), ) |