summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-03-31 07:24:57 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-03-31 07:24:57 -0400
commit09e426350409d45e7f7a8ff369f8d8aa9eec0fe4 (patch)
tree8a8cd3e590675fe6ecb1e5c2b4ad9eecde3dde6d
parent10987f08f8b6c510ba64f4b42d95ba67eec6e5b0 (diff)
New upstream version 1.25.1.upstream/1.25.1
-rw-r--r--CHANGELOG.md24
-rw-r--r--PKG-INFO9
-rw-r--r--README.rst7
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.526
-rw-r--r--gallery_dl.egg-info/PKG-INFO9
-rw-r--r--gallery_dl/__init__.py2
-rw-r--r--gallery_dl/extractor/gelbooru.py66
-rw-r--r--gallery_dl/extractor/hiperdex.py50
-rw-r--r--gallery_dl/extractor/naverwebtoon.py89
-rw-r--r--gallery_dl/extractor/nitter.py27
-rw-r--r--gallery_dl/extractor/twitter.py214
-rw-r--r--gallery_dl/extractor/weibo.py44
-rw-r--r--gallery_dl/formatter.py33
-rw-r--r--gallery_dl/job.py10
-rw-r--r--gallery_dl/output.py11
-rw-r--r--gallery_dl/postprocessor/metadata.py4
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_formatter.py36
-rw-r--r--test/test_job.py61
-rw-r--r--test/test_postprocessor.py35
21 files changed, 580 insertions, 181 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d805c2..d312557 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,32 @@
# Changelog
+## 1.25.1 - 2023-03-25
+### Additions
+- [nitter] support nitter.it ([#3819](https://github.com/mikf/gallery-dl/issues/3819))
+- [twitter] add `hashtag` extractor ([#3783](https://github.com/mikf/gallery-dl/issues/3783))
+- [twitter] support Tweet content with >280 characters
+- [formatter] support loading f-strings from template files ([#3800](https://github.com/mikf/gallery-dl/issues/3800))
+- [formatter] support filesystem paths for `\fM` modules ([#3399](https://github.com/mikf/gallery-dl/issues/3399))
+- [formatter] support putting keys in quotes (e.g. `user['name']`) ([#2559](https://github.com/mikf/gallery-dl/issues/2559))
+- [postprocessor:metadata] add `skip` option ([#3786](https://github.com/mikf/gallery-dl/issues/3786))
+### Fixes
+- [output] set `errors=replace` for output streams ([#3765](https://github.com/mikf/gallery-dl/issues/3765))
+- [gelbooru] extract favorites without needing cookies ([#3704](https://github.com/mikf/gallery-dl/issues/3704))
+- [gelbooru] fix and improve `--range` for pools
+- [hiperdex] fix extraction ([#3768](https://github.com/mikf/gallery-dl/issues/3768))
+- [naverwebtoon] fix extraction ([#3729](https://github.com/mikf/gallery-dl/issues/3729))
+- [nitter] fix extraction for instances without user banners
+- [twitter] update API query hashes and parameters
+- [weibo] support `mix_media_info` entries ([#3793](https://github.com/mikf/gallery-dl/issues/3793))
+- fix circular reference detection for `-K`
+### Changes
+- update `globals` instead of overwriting the default ([#3773](https://github.com/mikf/gallery-dl/issues/3773))
+
## 1.25.0 - 2023-03-11
### Changes
- [e621] split `e621` extractors from `danbooru` module ([#3425](https://github.com/mikf/gallery-dl/issues/3425))
- [deviantart] remove mature scraps warning ([#3691](https://github.com/mikf/gallery-dl/issues/3691))
-- [deviantart] use `/collections/all` endpoint for favorites ([#3666](https://github.com/mikf/gallery-dl/issues/3666) ,#3668)
+- [deviantart] use `/collections/all` endpoint for favorites ([#3666](https://github.com/mikf/gallery-dl/issues/3666), [#3668](https://github.com/mikf/gallery-dl/issues/3668))
- [newgrounds] update default image and audio archive IDs to prevent ID overlap ([#3681](https://github.com/mikf/gallery-dl/issues/3681))
- rename `--ignore-config` to `--config-ignore`
### Extractors
diff --git a/PKG-INFO b/PKG-INFO
index 43aacb4..1156e79 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.25.0
+Version: 1.25.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.bin>`__
Nightly Builds
@@ -322,7 +322,8 @@ This can be done via the
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
+ | (e.g. `Get cookies.txt LOCALLY <https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc>`__ for Chrome,
+ `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
diff --git a/README.rst b/README.rst
index c980bce..e4fd1c6 100644
--- a/README.rst
+++ b/README.rst
@@ -69,9 +69,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.bin>`__
Nightly Builds
@@ -285,7 +285,8 @@ This can be done via the
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
+ | (e.g. `Get cookies.txt LOCALLY <https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc>`__ for Chrome,
+ `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 27d3a09..8b96657 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-03-11" "1.25.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-03-25" "1.25.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index a0fd629..fd32eb1 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-03-11" "1.25.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-03-25" "1.25.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -5051,6 +5051,17 @@ Include private fields,
i.e. fields whose name starts with an underscore.
+.SS metadata.skip
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Do not overwrite already existing files.
+
+
.SS metadata.archive
.IP "Type:" 6
\f[I]Path\f[]
@@ -5407,16 +5418,15 @@ or by \f[I]extractor.modules\f[].
.br
* "gdl-globals"
-.IP "Default:" 9
-The \f[I]GLOBALS\f[] dict in
-\f[I]util.py\f[]
-
.IP "Description:" 4
Path to or name of an
+.br
\f[I]importable\f[]
-Python module whose namespace gets used as an alternative
-\f[I]globals parameter\f[]
-for compiled Python expressions.
+Python module,
+whose namespace,
+.br
+in addition to the \f[I]GLOBALS\f[] dict in \f[I]util.py\f[],
+gets used as \f[I]globals parameter\f[] for compiled Python expressions.
.SS cache.file
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index d4e660a..f836313 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.25.0
+Version: 1.25.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.bin>`__
Nightly Builds
@@ -322,7 +322,8 @@ This can be done via the
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
+ | (e.g. `Get cookies.txt LOCALLY <https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc>`__ for Chrome,
+ `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 116ca5d..a430f13 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -120,7 +120,7 @@ def main():
# eval globals
path = config.get((), "globals")
if path:
- util.GLOBALS = util.import_file(path).__dict__
+ util.GLOBALS.update(util.import_file(path).__dict__)
# loglevels
output.configure_logging(args.loglevel)
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 80b0ae1..e2173de 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -21,18 +21,21 @@ class GelbooruBase():
category = "gelbooru"
basecategory = "booru"
root = "https://gelbooru.com"
+ offset = 0
- def _api_request(self, params):
+ def _api_request(self, params, key="post"):
+ if "s" not in params:
+ params["s"] = "post"
params["api_key"] = self.api_key
params["user_id"] = self.user_id
- url = self.root + "/index.php?page=dapi&s=post&q=index&json=1"
+ url = self.root + "/index.php?page=dapi&q=index&json=1"
data = self.request(url, params=params).json()
- if "post" not in data:
+ if key not in data:
return ()
- posts = data["post"]
+ posts = data[key]
if not isinstance(posts, list):
return (posts,)
return posts
@@ -57,7 +60,7 @@ class GelbooruBase():
def _pagination_html(self, params):
url = self.root + "/index.php"
- params["pid"] = self.page_start * self.per_page
+ params["pid"] = self.offset
data = {}
while True:
@@ -103,6 +106,10 @@ class GelbooruBase():
"body" : extr(note, 'data-body="', '"')[0],
})
+ def _skip_offset(self, num):
+ self.offset += num
+ return num
+
class GelbooruTagExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02TagExtractor):
@@ -133,13 +140,14 @@ class GelbooruPoolExtractor(GelbooruBase,
}),
)
+ skip = GelbooruBase._skip_offset
+
def metadata(self):
url = self.root + "/index.php"
self._params = {
"page": "pool",
"s" : "show",
"id" : self.pool_id,
- "pid" : self.page_start,
}
page = self.request(url, params=self._params).text
@@ -158,8 +166,52 @@ class GelbooruPoolExtractor(GelbooruBase,
class GelbooruFavoriteExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02FavoriteExtractor):
+ """Extractor for gelbooru favorites"""
+ per_page = 100
pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)"
- test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=12345",)
+ test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=279415", {
+ "count": 3,
+ })
+
+ skip = GelbooruBase._skip_offset
+
+ def posts(self):
+ # get number of favorites
+ params = {
+ "s" : "favorite",
+ "id" : self.favorite_id,
+ "limit": "1",
+ }
+ count = self._api_request(params, "@attributes")[0]["count"]
+
+ if count <= self.offset:
+ return
+ pnum, last = divmod(count + 1, self.per_page)
+
+ if self.offset >= last:
+ self.offset -= last
+ diff, self.offset = divmod(self.offset, self.per_page)
+ pnum -= diff + 1
+ skip = self.offset
+
+ # paginate over them in reverse
+ params["pid"] = pnum
+ params["limit"] = self.per_page
+
+ while True:
+ favs = self._api_request(params, "favorite")
+
+ favs.reverse()
+ if skip:
+ favs = favs[skip:]
+ skip = 0
+
+ for fav in favs:
+ yield from self._api_request({"id": fav["favorite"]})
+
+ params["pid"] -= 1
+ if params["pid"] < 0:
+ return
class GelbooruPostExtractor(GelbooruBase,
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index d61c139..3aad88c 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://1sthiperdex.com/"""
+"""Extractors for https://hiperdex.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
@@ -20,7 +20,7 @@ BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
class HiperdexBase():
"""Base class for hiperdex extractors"""
category = "hiperdex"
- root = "https://1sthiperdex.com"
+ root = "https://hiperdex.com"
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
@@ -31,7 +31,9 @@ class HiperdexBase():
return {
"manga" : text.unescape(extr(
- "<title>", "<").rpartition("&")[0].strip()),
+ "<title>", "<").rpartition(" - ")[0].strip()),
+ "url" : text.unescape(extr(
+ 'property="og:url" content="', '"')),
"score" : text.parse_float(extr(
'id="averagerate">', '<')),
"author" : text.remove_html(extr(
@@ -65,10 +67,10 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
- """Extractor for manga chapters from 1sthiperdex.com"""
+ """Extractor for manga chapters from hiperdex.com"""
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
test = (
- ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/", {
+ ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
"pattern": r"https://(1st)?hiperdex\d?.(com|net|info)"
r"/wp-content/uploads/WP-manga/data"
r"/manga_\w+/[0-9a-f]{32}/\d+\.webp",
@@ -86,7 +88,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"type" : "Manga",
},
}),
- ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/"),
+ ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"),
@@ -109,11 +111,11 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
- """Extractor for manga from 1sthiperdex.com"""
+ """Extractor for manga from hiperdex.com"""
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
test = (
- ("https://1sthiperdex.com/manga/youre-not-that-special/", {
+ ("https://hiperdex.com/manga/1603231576-youre-not-that-special/", {
"count": 51,
"pattern": HiperdexChapterExtractor.pattern,
"keyword": {
@@ -131,6 +133,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
},
}),
("https://hiperdex.com/manga/youre-not-that-special/"),
+ ("https://1sthiperdex.com/manga/youre-not-that-special/"),
("https://hiperdex2.com/manga/youre-not-that-special/"),
("https://hiperdex.net/manga/youre-not-that-special/"),
("https://hiperdex.info/manga/youre-not-that-special/"),
@@ -142,25 +145,24 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
MangaExtractor.__init__(self, match, self.root + path + "/")
def chapters(self, page):
- self.manga_data(self.manga, page)
- results = []
-
- shortlink = text.extr(page, "rel='shortlink' href='", "'")
- data = {
- "action" : "manga_get_reading_nav",
- "manga" : shortlink.rpartition("=")[2],
- "chapter" : "",
- "volume_id": "",
- "style" : "list",
- "type" : "manga",
+ data = self.manga_data(self.manga, page)
+ self.manga_url = url = data["url"]
+
+ url = self.manga_url + "ajax/chapters/"
+ headers = {
+ "Accept": "*/*",
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin": self.root,
+ "Referer": self.manga_url,
}
- url = self.root + "/wp-admin/admin-ajax.php"
- page = self.request(url, method="POST", data=data).text
+ html = self.request(url, method="POST", headers=headers).text
- for url in text.extract_iter(page, 'data-redirect="', '"'):
- chapter = url.rpartition("/")[2]
+ results = []
+ for item in text.extract_iter(
+ html, '<li class="wp-manga-chapter', '</li>'):
+ url = text.extr(item, 'href="', '"')
+ chapter = url.rstrip("/").rpartition("/")[2]
results.append((url, self.chapter_data(chapter)))
-
return results
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py
index fa91f76..d6292af 100644
--- a/gallery_dl/extractor/naverwebtoon.py
+++ b/gallery_dl/extractor/naverwebtoon.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Seonghyeon Cho
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2033 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text
-import re
BASE_PATTERN = (r"(?:https?://)?comic\.naver\.com"
r"/(webtoon|challenge|bestChallenge)")
@@ -34,18 +33,44 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
"?titleId=26458&no=1&weekday=tue"), {
"url": "47a956ba8c7a837213d5985f50c569fcff986f75",
"content": "3806b6e8befbb1920048de9888dfce6220f69a60",
- "count": 14
+ "count": 14,
+ "keyword": {
+ "author": ["김규삼"],
+ "artist": ["김규삼"],
+ "comic": "N의등대-눈의등대",
+ "count": 14,
+ "episode": "1",
+ "extension": "jpg",
+ "num": int,
+ "tags": ["스릴러", "완결무료", "완결스릴러"],
+ "title": "n의 등대 - 눈의 등대 1화",
+ "title_id": "26458",
+ },
}),
(("https://comic.naver.com/challenge/detail"
"?titleId=765124&no=1"), {
- "pattern": r"https://image-comic\.pstatic\.net/nas"
+ "pattern": r"https://image-comic\.pstatic\.net"
r"/user_contents_data/challenge_comic/2021/01/19"
r"/342586/upload_7149856273586337846\.jpeg",
"count": 1,
+ "keyword": {
+ "author": ["kemi****"],
+ "artist": [],
+ "comic": "우니 모두의 이야기",
+ "count": 1,
+ "episode": "1",
+ "extension": "jpeg",
+ "filename": "upload_7149856273586337846",
+ "num": 1,
+ "tags": ["일상툰", "우니모두의이야기", "퇴사", "입사", "신입사원",
+ "사회초년생", "회사원", "20대"],
+ "title": "퇴사하다",
+ "title_id": "765124",
+ },
}),
(("https://comic.naver.com/bestChallenge/detail.nhn"
"?titleId=771467&no=3"), {
- "pattern": r"https://image-comic\.pstatic\.net/nas"
+ "pattern": r"https://image-comic\.pstatic\.net"
r"/user_contents_data/challenge_comic/2021/04/28"
r"/345534/upload_3617293622396203109\.jpeg",
"count": 1,
@@ -66,12 +91,14 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
return {
"title_id": self.title_id,
"episode" : self.episode,
- "title" : extr('property="og:title" content="', '"'),
- "comic" : extr('<h2>', '<span'),
- "authors" : extr('class="wrt_nm">', '</span>').strip().split("/"),
- "description": extr('<p class="txt">', '</p>'),
- "genre" : extr('<span class="genre">', '</span>'),
- "date" : extr('<dd class="date">', '</dd>'),
+ "comic" : extr("titleName: '", "'"),
+ "tags" : [t.strip() for t in text.extract_iter(
+ extr("tagList: [", "}],"), '"tagName":"', '"')],
+ "title" : extr('"subtitle":"', '"'),
+ "author" : [a.strip() for a in text.extract_iter(
+ extr('"writers":[', ']'), '"name":"', '"')],
+ "artist" : [a.strip() for a in text.extract_iter(
+ extr('"painters":[', ']'), '"name":"', '"')]
}
@staticmethod
@@ -87,7 +114,7 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
subcategory = "comic"
categorytransfer = True
- pattern = (BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)")
+ pattern = BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)"
test = (
("https://comic.naver.com/webtoon/list?titleId=22073", {
"pattern": NaverwebtoonEpisodeExtractor.pattern,
@@ -109,28 +136,30 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
query = text.parse_query(query)
self.title_id = query.get("titleId")
self.page_no = text.parse_int(query.get("page"), 1)
+ self.sort = query.get("sort", "ASC")
def items(self):
- url = "{}/{}/list".format(self.root, self.path)
- params = {"titleId": self.title_id, "page": self.page_no}
- data = {"_extractor": NaverwebtoonEpisodeExtractor}
+ base = "{}/{}/detail?titleId={}&no=".format(
+ self.root, self.path, self.title_id)
+
+ url = self.root + "/api/article/list"
+ headers = {
+ "Accept": "application/json, text/plain, */*",
+ "Referer": self.root + "/",
+ }
+ params = {
+ "titleId": self.title_id,
+ "page" : self.page_no,
+ "sort" : self.sort,
+ }
while True:
- page = self.request(url, params=params).text
- data["page"] = self.page_no
+ data = self.request(url, headers=headers, params=params).json()
- for episode_url in self.get_episode_urls(page):
- yield Message.Queue, episode_url, data
+ for article in data["articleList"]:
+ article["_extractor"] = NaverwebtoonEpisodeExtractor
+ yield Message.Queue, base + str(article["no"]), article
- if 'class="next"' not in page:
+ params["page"] = data["pageInfo"]["nextPage"]
+ if not params["page"]:
return
- params["page"] += 1
-
- def get_episode_urls(self, page):
- """Extract and return all episode urls in page"""
- return [
- self.root + path
- for path in re.findall(
- r'<a href="(/(?:webtoon|challenge|bestChallenge)'
- r'/detail\?[^"]+)', page)
- ][::2]
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index 9b69694..725788a 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -51,6 +51,11 @@ class NitterExtractor(BaseExtractor):
for url in text.extract_iter(
attachments, 'href="', '"'):
+ if "/i/broadcasts/" in url:
+ self.log.debug(
+ "Skipping unsupported broadcast '%s'", url)
+ continue
+
if "/enc/" in url:
name = binascii.a2b_base64(url.rpartition(
"/")[2]).decode().rpartition("/")[2]
@@ -123,7 +128,7 @@ class NitterExtractor(BaseExtractor):
"likes" : text.parse_int(extr(
'class="icon-heart', '</div>').rpartition(">")[2]),
"retweet" : 'class="retweet-header' in html,
- "quoted": False,
+ "quoted" : False,
}
def _tweet_from_quote(self, html):
@@ -140,18 +145,24 @@ class NitterExtractor(BaseExtractor):
"date" : text.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0],
- "content": extr('class="quote-text', "</div").partition(">")[2],
+ "content" : extr('class="quote-text', "</div").partition(">")[2],
"_attach" : extr('class="attachments', '''
</div>'''),
"retweet" : False,
- "quoted": True,
+ "quoted" : True,
}
def _user_from_html(self, html):
extr = text.extract_from(html, html.index('class="profile-tabs'))
banner = extr('class="profile-banner"><a href="', '"')
+
+ try:
+ uid = banner.split("%2F")[4]
+ except Exception:
+ uid = 0
+
return {
- "id" : banner.split("%2F")[4] if banner else None,
+ "id" : uid,
"profile_banner" : self.root + banner if banner else "",
"profile_image" : self.root + extr(
'class="profile-card-avatar" href="', '"'),
@@ -229,6 +240,10 @@ BASE_PATTERN = NitterExtractor.update({
"root": "https://nitter.unixfox.eu",
"pattern": r"nitter\.unixfox\.eu",
},
+ "nitter.it": {
+ "root": "https://nitter.it",
+ "pattern": r"nitter\.it",
+ },
})
USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)"
@@ -443,6 +458,10 @@ class NitterTweetExtractor(NitterExtractor):
"keyword": {"date": "dt:2022-02-13 20:10:00"},
"count": 1,
}),
+ # broadcast
+ ("https://nitter.it/POTUS/status/1639409307878928384", {
+ "count": 0,
+ })
)
def tweets(self):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 29b4ac3..89d96d7 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -248,11 +248,15 @@ class TwitterExtractor(Extractor):
author = tweet["user"]
author = self._transform_user(author)
+ if "note_tweet" in tweet:
+ note = tweet["note_tweet"]["note_tweet_results"]["result"]
+ else:
+ note = None
+
if "legacy" in tweet:
tweet = tweet["legacy"]
tget = tweet.get
- entities = tweet["entities"]
tdata = {
"tweet_id" : text.parse_int(tweet["id_str"]),
"retweet_id" : text.parse_int(
@@ -272,6 +276,8 @@ class TwitterExtractor(Extractor):
"retweet_count" : tget("retweet_count"),
}
+ entities = note["entity_set"] if note else tweet["entities"]
+
hashtags = entities.get("hashtags")
if hashtags:
tdata["hashtags"] = [t["text"] for t in hashtags]
@@ -284,7 +290,8 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
- content = text.unescape(tget("full_text") or tget("text") or "")
+ content = text.unescape(
+ note["text"] if note else tget("full_text") or tget("text") or "")
urls = entities.get("urls")
if urls:
for url in urls:
@@ -642,6 +649,21 @@ class TwitterSearchExtractor(TwitterExtractor):
return self.api.search_adaptive(query)
+class TwitterHashtagExtractor(TwitterExtractor):
+ """Extractor for Twitter hashtags"""
+ subcategory = "hashtag"
+ pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
+ test = ("https://twitter.com/hashtag/nature", {
+ "pattern": TwitterSearchExtractor.pattern,
+ "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
+ })
+
+ def items(self):
+ url = "{}/search?q=%23{}".format(self.root, self.user)
+ data = {"_extractor": TwitterSearchExtractor}
+ yield Message.Queue, url, data
+
+
class TwitterEventExtractor(TwitterExtractor):
"""Extractor for Tweets from a Twitter Event"""
subcategory = "event"
@@ -803,6 +825,23 @@ class TwitterTweetExtractor(TwitterExtractor):
r"\?format=(jpg|png)&name=orig$",
"range": "1-2",
}),
+ # note tweet with long 'content'
+ ("https://twitter.com/i/web/status/1629193457112686592", {
+ "keyword": {
+ "content": """\
+BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
+just contradicted federal government regulators, saying that toxic air \
+pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
+Washington Post writes, "Three weeks after the toxic train derailment in \
+Ohio, an analysis of Environmental Protection Agency data has found nine air \
+pollutants at levels that could raise long-term health concerns in and around \
+East Palestine, according to an independent analysis. \n\n\"The analysis by \
+Texas A&M University seems to contradict statements by state and federal \
+regulators that air near the crash site is completely safe, despite residents \
+complaining about rashes, breathing problems and other health effects." \
+Your reaction.""",
+ },
+ }),
)
def __init__(self, match):
@@ -951,6 +990,10 @@ class TwitterAPI():
self.extractor = extractor
self.root = "https://api.twitter.com"
+ self._nsfw_warning = True
+ self._syndication = self.extractor.syndication
+ self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+
cookies = extractor.session.cookies
cookiedomain = extractor.cookiedomain
@@ -965,7 +1008,11 @@ class TwitterAPI():
auth_token = cookies.get("auth_token", domain=cookiedomain)
+ if not auth_token:
+ self.user_media = self.user_media_legacy
+
self.headers = {
+ "Accept": "*/*",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
"COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
"4FA33AGWWjCpTnA",
@@ -1019,73 +1066,132 @@ class TwitterAPI():
"collab_control,vibe",
}
self.variables = {
- "includePromotedContent": False,
- "withSuperFollowsUserFields": True,
- "withBirdwatchPivots": False,
"withDownvotePerspective": False,
"withReactionsMetadata": False,
"withReactionsPerspective": False,
- "withSuperFollowsTweetFields": True,
- "withClientEventToken": False,
- "withBirdwatchNotes": False,
- "withVoice": True,
- "withV2Timeline": False,
- "__fs_interactive_text": False,
- "__fs_dont_mention_me_view_api_enabled": False,
}
-
- self._nsfw_warning = True
- self._syndication = self.extractor.syndication
- self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
+ self.features = {
+ "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+ "responsive_web_graphql_exclude_directive_enabled": True,
+ "verified_phone_label_enabled": False,
+ "responsive_web_graphql_skip_user_profile_"
+ "image_extensions_enabled": False,
+ "responsive_web_graphql_timeline_navigation_enabled": True,
+ }
+ self.features_pagination = {
+ "responsive_web_twitter_blue_verified_badge_is_enabled": True,
+ "responsive_web_graphql_exclude_directive_enabled": True,
+ "verified_phone_label_enabled": False,
+ "responsive_web_graphql_timeline_navigation_enabled": True,
+ "responsive_web_graphql_skip_user_profile_"
+ "image_extensions_enabled": False,
+ "tweetypie_unmention_optimization_enabled": True,
+ "vibe_api_enabled": True,
+ "responsive_web_edit_tweet_api_enabled": True,
+ "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
+ "view_counts_everywhere_api_enabled": True,
+ "longform_notetweets_consumption_enabled": True,
+ "tweet_awards_web_tipping_enabled": False,
+ "freedom_of_speech_not_reach_fetch_enabled": False,
+ "standardized_nudges_misinfo": True,
+ "tweet_with_visibility_results_prefer_gql_"
+ "limited_actions_policy_enabled": False,
+ "interactive_text_enabled": True,
+ "responsive_web_text_conversations_enabled": False,
+ "longform_notetweets_richtext_consumption_enabled": False,
+ "responsive_web_enhance_cards_enabled": False,
+ }
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
+ endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
variables = {
"focalTweetId": tweet_id,
+ "referrer": "profile",
"with_rux_injections": False,
+ "includePromotedContent": True,
"withCommunity": True,
"withQuickPromoteEligibilityTweetFields": True,
"withBirdwatchNotes": False,
+ "withSuperFollowsUserFields": True,
+ "withSuperFollowsTweetFields": True,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(
- endpoint, variables, ("threaded_conversation_with_injections",))
+ endpoint, variables, ("threaded_conversation_with_injections_v2",))
def user_tweets(self, screen_name):
- endpoint = "/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets"
+ endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": True,
"withQuickPromoteEligibilityTweetFields": True,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/t4wEKVulW4Mbv1P0kgxTEw/UserTweetsAndReplies"
+ endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": True,
"withCommunity": True,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
def user_media(self, screen_name):
- endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+ endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": False,
+ "withClientEventToken": False,
+ "withBirdwatchNotes": False,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
+ def user_media_legacy(self, screen_name):
+ endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
+ variables = {
+ "userId": self._user_id_by_screen_name(screen_name),
+ "count": 100,
+ "includePromotedContent": False,
+ "withSuperFollowsUserFields": True,
+ "withBirdwatchPivots": False,
+ "withSuperFollowsTweetFields": True,
+ "withClientEventToken": False,
+ "withBirdwatchNotes": False,
+ "withVoice": True,
+ "withV2Timeline": False,
+ "__fs_interactive_text": False,
+ "__fs_dont_mention_me_view_api_enabled": False,
+ }
+ return self._pagination_tweets(
+ endpoint, variables, ("user", "result", "timeline", "timeline"),
+ features=False)
+
def user_likes(self, screen_name):
- endpoint = "/graphql/9MSTt44HoGjVFSg_u3rHDw/Likes"
+ endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": False,
+ "withClientEventToken": False,
+ "withBirdwatchNotes": False,
+ "withVoice": True,
+ "withV2Timeline": True,
}
return self._pagination_tweets(endpoint, variables)
def user_bookmarks(self):
- endpoint = "/graphql/uKP9v_I31k0_VSBmlpq2Xg/Bookmarks"
+ endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks"
variables = {
"count": 100,
}
@@ -1093,7 +1199,7 @@ class TwitterAPI():
endpoint, variables, ("bookmark_timeline", "timeline"), False)
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline"
+ endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -1128,18 +1234,21 @@ class TwitterAPI():
["twitter_objects"]["live_events"][event_id])
def list_by_rest_id(self, list_id):
- endpoint = "/graphql/BWEhzAk7k8TwbU4lKH2dpw/ListByRestId"
- params = {"variables": self._json_dumps({
- "listId": list_id,
- "withSuperFollowsUserFields": True,
- })}
+ endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId"
+ params = {
+ "variables": self._json_dumps({
+ "listId": list_id,
+ "withSuperFollowsUserFields": True,
+ }),
+ "features": self._json_dumps(self.features),
+ }
try:
return self._call(endpoint, params)["data"]["list"]
except KeyError:
raise exception.NotFoundError("list")
def list_members(self, list_id):
- endpoint = "/graphql/snESM0DPs3c7M1SBm4rvVw/ListMembers"
+ endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -1149,29 +1258,34 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_following(self, screen_name):
- endpoint = "/graphql/mIwX8GogcobVlRwlgpHNYA/Following"
+ endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
+ "includePromotedContent": False,
}
return self._pagination_users(endpoint, variables)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/I5nvpI91ljifos1Y3Lltyg/UserByRestId"
- params = {"variables": self._json_dumps({
- "userId": rest_id,
- "withSafetyModeUserFields": True,
- "withSuperFollowsUserFields": True,
- })}
+ endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId"
+ params = {
+ "variables": self._json_dumps({
+ "userId": rest_id,
+ "withSafetyModeUserFields": True,
+ }),
+ "features": self._json_dumps(self.features),
+ }
return self._call(endpoint, params)["data"]["user"]["result"]
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/7mjxD3-C6BxitPMVQ6w0-Q/UserByScreenName"
- params = {"variables": self._json_dumps({
- "screen_name": screen_name,
- "withSafetyModeUserFields": True,
- "withSuperFollowsUserFields": True,
- })}
+ endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName"
+ params = {
+ "variables": self._json_dumps({
+ "screen_name": screen_name,
+ "withSafetyModeUserFields": True,
+ }),
+ "features": self._json_dumps(self.features),
+ }
return self._call(endpoint, params)["data"]["user"]["result"]
def _user_id_by_screen_name(self, screen_name):
@@ -1337,19 +1451,23 @@ class TwitterAPI():
params["cursor"] = cursor
def _pagination_tweets(self, endpoint, variables,
- path=None, stop_tweets=True):
+ path=None, stop_tweets=True, features=True):
extr = self.extractor
variables.update(self.variables)
original_retweets = (extr.retweets == "original")
pinned_tweet = extr.pinned
+ params = {"variables": None}
+ if features:
+ params["features"] = self._json_dumps(self.features_pagination)
+
while True:
- params = {"variables": self._json_dumps(variables)}
+ params["variables"] = self._json_dumps(variables)
data = self._call(endpoint, params)["data"]
try:
if path is None:
- instructions = (data["user"]["result"]["timeline"]
+ instructions = (data["user"]["result"]["timeline_v2"]
["timeline"]["instructions"])
else:
instructions = data
@@ -1487,10 +1605,12 @@ class TwitterAPI():
def _pagination_users(self, endpoint, variables, path=None):
variables.update(self.variables)
+ params = {"variables": None,
+ "features" : self._json_dumps(self.features_pagination)}
while True:
cursor = entry = stop = None
- params = {"variables": self._json_dumps(variables)}
+ params["variables"] = self._json_dumps(variables)
data = self._call(endpoint, params)["data"]
try:
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 68bd136..388ee03 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -79,6 +79,18 @@ class WeiboExtractor(Extractor):
def _extract_status(self, status, files):
append = files.append
+ if "mix_media_info" in status:
+ for item in status["mix_media_info"]["items"]:
+ type = item.get("type")
+ if type == "video":
+ if self.videos:
+ append(self._extract_video(item["data"]["media_info"]))
+ elif type == "pic":
+ append(item["data"]["largest"].copy())
+ else:
+ self.log.warning("Unknown media type '%s'", type)
+ return
+
pic_ids = status.get("pic_ids")
if pic_ids:
pics = status["pic_infos"]
@@ -100,18 +112,20 @@ class WeiboExtractor(Extractor):
else:
append(pic["largest"].copy())
- if "page_info" in status and self.videos:
- try:
- media = max(status["page_info"]["media_info"]["playback_list"],
- key=lambda m: m["meta"]["quality_index"])
- except KeyError:
- pass
- except ValueError:
- info = status["page_info"]["media_info"]
- append({"url": (info.get("stream_url_hd") or
- info["stream_url"])})
- else:
- append(media["play_info"].copy())
+ if "page_info" in status:
+ info = status["page_info"]
+ if "media_info" in info and self.videos:
+ append(self._extract_video(info["media_info"]))
+
+ def _extract_video(self, info):
+ try:
+ media = max(info["playback_list"],
+ key=lambda m: m["meta"]["quality_index"])
+ except Exception:
+ return {"url": (info.get("stream_url_hd") or
+ info["stream_url"])}
+ else:
+ return media["play_info"].copy()
def _status_by_id(self, status_id):
url = "{}/ajax/statuses/show?id={}".format(self.root, status_id)
@@ -380,7 +394,7 @@ class WeiboStatusExtractor(WeiboExtractor):
}),
# missing 'playback_list' (#2792)
("https://weibo.com/2909128931/4409545658754086", {
- "count": 9,
+ "count": 10,
}),
# empty 'playback_list' (#3301)
("https://weibo.com/1501933722/4142890299009993", {
@@ -389,6 +403,10 @@ class WeiboStatusExtractor(WeiboExtractor):
r"=0&ps=1CwnkDw1GXwCQx.+&KID=unistore,video",
"count": 1,
}),
+ # mix_media_info (#3793)
+ ("https://weibo.com/2427303621/MxojLlLgQ", {
+ "count": 9,
+ }),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
)
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 2c5bd11..fc36fa2 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -34,6 +34,8 @@ def parse(format_string, default=NONE, fmt=format):
if kind == "T":
cls = TemplateFormatter
+ elif kind == "TF":
+ cls = TemplateFStringFormatter
elif kind == "E":
cls = ExpressionFormatter
elif kind == "M":
@@ -197,15 +199,6 @@ class StringFormatter():
return lambda obj: fmt(conversion(obj))
-class TemplateFormatter(StringFormatter):
- """Read format_string from file"""
-
- def __init__(self, path, default=NONE, fmt=format):
- with open(util.expand_path(path)) as fp:
- format_string = fp.read()
- StringFormatter.__init__(self, format_string, default, fmt)
-
-
class ExpressionFormatter():
"""Generate text by evaluating a Python expression"""
@@ -218,7 +211,7 @@ class ModuleFormatter():
def __init__(self, function_spec, default=NONE, fmt=None):
module_name, _, function_name = function_spec.partition(":")
- module = __import__(module_name)
+ module = util.import_file(module_name)
self.format_map = getattr(module, function_name)
@@ -229,6 +222,24 @@ class FStringFormatter():
self.format_map = util.compile_expression('f"""' + fstring + '"""')
+class TemplateFormatter(StringFormatter):
+ """Read format_string from file"""
+
+ def __init__(self, path, default=NONE, fmt=format):
+ with open(util.expand_path(path)) as fp:
+ format_string = fp.read()
+ StringFormatter.__init__(self, format_string, default, fmt)
+
+
+class TemplateFStringFormatter(FStringFormatter):
+ """Read f-string from file"""
+
+ def __init__(self, path, default=NONE, fmt=format):
+ with open(util.expand_path(path)) as fp:
+ format_string = fp.read()
+ FStringFormatter.__init__(self, format_string, default, fmt)
+
+
def parse_field_name(field_name):
first, rest = _string.formatter_field_name_split(field_name)
funcs = []
@@ -245,6 +256,8 @@ def parse_field_name(field_name):
try:
if ":" in key:
key = _slice(key)
+ else:
+ key = key.strip("\"'")
except TypeError:
pass # key is an integer
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index a64c040..ca5785d 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -633,13 +633,13 @@ class KeywordJob(Job):
def print_kwdict(self, kwdict, prefix="", markers=None):
"""Print key-value pairs in 'kwdict' with formatting"""
write = sys.stdout.write
- suffix = "]" if prefix else ""
+ suffix = "']" if prefix else ""
markerid = id(kwdict)
if markers is None:
markers = {markerid}
elif markerid in markers:
- write("{}\n <circular reference>\n".format(prefix[:-1]))
+ write("{}\n <circular reference>\n".format(prefix[:-2]))
return # ignore circular reference
else:
markers.add(markerid)
@@ -650,13 +650,13 @@ class KeywordJob(Job):
key = prefix + key + suffix
if isinstance(value, dict):
- self.print_kwdict(value, key + "[", markers)
+ self.print_kwdict(value, key + "['", markers)
elif isinstance(value, list):
if not value:
pass
elif isinstance(value[0], dict):
- self.print_kwdict(value[0], key + "[N][", markers)
+ self.print_kwdict(value[0], key + "[N]['", markers)
else:
fmt = (" {:>%s} {}\n" % len(str(len(value)))).format
write(key + "[N]\n")
@@ -667,6 +667,8 @@ class KeywordJob(Job):
# string or number
write("{}\n {}\n".format(key, value))
+ markers.remove(markerid)
+
class UrlJob(Job):
"""Print download urls"""
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 1d53851..4f2ee26 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -270,16 +270,15 @@ else:
def configure_standard_streams():
for name in ("stdout", "stderr", "stdin"):
- options = config.get(("output",), name)
- if not options:
- continue
-
stream = getattr(sys, name, None)
if not stream:
continue
- if isinstance(options, str):
- options = {"encoding": options, "errors": "replace"}
+ options = config.get(("output",), name)
+ if not options:
+ options = {"errors": "replace"}
+ elif isinstance(options, str):
+ options = {"errors": "replace", "encoding": options}
elif not options.get("errors"):
options["errors"] = "replace"
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 9667a41..714f4fe 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -87,6 +87,7 @@ class MetadataPP(PostProcessor):
self.omode = options.get("open", omode)
self.encoding = options.get("encoding", "utf-8")
self.private = options.get("private", False)
+ self.skip = options.get("skip", False)
def run(self, pathfmt):
archive = self.archive
@@ -96,6 +97,9 @@ class MetadataPP(PostProcessor):
directory = self._directory(pathfmt)
path = directory + self._filename(pathfmt)
+ if self.skip and os.path.exists(path):
+ return
+
try:
with open(path, self.omode, encoding=self.encoding) as fp:
self.write(fp, pathfmt.kwdict)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 494b7f5..93a9148 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.25.0"
+__version__ = "1.25.1"
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 50e55a6..2258966 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -128,6 +128,11 @@ class TestFormatter(unittest.TestCase):
self._run_test("{l[0]}" , "a")
self._run_test("{a[6]}" , "w")
+ def test_dict_access(self):
+ self._run_test("{d[a]}" , "foo")
+ self._run_test("{d['a']}", "foo")
+ self._run_test('{d["a"]}', "foo")
+
def test_slicing(self):
v = self.kwdict["a"]
self._run_test("{a[1:10]}" , v[1:10])
@@ -348,6 +353,27 @@ class TestFormatter(unittest.TestCase):
self._run_test("\fF foo-'\"{a.upper()}\"'-bar",
"""foo-'"{}"'-bar""".format(self.kwdict["a"].upper()))
+ @unittest.skipIf(sys.hexversion < 0x3060000, "no fstring support")
+ def test_template_fstring(self):
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ path1 = os.path.join(tmpdirname, "tpl1")
+ path2 = os.path.join(tmpdirname, "tpl2")
+
+ with open(path1, "w") as fp:
+ fp.write("{a}")
+ fmt1 = formatter.parse("\fTF " + path1)
+
+ with open(path2, "w") as fp:
+ fp.write("foo-'\"{a.upper()}\"'-bar")
+ fmt2 = formatter.parse("\fTF " + path2)
+
+ self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"])
+ self.assertEqual(fmt2.format_map(self.kwdict),
+ """foo-'"{}"'-bar""".format(self.kwdict["a"].upper()))
+
+ with self.assertRaises(OSError):
+ formatter.parse("\fTF /")
+
def test_module(self):
with tempfile.TemporaryDirectory() as tmpdirname:
path = os.path.join(tmpdirname, "testmod.py")
@@ -374,7 +400,7 @@ def noarg():
try:
fmt1 = formatter.parse("\fM testmod:gentext")
fmt2 = formatter.parse("\fM testmod:lengths")
- fmt3 = formatter.parse("\fM testmod:noarg")
+ fmt0 = formatter.parse("\fM testmod:noarg")
with self.assertRaises(AttributeError):
formatter.parse("\fM testmod:missing")
@@ -383,11 +409,17 @@ def noarg():
finally:
sys.path.pop(0)
+ fmt3 = formatter.parse("\fM " + path + ":gentext")
+ fmt4 = formatter.parse("\fM " + path + ":lengths")
+
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt2.format_map(self.kwdict), "89")
+ self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name")
+ self.assertEqual(fmt4.format_map(self.kwdict), "89")
+
with self.assertRaises(TypeError):
- self.assertEqual(fmt3.format_map(self.kwdict), "")
+ self.assertEqual(fmt0.format_map(self.kwdict), "")
def _run_test(self, format_string, result, default=None, fmt=format):
fmt = formatter.parse(format_string, default, fmt)
diff --git a/test/test_job.py b/test/test_job.py
index 1bd9ccc..a6e093f 100644
--- a/test/test_job.py
+++ b/test/test_job.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -66,17 +66,36 @@ class TestKeywordJob(TestJob):
jobclass = job.KeywordJob
def test_default(self):
- extr = TestExtractor.from_url("test:")
+ self.maxDiff = None
+ extr = TestExtractor.from_url("test:self")
self.assertEqual(self._capture_stdout(extr), """\
Keywords for directory names:
-----------------------------
+author['id']
+ 123
+author['name']
+ test
+author['self']
+ <circular reference>
category
test_category
subcategory
test_subcategory
+user['id']
+ 123
+user['name']
+ test
+user['self']
+ <circular reference>
Keywords for filenames and --filter:
------------------------------------
+author['id']
+ 123
+author['name']
+ test
+author['self']
+ <circular reference>
category
test_category
extension
@@ -91,10 +110,12 @@ tags[N]
0 foo
1 bar
2 テスト
-user[id]
+user['id']
123
-user[name]
+user['name']
test
+user['self']
+ <circular reference>
""")
@@ -209,6 +230,7 @@ class TestDataJob(TestJob):
def test_default(self):
extr = TestExtractor.from_url("test:")
tjob = self.jobclass(extr, file=io.StringIO())
+ user = {"id": 123, "name": "test"}
tjob.run()
@@ -216,6 +238,8 @@ class TestDataJob(TestJob):
(Message.Directory, {
"category" : "test_category",
"subcategory": "test_subcategory",
+ "user" : user,
+ "author" : user,
}),
(Message.Url, "https://example.org/1.jpg", {
"category" : "test_category",
@@ -224,7 +248,8 @@ class TestDataJob(TestJob):
"extension" : "jpg",
"num" : 1,
"tags" : ["foo", "bar", "テスト"],
- "user" : {"id": 123, "name": "test"},
+ "user" : user,
+ "author" : user,
}),
(Message.Url, "https://example.org/2.jpg", {
"category" : "test_category",
@@ -233,7 +258,8 @@ class TestDataJob(TestJob):
"extension" : "jpg",
"num" : 2,
"tags" : ["foo", "bar", "テスト"],
- "user" : {"id": 123, "name": "test"},
+ "user" : user,
+ "author" : user,
}),
(Message.Url, "https://example.org/3.jpg", {
"category" : "test_category",
@@ -242,7 +268,8 @@ class TestDataJob(TestJob):
"extension" : "jpg",
"num" : 3,
"tags" : ["foo", "bar", "テスト"],
- "user" : {"id": 123, "name": "test"},
+ "user" : user,
+ "author" : user,
}),
])
@@ -316,7 +343,7 @@ class TestDataJob(TestJob):
config.set(("output",), "num-to-str", True)
with patch("gallery_dl.util.number_to_string") as nts:
tjob.run()
- self.assertEqual(len(nts.call_args_list), 52)
+ self.assertEqual(len(nts.call_args_list), 72)
tjob.run()
self.assertEqual(tjob.data[-1][0], Message.Url)
@@ -328,18 +355,30 @@ class TestExtractor(Extractor):
subcategory = "test_subcategory"
directory_fmt = ("{category}",)
filename_fmt = "test_{filename}.{extension}"
- pattern = r"test:(child)?$"
+ pattern = r"test:(child|self)?$"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = {"id": 123, "name": "test"}
+ if match.group(1) == "self":
+ self.user["self"] = self.user
def items(self):
root = "https://example.org"
+ user = self.user
+
+ yield Message.Directory, {
+ "user": user,
+ "author": user,
+ }
- yield Message.Directory, {}
for i in range(1, 4):
url = "{}/{}.jpg".format(root, i)
yield Message.Url, url, text.nameext_from_url(url, {
"num" : i,
"tags": ["foo", "bar", "テスト"],
- "user": {"id": 123, "name": "test"},
+ "user": user,
+ "author": user,
"_fallback": ("{}/alt/{}.jpg".format(root, i),),
})
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 650bf59..c78d7b0 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -428,11 +428,46 @@ class MetadataTest(BasePostprocessorTest):
self.assertNotIn("baz", pdict["bar"])
self.assertEqual(kwdict["bar"], pdict["bar"])
+ # no errors for deleted/undefined fields
self._trigger()
self.assertNotIn("foo", pdict)
self.assertNotIn("baz", pdict["bar"])
self.assertEqual(kwdict["bar"], pdict["bar"])
+ def test_metadata_option_skip(self):
+ self._create({"skip": True})
+
+ with patch("builtins.open", mock_open()) as m, \
+ patch("os.path.exists") as e:
+ e.return_value = True
+ self._trigger()
+
+ self.assertTrue(e.called)
+ self.assertTrue(not m.called)
+ self.assertTrue(not len(self._output(m)))
+
+ with patch("builtins.open", mock_open()) as m, \
+ patch("os.path.exists") as e:
+ e.return_value = False
+ self._trigger()
+
+ self.assertTrue(e.called)
+ self.assertTrue(m.called)
+ self.assertGreater(len(self._output(m)), 0)
+
+ path = self.pathfmt.realdirectory + "file.ext.json"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+
+ def test_metadata_option_skip_false(self):
+ self._create({"skip": False})
+
+ with patch("builtins.open", mock_open()) as m, \
+ patch("os.path.exists") as e:
+ self._trigger()
+
+ self.assertTrue(not e.called)
+ self.assertTrue(m.called)
+
@staticmethod
def _output(mock):
return "".join(