aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-04-25 21:32:02 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-04-25 21:32:02 -0400
commitf98ab7aaca3c4acbd5a793267791749740330e9c (patch)
tree72e3d3312a8ff2cdb24353b1d7be6fb8301f431c
parent09e426350409d45e7f7a8ff369f8d8aa9eec0fe4 (diff)
New upstream version 1.25.2.upstream/1.25.2
-rw-r--r--CHANGELOG.md27
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.518
-rw-r--r--docs/gallery-dl-example.conf4
-rw-r--r--docs/gallery-dl.conf3
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/downloader/http.py4
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/blogger.py2
-rw-r--r--gallery_dl/extractor/bunkr.py14
-rw-r--r--gallery_dl/extractor/danbooru.py26
-rw-r--r--gallery_dl/extractor/deviantart.py31
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py26
-rw-r--r--gallery_dl/extractor/generic.py58
-rw-r--r--gallery_dl/extractor/hentaifoundry.py6
-rw-r--r--gallery_dl/extractor/hotleak.py5
-rw-r--r--gallery_dl/extractor/imagechest.py15
-rw-r--r--gallery_dl/extractor/mastodon.py11
-rw-r--r--gallery_dl/extractor/nitter.py5
-rw-r--r--gallery_dl/extractor/sexcom.py17
-rw-r--r--gallery_dl/extractor/shopify.py13
-rw-r--r--gallery_dl/extractor/twitter.py124
-rw-r--r--gallery_dl/extractor/urlshortener.py69
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_downloader.py4
28 files changed, 385 insertions, 119 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d312557..a67e3ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,32 @@
# Changelog
+## 1.25.2 - 2023-04-15
+### Additions
+- [deviantart] add `public` option
+- [nitter] extract videos from `source` elements ([#3912](https://github.com/mikf/gallery-dl/issues/3912))
+- [twitter] add `date_liked` and `date_bookmarked` metadata for liked and bookmarked Tweets ([#3816](https://github.com/mikf/gallery-dl/issues/3816))
+- [urlshortener] add support for bit.ly & t.co ([#3841](https://github.com/mikf/gallery-dl/issues/3841))
+- [downloader:http] add MIME type and signature for `.heic` files ([#3915](https://github.com/mikf/gallery-dl/issues/3915))
+### Fixes
+- [blogger] update regex to get the highest resolution URLs ([#3863](https://github.com/mikf/gallery-dl/issues/3863), [#3870](https://github.com/mikf/gallery-dl/issues/3870))
+- [bunkr] update domain to `bunkr.la` ([#3813](https://github.com/mikf/gallery-dl/issues/3813), [#3877](https://github.com/mikf/gallery-dl/issues/3877))
+- [deviantart] keep using private access tokens when requesting download URLs ([#3845](https://github.com/mikf/gallery-dl/issues/3845), [#3857](https://github.com/mikf/gallery-dl/issues/3857), [#3896](https://github.com/mikf/gallery-dl/issues/3896))
+- [hentaifoundry] fix content filters ([#3887](https://github.com/mikf/gallery-dl/issues/3887))
+- [hotleak] fix downloading of creators whose name starts with a category name ([#3871](https://github.com/mikf/gallery-dl/issues/3871))
+- [imagechest] fix extraction ([#3914](https://github.com/mikf/gallery-dl/issues/3914))
+- [realbooru] fix extraction ([#2530](https://github.com/mikf/gallery-dl/issues/2530))
+- [sexcom] fix pagination ([#3906](https://github.com/mikf/gallery-dl/issues/3906))
+- [sexcom] fix HD video extraction
+- [shopify] fix `collection` extractor ([#3866](https://github.com/mikf/gallery-dl/issues/3866), [#3868](https://github.com/mikf/gallery-dl/issues/3868))
+- [twitter] update to bookmark timeline v2 ([#3859](https://github.com/mikf/gallery-dl/issues/3859), [#3854](https://github.com/mikf/gallery-dl/issues/3854))
+- [twitter] warn about "withheld" Tweets and users ([#3864](https://github.com/mikf/gallery-dl/issues/3864))
+### Improvements
+- [danbooru] reduce number of API requests when fetching extended `metadata`
+- [deviantart:search] detect login redirects ([#3860](https://github.com/mikf/gallery-dl/issues/3860))
+- [generic] write regular expressions without `x` flags
+- [mastodon] try to get account IDs without access token
+- [twitter] calculate `date` from Tweet IDs
+
## 1.25.1 - 2023-03-25
### Additions
- [nitter] support nitter.it ([#3819](https://github.com/mikf/gallery-dl/issues/3819))
diff --git a/PKG-INFO b/PKG-INFO
index 1156e79..cb01fca 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.25.1
+Version: 1.25.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index e4fd1c6..8472d2d 100644
--- a/README.rst
+++ b/README.rst
@@ -69,9 +69,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 8b96657..8aa419d 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-03-25" "1.25.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-04-15" "1.25.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index fd32eb1..63d78f0 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-03-25" "1.25.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-04-15" "1.25.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1346,7 +1346,7 @@ It is possible to specify a custom list of metadata includes.
See \f[I]available_includes\f[]
for possible field names. \f[I]aibooru\f[] also supports \f[I]ai_metadata\f[].
-Note: This requires 1 additional HTTP request per post.
+Note: This requires 1 additional HTTP request per 200-post batch.
.SS extractor.{Danbooru].threshold
@@ -1602,6 +1602,20 @@ Controls when to stop paginating over API results.
* \f[I]"manual"\f[]: Disregard \f[I]has_more\f[] and only stop when a batch of results is empty.
+.SS extractor.deviantart.public
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Use a public access token for API requests.
+
+Disable this option to *force* using a private token for all requests
+when a \f[I]refresh token\f[] is provided.
+
+
.SS extractor.deviantart.refresh-token
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf
index ef7b3b5..da386dd 100644
--- a/docs/gallery-dl-example.conf
+++ b/docs/gallery-dl-example.conf
@@ -317,6 +317,10 @@
"archive": "~/gallery-dl/custom-archive-file-for-TBIB.db",
"filename": "{id}_{md5}.{extension}",
"sleep-request": [0, 1.2]
+ },
+
+ "urlshortener": {
+ "tinyurl": {"root": "https://tinyurl.com"}
}
},
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 7564e5b..09d9e80 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -74,6 +74,7 @@
{
"client-id": null,
"client-secret": null,
+ "refresh-token": null,
"auto-watch": false,
"auto-unwatch": false,
"comments": false,
@@ -86,6 +87,8 @@
"mature": true,
"metadata": false,
"original": true,
+ "pagination": "api",
+ "public": true,
"wait-min": 0
},
"e621":
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index f836313..25c9619 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.25.1
+Version: 1.25.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.2/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 9827944..bb2ff51 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -199,6 +199,7 @@ gallery_dl/extractor/twibooru.py
gallery_dl/extractor/twitter.py
gallery_dl/extractor/unsplash.py
gallery_dl/extractor/uploadir.py
+gallery_dl/extractor/urlshortener.py
gallery_dl/extractor/vanillarock.py
gallery_dl/extractor/vichan.py
gallery_dl/extractor/vk.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index e977320..88e86e9 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -353,6 +353,8 @@ MIME_TYPES = {
"image/x-ms-bmp": "bmp",
"image/webp" : "webp",
"image/avif" : "avif",
+ "image/heic" : "heic",
+ "image/heif" : "heif",
"image/svg+xml" : "svg",
"image/ico" : "ico",
"image/icon" : "ico",
@@ -399,6 +401,8 @@ SIGNATURE_CHECKS = {
"webp": lambda s: (s[0:4] == b"RIFF" and
s[8:12] == b"WEBP"),
"avif": lambda s: s[4:11] == b"ftypavi" and s[11] in b"fs",
+ "heic": lambda s: (s[4:10] == b"ftyphe" and s[10:12] in (
+ b"ic", b"im", b"is", b"ix", b"vc", b"vm", b"vs")),
"svg" : lambda s: s[0:5] == b"<?xml",
"ico" : lambda s: s[0:4] == b"\x00\x00\x01\x00",
"cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 3968d72..553a110 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -153,6 +153,7 @@ modules = [
"twitter",
"unsplash",
"uploadir",
+ "urlshortener",
"vanillarock",
"vichan",
"vk",
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 56010c2..eafc8af 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -40,7 +40,7 @@ class BloggerExtractor(Extractor):
blog["date"] = text.parse_datetime(blog["published"])
del blog["selfLink"]
- sub = re.compile(r"(/|=)(?:s\d+|w\d+-h\d+)(?=/|$)").sub
+ sub = re.compile(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)").sub
findall_image = re.compile(
r'src="(https?://(?:'
r'blogger\.googleusercontent\.com/img|'
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 17d066d..7c66fb0 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,19 +6,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkr.su/"""
+"""Extractors for https://bunkr.la/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkr.su albums"""
+ """Extractor for bunkr.la albums"""
category = "bunkr"
- root = "https://bunkr.su"
- pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:[sr]u|is|to)/a/([^/?#]+)"
+ root = "https://bunkr.la"
+ pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
test = (
- ("https://bunkr.su/a/Lktg9Keq", {
+ ("https://bunkr.la/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
@@ -52,6 +52,10 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"num": int,
},
}),
+ ("https://bunkr.la/a/Lktg9Keq"),
+ ("https://bunkr.su/a/Lktg9Keq"),
+ ("https://bunkr.ru/a/Lktg9Keq"),
+ ("https://bunkr.is/a/Lktg9Keq"),
("https://bunkr.to/a/Lktg9Keq"),
)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index f104556..326b53b 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -26,6 +26,7 @@ class DanbooruExtractor(BaseExtractor):
BaseExtractor.__init__(self, match)
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
+ self.includes = False
threshold = self.config("threshold")
if isinstance(threshold, int):
@@ -54,6 +55,7 @@ class DanbooruExtractor(BaseExtractor):
includes = ",".join(includes)
elif not isinstance(includes, str):
includes = "artist_commentary,children,notes,parent,uploader"
+ self.includes = includes + ",id"
data = self.metadata()
for post in self.posts():
@@ -77,11 +79,6 @@ class DanbooruExtractor(BaseExtractor):
url = post["large_file_url"]
post["extension"] = "webm"
- if includes:
- meta_url = "{}/posts/{}.json?only={}".format(
- self.root, post["id"], includes)
- post.update(self.request(meta_url).json())
-
if url[0] == "/":
url = self.root + url
@@ -104,6 +101,19 @@ class DanbooruExtractor(BaseExtractor):
posts = self.request(url, params=params).json()
if "posts" in posts:
posts = posts["posts"]
+
+ if self.includes and posts:
+ if not pages and "only" not in params:
+ params["page"] = "b{}".format(posts[0]["id"] + 1)
+ params["only"] = self.includes
+ data = {
+ meta["id"]: meta
+ for meta in self.request(url, params=params).json()
+ }
+ for post in posts:
+ post.update(data[post["id"]])
+ params["only"] = None
+
yield from posts
if len(posts) < self.threshold:
@@ -255,7 +265,11 @@ class DanbooruPostExtractor(DanbooruExtractor):
def posts(self):
url = "{}/posts/{}.json".format(self.root, self.post_id)
- return (self.request(url).json(),)
+ post = self.request(url).json()
+ if self.includes:
+ params = {"only": self.includes}
+ post.update(self.request(url, params=params).json())
+ return (post,)
class DanbooruPopularExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 37475df..f532a97 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -320,7 +320,7 @@ class DeviantartExtractor(Extractor):
yield url, folder
def _update_content_default(self, deviation, content):
- public = "premium_folder_data" not in deviation
+ public = False if "premium_folder_data" in deviation else None
data = self.api.deviation_download(deviation["deviationid"], public)
content.update(data)
@@ -1180,7 +1180,11 @@ class DeviantartSearchExtractor(DeviantartExtractor):
}
while True:
- page = self.request(url, params=params).text
+ response = self.request(url, params=params)
+
+ if response.history and "/users/login" in response.url:
+ raise exception.StopExtraction("HTTP redirect to login page")
+ page = response.text
items , pos = text.rextract(page, r'\"items\":[', ']')
cursor, pos = text.extract(page, r'\"cursor\":\"', '\\', pos)
@@ -1280,6 +1284,7 @@ class DeviantartOAuthAPI():
self.folders = extractor.config("folders", False)
self.metadata = extractor.extra or extractor.config("metadata", False)
self.strategy = extractor.config("pagination")
+ self.public = extractor.config("public", True)
self.client_id = extractor.config("client-id")
if self.client_id:
@@ -1385,7 +1390,7 @@ class DeviantartOAuthAPI():
"mature_content": self.mature}
return self._pagination_list(endpoint, params=params, key="thread")
- def deviation(self, deviation_id, public=True):
+ def deviation(self, deviation_id, public=None):
"""Query and return info about a single Deviation"""
endpoint = "/deviation/" + deviation_id
deviation = self._call(endpoint, public=public)
@@ -1395,7 +1400,7 @@ class DeviantartOAuthAPI():
self._folders((deviation,))
return deviation
- def deviation_content(self, deviation_id, public=True):
+ def deviation_content(self, deviation_id, public=None):
"""Get extended content of a single Deviation"""
endpoint = "/deviation/content"
params = {"deviationid": deviation_id}
@@ -1408,7 +1413,7 @@ class DeviantartOAuthAPI():
self.log.warning("Private Journal")
return content
- def deviation_download(self, deviation_id, public=True):
+ def deviation_download(self, deviation_id, public=None):
"""Get the original file download (if allowed)"""
endpoint = "/deviation/download/" + deviation_id
params = {"mature_content": self.mature}
@@ -1423,7 +1428,7 @@ class DeviantartOAuthAPI():
params = {"mature_content": self.mature}
return self._call(endpoint, params=params)["metadata"]
- def gallery(self, username, folder_id, offset=0, extend=True, public=True):
+ def gallery(self, username, folder_id, offset=0, extend=True, public=None):
"""Yield all Deviation-objects contained in a gallery folder"""
endpoint = "/gallery/" + folder_id
params = {"username": username, "offset": offset, "limit": 24,
@@ -1513,11 +1518,14 @@ class DeviantartOAuthAPI():
refresh_token_key, data["refresh_token"])
return "Bearer " + data["access_token"]
- def _call(self, endpoint, fatal=True, public=True, **kwargs):
+ def _call(self, endpoint, fatal=True, public=None, **kwargs):
"""Call an API endpoint"""
url = "https://www.deviantart.com/api/v1/oauth2" + endpoint
kwargs["fatal"] = None
+ if public is None:
+ public = self.public
+
while True:
if self.delay:
self.extractor.sleep(self.delay, "api")
@@ -1559,8 +1567,13 @@ class DeviantartOAuthAPI():
return data
def _pagination(self, endpoint, params,
- extend=True, public=True, unpack=False, key="results"):
+ extend=True, public=None, unpack=False, key="results"):
warn = True
+ if public is None:
+ public = self.public
+ elif not public:
+ self.public = False
+
while True:
data = self._call(endpoint, params=params, public=public)
if key not in data:
@@ -1575,7 +1588,7 @@ class DeviantartOAuthAPI():
if public and len(results) < params["limit"]:
if self.refresh_token_key:
self.log.debug("Switching to private access token")
- public = False
+ self.public = public = False
continue
elif data["has_more"] and warn:
warn = False
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index facd3db..958c4b5 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -30,7 +30,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.api_root = self.root
if self.category == "realbooru":
- self._file_url = self._file_url_realbooru
+ self.items = self._items_realbooru
self._tags = self._tags_realbooru
def _api_request(self, params):
@@ -129,6 +129,28 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
return url
+ def _items_realbooru(self):
+ from .common import Message
+ data = self.metadata()
+
+ for post in self.posts():
+ try:
+ html = self._html(post)
+ url = post["file_url"] = text.rextract(
+ html, 'href="', '"', html.index(">Original<"))[0]
+ except Exception:
+ self.log.debug("Unable to fetch download URL for post %s "
+ "(md5: %s)", post.get("id"), post.get("md5"))
+ continue
+
+ text.nameext_from_url(url, post)
+ post.update(data)
+ self._prepare(post)
+ self._tags(post, html)
+
+ yield Message.Directory, post
+ yield Message.Url, url, post
+
def _tags_realbooru(self, post, page):
tag_container = text.extr(page, 'id="tagLink"', '</div>')
tags = collections.defaultdict(list)
@@ -404,7 +426,7 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
},
}),
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
- "pattern": r"https://realbooru\.com/images/dc/b5"
+ "pattern": r"https://realbooru\.com//?images/dc/b5"
r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
"options": (("tags", True),),
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 9999283..4ab26ae 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -1,16 +1,19 @@
# -*- coding: utf-8 -*-
-"""Extractor for images in a generic web page."""
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Generic information extractor"""
from .common import Extractor, Message
from .. import config, text
-import re
import os.path
+import re
class GenericExtractor(Extractor):
"""Extractor for images in a generic web page."""
-
category = "generic"
directory_fmt = ("{category}", "{pageurl}")
archive_fmt = "{imageurl}"
@@ -18,19 +21,19 @@ class GenericExtractor(Extractor):
# By default, the generic extractor is disabled
# and the "g(eneric):" prefix in url is required.
# If the extractor is enabled, make the prefix optional
- pattern = r"(?ix)(?P<generic>g(?:eneric)?:)"
+ pattern = r"(?i)(?P<generic>g(?:eneric)?:)"
if config.get(("extractor", "generic"), "enabled"):
pattern += r"?"
# The generic extractor pattern should match (almost) any valid url
# Based on: https://tools.ietf.org/html/rfc3986#appendix-B
- pattern += r"""
- (?P<scheme>https?://)? # optional http(s) scheme
- (?P<domain>[-\w\.]+) # required domain
- (?P<path>/[^?#]*)? # optional path
- (?:\?(?P<query>[^#]*))? # optional query
- (?:\#(?P<fragment>.*))? # optional fragment
- """
+ pattern += (
+ r"(?P<scheme>https?://)?" # optional http(s) scheme
+ r"(?P<domain>[-\w\.]+)" # required domain
+ r"(?P<path>/[^?#]*)?" # optional path
+ r"(?:\?(?P<query>[^#]*))?" # optional query
+ r"(?:\#(?P<fragment>.*))?" # optional fragment
+ )
test = (
("generic:https://www.nongnu.org/lzip/", {
@@ -49,19 +52,20 @@ class GenericExtractor(Extractor):
"count": 2,
"pattern": "^https://räksmörgås.josefsson.org/",
}),
+ ("g:https://en.wikipedia.org/Main_Page"),
+ ("g:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
+ ("g:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
("generic:https://en.wikipedia.org/Main_Page"),
("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
)
def __init__(self, match):
- """Init."""
Extractor.__init__(self, match)
# Strip the "g(eneric):" prefix
# and inform about "forced" or "fallback" mode
if match.group('generic'):
- self.log.info("Forcing use of generic information extractor.")
self.url = match.group(0).partition(":")[2]
else:
self.log.info("Falling back on generic information extractor.")
@@ -93,7 +97,6 @@ class GenericExtractor(Extractor):
pass
images = enumerate(imgs, 1)
- yield Message.Version, 1
yield Message.Directory, data
for data["num"], (url, imgdata) in images:
@@ -158,11 +161,13 @@ class GenericExtractor(Extractor):
image urls; this pattern matches only the first url; remaining urls
will be matched by the "imageurl_pattern_ext" pattern below.
"""
- imageurl_pattern_src = r"""(?ix)
- <(?:img|video|source)\s.*? # <img>, <video> or <source>
- src(?:set)?=["']? # src or srcset attributes
- (?P<URL>[^"'\s>]+) # url
- """
+
+ imageurl_pattern_src = (
+ r"(?i)"
+ r"<(?:img|video|source)\s[^>]*" # <img>, <video> or <source>
+ r"src(?:set)?=[\"']?" # src or srcset attributes
+ r"(?P<URL>[^\"'\s>]+)" # url
+ )
"""
2: Look anywhere for urls containing common image/video extensions
@@ -176,12 +181,13 @@ class GenericExtractor(Extractor):
urls in html tags.
"""
- imageurl_pattern_ext = r"""(?ix)
- (?:[^?&#"'>\s]+) # anything until dot+extension
- \.(?:jpe?g|jpe|png|gif
- |web[mp]|mp4|mkv|og[gmv]|opus) # dot + image/video extensions
- (?:[^"'<>\s]*)? # optional query and fragment
- """
+ imageurl_pattern_ext = (
+ r"(?i)"
+ r"(?:[^?&#\"'>\s]+)" # anything until dot+extension
+ # dot + image/video extensions
+ r"\.(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus)"
+ r"(?:[^\"'<>\s]*)?" # optional query and fragment
+ )
imageurls_src = re.findall(imageurl_pattern_src, page)
imageurls_ext = re.findall(imageurl_pattern_ext, page)
@@ -221,7 +227,7 @@ class GenericExtractor(Extractor):
absimageurls.append(self.baseurl + '/' + u)
# Remove duplicates
- absimageurls = set(absimageurls)
+ absimageurls = dict.fromkeys(absimageurls)
# Create the image metadata dict and add imageurl to it
# (image filename and extension are added by items())
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 2dfc721..e01a4ed 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -123,6 +123,9 @@ class HentaifoundryExtractor(Extractor):
def _init_site_filters(self):
"""Set site-internal filters to show all images"""
+ if self.session.cookies.get("PHPSESSID", domain=self.cookiedomain):
+ return
+
url = self.root + "/?enterAgree=1"
self.request(url, method="HEAD")
@@ -153,7 +156,6 @@ class HentaifoundryExtractor(Extractor):
"rating_scat" : "1",
"rating_incest" : "1",
"rating_rape" : "1",
- "filter_media" : "A",
"filter_order" : "date_new",
"filter_type" : "0",
"YII_CSRF_TOKEN" : text.unquote(text.extr(
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index 7c656be..30158b4 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -58,7 +58,7 @@ def decode_video_url(url):
class HotleakPostExtractor(HotleakExtractor):
"""Extractor for individual posts on hotleak"""
subcategory = "post"
- pattern = (BASE_PATTERN + r"/(?!hot|creators|videos|photos)"
+ pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/]+)/(photo|video)/(\d+)")
test = (
("https://hotleak.vip/kaiyakawaii/photo/1617145", {
@@ -117,7 +117,8 @@ class HotleakPostExtractor(HotleakExtractor):
class HotleakCreatorExtractor(HotleakExtractor):
"""Extractor for all posts from a hotleak creator"""
subcategory = "creator"
- pattern = BASE_PATTERN + r"/(?!hot|creators|videos|photos)([^/?#]+)/?$"
+ pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
+ r"([^/?#]+)/?$")
test = (
("https://hotleak.vip/kaiyakawaii", {
"range": "1-200",
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 14aa16f..8b18d5e 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Leonid "Bepis" Pavel
+# Copyright 2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from galleries at https://imgchest.com/"""
+"""Extractors for https://imgchest.com/"""
from .common import GalleryExtractor
from .. import text, exception
@@ -19,7 +20,14 @@ class ImagechestGalleryExtractor(GalleryExtractor):
pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})"
test = (
("https://imgchest.com/p/3na7kr3by8d", {
- "url": "f095b4f78c051e5a94e7c663814d1e8d4c93c1f7",
+ "pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
+ "keyword": {
+ "count": 3,
+ "gallery_id": "3na7kr3by8d",
+ "num": int,
+ "title": "Wizardry - Video Game From The Mid 80's",
+ },
+ "url": "7328ca4ec2459378d725e3be19f661d2b045feda",
"content": "076959e65be30249a2c651fbe6090dc30ba85193",
"count": 3
}),
@@ -43,6 +51,5 @@ class ImagechestGalleryExtractor(GalleryExtractor):
def images(self, page):
return [
(url, None)
- for url in text.extract_iter(
- page, 'property="og:image" content="', '"')
+ for url in text.extract_iter(page, 'data-url="', '"')
]
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index e49d29a..e190c7e 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -227,6 +227,12 @@ class MastodonAPI():
if username.startswith("id:"):
return username[3:]
+ try:
+ return self.account_lookup(username)["id"]
+ except Exception:
+ # fall back to account search
+ pass
+
if "@" in username:
handle = "@" + username
else:
@@ -246,6 +252,11 @@ class MastodonAPI():
endpoint = "/v1/accounts/{}/following".format(account_id)
return self._pagination(endpoint, None)
+ def account_lookup(self, username):
+ endpoint = "/v1/accounts/lookup"
+ params = {"acct": username}
+ return self._call(endpoint, params).json()
+
def account_search(self, query, limit=40):
"""Search for accounts"""
endpoint = "/v1/accounts/search"
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index 725788a..5f4ceea 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -93,6 +93,11 @@ class NitterExtractor(BaseExtractor):
"filename" : name.rpartition(".")[0],
"extension": "mp4",
})
+
+ for url in text.extract_iter(
+ attachments, '<source src="', '"'):
+ append(text.nameext_from_url(url, {"url": url}))
+
else:
files = ()
tweet["count"] = len(files)
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 486bf92..c6588de 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -35,6 +35,7 @@ class SexcomExtractor(Extractor):
def _pagination(self, url):
while True:
extr = text.extract_from(self.request(url).text)
+ url = extr('<link rel="next" href="', '"')
while True:
href = extr('<a class="image_wrapper" href="', '"')
@@ -42,11 +43,9 @@ class SexcomExtractor(Extractor):
break
yield self.root + href
- pager = extr('id="pagenum"', '</div>')
- url = text.extr(pager, ' href="', '"')
if not url:
return
- url = text.urljoin(self.root, url)
+ url = text.urljoin(self.root, text.unescape(url))
def _parse_pin(self, url):
response = self.request(url, fatal=False)
@@ -71,9 +70,12 @@ class SexcomExtractor(Extractor):
info = extr("player.updateSrc(", ");")
if info:
- path = text.extr(info, "src: '", "'")
- data["filename"] = path.rpartition("/")[2]
- data["extension"] = "mp4"
+ try:
+ path, _ = text.rextract(
+ info, "src: '", "'", info.index("label: 'HD'"))
+ except ValueError:
+ path = text.extr(info, "src: '", "'")
+ text.nameext_from_url(path, data)
data["url"] = path
else:
iframe = extr('<iframe', '>')
@@ -132,7 +134,8 @@ class SexcomPinExtractor(SexcomExtractor):
}),
# video
("https://www.sex.com/pin/55748341/", {
- "pattern": "https://www.sex.com/video/stream/776229/hd",
+ "pattern": r"https://cdn\.sex\.com/videos/pinporn"
+ r"/2018/02/10/776229_hd\.mp4",
"content": "e1a5834869163e2c4d1ca2677f5b7b367cf8cfff",
}),
# pornhub embed
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 278ad14..f6e8bc0 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -119,15 +119,14 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
def products(self):
url = self.item_url + "/products.json"
+ params = {"page": 1}
- while url:
- response = self.request(url)
- yield from response.json()["products"]
-
- url = response.links.get("next")
- if not url:
+ while True:
+ data = self.request(url, params=params).json()["products"]
+ if not data:
return
- url = url["url"]
+ yield from data
+ params["page"] += 1
class ShopifyProductExtractor(ShopifyExtractor):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 89d96d7..2ccc7e5 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -105,6 +105,10 @@ class TwitterExtractor(Extractor):
continue
seen_tweets.add(data["id_str"])
+ if "withheld_scope" in data:
+ txt = data.get("full_text") or data.get("text") or ""
+ self.log.warning("'%s' (%s)", txt, data["id_str"])
+
files = []
if "extended_entities" in data:
self._extract_media(
@@ -256,19 +260,26 @@ class TwitterExtractor(Extractor):
if "legacy" in tweet:
tweet = tweet["legacy"]
+ tweet_id = int(tweet["id_str"])
+ if tweet_id >= 300000000000000:
+ date = text.parse_timestamp(
+ ((tweet_id >> 22) + 1288834974657) // 1000)
+ else:
+ date = text.parse_datetime(
+ tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
+
tget = tweet.get
tdata = {
- "tweet_id" : text.parse_int(tweet["id_str"]),
+ "tweet_id" : tweet_id,
"retweet_id" : text.parse_int(
tget("retweeted_status_id_str")),
"quote_id" : text.parse_int(
tget("quoted_by_id_str")),
"reply_id" : text.parse_int(
tget("in_reply_to_status_id_str")),
- "date" : text.parse_datetime(
- tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
- "user" : self._user or author,
+ "date" : date,
"author" : author,
+ "user" : self._user or author,
"lang" : tweet["lang"],
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
@@ -321,8 +332,10 @@ class TwitterExtractor(Extractor):
user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
uget = user.get
- entities = user["entities"]
+ if uget("withheld_scope"):
+ self.log.warning("'%s'", uget("description"))
+ entities = user["entities"]
self._user_cache[uid] = udata = {
"id" : text.parse_int(uid),
"name" : user["screen_name"],
@@ -398,10 +411,8 @@ class TwitterExtractor(Extractor):
except Exception:
yield tweet
- def _make_tweet(self, user, id_str, url, timestamp):
+ def _make_tweet(self, user, url, id_str):
return {
- "created_at": text.parse_timestamp(timestamp).strftime(
- "%a %b %d %H:%M:%S +0000 %Y"),
"id_str": id_str,
"lang": None,
"user": user,
@@ -564,6 +575,12 @@ class TwitterLikesExtractor(TwitterExtractor):
def tweets(self):
return self.api.user_likes(self.user)
+ def _transform_tweet(self, tweet):
+ tdata = TwitterExtractor._transform_tweet(self, tweet)
+ tdata["date_liked"] = text.parse_timestamp(
+ (int(tweet["sortIndex"]) >> 20) // 1000)
+ return tdata
+
class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
@@ -574,6 +591,12 @@ class TwitterBookmarkExtractor(TwitterExtractor):
def tweets(self):
return self.api.user_bookmarks()
+ def _transform_tweet(self, tweet):
+ tdata = TwitterExtractor._transform_tweet(self, tweet)
+ tdata["date_bookmarked"] = text.parse_timestamp(
+ (int(tweet["sortIndex"]) >> 20) // 1000)
+ return tdata
+
class TwitterListExtractor(TwitterExtractor):
"""Extractor for Twitter lists"""
@@ -593,7 +616,11 @@ class TwitterListMembersExtractor(TwitterExtractor):
"""Extractor for members of a Twitter list"""
subcategory = "list-members"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
- test = ("https://twitter.com/i/lists/784214683683127296/members",)
+ test = ("https://twitter.com/i/lists/784214683683127296/members", {
+ "pattern": TwitterTimelineExtractor.pattern,
+ "range": "1-40",
+ "count": 40,
+ })
def items(self):
self.login()
@@ -780,6 +807,16 @@ class TwitterTweetExtractor(TwitterExtractor):
("cards-blacklist", ("twitch.tv",))),
"count": 0,
}),
+ # retweet
+ ("https://twitter.com/jessica_3978/status/1296304589591810048", {
+ "options": (("retweets", True),),
+ "count": 2,
+ "keyword": {
+ "tweet_id" : 1296304589591810048,
+ "retweet_id": 1296296016002547713,
+ "date" : "dt:2020-08-20 04:34:32",
+ },
+ }),
# original retweets (#1026)
("https://twitter.com/jessica_3978/status/1296304589591810048", {
"options": (("retweets", "original"),),
@@ -915,9 +952,8 @@ class TwitterAvatarExtractor(TwitterExtractor):
url = url.replace("_normal.", ".")
id_str = url.rsplit("/", 2)[1]
- timestamp = ((int(id_str) >> 22) + 1288834974657) // 1000
- return (self._make_tweet(user, id_str, url, timestamp),)
+ return (self._make_tweet(user, url, id_str),)
class TwitterBackgroundExtractor(TwitterExtractor):
@@ -932,7 +968,7 @@ class TwitterBackgroundExtractor(TwitterExtractor):
"keyword": {
"date": "dt:2015-01-12 10:29:43",
"filename": "1421058583",
- "tweet_id": 0,
+ "tweet_id": 554586009367478272,
},
}),
("https://twitter.com/User16/header_photo", {
@@ -950,7 +986,8 @@ class TwitterBackgroundExtractor(TwitterExtractor):
except (KeyError, ValueError):
return ()
- return (self._make_tweet(user, None, url, timestamp),)
+ id_str = str((int(timestamp) * 1000 - 1288834974657) << 22)
+ return (self._make_tweet(user, url, id_str),)
class TwitterImageExtractor(Extractor):
@@ -1008,9 +1045,6 @@ class TwitterAPI():
auth_token = cookies.get("auth_token", domain=cookiedomain)
- if not auth_token:
- self.user_media = self.user_media_legacy
-
self.headers = {
"Accept": "*/*",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
@@ -1071,6 +1105,7 @@ class TwitterAPI():
"withReactionsPerspective": False,
}
self.features = {
+ "blue_business_profile_image_shape_enabled": False,
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
@@ -1079,6 +1114,7 @@ class TwitterAPI():
"responsive_web_graphql_timeline_navigation_enabled": True,
}
self.features_pagination = {
+ "blue_business_profile_image_shape_enabled": False,
"responsive_web_twitter_blue_verified_badge_is_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
@@ -1103,7 +1139,7 @@ class TwitterAPI():
}
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/zXaXQgfyR4GxE21uwYQSyA/TweetDetail"
+ endpoint = "/graphql/AV_lPTkN6Fc6LgerQpK8Zg/TweetDetail"
variables = {
"focalTweetId": tweet_id,
"referrer": "profile",
@@ -1121,7 +1157,7 @@ class TwitterAPI():
endpoint, variables, ("threaded_conversation_with_injections_v2",))
def user_tweets(self, screen_name):
- endpoint = "/graphql/9rys0A7w1EyqVd2ME0QCJg/UserTweets"
+ endpoint = "/graphql/BeHK76TOCY3P8nO-FWocjA/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1133,7 +1169,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/ehMCHF3Mkgjsfz_aImqOsg/UserTweetsAndReplies"
+ endpoint = "/graphql/eZVlZu_1gwb6hMUDXBnZoQ/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1145,7 +1181,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_media(self, screen_name):
- endpoint = "/graphql/MA_EP2a21zpzNWKRkaPBMg/UserMedia"
+ endpoint = "/graphql/d_ONZLUHGCsErBCriRsLXg/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1178,7 +1214,7 @@ class TwitterAPI():
features=False)
def user_likes(self, screen_name):
- endpoint = "/graphql/XbHBYpgURwtklXj8NNxTDw/Likes"
+ endpoint = "/graphql/fN4-E0MjFJ9Cn7IYConL7g/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1191,15 +1227,18 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_bookmarks(self):
- endpoint = "/graphql/Xq0wQSWHlcfnXARLJGqTxg/Bookmarks"
+ endpoint = "/graphql/RV1g3b8n_SGOHwkqKYSCFw/Bookmarks"
variables = {
"count": 100,
}
+ features = self.features_pagination.copy()
+ features["graphql_timeline_v2_bookmark_timeline"] = True
return self._pagination_tweets(
- endpoint, variables, ("bookmark_timeline", "timeline"), False)
+ endpoint, variables, ("bookmark_timeline_v2", "timeline"), False,
+ features=features)
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/FDI9EiIp54KxEOWGiv3B4A/ListLatestTweetsTimeline"
+ endpoint = "/graphql/5DAiJG3bD77SiWEs4xViBw/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -1234,7 +1273,7 @@ class TwitterAPI():
["twitter_objects"]["live_events"][event_id])
def list_by_rest_id(self, list_id):
- endpoint = "/graphql/KlGpwq5CAt9tCfHkV2mwYQ/ListByRestId"
+ endpoint = "/graphql/D0EoyrDcct2MEqC-LnPzFg/ListByRestId"
params = {
"variables": self._json_dumps({
"listId": list_id,
@@ -1248,7 +1287,7 @@ class TwitterAPI():
raise exception.NotFoundError("list")
def list_members(self, list_id):
- endpoint = "/graphql/XsAJX17RLgLYU8GALIWg2g/ListMembers"
+ endpoint = "/graphql/tzsIIbGUH9RyFCVmtO2W2w/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -1258,7 +1297,7 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_following(self, screen_name):
- endpoint = "/graphql/vTZwBbd_gz6aI8v6Wze21A/Following"
+ endpoint = "/graphql/FaBzCqZXuQCb4PhB0RHqHw/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1267,7 +1306,7 @@ class TwitterAPI():
return self._pagination_users(endpoint, variables)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/QPSxc9lxrmrwnBzYkJI8eA/UserByRestId"
+ endpoint = "/graphql/S2BkcAyFMG--jef2N6Dgzw/UserByRestId"
params = {
"variables": self._json_dumps({
"userId": rest_id,
@@ -1278,7 +1317,7 @@ class TwitterAPI():
return self._call(endpoint, params)["data"]["user"]["result"]
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/nZjSkpOpSL5rWyIVdsKeLA/UserByScreenName"
+ endpoint = "/graphql/k26ASEiniqy4eXMdknTSoQ/UserByScreenName"
params = {
"variables": self._json_dumps({
"screen_name": screen_name,
@@ -1451,15 +1490,17 @@ class TwitterAPI():
params["cursor"] = cursor
def _pagination_tweets(self, endpoint, variables,
- path=None, stop_tweets=True, features=True):
+ path=None, stop_tweets=True, features=None):
extr = self.extractor
variables.update(self.variables)
original_retweets = (extr.retweets == "original")
pinned_tweet = extr.pinned
params = {"variables": None}
+ if features is None:
+ features = self.features_pagination
if features:
- params["features"] = self._json_dumps(self.features_pagination)
+ params["features"] = self._json_dumps(features)
while True:
params["variables"] = self._json_dumps(variables)
@@ -1550,6 +1591,7 @@ class TwitterAPI():
if "tweet" in tweet:
tweet = tweet["tweet"]
legacy = tweet["legacy"]
+ tweet["sortIndex"] = entry.get("sortIndex")
except KeyError:
extr.log.debug(
"Skipping %s (deleted)",
@@ -1574,10 +1616,17 @@ class TwitterAPI():
retweet["rest_id"]
tweet["author"] = \
retweet["core"]["user_results"]["result"]
- if "extended_entities" in retweet["legacy"] and \
+
+ rtlegacy = retweet["legacy"]
+ if "extended_entities" in rtlegacy and \
"extended_entities" not in legacy:
legacy["extended_entities"] = \
- retweet["legacy"]["extended_entities"]
+ rtlegacy["extended_entities"]
+ if "withheld_scope" in rtlegacy and \
+ "withheld_scope" not in legacy:
+ legacy["withheld_scope"] = \
+ rtlegacy["withheld_scope"]
+ legacy["full_text"] = rtlegacy["full_text"]
except KeyError:
pass
@@ -1590,6 +1639,8 @@ class TwitterAPI():
tweet["core"]["user_results"]["result"]
["legacy"]["screen_name"])
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
+ quoted["sortIndex"] = entry.get("sortIndex")
+
yield quoted
except KeyError:
extr.log.debug(
@@ -1679,9 +1730,10 @@ class TwitterAPI():
"in_reply_to_status_id_str" not in tweet:
tweet["conversation_id_str"] = tweet["id_str"]
- tweet["created_at"] = text.parse_datetime(
- tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
- "%a %b %d %H:%M:%S +0000 %Y")
+ if int(tweet_id) < 300000000000000:
+ tweet["created_at"] = text.parse_datetime(
+ tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
+ "%a %b %d %H:%M:%S +0000 %Y")
if "video" in tweet:
video = tweet["video"]
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
new file mode 100644
index 0000000..1a39b5b
--- /dev/null
+++ b/gallery_dl/extractor/urlshortener.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for general-purpose URL shorteners"""
+
+from .common import BaseExtractor, Message
+from .. import exception
+
+
+class UrlshortenerExtractor(BaseExtractor):
+ """Base class for URL shortener extractors"""
+ basecategory = "urlshortener"
+
+
+INSTANCES = {
+ "bitly": {
+ "root": "https://bit.ly",
+ "pattern": r"bit\.ly",
+ },
+ "tco": {
+ # t.co sends 'http-equiv="refresh"' (200) when using browser UA
+ "headers": {"User-Agent": None},
+ "root": "https://t.co",
+ "pattern": r"t\.co",
+ },
+}
+
+BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
+
+
+class UrlshortenerLinkExtractor(UrlshortenerExtractor):
+ """Extractor for general-purpose URL shorteners"""
+ subcategory = "link"
+ pattern = BASE_PATTERN + r"/([^/?&#]+)"
+ test = (
+ ("https://bit.ly/3cWIUgq", {
+ "count": 1,
+ "pattern": "^https://gumroad.com/l/storm_b1",
+ }),
+ ("https://t.co/bCgBY8Iv5n", {
+ "count": 1,
+ "pattern": "^https://twitter.com/elonmusk/status/"
+ "1421395561324896257/photo/1",
+ }),
+ ("https://t.co/abcdefghij", {
+ "exception": exception.NotFoundError,
+ }),
+ )
+
+ def __init__(self, match):
+ UrlshortenerExtractor.__init__(self, match)
+ self.id = match.group(match.lastindex)
+
+ try:
+ self.headers = INSTANCES[self.category]["headers"]
+ except Exception:
+ self.headers = None
+
+ def items(self):
+ response = self.request(
+ "{}/{}".format(self.root, self.id), headers=self.headers,
+ method="HEAD", allow_redirects=False, notfound="URL")
+ try:
+ yield Message.Queue, response.headers["location"], {}
+ except KeyError:
+ raise exception.StopExtraction("Unable to resolve short URL")
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 93a9148..c40736a 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.25.1"
+__version__ = "1.25.2"
diff --git a/test/test_downloader.py b/test/test_downloader.py
index bbee0f4..c65be95 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -289,6 +289,10 @@ SAMPLES = {
("webp", b"RIFF????WEBP"),
("avif", b"????ftypavif"),
("avif", b"????ftypavis"),
+ ("heic", b"????ftypheic"),
+ ("heic", b"????ftypheim"),
+ ("heic", b"????ftypheis"),
+ ("heic", b"????ftypheix"),
("svg" , b"<?xml"),
("ico" , b"\x00\x00\x01\x00"),
("cur" , b"\x00\x00\x02\x00"),