summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/downloader/http.py11
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/batoto.py10
-rw-r--r--gallery_dl/extractor/bluesky.py458
-rw-r--r--gallery_dl/extractor/bunkr.py83
-rw-r--r--gallery_dl/extractor/deviantart.py61
-rw-r--r--gallery_dl/extractor/fanbox.py29
-rw-r--r--gallery_dl/extractor/fapachi.py5
-rw-r--r--gallery_dl/extractor/flickr.py6
-rw-r--r--gallery_dl/extractor/foolfuuka.py9
-rw-r--r--gallery_dl/extractor/gofile.py4
-rw-r--r--gallery_dl/extractor/idolcomplex.py12
-rw-r--r--gallery_dl/extractor/imagechest.py42
-rw-r--r--gallery_dl/extractor/imagehosts.py17
-rw-r--r--gallery_dl/extractor/kemonoparty.py104
-rw-r--r--gallery_dl/extractor/luscious.py6
-rw-r--r--gallery_dl/extractor/naverwebtoon.py9
-rw-r--r--gallery_dl/extractor/pinterest.py5
-rw-r--r--gallery_dl/extractor/pornpics.py2
-rw-r--r--gallery_dl/extractor/sankaku.py12
-rw-r--r--gallery_dl/extractor/skeb.py70
-rw-r--r--gallery_dl/extractor/twitter.py217
-rw-r--r--gallery_dl/extractor/vsco.py91
-rw-r--r--gallery_dl/extractor/weibo.py9
-rw-r--r--gallery_dl/extractor/wikimedia.py55
-rw-r--r--gallery_dl/extractor/zerochan.py4
-rw-r--r--gallery_dl/path.py2
-rw-r--r--gallery_dl/version.py2
28 files changed, 1016 insertions, 320 deletions
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index f493947..f1d2c4a 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -13,12 +13,7 @@ import mimetypes
from requests.exceptions import RequestException, ConnectionError, Timeout
from .common import DownloaderBase
from .. import text, util
-
from ssl import SSLError
-try:
- from OpenSSL.SSL import Error as OpenSSLError
-except ImportError:
- OpenSSLError = SSLError
class HttpDownloader(DownloaderBase):
@@ -249,7 +244,7 @@ class HttpDownloader(DownloaderBase):
file_header = next(
content if response.raw.chunked
else response.iter_content(16), b"")
- except (RequestException, SSLError, OpenSSLError) as exc:
+ except (RequestException, SSLError) as exc:
msg = str(exc)
print()
continue
@@ -283,7 +278,7 @@ class HttpDownloader(DownloaderBase):
self.out.start(pathfmt.path)
try:
self.receive(fp, content, size, offset)
- except (RequestException, SSLError, OpenSSLError) as exc:
+ except (RequestException, SSLError) as exc:
msg = str(exc)
print()
continue
@@ -310,7 +305,7 @@ class HttpDownloader(DownloaderBase):
try:
for _ in response.iter_content(self.chunk_size):
pass
- except (RequestException, SSLError, OpenSSLError) as exc:
+ except (RequestException, SSLError) as exc:
print()
self.log.debug(
"Unable to consume response body (%s: %s); "
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d624736..a665249 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -29,6 +29,7 @@ modules = [
"bbc",
"behance",
"blogger",
+ "bluesky",
"bunkr",
"catbox",
"chevereto",
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index e82cd09..2adb142 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -40,10 +40,18 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
- manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
+ try:
+ manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
+ except ValueError:
+ manga = info = None
+
manga_id = text.extr(
extr('rel="canonical" href="', '"'), "/title/", "/")
+ if not manga:
+ manga = extr('link-hover">', "<")
+ info = text.remove_html(extr('link-hover">', "</"))
+
match = re.match(
r"(?:Volume\s+(\d+) )?"
r"\w+\s+(\d+)(.*)", info)
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
new file mode 100644
index 0000000..8de0d7b
--- /dev/null
+++ b/gallery_dl/extractor/bluesky.py
@@ -0,0 +1,458 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bsky.app/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache, memcache
+
+BASE_PATTERN = r"(?:https?://)?bsky\.app"
+USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)"
+
+
+class BlueskyExtractor(Extractor):
+ """Base class for bluesky extractors"""
+ category = "bluesky"
+ directory_fmt = ("{category}", "{author[handle]}")
+ filename_fmt = "{createdAt[:19]}_{post_id}_{num}.{extension}"
+ archive_fmt = "{filename}"
+ root = "https://bsky.app"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def _init(self):
+ meta = self.config("metadata") or ()
+ if meta:
+ if isinstance(meta, str):
+ meta = meta.replace(" ", "").split(",")
+ elif not isinstance(meta, (list, tuple)):
+ meta = ("user", "facets")
+ self._metadata_user = ("user" in meta)
+ self._metadata_facets = ("facets" in meta)
+
+ self.api = BlueskyAPI(self)
+ self._user = None
+
+ def items(self):
+ for post in self.posts():
+ if "post" in post:
+ post = post["post"]
+ post.update(post["record"])
+ del post["record"]
+
+ images = ()
+ if "embed" in post:
+ media = post["embed"]
+ if "media" in media:
+ media = media["media"]
+ if "images" in media:
+ images = media["images"]
+
+ if self._metadata_facets:
+ if "facets" in post:
+ post["hashtags"] = tags = []
+ post["mentions"] = dids = []
+ post["uris"] = uris = []
+ for facet in post["facets"]:
+ features = facet["features"][0]
+ if "tag" in features:
+ tags.append(features["tag"])
+ elif "did" in features:
+ dids.append(features["did"])
+ elif "uri" in features:
+ uris.append(features["uri"])
+ else:
+ post["hashtags"] = post["mentions"] = post["uris"] = ()
+
+ if self._metadata_user:
+ post["user"] = self._user or post["author"]
+
+ post["post_id"] = post["uri"].rpartition("/")[2]
+ post["count"] = len(images)
+ post["date"] = text.parse_datetime(
+ post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+
+ yield Message.Directory, post
+
+ if not images:
+ continue
+
+ base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
+ "?did={}&cid=".format(post["author"]["did"]))
+ post["num"] = 0
+
+ for file in images:
+ post["num"] += 1
+ post["description"] = file["alt"]
+
+ try:
+ aspect = file["aspectRatio"]
+ post["width"] = aspect["width"]
+ post["height"] = aspect["height"]
+ except KeyError:
+ post["width"] = post["height"] = 0
+
+ image = file["image"]
+ post["filename"] = link = image["ref"]["$link"]
+ post["extension"] = image["mimeType"].rpartition("/")[2]
+
+ yield Message.Url, base + link, post
+
+ def posts(self):
+ return ()
+
+ def _make_post(self, actor, kind):
+ did = self.api._did_from_actor(actor)
+ profile = self.api.get_profile(did)
+
+ if kind not in profile:
+ return ()
+ cid = profile[kind].rpartition("/")[2].partition("@")[0]
+
+ return ({
+ "post": {
+ "embed": {"images": [{
+ "alt": kind,
+ "image": {
+ "$type" : "blob",
+ "ref" : {"$link": cid},
+ "mimeType": "image/jpeg",
+ "size" : 0,
+ },
+ "aspectRatio": {
+ "width" : 1000,
+ "height": 1000,
+ },
+ }]},
+ "author" : profile,
+ "record" : (),
+ "createdAt": "",
+ "uri" : cid,
+ },
+ },)
+
+
+class BlueskyUserExtractor(BlueskyExtractor):
+ subcategory = "user"
+ pattern = USER_PATTERN + r"$"
+ example = "https://bsky.app/profile/HANDLE"
+
+ def initialize(self):
+ pass
+
+ def items(self):
+ base = "{}/profile/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (BlueskyAvatarExtractor , base + "avatar"),
+ (BlueskyBackgroundExtractor, base + "banner"),
+ (BlueskyPostsExtractor , base + "posts"),
+ (BlueskyRepliesExtractor , base + "replies"),
+ (BlueskyMediaExtractor , base + "media"),
+ (BlueskyLikesExtractor , base + "likes"),
+ ), ("media",))
+
+
+class BlueskyPostsExtractor(BlueskyExtractor):
+ subcategory = "posts"
+ pattern = USER_PATTERN + r"/posts"
+ example = "https://bsky.app/profile/HANDLE/posts"
+
+ def posts(self):
+ return self.api.get_author_feed(self.user, "posts_and_author_threads")
+
+
+class BlueskyRepliesExtractor(BlueskyExtractor):
+ subcategory = "replies"
+ pattern = USER_PATTERN + r"/replies"
+ example = "https://bsky.app/profile/HANDLE/replies"
+
+ def posts(self):
+ return self.api.get_author_feed(self.user, "posts_with_replies")
+
+
+class BlueskyMediaExtractor(BlueskyExtractor):
+ subcategory = "media"
+ pattern = USER_PATTERN + r"/media"
+ example = "https://bsky.app/profile/HANDLE/media"
+
+ def posts(self):
+ return self.api.get_author_feed(self.user, "posts_with_media")
+
+
+class BlueskyLikesExtractor(BlueskyExtractor):
+ subcategory = "likes"
+ pattern = USER_PATTERN + r"/likes"
+ example = "https://bsky.app/profile/HANDLE/likes"
+
+ def posts(self):
+ return self.api.get_actor_likes(self.user)
+
+
+class BlueskyFeedExtractor(BlueskyExtractor):
+ subcategory = "feed"
+ pattern = USER_PATTERN + r"/feed/([^/?#]+)"
+ example = "https://bsky.app/profile/HANDLE/feed/NAME"
+
+ def __init__(self, match):
+ BlueskyExtractor.__init__(self, match)
+ self.feed = match.group(2)
+
+ def posts(self):
+ return self.api.get_feed(self.user, self.feed)
+
+
+class BlueskyListExtractor(BlueskyExtractor):
+ subcategory = "list"
+ pattern = USER_PATTERN + r"/lists/([^/?#]+)"
+ example = "https://bsky.app/profile/HANDLE/lists/ID"
+
+ def __init__(self, match):
+ BlueskyExtractor.__init__(self, match)
+ self.list = match.group(2)
+
+ def posts(self):
+ return self.api.get_list_feed(self.user, self.list)
+
+
+class BlueskyFollowingExtractor(BlueskyExtractor):
+ subcategory = "following"
+ pattern = USER_PATTERN + r"/follows"
+ example = "https://bsky.app/profile/HANDLE/follows"
+
+ def items(self):
+ for user in self.api.get_follows(self.user):
+ url = "https://bsky.app/profile/" + user["did"]
+ yield Message.Queue, url, user
+
+
+class BlueskyPostExtractor(BlueskyExtractor):
+ subcategory = "post"
+ pattern = USER_PATTERN + r"/post/([^/?#]+)"
+ example = "https://bsky.app/profile/HANDLE/post/ID"
+
+ def __init__(self, match):
+ BlueskyExtractor.__init__(self, match)
+ self.post_id = match.group(2)
+
+ def posts(self):
+ return self.api.get_post_thread(self.user, self.post_id)
+
+
+class BlueskyAvatarExtractor(BlueskyExtractor):
+ subcategory = "avatar"
+ filename_fmt = "avatar_{post_id}.{extension}"
+ pattern = USER_PATTERN + r"/avatar"
+ example = "https://bsky.app/profile/HANDLE/avatar"
+
+ def posts(self):
+ return self._make_post(self.user, "avatar")
+
+
+class BlueskyBackgroundExtractor(BlueskyExtractor):
+ subcategory = "background"
+ filename_fmt = "background_{post_id}.{extension}"
+ pattern = USER_PATTERN + r"/ba(?:nner|ckground)"
+ example = "https://bsky.app/profile/HANDLE/banner"
+
+ def posts(self):
+ return self._make_post(self.user, "banner")
+
+
+class BlueskySearchExtractor(BlueskyExtractor):
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/search(?:/|\?q=)(.+)"
+ example = "https://bsky.app/search?q=QUERY"
+
+ def posts(self):
+ return self.api.search_posts(self.user)
+
+
+class BlueskyAPI():
+ """Interface for the Bluesky API
+
+ https://www.docs.bsky.app/docs/category/http-reference
+ """
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.log = extractor.log
+ self.headers = {"Accept": "application/json"}
+
+ self.username, self.password = extractor._get_auth_info()
+ if self.username:
+ self.root = "https://bsky.social"
+ else:
+ self.root = "https://api.bsky.app"
+ self.authenticate = util.noop
+
+ def get_actor_likes(self, actor):
+ endpoint = "app.bsky.feed.getActorLikes"
+ params = {
+ "actor": self._did_from_actor(actor),
+ "limit": "100",
+ }
+ return self._pagination(endpoint, params)
+
+ def get_author_feed(self, actor, filter="posts_and_author_threads"):
+ endpoint = "app.bsky.feed.getAuthorFeed"
+ params = {
+ "actor" : self._did_from_actor(actor),
+ "filter": filter,
+ "limit" : "100",
+ }
+ return self._pagination(endpoint, params)
+
+ def get_feed(self, actor, feed):
+ endpoint = "app.bsky.feed.getFeed"
+ params = {
+ "feed" : "at://{}/app.bsky.feed.generator/{}".format(
+ self._did_from_actor(actor), feed),
+ "limit": "100",
+ }
+ return self._pagination(endpoint, params)
+
+ def get_follows(self, actor):
+ endpoint = "app.bsky.graph.getFollows"
+ params = {
+ "actor": self._did_from_actor(actor),
+ "limit": "100",
+ }
+ return self._pagination(endpoint, params, "follows")
+
+ def get_list_feed(self, actor, list):
+ endpoint = "app.bsky.feed.getListFeed"
+ params = {
+ "list" : "at://{}/app.bsky.graph.list/{}".format(
+ self._did_from_actor(actor), list),
+ "limit": "100",
+ }
+ return self._pagination(endpoint, params)
+
+ def get_post_thread(self, actor, post_id):
+ endpoint = "app.bsky.feed.getPostThread"
+ params = {
+ "uri": "at://{}/app.bsky.feed.post/{}".format(
+ self._did_from_actor(actor), post_id),
+ "depth" : self.extractor.config("depth", "0"),
+ "parentHeight": "0",
+ }
+
+ thread = self._call(endpoint, params)["thread"]
+ if "replies" not in thread:
+ return (thread,)
+
+ index = 0
+ posts = [thread]
+ while index < len(posts):
+ post = posts[index]
+ if "replies" in post:
+ posts.extend(post["replies"])
+ index += 1
+ return posts
+
+ @memcache(keyarg=1)
+ def get_profile(self, did):
+ endpoint = "app.bsky.actor.getProfile"
+ params = {"actor": did}
+ return self._call(endpoint, params)
+
+ @memcache(keyarg=1)
+ def resolve_handle(self, handle):
+ endpoint = "com.atproto.identity.resolveHandle"
+ params = {"handle": handle}
+ return self._call(endpoint, params)["did"]
+
+ def search_posts(self, query):
+ endpoint = "app.bsky.feed.searchPosts"
+ params = {
+ "q" : query,
+ "limit": "100",
+ }
+ return self._pagination(endpoint, params, "posts")
+
+ def _did_from_actor(self, actor):
+ if actor.startswith("did:"):
+ did = actor
+ else:
+ did = self.resolve_handle(actor)
+
+ if self.extractor._metadata_user:
+ self.extractor._user = self.get_profile(did)
+
+ return did
+
+ def authenticate(self):
+ self.headers["Authorization"] = self._authenticate_impl(self.username)
+
+ @cache(maxage=3600, keyarg=1)
+ def _authenticate_impl(self, username):
+ refresh_token = _refresh_token_cache(username)
+
+ if refresh_token:
+ self.log.info("Refreshing access token for %s", username)
+ endpoint = "com.atproto.server.refreshSession"
+ headers = {"Authorization": "Bearer " + refresh_token}
+ data = None
+ else:
+ self.log.info("Logging in as %s", username)
+ endpoint = "com.atproto.server.createSession"
+ headers = None
+ data = {
+ "identifier": username,
+ "password" : self.password,
+ }
+
+ url = "{}/xrpc/{}".format(self.root, endpoint)
+ response = self.extractor.request(
+ url, method="POST", headers=headers, json=data, fatal=None)
+ data = response.json()
+
+ if response.status_code != 200:
+ self.log.debug("Server response: %s", data)
+ raise exception.AuthenticationError('"{}: {}"'.format(
+ data.get("error"), data.get("message")))
+
+ _refresh_token_cache.update(self.username, data["refreshJwt"])
+ return "Bearer " + data["accessJwt"]
+
+ def _call(self, endpoint, params):
+ url = "{}/xrpc/{}".format(self.root, endpoint)
+
+ while True:
+ self.authenticate()
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+
+ if response.status_code < 400:
+ return response.json()
+ if response.status_code == 429:
+ self.extractor.wait(seconds=60)
+ continue
+
+ self.extractor.log.debug("Server response: %s", response.text)
+ raise exception.StopExtraction(
+ "API request failed (%s %s)",
+ response.status_code, response.reason)
+
+ def _pagination(self, endpoint, params, key="feed"):
+ while True:
+ data = self._call(endpoint, params)
+ yield from data[key]
+
+ cursor = data.get("cursor")
+ if not cursor:
+ return
+ params["cursor"] = cursor
+
+
+@cache(maxage=84*86400, keyarg=0)
+def _refresh_token_cache(username):
+ return None
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index e7fc14b..1a0e47d 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,32 +6,39 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkrr.ru/"""
+"""Extractors for https://bunkr.sk/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
-from urllib.parse import urlsplit, urlunsplit
-BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:[rs]u|la|is|to)"
+BASE_PATTERN = (
+ r"(?:https?://)?(?:app\.)?(bunkr+"
+ r"\.(?:s[kiu]|ru|la|is|to|ac|black|cat|media|red|site|ws))"
+)
-MEDIA_DOMAIN_OVERRIDES = {
- "cdn9.bunkr.ru" : "c9.bunkr.ru",
- "cdn12.bunkr.ru": "media-files12.bunkr.la",
- "cdn-pizza.bunkr.ru": "pizza.bunkr.ru",
+LEGACY_DOMAINS = {
+ "bunkr.ru",
+ "bunkrr.ru",
+ "bunkr.su",
+ "bunkrr.su",
+ "bunkr.la",
+ "bunkr.is",
+ "bunkr.to",
}
-CDN_HOSTED_EXTENSIONS = (
- ".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", ".wmv",
- ".zip", ".rar", ".7z",
-)
-
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkrr.ru albums"""
+ """Extractor for bunkr.sk albums"""
category = "bunkr"
- root = "https://bunkrr.ru"
+ root = "https://bunkr.sk"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
- example = "https://bunkrr.ru/a/ID"
+ example = "https://bunkr.sk/a/ID"
+
+ def __init__(self, match):
+ LolisafeAlbumExtractor.__init__(self, match)
+ domain = match.group(match.lastindex-1)
+ if domain not in LEGACY_DOMAINS:
+ self.root = "https://" + domain
def fetch_album(self, album_id):
# album metadata
@@ -53,46 +60,32 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def _extract_files(self, urls):
for url in urls:
- if url.startswith("/"):
- try:
- url = self._extract_file(text.unescape(url))
- except Exception as exc:
- self.log.error("%s: %s", exc.__class__.__name__, exc)
- continue
-
- else:
- if url.lower().endswith(CDN_HOSTED_EXTENSIONS):
- scheme, domain, path, query, fragment = urlsplit(url)
- if domain in MEDIA_DOMAIN_OVERRIDES:
- domain = MEDIA_DOMAIN_OVERRIDES[domain]
- else:
- domain = domain.replace("cdn", "media-files", 1)
- url = urlunsplit((scheme, domain, path, query, fragment))
-
+ try:
+ url = self._extract_file(text.unescape(url))
+ except Exception as exc:
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ continue
yield {"file": text.unescape(url)}
- def _extract_file(self, path):
- page = self.request(self.root + path).text
- if path[1] == "v":
- url = text.extr(page, '<source src="', '"')
- else:
- url = text.extr(page, '<img src="', '"')
- if not url:
- url = text.rextract(
- page, ' href="', '"', page.rindex("Download"))[0]
- return url
+ def _extract_file(self, url):
+ page = self.request(url).text
+ return (
+ text.extr(page, '<source src="', '"') or
+ text.extr(page, '<img src="', '"') or
+ text.rextract(page, ' href="', '"', page.rindex("Download"))[0]
+ )
class BunkrMediaExtractor(BunkrAlbumExtractor):
- """Extractor for bunkrr.ru media links"""
+ """Extractor for bunkr.sk media links"""
subcategory = "media"
directory_fmt = ("{category}",)
- pattern = BASE_PATTERN + r"/[vid]/([^/?#]+)"
- example = "https://bunkrr.ru/v/FILENAME"
+ pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)"
+ example = "https://bunkr.sk/v/FILENAME"
def fetch_album(self, album_id):
try:
- url = self._extract_file(urlsplit(self.url).path)
+ url = self._extract_file(self.root + self.album_id)
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index bcfbe73..0cf4f88 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -62,7 +62,12 @@ class DeviantartExtractor(Extractor):
self.unwatch = None
if self.quality:
- self.quality = ",q_{}".format(self.quality)
+ if self.quality == "png":
+ self.quality = "-fullview.png?"
+ self.quality_sub = re.compile(r"-fullview\.[a-z0-9]+\?").sub
+ else:
+ self.quality = ",q_{}".format(self.quality)
+ self.quality_sub = re.compile(r",q_\d+").sub
if self.original != "image":
self._update_content = self._update_content_default
@@ -119,6 +124,12 @@ class DeviantartExtractor(Extractor):
"Skipping %s (deleted)", deviation["deviationid"])
continue
+ tier_access = deviation.get("tier_access")
+ if tier_access == "locked":
+ self.log.debug(
+ "Skipping %s (access locked)", deviation["deviationid"])
+ continue
+
if "premium_folder_data" in deviation:
data = self._fetch_premium(deviation)
if not data:
@@ -129,26 +140,7 @@ class DeviantartExtractor(Extractor):
yield Message.Directory, deviation
if "content" in deviation:
- content = deviation["content"]
-
- if self.original and deviation["is_downloadable"]:
- self._update_content(deviation, content)
- elif self.jwt:
- self._update_token(deviation, content)
- elif content["src"].startswith("https://images-wixmp-"):
- if self.intermediary and deviation["index"] <= 790677560:
- # https://github.com/r888888888/danbooru/issues/4069
- intermediary, count = re.subn(
- r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", content["src"], 1)
- if count:
- deviation["is_original"] = False
- deviation["_fallback"] = (content["src"],)
- content["src"] = intermediary
- if self.quality:
- content["src"] = re.sub(
- r",q_\d+", self.quality, content["src"], 1)
-
+ content = self._extract_content(deviation)
yield self.commit(deviation, content)
elif deviation["is_downloadable"]:
@@ -333,6 +325,33 @@ class DeviantartExtractor(Extractor):
deviation["extension"] = "txt"
return Message.Url, txt, deviation
+ def _extract_content(self, deviation):
+ content = deviation["content"]
+
+ if self.original and deviation["is_downloadable"]:
+ self._update_content(deviation, content)
+ return content
+
+ if self.jwt:
+ self._update_token(deviation, content)
+ return content
+
+ if content["src"].startswith("https://images-wixmp-"):
+ if self.intermediary and deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ intermediary, count = re.subn(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*",
+ r"/intermediary\1", content["src"], 1)
+ if count:
+ deviation["is_original"] = False
+ deviation["_fallback"] = (content["src"],)
+ content["src"] = intermediary
+ if self.quality:
+ content["src"] = self.quality_sub(
+ self.quality, content["src"], 1)
+
+ return content
+
@staticmethod
def _find_folder(folders, name, uuid):
if uuid.isdecimal():
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 61a3928..2223403 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -11,7 +11,8 @@ from .. import text
from ..cache import memcache
import re
-BASE_PATTERN = (
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fanbox\.cc"
+USER_PATTERN = (
r"(?:https?://)?(?:"
r"(?!www\.)([\w-]+)\.fanbox\.cc|"
r"(?:www\.)?fanbox\.cc/@([\w-]+))"
@@ -290,7 +291,7 @@ class FanboxExtractor(Extractor):
class FanboxCreatorExtractor(FanboxExtractor):
"""Extractor for a Fanbox creator's works"""
subcategory = "creator"
- pattern = BASE_PATTERN + r"(?:/posts)?/?$"
+ pattern = USER_PATTERN + r"(?:/posts)?/?$"
example = "https://USER.fanbox.cc/"
def __init__(self, match):
@@ -305,7 +306,7 @@ class FanboxCreatorExtractor(FanboxExtractor):
class FanboxPostExtractor(FanboxExtractor):
"""Extractor for media from a single Fanbox post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/posts/(\d+)"
+ pattern = USER_PATTERN + r"/posts/(\d+)"
example = "https://USER.fanbox.cc/posts/12345"
def __init__(self, match):
@@ -316,6 +317,28 @@ class FanboxPostExtractor(FanboxExtractor):
return (self._get_post_data(self.post_id),)
+class FanboxHomeExtractor(FanboxExtractor):
+ """Extractor for your Fanbox home feed"""
+ subcategory = "home"
+ pattern = BASE_PATTERN + r"/?$"
+ example = "https://fanbox.cc/"
+
+ def posts(self):
+ url = "https://api.fanbox.cc/post.listHome?limit=10"
+ return self._pagination(url)
+
+
+class FanboxSupportingExtractor(FanboxExtractor):
+ """Extractor for your supported Fanbox users feed"""
+ subcategory = "supporting"
+ pattern = BASE_PATTERN + r"/home/supporting"
+ example = "https://fanbox.cc/home/supporting"
+
+ def posts(self):
+ url = "https://api.fanbox.cc/post.listSupporting?limit=10"
+ return self._pagination(url)
+
+
class FanboxRedirectExtractor(Extractor):
"""Extractor for pixiv redirects to fanbox.cc"""
category = "fanbox"
diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py
index 6e81519..80478ca 100644
--- a/gallery_dl/extractor/fapachi.py
+++ b/gallery_dl/extractor/fapachi.py
@@ -58,8 +58,9 @@ class FapachiUserExtractor(Extractor):
page = self.request("{}/{}/page/{}".format(
self.root, self.user, self.num)).text
for post in text.extract_iter(page, 'model-media-prew">', ">"):
- url = self.root + text.extr(post, '<a href="', '"')
- yield Message.Queue, url, data
+ path = text.extr(post, '<a href="', '"')
+ if path:
+ yield Message.Queue, self.root + path, data
if '">Next page</a>' not in page:
return
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index ea32765..f7dc3cc 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -386,7 +386,11 @@ class FlickrAPI(oauth.OAuth1API):
params["nojsoncallback"] = "1"
if self.api_key:
params["api_key"] = self.api_key
- data = self.request(self.API_URL, params=params).json()
+ response = self.request(self.API_URL, params=params)
+ try:
+ data = response.json()
+ except ValueError:
+ data = {"code": -1, "message": response.content}
if "code" in data:
msg = data.get("message")
self.log.debug("Server response: %s", data)
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index cedac0c..715abcb 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -24,6 +24,8 @@ class FoolfuukaExtractor(BaseExtractor):
BaseExtractor.__init__(self, match)
if self.category == "b4k":
self.remote = self._remote_direct
+ elif self.category == "archivedmoe":
+ self.referer = False
def items(self):
yield Message.Directory, self.metadata()
@@ -53,9 +55,12 @@ class FoolfuukaExtractor(BaseExtractor):
def remote(self, media):
"""Resolve a remote media link"""
- needle = '<meta http-equiv="Refresh" content="0; url='
page = self.request(media["remote_media_link"]).text
- return text.extr(page, needle, '"')
+ url = text.extr(page, 'http-equiv="Refresh" content="0; url=', '"')
+ if url.endswith(".webm") and \
+ url.startswith("https://thebarchive.com/"):
+ return url[:-1]
+ return url
@staticmethod
def _remote_direct(media):
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index 3928792..289f91c 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -73,7 +73,7 @@ class GofileFolderExtractor(Extractor):
def _get_website_token(self):
self.log.debug("Fetching website token")
page = self.request(self.root + "/dist/js/alljs.js").text
- return text.extr(page, 'fetchData.websiteToken = "', '"')
+ return text.extr(page, 'fetchData.wt = "', '"')
def _get_content(self, content_id, password=None):
if password is not None:
@@ -81,7 +81,7 @@ class GofileFolderExtractor(Extractor):
return self._api_request("getContent", {
"contentId" : content_id,
"token" : self.api_token,
- "websiteToken": self.website_token,
+ "wt" : self.website_token,
"password" : password,
})
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index f70a948..c249a3e 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -35,7 +35,7 @@ class IdolcomplexExtractor(SankakuExtractor):
def _init(self):
self.find_pids = re.compile(
- r" href=[\"#]/\w\w/posts/([0-9a-f]+)"
+ r" href=[\"#]/\w\w/posts/(\w+)"
).findall
self.find_tags = re.compile(
r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
@@ -101,7 +101,7 @@ class IdolcomplexExtractor(SankakuExtractor):
page = self.request(url, retries=10).text
extr = text.extract_from(page)
- tags = extr("<title>", " | ")
+ pid_alnum = extr('/posts/', '"')
vavg = extr('itemprop="ratingValue">', "<")
vcnt = extr('itemprop="reviewCount">', "<")
pid = extr(">Post ID:", "<")
@@ -121,8 +121,8 @@ class IdolcomplexExtractor(SankakuExtractor):
data = {
"id" : text.parse_int(pid),
+ "id_alnum" : pid_alnum,
"md5" : file_url.rpartition("/")[2].partition(".")[0],
- "tags" : text.unescape(tags),
"vote_average": text.parse_float(vavg),
"vote_count" : text.parse_int(vcnt),
"created_at" : created,
@@ -206,8 +206,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
if not next_url:
return
- next_params = text.parse_query(text.unescape(text.unescape(
- next_url).lstrip("?/")))
+ next_params = text.parse_query(text.unquote(text.unescape(
+ text.unescape(next_url).lstrip("?/"))))
if "next" in next_params:
# stop if the same "next" value occurs twice in a row (#265)
@@ -258,7 +258,7 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor):
"""Extractor for single images from idol.sankakucomplex.com"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/posts?/(?:show/)?([0-9a-f]+)"
+ pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)"
example = "https://idol.sankakucomplex.com/posts/0123456789abcdef"
def __init__(self, match):
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 9199d12..115fff3 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -9,15 +9,17 @@
"""Extractors for https://imgchest.com/"""
-from .common import GalleryExtractor
+from .common import GalleryExtractor, Extractor, Message
from .. import text, exception
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com"
+
class ImagechestGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from imgchest.com"""
category = "imagechest"
root = "https://imgchest.com"
- pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})"
+ pattern = BASE_PATTERN + r"/p/([A-Za-z0-9]{11})"
example = "https://imgchest.com/p/abcdefghijk"
def __init__(self, match):
@@ -83,6 +85,42 @@ class ImagechestGalleryExtractor(GalleryExtractor):
]
+class ImagechestUserExtractor(Extractor):
+ """Extractor for imgchest.com user profiles"""
+ category = "imagechest"
+ subcategory = "user"
+ root = "https://imgchest.com"
+ pattern = BASE_PATTERN + r"/u/([^/?#]+)"
+ example = "https://imgchest.com/u/USER"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def items(self):
+ url = self.root + "/api/posts"
+ params = {
+ "page" : 1,
+ "sort" : "new",
+ "tag" : "",
+ "q" : "",
+ "username": text.unquote(self.user),
+ "nsfw" : "true",
+ }
+
+ while True:
+ try:
+ data = self.request(url, params=params).json()["data"]
+ except (TypeError, KeyError):
+ return
+
+ for gallery in data:
+ gallery["_extractor"] = ImagechestGalleryExtractor
+ yield Message.Queue, gallery["link"], gallery
+
+ params["page"] += 1
+
+
class ImagechestAPI():
"""Interface for the Image Chest API
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 94019bd..5f1e0f4 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -183,6 +183,23 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
return url, filename
+class ImagetwistGalleryExtractor(ImagehostImageExtractor):
+ """Extractor for galleries from imagetwist.com"""
+ category = "imagetwist"
+ subcategory = "gallery"
+ pattern = (r"(?:https?://)?((?:www\.|phun\.)?"
+ r"image(?:twist|haha)\.com/(p/[^/?#]+/\d+))")
+ example = "https://imagetwist.com/p/USER/12345/NAME"
+
+ def items(self):
+ data = {"_extractor": ImagetwistImageExtractor}
+ root = self.page_url[:self.page_url.find("/", 8)]
+ page = self.request(self.page_url).text
+ gallery = text.extr(page, 'class="gallerys', "</div")
+ for path in text.extract_iter(gallery, ' href="', '"'):
+ yield Message.Queue, root + path, data
+
+
class ImgspiceImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgspice.com"""
category = "imgspice"
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 10228b5..fd5a73a 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://kemono.party/"""
+"""Extractors for https://kemono.su/"""
from .common import Extractor, Message
from .. import text, util, exception
@@ -23,11 +23,11 @@ HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})"
class KemonopartyExtractor(Extractor):
"""Base class for kemonoparty extractors"""
category = "kemonoparty"
- root = "https://kemono.party"
+ root = "https://kemono.su"
directory_fmt = ("{category}", "{service}", "{user}")
filename_fmt = "{id}_{title[:180]}_{num:>02}_{filename[:180]}.{extension}"
archive_fmt = "{service}_{user}_{id}_{num}"
- cookies_domain = ".kemono.party"
+ cookies_domain = ".kemono.su"
def __init__(self, match):
domain = match.group(1)
@@ -39,6 +39,8 @@ class KemonopartyExtractor(Extractor):
def _init(self):
self.revisions = self.config("revisions")
+ if self.revisions:
+ self.revisions_unique = (self.revisions == "unique")
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
@@ -162,7 +164,7 @@ class KemonopartyExtractor(Extractor):
return post["attachments"]
def _inline(self, post):
- for path in self._find_inline(post["content"] or ""):
+ for path in self._find_inline(post.get("content") or ""):
yield {"path": path, "name": path, "type": "inline"}
def _build_file_generators(self, filetypes):
@@ -222,8 +224,37 @@ class KemonopartyExtractor(Extractor):
self.root, server)
return self.request(url).json()
- @memcache(keyarg=1)
- def _post_revisions(self, url):
+ def _revisions_post(self, post, url):
+ post["revision_id"] = 0
+
+ try:
+ revs = self.request(url + "/revisions").json()
+ except exception.HttpError:
+ post["revision_hash"] = self._revision_hash(post)
+ post["revision_index"] = 1
+ return (post,)
+ revs.insert(0, post)
+
+ for rev in revs:
+ rev["revision_hash"] = self._revision_hash(rev)
+
+ if self.revisions_unique:
+ uniq = []
+ last = None
+ for rev in revs:
+ if last != rev["revision_hash"]:
+ last = rev["revision_hash"]
+ uniq.append(rev)
+ revs = uniq
+
+ idx = len(revs)
+ for rev in revs:
+ rev["revision_index"] = idx
+ idx -= 1
+
+ return revs
+
+ def _revisions_all(self, url):
revs = self.request(url + "/revisions").json()
idx = len(revs)
@@ -240,7 +271,9 @@ class KemonopartyExtractor(Extractor):
rev.pop("added", None)
rev.pop("next", None)
rev.pop("prev", None)
+ rev["file"] = rev["file"].copy()
rev["file"].pop("name", None)
+ rev["attachments"] = [a.copy() for a in rev["attachments"]]
for a in rev["attachments"]:
a.pop("name", None)
return util.sha1(self._json_dumps(rev))
@@ -252,10 +285,10 @@ def _validate(response):
class KemonopartyUserExtractor(KemonopartyExtractor):
- """Extractor for all posts from a kemono.party user listing"""
+ """Extractor for all posts from a kemono.su user listing"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])"
- example = "https://kemono.party/SERVICE/user/12345"
+ example = "https://kemono.su/SERVICE/user/12345"
def __init__(self, match):
_, _, service, user_id, self.query = match.groups()
@@ -275,18 +308,9 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
if self.revisions:
for post in posts:
- post["revision_hash"] = self._revision_hash(post)
- post["revision_id"] = 0
- post_url = "{}/post/{}".format(self.api_url, post["id"])
- try:
- revs = self._post_revisions(post_url)
- except exception.HttpError:
- post["revision_index"] = 1
- yield post
- else:
- post["revision_index"] = len(revs) + 1
- yield post
- yield from revs
+ post_url = "{}/api/v1/{}/user/{}/post/{}".format(
+ self.root, post["service"], post["user"], post["id"])
+ yield from self._revisions_post(post, post_url)
else:
yield from posts
@@ -295,11 +319,25 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
params["o"] += 50
+class KemonopartyPostsExtractor(KemonopartyExtractor):
+ """Extractor for kemono.su post listings"""
+ subcategory = "posts"
+ pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?"
+ example = "https://kemono.su/posts"
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.query = match.group(3)
+ self.api_url = self.root + "/api/v1/posts"
+
+ posts = KemonopartyUserExtractor.posts
+
+
class KemonopartyPostExtractor(KemonopartyExtractor):
- """Extractor for a single kemono.party post"""
+ """Extractor for a single kemono.su post"""
subcategory = "post"
pattern = USER_PATTERN + r"/post/([^/?#]+)(/revisions?(?:/(\d*))?)?"
- example = "https://kemono.party/SERVICE/user/12345/post/12345"
+ example = "https://kemono.su/SERVICE/user/12345/post/12345"
def __init__(self, match):
_, _, service, user_id, post_id, self.revision, self.revision_id = \
@@ -314,18 +352,10 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
if not self.revision:
post = self.request(self.api_url).json()
if self.revisions:
- post["revision_hash"] = self._revision_hash(post)
- post["revision_id"] = 0
- try:
- revs = self._post_revisions(self.api_url)
- except exception.HttpError:
- post["revision_index"] = 1
- else:
- post["revision_index"] = len(revs) + 1
- return itertools.chain((post,), revs)
+ return self._revisions_post(post, self.api_url)
return (post,)
- revs = self._post_revisions(self.api_url)
+ revs = self._revisions_all(self.api_url)
if not self.revision_id:
return revs
@@ -337,14 +367,14 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
class KemonopartyDiscordExtractor(KemonopartyExtractor):
- """Extractor for kemono.party discord servers"""
+ """Extractor for kemono.su discord servers"""
subcategory = "discord"
directory_fmt = ("{category}", "discord", "{server}",
"{channel_name|channel}")
filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
archive_fmt = "discord_{server}_{id}_{num}"
pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
- example = "https://kemono.party/discord/server/12345#CHANNEL"
+ example = "https://kemono.su/discord/server/12345#CHANNEL"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@@ -430,7 +460,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
- example = "https://kemono.party/discord/server/12345"
+ example = "https://kemono.su/discord/server/12345"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@@ -445,10 +475,10 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
- """Extractor for kemono.party favorites"""
+ """Extractor for kemono.su favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
- example = "https://kemono.party/favorites"
+ example = "https://kemono.su/favorites"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index c3c44d2..8e73964 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -63,7 +63,11 @@ class LusciousAlbumExtractor(LusciousExtractor):
image["num"] = num
image["album"] = album
- image["thumbnail"] = image.pop("thumbnails")[0]["url"]
+ try:
+ image["thumbnail"] = image.pop("thumbnails")[0]["url"]
+ except LookupError:
+ image["thumbnail"] = ""
+
image["tags"] = [item["text"] for item in image["tags"]]
image["date"] = text.parse_timestamp(image["created"])
image["id"] = text.parse_int(image["id"])
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py
index 72ee5b0..4137f5d 100644
--- a/gallery_dl/extractor/naverwebtoon.py
+++ b/gallery_dl/extractor/naverwebtoon.py
@@ -46,7 +46,7 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
"episode" : self.episode,
"comic" : extr('titleName: "', '"'),
"tags" : [t.strip() for t in text.extract_iter(
- extr("tagList: [", "}],"), '"tagName":"', '"')],
+ extr("tagList: [", "],"), '"tagName":"', '"')],
"title" : extr('"subtitle":"', '"'),
"author" : [a.strip() for a in text.extract_iter(
extr('"writers":[', ']'), '"name":"', '"')],
@@ -79,9 +79,6 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
self.sort = query.get("sort", "ASC")
def items(self):
- base = "{}/{}/detail?titleId={}&no=".format(
- self.root, self.path, self.title_id)
-
url = self.root + "/api/article/list"
headers = {
"Accept": "application/json, text/plain, */*",
@@ -95,6 +92,10 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
while True:
data = self.request(url, headers=headers, params=params).json()
+ path = data["webtoonLevelCode"].lower().replace("_c", "C", 1)
+ base = "{}/{}/detail?titleId={}&no=".format(
+ self.root, path, data["titleId"])
+
for article in data["articleList"]:
article["_extractor"] = NaverwebtoonEpisodeExtractor
yield Message.Queue, base + str(article["no"]), article
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index c46a587..8c04ed5 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -149,8 +149,7 @@ class PinterestBoardExtractor(PinterestExtractor):
pins = self.api.board_pins(board["id"])
if board["section_count"] and self.config("sections", True):
- base = "{}/{}/{}/id:".format(
- self.root, board["owner"]["username"], board["name"])
+ base = "{}{}id:".format(self.root, board["url"])
data = {"_extractor": PinterestSectionExtractor}
sections = [(base + section["id"], data)
for section in self.api.board_sections(board["id"])]
@@ -220,7 +219,7 @@ class PinterestSectionExtractor(PinterestExtractor):
"{board[name]}", "{section[title]}")
archive_fmt = "{board[id]}_{id}"
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)"
- example = "https://www.pinterest.com/USER/BOARD/SEcTION"
+ example = "https://www.pinterest.com/USER/BOARD/SECTION"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
index 4a6f031..83f3064 100644
--- a/gallery_dl/extractor/pornpics.py
+++ b/gallery_dl/extractor/pornpics.py
@@ -76,7 +76,7 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
"gallery_id": text.parse_int(self.gallery_id),
"slug" : extr("/galleries/", "/").rpartition("-")[0],
"title" : text.unescape(extr("<h1>", "<")),
- "channel" : extr('>Channel:', '</a>').rpartition(">")[2],
+ "channel" : text.split_html(extr(">Channel:&nbsp;", '</div>')),
"models" : text.split_html(extr(
">Models:", '<span class="suggest')),
"categories": text.split_html(extr(
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index b3b7a9c..caf3e16 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -179,12 +179,16 @@ class SankakuAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
- "Accept" : "application/vnd.sankaku.api+json;v=2",
- "Platform": "web-app",
- "Origin" : extractor.root,
+ "Accept" : "application/vnd.sankaku.api+json;v=2",
+ "Platform" : "web-app",
+ "Api-Version": None,
+ "Origin" : extractor.root,
}
- self.username, self.password = self.extractor._get_auth_info()
+ if extractor.config("id-format") in ("alnum", "alphanumeric"):
+ self.headers["Api-Version"] = "2"
+
+ self.username, self.password = extractor._get_auth_info()
if not self.username:
self.authenticate = util.noop
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 55a0db0..0b29ed0 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -33,10 +33,14 @@ class SkebExtractor(Extractor):
response, post = self._get_post_data(user_name, post_num)
if metadata:
post.update(metadata)
+
+ files = self._get_files_from_post(response)
+ post["count"] = len(files)
yield Message.Directory, post
- for data in self._get_urls_from_post(response, post):
- url = data["file_url"]
- yield Message.Url, url, text.nameext_from_url(url, data)
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ url = file["file_url"]
+ yield Message.Url, url, text.nameext_from_url(url, post)
def posts(self):
"""Return post number"""
@@ -105,40 +109,48 @@ class SkebExtractor(Extractor):
}
return resp, post
- def _get_urls_from_post(self, resp, post):
+ def _get_files_from_post(self, resp):
+ files = []
+
if self.thumbnails and "og_image_url" in resp:
- post["content_category"] = "thumb"
- post["file_id"] = "thumb"
- post["_file_id"] = str(resp["id"]) + "t"
- post["file_url"] = resp["og_image_url"]
- yield post
+ files.append({
+ "content_category": "thumb",
+ "file_id" : "thumb",
+ "_file_id": str(resp["id"]) + "t",
+ "file_url": resp["og_image_url"],
+ })
if self.article and "article_image_url" in resp:
url = resp["article_image_url"]
if url:
- post["content_category"] = "article"
- post["file_id"] = "article"
- post["_file_id"] = str(resp["id"]) + "a"
- post["file_url"] = url
- yield post
+ files.append({
+ "content_category": "article",
+ "file_id" : "article",
+ "_file_id": str(resp["id"]) + "a",
+ "file_url": url,
+ })
for preview in resp["previews"]:
- post["content_category"] = "preview"
- post["file_id"] = post["_file_id"] = preview["id"]
- post["file_url"] = preview["url"]
info = preview["information"]
- post["original"] = {
- "width" : info["width"],
- "height" : info["height"],
- "byte_size" : info["byte_size"],
- "duration" : info["duration"],
- "frame_rate": info["frame_rate"],
- "software" : info["software"],
- "extension" : info["extension"],
- "is_movie" : info["is_movie"],
- "transcoder": info["transcoder"],
- }
- yield post
+ files.append({
+ "content_category": "preview",
+ "file_id" : preview["id"],
+ "_file_id": preview["id"],
+ "file_url": preview["url"],
+ "original": {
+ "width" : info["width"],
+ "height" : info["height"],
+ "byte_size" : info["byte_size"],
+ "duration" : info["duration"],
+ "frame_rate": info["frame_rate"],
+ "software" : info["software"],
+ "extension" : info["extension"],
+ "is_movie" : info["is_movie"],
+ "transcoder": info["transcoder"],
+ },
+ })
+
+ return files
class SkebPostExtractor(SkebExtractor):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index cf759e0..ad5bfc6 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -693,6 +693,28 @@ class TwitterHashtagExtractor(TwitterExtractor):
yield Message.Queue, url, data
+class TwitterCommunityExtractor(TwitterExtractor):
+ """Extractor for a Twitter community"""
+ subcategory = "community"
+ pattern = BASE_PATTERN + r"/i/communities/(\d+)"
+ example = "https://twitter.com/i/communities/12345"
+
+ def tweets(self):
+ if self.textonly:
+ return self.api.community_tweets_timeline(self.user)
+ return self.api.community_media_timeline(self.user)
+
+
+class TwitterCommunitiesExtractor(TwitterExtractor):
+ """Extractor for followed Twitter communities"""
+ subcategory = "communities"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/communities/?$"
+ example = "https://twitter.com/i/communities"
+
+ def tweets(self):
+ return self.api.communities_main_page_timeline(self.user)
+
+
class TwitterEventExtractor(TwitterExtractor):
"""Extractor for Tweets from a Twitter Event"""
subcategory = "event"
@@ -881,15 +903,19 @@ class TwitterAPI():
self.headers = {
"Accept": "*/*",
- "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
- "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
- "4FA33AGWWjCpTnA",
+ "Referer": "https://twitter.com/",
+ "content-type": "application/json",
"x-guest-token": None,
"x-twitter-auth-type": "OAuth2Session" if auth_token else None,
+ "x-csrf-token": csrf_token,
"x-twitter-client-language": "en",
"x-twitter-active-user": "yes",
- "x-csrf-token": csrf_token,
- "Referer": "https://twitter.com/",
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-origin",
+ "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
+ "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
+ "4FA33AGWWjCpTnA",
}
self.params = {
"include_profile_interstitial_type": "1",
@@ -933,78 +959,54 @@ class TwitterAPI():
"collab_control,vibe",
}
self.features = {
- "hidden_profile_likes_enabled": False,
+ "hidden_profile_likes_enabled": True,
+ "hidden_profile_subscriptions_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
- "subscriptions_verification_info_verified_since_enabled": True,
"highlights_tweets_tab_ui_enabled": True,
+ "responsive_web_twitter_article_notes_tab_enabled": True,
"creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_graphql_"
"skip_user_profile_image_extensions_enabled": False,
"responsive_web_graphql_timeline_navigation_enabled": True,
}
self.features_pagination = {
- "rweb_lists_timeline_redesign_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
"creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_graphql_timeline_navigation_enabled": True,
"responsive_web_graphql_skip_user_profile_"
"image_extensions_enabled": False,
+ "c9s_tweet_anatomy_moderator_badge_enabled": True,
"tweetypie_unmention_optimization_enabled": True,
"responsive_web_edit_tweet_api_enabled": True,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
"view_counts_everywhere_api_enabled": True,
"longform_notetweets_consumption_enabled": True,
+ "responsive_web_twitter_article_tweet_consumption_enabled": True,
"tweet_awards_web_tipping_enabled": False,
"freedom_of_speech_not_reach_fetch_enabled": True,
"standardized_nudges_misinfo": True,
"tweet_with_visibility_results_prefer_gql_"
- "limited_actions_policy_enabled": False,
- "interactive_text_enabled": True,
- "responsive_web_text_conversations_enabled": False,
+ "limited_actions_policy_enabled": True,
+ "rweb_video_timestamps_enabled": True,
"longform_notetweets_rich_text_read_enabled": True,
- "longform_notetweets_inline_media_enabled": False,
+ "longform_notetweets_inline_media_enabled": True,
+ "responsive_web_media_download_video_enabled": True,
"responsive_web_enhance_cards_enabled": False,
}
def tweet_result_by_rest_id(self, tweet_id):
- endpoint = "/graphql/2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId"
+ endpoint = "/graphql/MWY3AO9_I3rcP_L2A4FR4A/TweetResultByRestId"
+ variables = {
+ "tweetId": tweet_id,
+ "withCommunity": False,
+ "includePromotedContent": False,
+ "withVoice": False,
+ }
params = {
- "variables": self._json_dumps({
- "tweetId": tweet_id,
- "withCommunity": False,
- "includePromotedContent": False,
- "withVoice": False,
- }),
- "features": self._json_dumps({
- "creator_subscriptions_tweet_preview_api_enabled": True,
- "tweetypie_unmention_optimization_enabled": True,
- "responsive_web_edit_tweet_api_enabled": True,
- "graphql_is_translatable_rweb_tweet_is_translatable_enabled":
- True,
- "view_counts_everywhere_api_enabled": True,
- "longform_notetweets_consumption_enabled": True,
- "responsive_web_twitter_article_tweet_consumption_enabled":
- False,
- "tweet_awards_web_tipping_enabled": False,
- "freedom_of_speech_not_reach_fetch_enabled": True,
- "standardized_nudges_misinfo": True,
- "tweet_with_visibility_results_prefer_gql_"
- "limited_actions_policy_enabled": True,
- "longform_notetweets_rich_text_read_enabled": True,
- "longform_notetweets_inline_media_enabled": True,
- "responsive_web_graphql_exclude_directive_enabled": True,
- "verified_phone_label_enabled": False,
- "responsive_web_media_download_video_enabled": False,
- "responsive_web_graphql_skip_user_profile_"
- "image_extensions_enabled": False,
- "responsive_web_graphql_timeline_navigation_enabled": True,
- "responsive_web_enhance_cards_enabled": False,
- }),
- "fieldToggles": self._json_dumps({
- "withArticleRichContentState": False,
- }),
+ "variables": self._json_dumps(variables),
+ "features" : self._json_dumps(self.features_pagination),
}
tweet = self._call(endpoint, params)["data"]["tweetResult"]["result"]
if "tweet" in tweet:
@@ -1021,7 +1023,7 @@ class TwitterAPI():
return tweet
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/JlLZj42Ltr2qwjasw-l5lQ/TweetDetail"
+ endpoint = "/graphql/B9_KmbkLhXt6jRwGjJrweg/TweetDetail"
variables = {
"focalTweetId": tweet_id,
"referrer": "profile",
@@ -1037,7 +1039,7 @@ class TwitterAPI():
endpoint, variables, ("threaded_conversation_with_injections_v2",))
def user_tweets(self, screen_name):
- endpoint = "/graphql/-AY51QoFpVf-w7TxjQ6lpw/UserTweets"
+ endpoint = "/graphql/5ICa5d9-AitXZrIA3H-4MQ/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1049,7 +1051,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/urrCZMyyIh1FkSFi2cdPUA/UserTweetsAndReplies"
+ endpoint = "/graphql/UtLStR_BnYUGD7Q453UXQg/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1061,7 +1063,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_media(self, screen_name):
- endpoint = "/graphql/lo965xQZdN2-eSM1Jc-W_A/UserMedia"
+ endpoint = "/graphql/tO4LMUYAZbR4T0SqQ85aAw/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1073,28 +1075,8 @@ class TwitterAPI():
}
return self._pagination_tweets(endpoint, variables)
- def user_media_legacy(self, screen_name):
- endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
- variables = {
- "userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
- "includePromotedContent": False,
- "withSuperFollowsUserFields": True,
- "withBirdwatchPivots": False,
- "withSuperFollowsTweetFields": True,
- "withClientEventToken": False,
- "withBirdwatchNotes": False,
- "withVoice": True,
- "withV2Timeline": False,
- "__fs_interactive_text": False,
- "__fs_dont_mention_me_view_api_enabled": False,
- }
- return self._pagination_tweets(
- endpoint, variables, ("user", "result", "timeline", "timeline"),
- features=False)
-
def user_likes(self, screen_name):
- endpoint = "/graphql/6JET1d0iHsIzW0Zjs3OOwQ/Likes"
+ endpoint = "/graphql/9s8V6sUI8fZLDiN-REkAxA/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1107,9 +1089,10 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_bookmarks(self):
- endpoint = "/graphql/YNtYqNuki6_oiVwx0uP8mQ/Bookmarks"
+ endpoint = "/graphql/cQxQgX8MJYjWwC0dxpyfYg/Bookmarks"
variables = {
"count": 100,
+ "includePromotedContent": False,
}
features = self.features_pagination.copy()
features["graphql_timeline_v2_bookmark_timeline"] = True
@@ -1118,7 +1101,7 @@ class TwitterAPI():
features=features)
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/ZBbXrl37E6za5ml-DIpmgg/ListLatestTweetsTimeline"
+ endpoint = "/graphql/HjsWc-nwwHKYwHenbHm-tw/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -1127,22 +1110,54 @@ class TwitterAPI():
endpoint, variables, ("list", "tweets_timeline", "timeline"))
def search_timeline(self, query):
- endpoint = "/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline"
+ endpoint = "/graphql/fZK7JipRHWtiZsTodhsTfQ/SearchTimeline"
variables = {
"rawQuery": query,
- "count": 20,
+ "count": 100,
+ "querySource": "",
"product": "Latest",
- "withDownvotePerspective": False,
- "withReactionsMetadata": False,
- "withReactionsPerspective": False,
}
- features = self.features_pagination.copy()
- features["blue_business_profile_image_shape_enabled"] = False
- features["vibe_api_enabled"] = True
+
return self._pagination_tweets(
endpoint, variables,
- ("search_by_raw_query", "search_timeline", "timeline"),
- features=features)
+ ("search_by_raw_query", "search_timeline", "timeline"))
+
+ def community_tweets_timeline(self, community_id):
+ endpoint = "/graphql/7B2AdxSuC-Er8qUr3Plm_w/CommunityTweetsTimeline"
+ variables = {
+ "communityId": community_id,
+ "count": 100,
+ "displayLocation": "Community",
+ "rankingMode": "Recency",
+ "withCommunity": True,
+ }
+ return self._pagination_tweets(
+ endpoint, variables,
+ ("communityResults", "result", "ranked_community_timeline",
+ "timeline"))
+
+ def community_media_timeline(self, community_id):
+ endpoint = "/graphql/qAGUldfcIoMv5KyAyVLYog/CommunityMediaTimeline"
+ variables = {
+ "communityId": community_id,
+ "count": 100,
+ "withCommunity": True,
+ }
+ return self._pagination_tweets(
+ endpoint, variables,
+ ("communityResults", "result", "community_media_timeline",
+ "timeline"))
+
+ def communities_main_page_timeline(self, screen_name):
+ endpoint = ("/graphql/GtOhw2mstITBepTRppL6Uw"
+ "/CommunitiesMainPageTimeline")
+ variables = {
+ "count": 100,
+ "withCommunity": True,
+ }
+ return self._pagination_tweets(
+ endpoint, variables,
+ ("viewer", "communities_timeline", "timeline"))
def live_event_timeline(self, event_id):
endpoint = "/2/live_event/timeline/{}.json".format(event_id)
@@ -1160,21 +1175,8 @@ class TwitterAPI():
return (self._call(endpoint, params)
["twitter_objects"]["live_events"][event_id])
- def list_by_rest_id(self, list_id):
- endpoint = "/graphql/AmCdeFUvlrKAO96yHr-GCg/ListByRestId"
- params = {
- "variables": self._json_dumps({
- "listId": list_id,
- }),
- "features": self._json_dumps(self.features),
- }
- try:
- return self._call(endpoint, params)["data"]["list"]
- except KeyError:
- raise exception.NotFoundError("list")
-
def list_members(self, list_id):
- endpoint = "/graphql/a_ZQomd3MMk1crWkeiQBPg/ListMembers"
+ endpoint = "/graphql/BQp2IEYkgxuSxqbTAr1e1g/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -1184,7 +1186,7 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_following(self, screen_name):
- endpoint = "/graphql/JPZiqKjET7_M1r5Tlr8pyA/Following"
+ endpoint = "/graphql/PAnE9toEjRfE-4tozRcsfw/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -1194,9 +1196,8 @@ class TwitterAPI():
@memcache(keyarg=1)
def user_by_rest_id(self, rest_id):
- endpoint = "/graphql/1YAM811Q8Ry4XyPpJclURQ/UserByRestId"
- features = self.features.copy()
- features["blue_business_profile_image_shape_enabled"] = True
+ endpoint = "/graphql/tD8zKvQzwY3kdx5yz6YmOw/UserByRestId"
+ features = self.features
params = {
"variables": self._json_dumps({
"userId": rest_id,
@@ -1208,13 +1209,18 @@ class TwitterAPI():
@memcache(keyarg=1)
def user_by_screen_name(self, screen_name):
- endpoint = "/graphql/XA6F1nJELYg65hxOC2Ekmg/UserByScreenName"
+ endpoint = "/graphql/k5XapwcSikNsEsILW5FvgA/UserByScreenName"
+ features = self.features.copy()
+ features["subscriptions_verification_info_"
+ "is_identity_verified_enabled"] = True
+ features["subscriptions_verification_info_"
+ "verified_since_enabled"] = True
params = {
"variables": self._json_dumps({
"screen_name": screen_name,
"withSafetyModeUserFields": True,
}),
- "features": self._json_dumps(self.features),
+ "features": self._json_dumps(features),
}
return self._call(endpoint, params)["data"]["user"]["result"]
@@ -1486,7 +1492,8 @@ class TwitterAPI():
if esw("tweet-"):
tweets.append(entry)
- elif esw("profile-grid-"):
+ elif esw(("profile-grid-",
+ "communities-grid-")):
if "content" in entry:
tweets.extend(entry["content"]["items"])
else:
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 14e3c7b..41141c6 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -11,8 +11,8 @@
from .common import Extractor, Message
from .. import text, util
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co"
+USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)"
class VscoExtractor(Extractor):
@@ -115,7 +115,7 @@ class VscoExtractor(Extractor):
class VscoUserExtractor(VscoExtractor):
"""Extractor for images from a user on vsco.co"""
subcategory = "user"
- pattern = BASE_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])"
+ pattern = USER_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])"
example = "https://vsco.co/USER/gallery"
def images(self):
@@ -139,8 +139,8 @@ class VscoCollectionExtractor(VscoExtractor):
subcategory = "collection"
directory_fmt = ("{category}", "{user}", "collection")
archive_fmt = "c_{user}_{id}"
- pattern = BASE_PATTERN + r"/collection/"
- example = "https://vsco.co/USER/collection/12345"
+ pattern = USER_PATTERN + r"/collection"
+ example = "https://vsco.co/USER/collection/1"
def images(self):
url = "{}/{}/collection/1".format(self.root, self.user)
@@ -159,10 +159,89 @@ class VscoCollectionExtractor(VscoExtractor):
))
+class VscoSpaceExtractor(VscoExtractor):
+ """Extractor for a vsco.co space"""
+ subcategory = "space"
+ directory_fmt = ("{category}", "space", "{user}")
+ archive_fmt = "s_{user}_{id}"
+ pattern = BASE_PATTERN + r"/spaces/([^/?#]+)"
+ example = "https://vsco.co/spaces/a1b2c3d4e5f"
+
+ def images(self):
+ url = "{}/spaces/{}".format(self.root, self.user)
+ data = self._extract_preload_state(url)
+
+ tkn = data["users"]["currentUser"]["tkn"]
+ sid = self.user
+
+ posts = data["entities"]["posts"]
+ images = data["entities"]["postImages"]
+ for post in posts.values():
+ post["image"] = images[post["image"]]
+
+ space = data["spaces"]["byId"][sid]
+ space["postsList"] = [posts[pid] for pid in space["postsList"]]
+
+ url = "{}/grpc/spaces/{}/posts".format(self.root, sid)
+ params = {}
+ return self._pagination(url, params, tkn, space)
+
+ def _pagination(self, url, params, token, data):
+ headers = {
+ "Accept" : "application/json",
+ "Referer" : "{}/spaces/{}".format(self.root, self.user),
+ "Content-Type" : "application/json",
+ "Authorization": "Bearer " + token,
+ }
+
+ while True:
+ for post in data["postsList"]:
+ post = self._transform_media(post["image"])
+ post["upload_date"] = post["upload_date"]["sec"] * 1000
+ yield post
+
+ cursor = data["cursor"]
+ if cursor.get("atEnd"):
+ return
+ params["cursor"] = cursor["postcursorcontext"]["postId"]
+
+ data = self.request(url, params=params, headers=headers).json()
+
+
+class VscoSpacesExtractor(VscoExtractor):
+ """Extractor for a vsco.co user's spaces"""
+ subcategory = "spaces"
+ pattern = USER_PATTERN + r"/spaces"
+ example = "https://vsco.co/USER/spaces"
+
+ def items(self):
+ url = "{}/{}/spaces".format(self.root, self.user)
+ data = self._extract_preload_state(url)
+
+ tkn = data["users"]["currentUser"]["tkn"]
+ uid = data["sites"]["siteByUsername"][self.user]["site"]["userId"]
+
+ headers = {
+ "Accept" : "application/json",
+ "Referer" : url,
+ "Content-Type" : "application/json",
+ "Authorization": "Bearer " + tkn,
+ }
+ # this would theoretically need to be paginated
+ url = "{}/grpc/spaces/user/{}".format(self.root, uid)
+ data = self.request(url, headers=headers).json()
+
+ for space in data["spacesWithRoleList"]:
+ space = space["space"]
+ url = "{}/spaces/{}".format(self.root, space["id"])
+ space["_extractor"] = VscoSpaceExtractor
+ yield Message.Queue, url, space
+
+
class VscoImageExtractor(VscoExtractor):
"""Extractor for individual images on vsco.co"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/media/([0-9a-fA-F]+)"
+ pattern = USER_PATTERN + r"/media/([0-9a-fA-F]+)"
example = "https://vsco.co/USER/media/0123456789abcdef"
def __init__(self, match):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 3bd0648..5b45148 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -33,6 +33,8 @@ class WeiboExtractor(Extractor):
self.retweets = self.config("retweets", True)
self.videos = self.config("videos", True)
self.livephoto = self.config("livephoto", True)
+ self.gifs = self.config("gifs", True)
+ self.gifs_video = (self.gifs == "video")
cookies = _cookie_cache()
if cookies is not None:
@@ -106,8 +108,11 @@ class WeiboExtractor(Extractor):
pic = pics[pic_id]
pic_type = pic.get("type")
- if pic_type == "gif" and self.videos:
- append({"url": pic["video"]})
+ if pic_type == "gif" and self.gifs:
+ if self.gifs_video:
+ append({"url": pic["video"]})
+ else:
+ append(pic["largest"].copy())
elif pic_type == "livephoto" and self.livephoto:
append(pic["largest"].copy())
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 1eafc29..c93f33f 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -25,15 +25,14 @@ class WikimediaExtractor(BaseExtractor):
BaseExtractor.__init__(self, match)
path = match.group(match.lastindex)
- if self.category == "fandom":
+ if self.category == "wikimedia":
+ self.category = self.root.split(".")[-2]
+ elif self.category == "fandom":
self.category = \
"fandom-" + self.root.partition(".")[0].rpartition("/")[2]
if path.startswith("wiki/"):
path = path[5:]
- self.api_path = "/w/api.php"
- else:
- self.api_path = "/api.php"
pre, sep, _ = path.partition(":")
prefix = pre.lower() if sep else None
@@ -66,7 +65,7 @@ class WikimediaExtractor(BaseExtractor):
else:
self.api_url = api_path
else:
- self.api_url = self.root + self.api_path
+ self.api_url = self.root + "/api.php"
def items(self):
for info in self._pagination(self.params):
@@ -122,55 +121,47 @@ class WikimediaExtractor(BaseExtractor):
BASE_PATTERN = WikimediaExtractor.update({
- "wikipedia": {
- "root": None,
- "pattern": r"[a-z]{2,}\.wikipedia\.org",
- },
- "wiktionary": {
- "root": None,
- "pattern": r"[a-z]{2,}\.wiktionary\.org",
- },
- "wikiquote": {
- "root": None,
- "pattern": r"[a-z]{2,}\.wikiquote\.org",
- },
- "wikibooks": {
+ "wikimedia": {
"root": None,
- "pattern": r"[a-z]{2,}\.wikibooks\.org",
- },
- "wikisource": {
- "root": None,
- "pattern": r"[a-z]{2,}\.wikisource\.org",
- },
- "wikinews": {
- "root": None,
- "pattern": r"[a-z]{2,}\.wikinews\.org",
- },
- "wikiversity": {
- "root": None,
- "pattern": r"[a-z]{2,}\.wikiversity\.org",
+ "pattern": r"[a-z]{2,}\."
+ r"wik(?:i(?:pedia|quote|books|source|news|versity|data"
+ r"|voyage)|tionary)"
+ r"\.org",
+ "api-path": "/w/api.php",
},
"wikispecies": {
"root": "https://species.wikimedia.org",
"pattern": r"species\.wikimedia\.org",
+ "api-path": "/w/api.php",
},
"wikimediacommons": {
"root": "https://commons.wikimedia.org",
"pattern": r"commons\.wikimedia\.org",
+ "api-path": "/w/api.php",
},
"mediawiki": {
"root": "https://www.mediawiki.org",
"pattern": r"(?:www\.)?mediawiki\.org",
+ "api-path": "/w/api.php",
},
"fandom": {
"root": None,
"pattern": r"[\w-]+\.fandom\.com",
- "api-path": "/api.php",
},
"mariowiki": {
"root": "https://www.mariowiki.com",
"pattern": r"(?:www\.)?mariowiki\.com",
},
+ "bulbapedia": {
+ "root": "https://bulbapedia.bulbagarden.net",
+ "pattern": r"(?:bulbapedia|archives)\.bulbagarden\.net",
+ "api-path": "/w/api.php",
+ },
+ "pidgiwiki": {
+ "root": "https://www.pidgi.net",
+ "pattern": r"(?:www\.)?pidgi\.net",
+ "api-path": "/wiki/api.php",
+ },
})
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 1307399..6ee96e6 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -146,9 +146,9 @@ class ZerochanTagExtractor(ZerochanExtractor):
yield {
"id" : extr('href="/', '"'),
"name" : extr('alt="', '"'),
- "width" : extr('title="', 'x'),
+ "width" : extr('title="', '&#10005;'),
"height": extr('', ' '),
- "size" : extr('', 'B'),
+ "size" : extr('', 'b'),
"file_url": "https://static." + extr(
'<a href="https://static.', '"'),
}
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 71927a5..1616bbd 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -231,6 +231,8 @@ class PathFormat():
self.temppath = self.realpath = self.realpath[:-1]
elif not self.temppath:
self.build_path()
+ except exception.GalleryDLException:
+ raise
except Exception:
self.path = self.directory + "?"
self.realpath = self.temppath = self.realdirectory + "?"
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index f99beaa..e89ab9c 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.7"
+__version__ = "1.26.8"