aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-10-05 23:30:05 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-10-05 23:30:05 -0400
commit34ba2951b8c523713425c98addb9256ea05c946f (patch)
tree6ec7e96d0c6e6f6e94b6b97ecd8c0a414ceef93d /gallery_dl/extractor
parent3f5483df9075ae526f4c54f4cbe80edeabf6d4cc (diff)
New upstream version 1.19.0.upstream/1.19.0
Diffstat (limited to 'gallery_dl/extractor')
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/artstation.py3
-rw-r--r--gallery_dl/extractor/aryion.py57
-rw-r--r--gallery_dl/extractor/common.py44
-rw-r--r--gallery_dl/extractor/cyberdrop.py2
-rw-r--r--gallery_dl/extractor/desktopography.py95
-rw-r--r--gallery_dl/extractor/deviantart.py63
-rw-r--r--gallery_dl/extractor/erome.py7
-rw-r--r--gallery_dl/extractor/fantia.py2
-rw-r--r--gallery_dl/extractor/foolslide.py4
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py52
-rw-r--r--gallery_dl/extractor/gfycat.py17
-rw-r--r--gallery_dl/extractor/hiperdex.py10
-rw-r--r--gallery_dl/extractor/imagehosts.py20
-rw-r--r--gallery_dl/extractor/instagram.py3
-rw-r--r--gallery_dl/extractor/kemonoparty.py54
-rw-r--r--gallery_dl/extractor/mangadex.py38
-rw-r--r--gallery_dl/extractor/mastodon.py52
-rw-r--r--gallery_dl/extractor/nozomi.py4
-rw-r--r--gallery_dl/extractor/oauth.py2
-rw-r--r--gallery_dl/extractor/reddit.py16
-rw-r--r--gallery_dl/extractor/redgifs.py2
-rw-r--r--gallery_dl/extractor/twitter.py37
23 files changed, 460 insertions, 125 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index f68ea9f..c512548 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -28,6 +28,7 @@ modules = [
"comicvine",
"cyberdrop",
"danbooru",
+ "desktopography",
"deviantart",
"dynastyscans",
"e621",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index f2ad0ab..f687ff8 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -158,7 +158,8 @@ class ArtstationUserExtractor(ArtstationExtractor):
def projects(self):
url = "{}/users/{}/projects.json".format(self.root, self.user)
- return self._pagination(url)
+ params = {"album_id": "all"}
+ return self._pagination(url, params)
class ArtstationAlbumExtractor(ArtstationExtractor):
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 0d0ad70..06ec571 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -29,7 +29,6 @@ class AryionExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.recursive = True
- self._needle = "class='gallery-item' id='"
def login(self):
if self._check_cookies(self.cookienames):
@@ -56,25 +55,50 @@ class AryionExtractor(Extractor):
def items(self):
self.login()
+ data = self.metadata()
for post_id in self.posts():
post = self._parse_post(post_id)
if post:
+ if data:
+ post.update(data)
yield Message.Directory, post
yield Message.Url, post["url"], post
elif post is False and self.recursive:
base = self.root + "/g4/view/"
data = {"_extractor": AryionPostExtractor}
- for post_id in self._pagination(base + post_id):
+ for post_id in self._pagination_params(base + post_id):
yield Message.Queue, base + post_id, data
def posts(self):
"""Yield relevant post IDs"""
- def _pagination(self, url):
+ def metadata(self):
+ """Return general metadata"""
+
+ def _pagination_params(self, url, params=None):
+ if params is None:
+ params = {"p": 1}
+ else:
+ params["p"] = text.parse_int(params.get("p"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for post_id in text.extract_iter(
+ page, "class='gallery-item' id='", "'"):
+ cnt += 1
+ yield post_id
+
+ if cnt < 40:
+ return
+ params["p"] += 1
+
+ def _pagination_next(self, url):
while True:
page = self.request(url).text
- yield from text.extract_iter(page, self._needle, "'")
+ yield from text.extract_iter(page, "thumb' href='/g4/view/", "'")
pos = page.find("Next &gt;&gt;")
if pos < 0:
@@ -180,11 +204,30 @@ class AryionGalleryExtractor(AryionExtractor):
def posts(self):
if self.recursive:
url = "{}/g4/gallery/{}".format(self.root, self.user)
- return self._pagination(url)
+ return self._pagination_params(url)
else:
- self._needle = "thumb' href='/g4/view/"
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
- return util.advance(self._pagination(url), self.offset)
+ return util.advance(self._pagination_next(url), self.offset)
+
+
+class AryionTagExtractor(AryionExtractor):
+ """Extractor for tag searches on eka's portal"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "tags", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
+ test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
+ "count": ">= 5",
+ })
+
+ def metadata(self):
+ self.params = text.parse_query(self.user)
+ self.user = None
+ return {"search_tags": self.params.get("tag")}
+
+ def posts(self):
+ url = self.root + "/g4/tags.php"
+ return self._pagination_params(url, self.params)
class AryionPostExtractor(AryionExtractor):
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d9f69ab..4f42477 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -42,6 +42,7 @@ class Extractor():
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
+ self.finalize = None
if self.basecategory:
self.config = self._config_shared
@@ -53,13 +54,13 @@ class Extractor():
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
- self.request_interval = self.config(
- "sleep-request", self.request_interval)
+ self._interval = util.build_duration_func(
+ self.config("sleep-request", self.request_interval),
+ self.request_interval_min,
+ )
if self._retries < 0:
self._retries = float("inf")
- if self.request_interval < self.request_interval_min:
- self.request_interval = self.request_interval_min
self._init_session()
self._init_cookies()
@@ -102,15 +103,19 @@ class Extractor():
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
- tries = 1
- retries = self._retries if retries is None else retries
- session = self.session if session is None else session
- kwargs.setdefault("timeout", self._timeout)
- kwargs.setdefault("verify", self._verify)
+ if retries is None:
+ retries = self._retries
+ if session is None:
+ session = self.session
+ if "timeout" not in kwargs:
+ kwargs["timeout"] = self._timeout
+ if "verify" not in kwargs:
+ kwargs["verify"] = self._verify
response = None
+ tries = 1
- if self.request_interval:
- seconds = (self.request_interval -
+ if self._interval:
+ seconds = (self._interval() -
(time.time() - Extractor.request_timestamp))
if seconds > 0.0:
self.log.debug("Sleeping for %.5s seconds", seconds)
@@ -442,16 +447,23 @@ class GalleryExtractor(Extractor):
imgs = self.images(page)
if "count" in data:
- images = zip(
- range(1, data["count"]+1),
- imgs,
- )
+ if self.config("page-reverse"):
+ images = util.enumerate_reversed(imgs, 1, data["count"])
+ else:
+ images = zip(
+ range(1, data["count"]+1),
+ imgs,
+ )
else:
+ enum = enumerate
try:
data["count"] = len(imgs)
except TypeError:
pass
- images = enumerate(imgs, 1)
+ else:
+ if self.config("page-reverse"):
+ enum = util.enumerate_reversed
+ images = enum(imgs, 1)
yield Message.Directory, data
for data[self.enum], (url, imgdata) in images:
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index e354cb7..2004921 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -15,7 +15,7 @@ class CyberdropAlbumExtractor(Extractor):
category = "cyberdrop"
subcategory = "album"
root = "https://cyberdrop.me"
- directory_fmt = ("{category}", "{album_id} {album_name}")
+ directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{id}"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
test = ("https://cyberdrop.me/a/keKRjm4t", {
diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py
new file mode 100644
index 0000000..363341a
--- /dev/null
+++ b/gallery_dl/extractor/desktopography.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://desktopography.net/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?desktopography\.net"
+
+
+class DesktopographyExtractor(Extractor):
+ """Base class for desktopography extractors"""
+ category = "desktopography"
+ archive_fmt = "{filename}"
+ root = "https://desktopography.net"
+
+
+class DesktopographySiteExtractor(DesktopographyExtractor):
+ """Extractor for all desktopography exhibitions """
+ subcategory = "site"
+ pattern = BASE_PATTERN + r"/$"
+ test = ("https://desktopography.net/",)
+
+ def items(self):
+ page = self.request(self.root).text
+ data = {"_extractor": DesktopographyExhibitionExtractor}
+
+ for exhibition_year in text.extract_iter(
+ page,
+ '<a href="https://desktopography.net/exhibition-',
+ '/">'):
+
+ url = self.root + "/exhibition-" + exhibition_year + "/"
+ yield Message.Queue, url, data
+
+
+class DesktopographyExhibitionExtractor(DesktopographyExtractor):
+ """Extractor for a yearly desktopography exhibition"""
+ subcategory = "exhibition"
+ pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
+ test = ("https://desktopography.net/exhibition-2020/",)
+
+ def __init__(self, match):
+ DesktopographyExtractor.__init__(self, match)
+ self.year = match.group(1)
+
+ def items(self):
+ url = "{}/exhibition-{}/".format(self.root, self.year)
+ base_entry_url = "https://desktopography.net/portfolios/"
+ page = self.request(url).text
+
+ data = {
+ "_extractor": DesktopographyEntryExtractor,
+ "year": self.year,
+ }
+
+ for entry_url in text.extract_iter(
+ page,
+ '<a class="overlay-background" href="' + base_entry_url,
+ '">'):
+
+ url = base_entry_url + entry_url
+ yield Message.Queue, url, data
+
+
+class DesktopographyEntryExtractor(DesktopographyExtractor):
+ """Extractor for all resolutions of a desktopography wallpaper"""
+ subcategory = "entry"
+ pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
+ test = ("https://desktopography.net/portfolios/new-era/",)
+
+ def __init__(self, match):
+ DesktopographyExtractor.__init__(self, match)
+ self.entry = match.group(1)
+
+ def items(self):
+ url = "{}/portfolios/{}".format(self.root, self.entry)
+ page = self.request(url).text
+
+ entry_data = {"entry": self.entry}
+ yield Message.Directory, entry_data
+
+ for image_data in text.extract_iter(
+ page,
+ '<a target="_blank" href="https://desktopography.net',
+ '">'):
+
+ path, _, filename = image_data.partition(
+ '" class="wallpaper-button" download="')
+ text.nameext_from_url(filename, entry_data)
+ yield Message.Url, self.root + path, entry_data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index b4ac742..7dac770 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -46,6 +46,13 @@ class DeviantartExtractor(Extractor):
self.group = False
self.api = None
+ unwatch = self.config("auto-unwatch")
+ if unwatch:
+ self.unwatch = []
+ self.finalize = self._unwatch_premium
+ else:
+ self.unwatch = None
+
if self.quality:
self.quality = ",q_{}".format(self.quality)
@@ -318,44 +325,48 @@ class DeviantartExtractor(Extractor):
except KeyError:
pass
- # check accessibility
- if self.api.refresh_token_key:
- dev = self.api.deviation(deviation["deviationid"], False)
- has_access = dev["premium_folder_data"]["has_access"]
- username = dev["author"]["username"]
- folder = dev["premium_folder_data"]
-
- if not has_access and folder["type"] == "watchers" and \
- self.config("auto-watch"):
- if self.api.user_friends_watch(username):
- has_access = True
- self.log.info(
- "Watching %s for premium folder access", username)
- else:
- self.log.warning(
- "Error when trying to watch %s. "
- "Try again with a new refresh-token", username)
- else:
+ if not self.api.refresh_token_key:
self.log.warning(
"Unable to access premium content (no refresh-token)")
self._fetch_premium = lambda _: None
return None
+ dev = self.api.deviation(deviation["deviationid"], False)
+ folder = dev["premium_folder_data"]
+ username = dev["author"]["username"]
+ has_access = folder["has_access"]
+
+ if not has_access and folder["type"] == "watchers" and \
+ self.config("auto-watch"):
+ if self.unwatch is not None:
+ self.unwatch.append(username)
+ if self.api.user_friends_watch(username):
+ has_access = True
+ self.log.info(
+ "Watching %s for premium folder access", username)
+ else:
+ self.log.warning(
+ "Error when trying to watch %s. "
+ "Try again with a new refresh-token", username)
+
if has_access:
self.log.info("Fetching premium folder data")
else:
self.log.warning("Unable to access premium content (type: %s)",
folder["type"])
- self._fetch_premium = lambda _: None
- return None
- # fill cache
cache = self._premium_cache
for dev in self.api.gallery(
username, folder["gallery_id"], public=False):
- cache[dev["deviationid"]] = dev
+ cache[dev["deviationid"]] = dev if has_access else None
+
return cache[deviation["deviationid"]]
+ def _unwatch_premium(self):
+ for username in self.unwatch:
+ self.log.info("Unwatching %s", username)
+ self.api.user_friends_unwatch(username)
+
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
@@ -823,7 +834,7 @@ class DeviantartWatchPostsExtractor(DeviantartExtractor):
class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
- archive_fmt = "{index}.{extension}"
+ archive_fmt = "g_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
@@ -1153,13 +1164,15 @@ class DeviantartOAuthAPI():
"mature_content" : self.mature,
}
return self._call(
- endpoint, method="POST", data=data, public=False, fatal=False)
+ endpoint, method="POST", data=data, public=False, fatal=False,
+ ).get("success")
def user_friends_unwatch(self, username):
"""Unwatch a user"""
endpoint = "user/friends/unwatch/" + username
return self._call(
- endpoint, method="POST", public=False, fatal=False)
+ endpoint, method="POST", public=False, fatal=False,
+ ).get("success")
def authenticate(self, refresh_token_key):
"""Authenticate the application by requesting an access token"""
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index d4fd826..992db97 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -46,9 +46,10 @@ class EromeExtractor(Extractor):
user, pos = text.extract(
page, 'href="https://www.erome.com/', '"', pos)
data = {
- "album_id": album_id,
- "title" : text.unescape(title),
- "user" : text.unquote(user),
+ "album_id" : album_id,
+ "title" : text.unescape(title),
+ "user" : text.unquote(user),
+ "_http_headers": {"Referer": url},
}
yield Message.Directory, data
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 9df2bef..62f7429 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -66,6 +66,8 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"],
"rating": resp["rating"],
"posted_at": resp["posted_at"],
+ "date": text.parse_datetime(
+ resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"],
"fanclub_user_name": resp["fanclub"]["user"]["name"],
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index a1470dc..c09eb96 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -88,7 +88,9 @@ class FoolslideChapterExtractor(FoolslideExtractor):
data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
yield Message.Directory, data
- for data["page"], image in enumerate(imgs, 1):
+ enum = util.enumerate_reversed if self.config(
+ "page-reverse") else enumerate
+ for data["page"], image in enum(imgs, 1):
try:
url = image["url"]
del image["url"]
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 1b877b3..e09e190 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -176,6 +176,58 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
yield post.attrib
+class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "favorites", "{favorite_id}")
+ archive_fmt = "f_{favorite_id}_{id}"
+ per_page = 50
+ pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ test = (
+ ("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", {
+ "count": 3,
+ }),
+ ("https://safebooru.org/index.php?page=favorites&s=view&id=17567", {
+ "count": 2,
+ }),
+ ("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
+ "count": 4,
+ }),
+ ("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
+ "count": 3,
+ }),
+ )
+
+ def __init__(self, match):
+ GelbooruV02Extractor.__init__(self, match)
+ self.favorite_id = match.group(match.lastindex)
+
+ def metadata(self):
+ return {"favorite_id": text.parse_int(self.favorite_id)}
+
+ def posts(self):
+ url = self.root + "/index.php"
+ params = {
+ "page": "favorites",
+ "s" : "view",
+ "id" : self.favorite_id,
+ "pid" : self.page_start * self.per_page,
+ }
+
+ data = {}
+ while True:
+ num_ids = 0
+ page = self.request(url, params=params).text
+
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ for post in self._api_request(data):
+ yield post.attrib
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+
+
class GelbooruV02PostExtractor(GelbooruV02Extractor):
subcategory = "post"
archive_fmt = "{id}"
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 6d31f7d..2757852 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -10,7 +10,6 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache
class GfycatExtractor(Extractor):
@@ -155,7 +154,6 @@ class GfycatImageExtractor(GfycatExtractor):
class GfycatAPI():
API_ROOT = "https://api.gfycat.com"
- ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
def __init__(self, extractor):
self.extractor = extractor
@@ -175,23 +173,8 @@ class GfycatAPI():
params = {"search_text": query, "count": 150}
return self._pagination(endpoint, params)
- @cache(keyarg=1, maxage=3600)
- def _authenticate_impl(self, category):
- if category == "redgifs":
- url = "https://api.redgifs.com/v1/oauth/webtoken"
- else:
- url = "https://weblogin." + category + ".com/oauth/webtoken"
- data = {"access_key": self.ACCESS_KEY}
- headers = {"Referer": self.extractor.root + "/",
- "Origin" : self.extractor.root}
- response = self.extractor.request(
- url, method="POST", headers=headers, json=data)
- return "Bearer " + response.json()["access_token"]
-
def _call(self, endpoint, params=None):
url = self.API_ROOT + endpoint
- self.headers["Authorization"] = self._authenticate_impl(
- self.extractor.category)
return self.extractor.request(
url, params=params, headers=self.headers).json()
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index a40d631..201ffdd 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -141,13 +141,17 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
shortlink = text.extract(page, "rel='shortlink' href='", "'")[0]
data = {
- "action": "manga_get_chapters",
- "manga" : shortlink.rpartition("=")[2],
+ "action" : "manga_get_reading_nav",
+ "manga" : shortlink.rpartition("=")[2],
+ "chapter" : "",
+ "volume_id": "",
+ "style" : "list",
+ "type" : "manga",
}
url = self.root + "/wp-admin/admin-ajax.php"
page = self.request(url, method="POST", data=data).text
- for url in text.extract_iter(page, 'href="', '"', 320):
+ for url in text.extract_iter(page, 'data-redirect="', '"'):
chapter = url.rpartition("/")[2]
results.append((url, self.chapter_data(chapter)))
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 13996d0..d699f07 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -279,3 +279,23 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
url , pos = text.extract(page, '<br><img src="', '"')
filename, pos = text.extract(page, 'alt="', '"', pos)
return url, filename
+
+
+class FappicImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from fappic.com"""
+ category = "fappic"
+ pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)"
+ test = ("https://www.fappic.com/98wxqcklyh8k/test.png", {
+ "pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png",
+ "keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898",
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
+ })
+
+ def get_info(self, page):
+ url , pos = text.extract(page, '<a href="/?click"><img src="', '"')
+ filename, pos = text.extract(page, 'alt="', '"', pos)
+
+ if filename.startswith("Porn-Picture-"):
+ filename = filename[13:]
+
+ return url, filename
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 3590e17..983ae37 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -29,7 +29,7 @@ class InstagramExtractor(Extractor):
root = "https://www.instagram.com"
cookiedomain = ".instagram.com"
cookienames = ("sessionid",)
- request_interval = 8.0
+ request_interval = (6.0, 12.0)
def __init__(self, match):
Extractor.__init__(self, match)
@@ -679,7 +679,6 @@ class InstagramStoriesExtractor(InstagramExtractor):
("https://www.instagram.com/stories/instagram/"),
("https://www.instagram.com/stories/highlights/18042509488170095/"),
)
- request_interval = 1.0
def __init__(self, match):
self.highlight_id, self.user = match.groups()
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index a911d35..c5f5ae7 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -9,7 +9,8 @@
"""Extractors for https://kemono.party/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
import itertools
import re
@@ -70,11 +71,32 @@ class KemonopartyExtractor(Extractor):
post["type"] = file["type"]
url = file["path"]
if url[0] == "/":
- url = self.root + url
+ url = self.root + "/data" + url
+ elif url.startswith("https://kemono.party"):
+ url = self.root + "/data" + url[20:]
text.nameext_from_url(file["name"], post)
yield Message.Url, url, post
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=28*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/account/login"
+ data = {"username": username, "password": password}
+
+ response = self.request(url, method="POST", data=data)
+ if response.url.endswith("/account/login") and \
+ "Username or password is incorrect" in response.text:
+ raise exception.AuthenticationError()
+
+ return {c.name: c.value for c in response.history[0].cookies}
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
@@ -119,7 +141,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
pattern = BASE_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/files/fanbox"
+ "pattern": r"https://kemono\.party/data/files/fanbox"
r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
@@ -142,12 +164,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://kemono\.party/inline/fanbox"
+ "pattern": r"https://kemono\.party/data/inline/fanbox"
r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
- "pattern": r"https://kemono\.party/(file|attachment)s"
+ "pattern": r"https://kemono\.party/data/(file|attachment)s"
r"/gumroad/trylsc/IURjT/",
}),
# username (#1548, #1652)
@@ -173,3 +195,25 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
def posts(self):
posts = self.request(self.api_url).json()
return (posts[0],) if len(posts) > 1 else posts
+
+
+class KemonopartyFavoriteExtractor(KemonopartyExtractor):
+ """Extractor for kemono.party favorites"""
+ subcategory = "favorite"
+ pattern = r"(?:https?://)?kemono\.party/favorites"
+ test = ("https://kemono.party/favorites", {
+ "pattern": KemonopartyUserExtractor.pattern,
+ "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
+ "count": 3,
+ })
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+ self.login()
+
+ users = self.request(self.root + "/api/favorites").json()
+ for user in users:
+ user["_extractor"] = KemonopartyUserExtractor
+ url = "{}/{}/user/{}".format(
+ self.root, user["service"], user["id"])
+ yield Message.Queue, url, user
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 53ae76a..634a92d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -37,7 +37,7 @@ class MangadexExtractor(Extractor):
def items(self):
for chapter in self.chapters():
- uuid = chapter["data"]["id"]
+ uuid = chapter["id"]
data = self._transform(chapter)
data["_extractor"] = MangadexChapterExtractor
self._cache[uuid] = (chapter, data)
@@ -51,8 +51,8 @@ class MangadexExtractor(Extractor):
for item in manga["relationships"]:
relationships[item["type"]].append(item["id"])
- cattributes = chapter["data"]["attributes"]
- mattributes = manga["data"]["attributes"]
+ cattributes = chapter["attributes"]
+ mattributes = manga["attributes"]
lang = cattributes["translatedLanguage"].partition("-")[0]
if cattributes["chapter"]:
@@ -63,12 +63,12 @@ class MangadexExtractor(Extractor):
data = {
"manga" : (mattributes["title"].get("en") or
next(iter(mattributes["title"].values()))),
- "manga_id": manga["data"]["id"],
+ "manga_id": manga["id"],
"title" : cattributes["title"],
"volume" : text.parse_int(cattributes["volume"]),
"chapter" : text.parse_int(chnum),
"chapter_minor": sep + minor,
- "chapter_id": chapter["data"]["id"],
+ "chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]),
"lang" : lang,
"language": util.code_to_language(lang),
@@ -77,13 +77,13 @@ class MangadexExtractor(Extractor):
if self.config("metadata"):
data["artist"] = [
- self.api.author(uuid)["data"]["attributes"]["name"]
+ self.api.author(uuid)["attributes"]["name"]
for uuid in relationships["artist"]]
data["author"] = [
- self.api.author(uuid)["data"]["attributes"]["name"]
+ self.api.author(uuid)["attributes"]["name"]
for uuid in relationships["author"]]
data["group"] = [
- self.api.group(uuid)["data"]["attributes"]["name"]
+ self.api.group(uuid)["attributes"]["name"]
for uuid in relationships["scanlation_group"]]
return data
@@ -118,11 +118,14 @@ class MangadexChapterExtractor(MangadexExtractor):
data = self._transform(chapter)
yield Message.Directory, data
- cattributes = chapter["data"]["attributes"]
+ cattributes = chapter["attributes"]
data["_http_headers"] = self._headers
base = "{}/data/{}/".format(
self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
- for data["page"], page in enumerate(cattributes["data"], 1):
+
+ enum = util.enumerate_reversed if self.config(
+ "page-reverse") else enumerate
+ for data["page"], page in enum(cattributes["data"], 1):
text.nameext_from_url(page, data)
yield Message.Url, base + page, data
@@ -153,6 +156,9 @@ class MangadexMangaExtractor(MangadexExtractor):
("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
"count": 1,
}),
+ ("https://mangadex.org/title/584ef094-b2ab-40ce-962c-bce341fb9d10", {
+ "count": ">= 20",
+ })
)
def chapters(self):
@@ -189,18 +195,18 @@ class MangadexAPI():
@memcache(keyarg=1)
def author(self, uuid):
- return self._call("/author/" + uuid)
+ return self._call("/author/" + uuid)["data"]
def chapter(self, uuid):
- return self._call("/chapter/" + uuid)
+ return self._call("/chapter/" + uuid)["data"]
@memcache(keyarg=1)
def group(self, uuid):
- return self._call("/group/" + uuid)
+ return self._call("/group/" + uuid)["data"]
@memcache(keyarg=1)
def manga(self, uuid):
- return self._call("/manga/" + uuid)
+ return self._call("/manga/" + uuid)["data"]
def manga_feed(self, uuid):
config = self.extractor.config
@@ -209,6 +215,8 @@ class MangadexAPI():
"order[volume]" : order,
"order[chapter]" : order,
"translatedLanguage[]": config("lang"),
+ "contentRating[]" : [
+ "safe", "suggestive", "erotica", "pornographic"],
}
return self._pagination("/manga/" + uuid + "/feed", params)
@@ -271,7 +279,7 @@ class MangadexAPI():
while True:
data = self._call(endpoint, params)
- yield from data["results"]
+ yield from data["data"]
params["offset"] = data["offset"] + data["limit"]
if params["offset"] >= data["total"]:
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index ff0bfc3..cd7cabb 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -87,7 +87,7 @@ BASE_PATTERN = MastodonExtractor.update(INSTANCES)
class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$"
+ pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$"
test = (
("https://mastodon.social/@jk", {
"pattern": r"https://files.mastodon.social/media_attachments"
@@ -100,26 +100,44 @@ class MastodonUserExtractor(MastodonExtractor):
"count": 60,
}),
("https://baraag.net/@pumpkinnsfw"),
+ ("https://mastodon.social/@id:10843"),
+ ("https://mastodon.social/users/id:10843"),
+ ("https://mastodon.social/users/jk"),
)
def statuses(self):
api = MastodonAPI(self)
- username = self.item
- handle = "@{}@{}".format(username, self.instance)
- for account in api.account_search(handle, 1):
- if account["username"] == username:
- break
- else:
- raise exception.NotFoundError("account")
-
return api.account_statuses(
- account["id"],
+ api.account_id_by_username(self.item),
only_media=not self.config("text-posts", False),
exclude_replies=not self.replies,
)
+class MastodonFollowingExtractor(MastodonExtractor):
+ """Extractor for followed mastodon users"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/users/([^/?#]+)/following"
+ test = (
+ ("https://mastodon.social/users/0x4f/following", {
+ "extractor": False,
+ "count": ">= 20",
+ }),
+ ("https://mastodon.social/users/id:10843/following"),
+ ("https://pawoo.net/users/yoru_nine/following"),
+ ("https://baraag.net/users/pumpkinnsfw/following"),
+ )
+
+ def items(self):
+ api = MastodonAPI(self)
+ account_id = api.account_id_by_username(self.item)
+
+ for account in api.account_following(account_id):
+ account["_extractor"] = MastodonUserExtractor
+ yield Message.Queue, account["url"], account
+
+
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
@@ -165,6 +183,20 @@ class MastodonAPI():
self.headers = {"Authorization": "Bearer " + access_token}
+ def account_id_by_username(self, username):
+ if username.startswith("id:"):
+ return username[3:]
+
+ handle = "@{}@{}".format(username, self.extractor.instance)
+ for account in self.account_search(handle, 1):
+ if account["username"] == username:
+ return account["id"]
+ raise exception.NotFoundError("account")
+
+ def account_following(self, account_id):
+ endpoint = "/v1/accounts/{}/following".format(account_id)
+ return self._pagination(endpoint, None)
+
def account_search(self, query, limit=40):
"""Search for accounts"""
endpoint = "/v1/accounts/search"
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 44411c8..4dc880f 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -168,7 +168,7 @@ class NozomiTagExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
tags, self.pnum = match.groups()
- self.tags = text.unquote(tags).lower()
+ self.tags = text.unquote(tags)
self.nozomi = "/nozomi/{}.nozomi".format(self.tags)
def metadata(self):
@@ -187,7 +187,7 @@ class NozomiSearchExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
- self.tags = text.unquote(match.group(1)).lower().split()
+ self.tags = text.unquote(match.group(1)).split()
def metadata(self):
return {"search_tags": self.tags}
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 4dc1e43..6812f35 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -415,7 +415,7 @@ class OAuthPixiv(OAuthBase):
print("""
1) Open your browser's Developer Tools (F12) and switch to the Network tab
2) Login
-4) Select the last network monitor entry ('callback?state=...')
+3) Select the last network monitor entry ('callback?state=...')
4) Copy its 'code' query parameter, paste it below, and press Enter
""")
code = input("code: ")
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 8953edd..43c7e50 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -139,7 +139,7 @@ class RedditSubredditExtractor(RedditExtractor):
"""Extractor for URLs from subreddits on reddit.com"""
subcategory = "subreddit"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/"
- r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)")
+ r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)")
test = (
("https://www.reddit.com/r/lavaporn/", {
"range": "1-20",
@@ -152,9 +152,11 @@ class RedditSubredditExtractor(RedditExtractor):
)
def __init__(self, match):
+ self.subreddit, sub, params = match.groups()
+ self.params = text.parse_query(params)
+ if sub:
+ self.subcategory += "-" + sub
RedditExtractor.__init__(self, match)
- self.subreddit = match.group(1)
- self.params = text.parse_query(match.group(2))
def submissions(self):
return self.api.submissions_subreddit(self.subreddit, self.params)
@@ -164,7 +166,7 @@ class RedditUserExtractor(RedditExtractor):
"""Extractor for URLs from posts by a reddit user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/"
- r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?")
+ r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?")
test = (
("https://www.reddit.com/user/username/", {
"count": ">= 2",
@@ -175,9 +177,11 @@ class RedditUserExtractor(RedditExtractor):
)
def __init__(self, match):
+ self.user, sub, params = match.groups()
+ self.params = text.parse_query(params)
+ if sub:
+ self.subcategory += "-" + sub
RedditExtractor.__init__(self, match)
- self.user = match.group(1)
- self.params = text.parse_query(match.group(2))
def submissions(self):
return self.api.submissions_user(self.user, self.params)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 576564c..e078bef 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -72,5 +72,3 @@ class RedgifsImageExtractor(RedgifsExtractor):
class RedgifsAPI(GfycatAPI):
API_ROOT = "https://api.redgifs.com"
- ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
- "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2dfcb55..4a3f6cd 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -50,7 +50,7 @@ class TwitterExtractor(Extractor):
if not self.retweets and "retweeted_status_id_str" in tweet:
self.log.debug("Skipping %s (retweet)", tweet["id_str"])
continue
- if not self.quoted and "quoted" in tweet:
+ if not self.quoted and "quoted_by_id_str" in tweet:
self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"])
continue
if "in_reply_to_user_id_str" in tweet and (
@@ -139,8 +139,10 @@ class TwitterExtractor(Extractor):
for size in ("original", "x_large", "large", "small"):
key = prefix + size
if key in bvals:
- files.append(bvals[key]["image_value"])
- return
+ value = bvals[key].get("image_value")
+ if value and "url" in value:
+ files.append(value)
+ return
elif self.videos:
url = "ytdl:{}/i/web/status/{}".format(self.root, tweet["id_str"])
files.append({"url": url})
@@ -199,6 +201,8 @@ class TwitterExtractor(Extractor):
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
+ if "quoted_by_id_str" in tweet:
+ tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
if "author" in tweet:
tdata["author"] = self._transform_user(tweet["author"])
@@ -316,7 +320,7 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
- """Extractor for all images from a user's timeline"""
+ """Extractor for Tweets from a user's timeline"""
subcategory = "timeline"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
@@ -341,8 +345,25 @@ class TwitterTimelineExtractor(TwitterExtractor):
return TwitterAPI(self).timeline_profile(self.user)
+class TwitterRepliesExtractor(TwitterExtractor):
+ """Extractor for Tweets from a user's timeline including replies"""
+ subcategory = "replies"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
+ test = (
+ ("https://twitter.com/supernaturepics/with_replies", {
+ "range": "1-40",
+ "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
+ }),
+ ("https://mobile.twitter.com/supernaturepics/with_replies#t"),
+ ("https://www.twitter.com/id:2976459548/with_replies"),
+ )
+
+ def tweets(self):
+ return TwitterAPI(self).timeline_profile(self.user, replies=True)
+
+
class TwitterMediaExtractor(TwitterExtractor):
- """Extractor for all images from a user's Media Tweets"""
+ """Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
test = (
@@ -652,11 +673,11 @@ class TwitterAPI():
endpoint = "/2/timeline/conversation/{}.json".format(conversation_id)
return self._pagination(endpoint)
- def timeline_profile(self, screen_name):
+ def timeline_profile(self, screen_name, replies=False):
user_id = self._user_id_by_screen_name(screen_name)
endpoint = "/2/timeline/profile/{}.json".format(user_id)
params = self.params.copy()
- params["include_tweet_replies"] = "false"
+ params["include_tweet_replies"] = "true" if replies else "false"
return self._pagination(endpoint, params)
def timeline_media(self, screen_name):
@@ -886,7 +907,7 @@ class TwitterAPI():
quoted = quoted.copy()
quoted["author"] = users[quoted["user_id_str"]]
quoted["user"] = tweet["user"]
- quoted["quoted"] = True
+ quoted["quoted_by_id_str"] = tweet["id_str"]
yield quoted
# update cursor value