diff options
| author | 2021-10-05 23:30:05 -0400 | |
|---|---|---|
| committer | 2021-10-05 23:30:05 -0400 | |
| commit | 34ba2951b8c523713425c98addb9256ea05c946f (patch) | |
| tree | 6ec7e96d0c6e6f6e94b6b97ecd8c0a414ceef93d /gallery_dl/extractor | |
| parent | 3f5483df9075ae526f4c54f4cbe80edeabf6d4cc (diff) | |
New upstream version 1.19.0.upstream/1.19.0
Diffstat (limited to 'gallery_dl/extractor')
23 files changed, 460 insertions, 125 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index f68ea9f..c512548 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -28,6 +28,7 @@ modules = [ "comicvine", "cyberdrop", "danbooru", + "desktopography", "deviantart", "dynastyscans", "e621", diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index f2ad0ab..f687ff8 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -158,7 +158,8 @@ class ArtstationUserExtractor(ArtstationExtractor): def projects(self): url = "{}/users/{}/projects.json".format(self.root, self.user) - return self._pagination(url) + params = {"album_id": "all"} + return self._pagination(url, params) class ArtstationAlbumExtractor(ArtstationExtractor): diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 0d0ad70..06ec571 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -29,7 +29,6 @@ class AryionExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) self.recursive = True - self._needle = "class='gallery-item' id='" def login(self): if self._check_cookies(self.cookienames): @@ -56,25 +55,50 @@ class AryionExtractor(Extractor): def items(self): self.login() + data = self.metadata() for post_id in self.posts(): post = self._parse_post(post_id) if post: + if data: + post.update(data) yield Message.Directory, post yield Message.Url, post["url"], post elif post is False and self.recursive: base = self.root + "/g4/view/" data = {"_extractor": AryionPostExtractor} - for post_id in self._pagination(base + post_id): + for post_id in self._pagination_params(base + post_id): yield Message.Queue, base + post_id, data def posts(self): """Yield relevant post IDs""" - def _pagination(self, url): + def metadata(self): + """Return general metadata""" + + def _pagination_params(self, url, params=None): + if params is None: + params = {"p": 1} + else: + params["p"] = text.parse_int(params.get("p"), 1) + + while True: + page = self.request(url, params=params).text + + cnt = 0 + for post_id in text.extract_iter( + page, "class='gallery-item' id='", "'"): + cnt += 1 + yield post_id + + if cnt < 40: + return + params["p"] += 1 + + def _pagination_next(self, url): while True: page = self.request(url).text - yield from text.extract_iter(page, self._needle, "'") + yield from text.extract_iter(page, "thumb' href='/g4/view/", "'") pos = page.find("Next >>") if pos < 0: @@ -180,11 +204,30 @@ class AryionGalleryExtractor(AryionExtractor): def posts(self): if self.recursive: url = "{}/g4/gallery/{}".format(self.root, self.user) - return self._pagination(url) + return self._pagination_params(url) else: - self._needle = "thumb' href='/g4/view/" url = "{}/g4/latest.php?name={}".format(self.root, self.user) - return util.advance(self._pagination(url), self.offset) + return util.advance(self._pagination_next(url), self.offset) + + +class AryionTagExtractor(AryionExtractor): + """Extractor for tag searches on eka's portal""" + subcategory = "tag" + directory_fmt = ("{category}", "tags", "{search_tags}") + archive_fmt = "t_{search_tags}_{id}" + pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)" + test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", { + "count": ">= 5", + }) + + def metadata(self): + self.params = text.parse_query(self.user) + self.user = None + return {"search_tags": self.params.get("tag")} + + def posts(self): + url = self.root + "/g4/tags.php" + return self._pagination_params(url, self.params) class AryionPostExtractor(AryionExtractor): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index d9f69ab..4f42477 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -42,6 +42,7 @@ class Extractor(): def __init__(self, match): self.log = logging.getLogger(self.category) self.url = match.string + self.finalize = None if self.basecategory: self.config = self._config_shared @@ -53,13 +54,13 @@ class Extractor(): self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) - self.request_interval = self.config( - "sleep-request", self.request_interval) + self._interval = util.build_duration_func( + self.config("sleep-request", self.request_interval), + self.request_interval_min, + ) if self._retries < 0: self._retries = float("inf") - if self.request_interval < self.request_interval_min: - self.request_interval = self.request_interval_min self._init_session() self._init_cookies() @@ -102,15 +103,19 @@ class Extractor(): def request(self, url, *, method="GET", session=None, retries=None, encoding=None, fatal=True, notfound=None, **kwargs): - tries = 1 - retries = self._retries if retries is None else retries - session = self.session if session is None else session - kwargs.setdefault("timeout", self._timeout) - kwargs.setdefault("verify", self._verify) + if retries is None: + retries = self._retries + if session is None: + session = self.session + if "timeout" not in kwargs: + kwargs["timeout"] = self._timeout + if "verify" not in kwargs: + kwargs["verify"] = self._verify response = None + tries = 1 - if self.request_interval: - seconds = (self.request_interval - + if self._interval: + seconds = (self._interval() - (time.time() - Extractor.request_timestamp)) if seconds > 0.0: self.log.debug("Sleeping for %.5s seconds", seconds) @@ -442,16 +447,23 @@ class GalleryExtractor(Extractor): imgs = self.images(page) if "count" in data: - images = zip( - range(1, data["count"]+1), - imgs, - ) + if self.config("page-reverse"): + images = util.enumerate_reversed(imgs, 1, data["count"]) + else: + images = zip( + range(1, data["count"]+1), + imgs, + ) else: + enum = enumerate try: data["count"] = len(imgs) except TypeError: pass - images = enumerate(imgs, 1) + else: + if self.config("page-reverse"): + enum = util.enumerate_reversed + images = enum(imgs, 1) yield Message.Directory, data for data[self.enum], (url, imgdata) in images: diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index e354cb7..2004921 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -15,7 +15,7 @@ class CyberdropAlbumExtractor(Extractor): category = "cyberdrop" subcategory = "album" root = "https://cyberdrop.me" - directory_fmt = ("{category}", "{album_id} {album_name}") + directory_fmt = ("{category}", "{album_name} ({album_id})") archive_fmt = "{album_id}_{id}" pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" test = ("https://cyberdrop.me/a/keKRjm4t", { diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py new file mode 100644 index 0000000..363341a --- /dev/null +++ b/gallery_dl/extractor/desktopography.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://desktopography.net/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?desktopography\.net" + + +class DesktopographyExtractor(Extractor): + """Base class for desktopography extractors""" + category = "desktopography" + archive_fmt = "{filename}" + root = "https://desktopography.net" + + +class DesktopographySiteExtractor(DesktopographyExtractor): + """Extractor for all desktopography exhibitions """ + subcategory = "site" + pattern = BASE_PATTERN + r"/$" + test = ("https://desktopography.net/",) + + def items(self): + page = self.request(self.root).text + data = {"_extractor": DesktopographyExhibitionExtractor} + + for exhibition_year in text.extract_iter( + page, + '<a href="https://desktopography.net/exhibition-', + '/">'): + + url = self.root + "/exhibition-" + exhibition_year + "/" + yield Message.Queue, url, data + + +class DesktopographyExhibitionExtractor(DesktopographyExtractor): + """Extractor for a yearly desktopography exhibition""" + subcategory = "exhibition" + pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/" + test = ("https://desktopography.net/exhibition-2020/",) + + def __init__(self, match): + DesktopographyExtractor.__init__(self, match) + self.year = match.group(1) + + def items(self): + url = "{}/exhibition-{}/".format(self.root, self.year) + base_entry_url = "https://desktopography.net/portfolios/" + page = self.request(url).text + + data = { + "_extractor": DesktopographyEntryExtractor, + "year": self.year, + } + + for entry_url in text.extract_iter( + page, + '<a class="overlay-background" href="' + base_entry_url, + '">'): + + url = base_entry_url + entry_url + yield Message.Queue, url, data + + +class DesktopographyEntryExtractor(DesktopographyExtractor): + """Extractor for all resolutions of a desktopography wallpaper""" + subcategory = "entry" + pattern = BASE_PATTERN + r"/portfolios/([\w-]+)" + test = ("https://desktopography.net/portfolios/new-era/",) + + def __init__(self, match): + DesktopographyExtractor.__init__(self, match) + self.entry = match.group(1) + + def items(self): + url = "{}/portfolios/{}".format(self.root, self.entry) + page = self.request(url).text + + entry_data = {"entry": self.entry} + yield Message.Directory, entry_data + + for image_data in text.extract_iter( + page, + '<a target="_blank" href="https://desktopography.net', + '">'): + + path, _, filename = image_data.partition( + '" class="wallpaper-button" download="') + text.nameext_from_url(filename, entry_data) + yield Message.Url, self.root + path, entry_data diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index b4ac742..7dac770 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -46,6 +46,13 @@ class DeviantartExtractor(Extractor): self.group = False self.api = None + unwatch = self.config("auto-unwatch") + if unwatch: + self.unwatch = [] + self.finalize = self._unwatch_premium + else: + self.unwatch = None + if self.quality: self.quality = ",q_{}".format(self.quality) @@ -318,44 +325,48 @@ class DeviantartExtractor(Extractor): except KeyError: pass - # check accessibility - if self.api.refresh_token_key: - dev = self.api.deviation(deviation["deviationid"], False) - has_access = dev["premium_folder_data"]["has_access"] - username = dev["author"]["username"] - folder = dev["premium_folder_data"] - - if not has_access and folder["type"] == "watchers" and \ - self.config("auto-watch"): - if self.api.user_friends_watch(username): - has_access = True - self.log.info( - "Watching %s for premium folder access", username) - else: - self.log.warning( - "Error when trying to watch %s. " - "Try again with a new refresh-token", username) - else: + if not self.api.refresh_token_key: self.log.warning( "Unable to access premium content (no refresh-token)") self._fetch_premium = lambda _: None return None + dev = self.api.deviation(deviation["deviationid"], False) + folder = dev["premium_folder_data"] + username = dev["author"]["username"] + has_access = folder["has_access"] + + if not has_access and folder["type"] == "watchers" and \ + self.config("auto-watch"): + if self.unwatch is not None: + self.unwatch.append(username) + if self.api.user_friends_watch(username): + has_access = True + self.log.info( + "Watching %s for premium folder access", username) + else: + self.log.warning( + "Error when trying to watch %s. " + "Try again with a new refresh-token", username) + if has_access: self.log.info("Fetching premium folder data") else: self.log.warning("Unable to access premium content (type: %s)", folder["type"]) - self._fetch_premium = lambda _: None - return None - # fill cache cache = self._premium_cache for dev in self.api.gallery( username, folder["gallery_id"], public=False): - cache[dev["deviationid"]] = dev + cache[dev["deviationid"]] = dev if has_access else None + return cache[deviation["deviationid"]] + def _unwatch_premium(self): + for username in self.unwatch: + self.log.info("Unwatching %s", username) + self.api.user_friends_unwatch(username) + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -823,7 +834,7 @@ class DeviantartWatchPostsExtractor(DeviantartExtractor): class DeviantartDeviationExtractor(DeviantartExtractor): """Extractor for single deviations""" subcategory = "deviation" - archive_fmt = "{index}.{extension}" + archive_fmt = "g_{_username}_{index}.{extension}" pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)" test = ( (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), { @@ -1153,13 +1164,15 @@ class DeviantartOAuthAPI(): "mature_content" : self.mature, } return self._call( - endpoint, method="POST", data=data, public=False, fatal=False) + endpoint, method="POST", data=data, public=False, fatal=False, + ).get("success") def user_friends_unwatch(self, username): """Unwatch a user""" endpoint = "user/friends/unwatch/" + username return self._call( - endpoint, method="POST", public=False, fatal=False) + endpoint, method="POST", public=False, fatal=False, + ).get("success") def authenticate(self, refresh_token_key): """Authenticate the application by requesting an access token""" diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py index d4fd826..992db97 100644 --- a/gallery_dl/extractor/erome.py +++ b/gallery_dl/extractor/erome.py @@ -46,9 +46,10 @@ class EromeExtractor(Extractor): user, pos = text.extract( page, 'href="https://www.erome.com/', '"', pos) data = { - "album_id": album_id, - "title" : text.unescape(title), - "user" : text.unquote(user), + "album_id" : album_id, + "title" : text.unescape(title), + "user" : text.unquote(user), + "_http_headers": {"Referer": url}, } yield Message.Directory, data diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py index 9df2bef..62f7429 100644 --- a/gallery_dl/extractor/fantia.py +++ b/gallery_dl/extractor/fantia.py @@ -66,6 +66,8 @@ class FantiaExtractor(Extractor): "comment": resp["comment"], "rating": resp["rating"], "posted_at": resp["posted_at"], + "date": text.parse_datetime( + resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"), "fanclub_id": resp["fanclub"]["id"], "fanclub_user_id": resp["fanclub"]["user"]["id"], "fanclub_user_name": resp["fanclub"]["user"]["name"], diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index a1470dc..c09eb96 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -88,7 +88,9 @@ class FoolslideChapterExtractor(FoolslideExtractor): data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"]) yield Message.Directory, data - for data["page"], image in enumerate(imgs, 1): + enum = util.enumerate_reversed if self.config( + "page-reverse") else enumerate + for data["page"], image in enum(imgs, 1): try: url = image["url"] del image["url"] diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 1b877b3..e09e190 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -176,6 +176,58 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor): yield post.attrib +class GelbooruV02FavoriteExtractor(GelbooruV02Extractor): + subcategory = "favorite" + directory_fmt = ("{category}", "favorites", "{favorite_id}") + archive_fmt = "f_{favorite_id}_{id}" + per_page = 50 + pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)" + test = ( + ("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", { + "count": 3, + }), + ("https://safebooru.org/index.php?page=favorites&s=view&id=17567", { + "count": 2, + }), + ("https://realbooru.com/index.php?page=favorites&s=view&id=274", { + "count": 4, + }), + ("https://tbib.org/index.php?page=favorites&s=view&id=7881", { + "count": 3, + }), + ) + + def __init__(self, match): + GelbooruV02Extractor.__init__(self, match) + self.favorite_id = match.group(match.lastindex) + + def metadata(self): + return {"favorite_id": text.parse_int(self.favorite_id)} + + def posts(self): + url = self.root + "/index.php" + params = { + "page": "favorites", + "s" : "view", + "id" : self.favorite_id, + "pid" : self.page_start * self.per_page, + } + + data = {} + while True: + num_ids = 0 + page = self.request(url, params=params).text + + for data["id"] in text.extract_iter(page, '" id="p', '"'): + num_ids += 1 + for post in self._api_request(data): + yield post.attrib + + if num_ids < self.per_page: + return + params["pid"] += self.per_page + + class GelbooruV02PostExtractor(GelbooruV02Extractor): subcategory = "post" archive_fmt = "{id}" diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 6d31f7d..2757852 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -10,7 +10,6 @@ from .common import Extractor, Message from .. import text, exception -from ..cache import cache class GfycatExtractor(Extractor): @@ -155,7 +154,6 @@ class GfycatImageExtractor(GfycatExtractor): class GfycatAPI(): API_ROOT = "https://api.gfycat.com" - ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa" def __init__(self, extractor): self.extractor = extractor @@ -175,23 +173,8 @@ class GfycatAPI(): params = {"search_text": query, "count": 150} return self._pagination(endpoint, params) - @cache(keyarg=1, maxage=3600) - def _authenticate_impl(self, category): - if category == "redgifs": - url = "https://api.redgifs.com/v1/oauth/webtoken" - else: - url = "https://weblogin." + category + ".com/oauth/webtoken" - data = {"access_key": self.ACCESS_KEY} - headers = {"Referer": self.extractor.root + "/", - "Origin" : self.extractor.root} - response = self.extractor.request( - url, method="POST", headers=headers, json=data) - return "Bearer " + response.json()["access_token"] - def _call(self, endpoint, params=None): url = self.API_ROOT + endpoint - self.headers["Authorization"] = self._authenticate_impl( - self.extractor.category) return self.extractor.request( url, params=params, headers=self.headers).json() diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index a40d631..201ffdd 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -141,13 +141,17 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): shortlink = text.extract(page, "rel='shortlink' href='", "'")[0] data = { - "action": "manga_get_chapters", - "manga" : shortlink.rpartition("=")[2], + "action" : "manga_get_reading_nav", + "manga" : shortlink.rpartition("=")[2], + "chapter" : "", + "volume_id": "", + "style" : "list", + "type" : "manga", } url = self.root + "/wp-admin/admin-ajax.php" page = self.request(url, method="POST", data=data).text - for url in text.extract_iter(page, 'href="', '"', 320): + for url in text.extract_iter(page, 'data-redirect="', '"'): chapter = url.rpartition("/")[2] results.append((url, self.chapter_data(chapter))) diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 13996d0..d699f07 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -279,3 +279,23 @@ class ImgclickImageExtractor(ImagehostImageExtractor): url , pos = text.extract(page, '<br><img src="', '"') filename, pos = text.extract(page, 'alt="', '"', pos) return url, filename + + +class FappicImageExtractor(ImagehostImageExtractor): + """Extractor for single images from fappic.com""" + category = "fappic" + pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)" + test = ("https://www.fappic.com/98wxqcklyh8k/test.png", { + "pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png", + "keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898", + "content": "0c8768055e4e20e7c7259608b67799171b691140", + }) + + def get_info(self, page): + url , pos = text.extract(page, '<a href="/?click"><img src="', '"') + filename, pos = text.extract(page, 'alt="', '"', pos) + + if filename.startswith("Porn-Picture-"): + filename = filename[13:] + + return url, filename diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 3590e17..983ae37 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -29,7 +29,7 @@ class InstagramExtractor(Extractor): root = "https://www.instagram.com" cookiedomain = ".instagram.com" cookienames = ("sessionid",) - request_interval = 8.0 + request_interval = (6.0, 12.0) def __init__(self, match): Extractor.__init__(self, match) @@ -679,7 +679,6 @@ class InstagramStoriesExtractor(InstagramExtractor): ("https://www.instagram.com/stories/instagram/"), ("https://www.instagram.com/stories/highlights/18042509488170095/"), ) - request_interval = 1.0 def __init__(self, match): self.highlight_id, self.user = match.groups() diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index a911d35..c5f5ae7 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -9,7 +9,8 @@ """Extractors for https://kemono.party/""" from .common import Extractor, Message -from .. import text +from .. import text, exception +from ..cache import cache import itertools import re @@ -70,11 +71,32 @@ class KemonopartyExtractor(Extractor): post["type"] = file["type"] url = file["path"] if url[0] == "/": - url = self.root + url + url = self.root + "/data" + url + elif url.startswith("https://kemono.party"): + url = self.root + "/data" + url[20:] text.nameext_from_url(file["name"], post) yield Message.Url, url, post + def login(self): + username, password = self._get_auth_info() + if username: + self._update_cookies(self._login_impl(username, password)) + + @cache(maxage=28*24*3600, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/account/login" + data = {"username": username, "password": password} + + response = self.request(url, method="POST", data=data) + if response.url.endswith("/account/login") and \ + "Username or password is incorrect" in response.text: + raise exception.AuthenticationError() + + return {c.name: c.value for c in response.history[0].cookies} + class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" @@ -119,7 +141,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor): pattern = BASE_PATTERN + r"/post/([^/?#]+)" test = ( ("https://kemono.party/fanbox/user/6993449/post/506575", { - "pattern": r"https://kemono\.party/files/fanbox" + "pattern": r"https://kemono\.party/data/files/fanbox" r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", "keyword": { "added": "Wed, 06 May 2020 20:28:02 GMT", @@ -142,12 +164,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor): }), # inline image (#1286) ("https://kemono.party/fanbox/user/7356311/post/802343", { - "pattern": r"https://kemono\.party/inline/fanbox" + "pattern": r"https://kemono\.party/data/inline/fanbox" r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg", }), # kemono.party -> data.kemono.party ("https://kemono.party/gumroad/user/trylsc/post/IURjT", { - "pattern": r"https://kemono\.party/(file|attachment)s" + "pattern": r"https://kemono\.party/data/(file|attachment)s" r"/gumroad/trylsc/IURjT/", }), # username (#1548, #1652) @@ -173,3 +195,25 @@ class KemonopartyPostExtractor(KemonopartyExtractor): def posts(self): posts = self.request(self.api_url).json() return (posts[0],) if len(posts) > 1 else posts + + +class KemonopartyFavoriteExtractor(KemonopartyExtractor): + """Extractor for kemono.party favorites""" + subcategory = "favorite" + pattern = r"(?:https?://)?kemono\.party/favorites" + test = ("https://kemono.party/favorites", { + "pattern": KemonopartyUserExtractor.pattern, + "url": "f4b5b796979bcba824af84206578c79101c7f0e1", + "count": 3, + }) + + def items(self): + self._prepare_ddosguard_cookies() + self.login() + + users = self.request(self.root + "/api/favorites").json() + for user in users: + user["_extractor"] = KemonopartyUserExtractor + url = "{}/{}/user/{}".format( + self.root, user["service"], user["id"]) + yield Message.Queue, url, user diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 53ae76a..634a92d 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -37,7 +37,7 @@ class MangadexExtractor(Extractor): def items(self): for chapter in self.chapters(): - uuid = chapter["data"]["id"] + uuid = chapter["id"] data = self._transform(chapter) data["_extractor"] = MangadexChapterExtractor self._cache[uuid] = (chapter, data) @@ -51,8 +51,8 @@ class MangadexExtractor(Extractor): for item in manga["relationships"]: relationships[item["type"]].append(item["id"]) - cattributes = chapter["data"]["attributes"] - mattributes = manga["data"]["attributes"] + cattributes = chapter["attributes"] + mattributes = manga["attributes"] lang = cattributes["translatedLanguage"].partition("-")[0] if cattributes["chapter"]: @@ -63,12 +63,12 @@ class MangadexExtractor(Extractor): data = { "manga" : (mattributes["title"].get("en") or next(iter(mattributes["title"].values()))), - "manga_id": manga["data"]["id"], + "manga_id": manga["id"], "title" : cattributes["title"], "volume" : text.parse_int(cattributes["volume"]), "chapter" : text.parse_int(chnum), "chapter_minor": sep + minor, - "chapter_id": chapter["data"]["id"], + "chapter_id": chapter["id"], "date" : text.parse_datetime(cattributes["publishAt"]), "lang" : lang, "language": util.code_to_language(lang), @@ -77,13 +77,13 @@ class MangadexExtractor(Extractor): if self.config("metadata"): data["artist"] = [ - self.api.author(uuid)["data"]["attributes"]["name"] + self.api.author(uuid)["attributes"]["name"] for uuid in relationships["artist"]] data["author"] = [ - self.api.author(uuid)["data"]["attributes"]["name"] + self.api.author(uuid)["attributes"]["name"] for uuid in relationships["author"]] data["group"] = [ - self.api.group(uuid)["data"]["attributes"]["name"] + self.api.group(uuid)["attributes"]["name"] for uuid in relationships["scanlation_group"]] return data @@ -118,11 +118,14 @@ class MangadexChapterExtractor(MangadexExtractor): data = self._transform(chapter) yield Message.Directory, data - cattributes = chapter["data"]["attributes"] + cattributes = chapter["attributes"] data["_http_headers"] = self._headers base = "{}/data/{}/".format( self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"]) - for data["page"], page in enumerate(cattributes["data"], 1): + + enum = util.enumerate_reversed if self.config( + "page-reverse") else enumerate + for data["page"], page in enum(cattributes["data"], 1): text.nameext_from_url(page, data) yield Message.Url, base + page, data @@ -153,6 +156,9 @@ class MangadexMangaExtractor(MangadexExtractor): ("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", { "count": 1, }), + ("https://mangadex.org/title/584ef094-b2ab-40ce-962c-bce341fb9d10", { + "count": ">= 20", + }) ) def chapters(self): @@ -189,18 +195,18 @@ class MangadexAPI(): @memcache(keyarg=1) def author(self, uuid): - return self._call("/author/" + uuid) + return self._call("/author/" + uuid)["data"] def chapter(self, uuid): - return self._call("/chapter/" + uuid) + return self._call("/chapter/" + uuid)["data"] @memcache(keyarg=1) def group(self, uuid): - return self._call("/group/" + uuid) + return self._call("/group/" + uuid)["data"] @memcache(keyarg=1) def manga(self, uuid): - return self._call("/manga/" + uuid) + return self._call("/manga/" + uuid)["data"] def manga_feed(self, uuid): config = self.extractor.config @@ -209,6 +215,8 @@ class MangadexAPI(): "order[volume]" : order, "order[chapter]" : order, "translatedLanguage[]": config("lang"), + "contentRating[]" : [ + "safe", "suggestive", "erotica", "pornographic"], } return self._pagination("/manga/" + uuid + "/feed", params) @@ -271,7 +279,7 @@ class MangadexAPI(): while True: data = self._call(endpoint, params) - yield from data["results"] + yield from data["data"] params["offset"] = data["offset"] + data["limit"] if params["offset"] >= data["total"]: diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index ff0bfc3..cd7cabb 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -87,7 +87,7 @@ BASE_PATTERN = MastodonExtractor.update(INSTANCES) class MastodonUserExtractor(MastodonExtractor): """Extractor for all images of an account/user""" subcategory = "user" - pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$" + pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$" test = ( ("https://mastodon.social/@jk", { "pattern": r"https://files.mastodon.social/media_attachments" @@ -100,26 +100,44 @@ class MastodonUserExtractor(MastodonExtractor): "count": 60, }), ("https://baraag.net/@pumpkinnsfw"), + ("https://mastodon.social/@id:10843"), + ("https://mastodon.social/users/id:10843"), + ("https://mastodon.social/users/jk"), ) def statuses(self): api = MastodonAPI(self) - username = self.item - handle = "@{}@{}".format(username, self.instance) - for account in api.account_search(handle, 1): - if account["username"] == username: - break - else: - raise exception.NotFoundError("account") - return api.account_statuses( - account["id"], + api.account_id_by_username(self.item), only_media=not self.config("text-posts", False), exclude_replies=not self.replies, ) +class MastodonFollowingExtractor(MastodonExtractor): + """Extractor for followed mastodon users""" + subcategory = "following" + pattern = BASE_PATTERN + r"/users/([^/?#]+)/following" + test = ( + ("https://mastodon.social/users/0x4f/following", { + "extractor": False, + "count": ">= 20", + }), + ("https://mastodon.social/users/id:10843/following"), + ("https://pawoo.net/users/yoru_nine/following"), + ("https://baraag.net/users/pumpkinnsfw/following"), + ) + + def items(self): + api = MastodonAPI(self) + account_id = api.account_id_by_username(self.item) + + for account in api.account_following(account_id): + account["_extractor"] = MastodonUserExtractor + yield Message.Queue, account["url"], account + + class MastodonStatusExtractor(MastodonExtractor): """Extractor for images from a status""" subcategory = "status" @@ -165,6 +183,20 @@ class MastodonAPI(): self.headers = {"Authorization": "Bearer " + access_token} + def account_id_by_username(self, username): + if username.startswith("id:"): + return username[3:] + + handle = "@{}@{}".format(username, self.extractor.instance) + for account in self.account_search(handle, 1): + if account["username"] == username: + return account["id"] + raise exception.NotFoundError("account") + + def account_following(self, account_id): + endpoint = "/v1/accounts/{}/following".format(account_id) + return self._pagination(endpoint, None) + def account_search(self, query, limit=40): """Search for accounts""" endpoint = "/v1/accounts/search" diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index 44411c8..4dc880f 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -168,7 +168,7 @@ class NozomiTagExtractor(NozomiExtractor): def __init__(self, match): NozomiExtractor.__init__(self, match) tags, self.pnum = match.groups() - self.tags = text.unquote(tags).lower() + self.tags = text.unquote(tags) self.nozomi = "/nozomi/{}.nozomi".format(self.tags) def metadata(self): @@ -187,7 +187,7 @@ class NozomiSearchExtractor(NozomiExtractor): def __init__(self, match): NozomiExtractor.__init__(self, match) - self.tags = text.unquote(match.group(1)).lower().split() + self.tags = text.unquote(match.group(1)).split() def metadata(self): return {"search_tags": self.tags} diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 4dc1e43..6812f35 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -415,7 +415,7 @@ class OAuthPixiv(OAuthBase): print(""" 1) Open your browser's Developer Tools (F12) and switch to the Network tab 2) Login -4) Select the last network monitor entry ('callback?state=...') +3) Select the last network monitor entry ('callback?state=...') 4) Copy its 'code' query parameter, paste it below, and press Enter """) code = input("code: ") diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 8953edd..43c7e50 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -139,7 +139,7 @@ class RedditSubredditExtractor(RedditExtractor): """Extractor for URLs from subreddits on reddit.com""" subcategory = "subreddit" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/" - r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)") + r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)") test = ( ("https://www.reddit.com/r/lavaporn/", { "range": "1-20", @@ -152,9 +152,11 @@ class RedditSubredditExtractor(RedditExtractor): ) def __init__(self, match): + self.subreddit, sub, params = match.groups() + self.params = text.parse_query(params) + if sub: + self.subcategory += "-" + sub RedditExtractor.__init__(self, match) - self.subreddit = match.group(1) - self.params = text.parse_query(match.group(2)) def submissions(self): return self.api.submissions_subreddit(self.subreddit, self.params) @@ -164,7 +166,7 @@ class RedditUserExtractor(RedditExtractor): """Extractor for URLs from posts by a reddit user""" subcategory = "user" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/" - r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?") + r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?") test = ( ("https://www.reddit.com/user/username/", { "count": ">= 2", @@ -175,9 +177,11 @@ class RedditUserExtractor(RedditExtractor): ) def __init__(self, match): + self.user, sub, params = match.groups() + self.params = text.parse_query(params) + if sub: + self.subcategory += "-" + sub RedditExtractor.__init__(self, match) - self.user = match.group(1) - self.params = text.parse_query(match.group(2)) def submissions(self): return self.api.submissions_user(self.user, self.params) diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 576564c..e078bef 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -72,5 +72,3 @@ class RedgifsImageExtractor(RedgifsExtractor): class RedgifsAPI(GfycatAPI): API_ROOT = "https://api.redgifs.com" - ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe" - "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9") diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 2dfcb55..4a3f6cd 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -50,7 +50,7 @@ class TwitterExtractor(Extractor): if not self.retweets and "retweeted_status_id_str" in tweet: self.log.debug("Skipping %s (retweet)", tweet["id_str"]) continue - if not self.quoted and "quoted" in tweet: + if not self.quoted and "quoted_by_id_str" in tweet: self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"]) continue if "in_reply_to_user_id_str" in tweet and ( @@ -139,8 +139,10 @@ class TwitterExtractor(Extractor): for size in ("original", "x_large", "large", "small"): key = prefix + size if key in bvals: - files.append(bvals[key]["image_value"]) - return + value = bvals[key].get("image_value") + if value and "url" in value: + files.append(value) + return elif self.videos: url = "ytdl:{}/i/web/status/{}".format(self.root, tweet["id_str"]) files.append({"url": url}) @@ -199,6 +201,8 @@ class TwitterExtractor(Extractor): if "in_reply_to_screen_name" in tweet: tdata["reply_to"] = tweet["in_reply_to_screen_name"] + if "quoted_by_id_str" in tweet: + tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"]) if "author" in tweet: tdata["author"] = self._transform_user(tweet["author"]) @@ -316,7 +320,7 @@ class TwitterExtractor(Extractor): class TwitterTimelineExtractor(TwitterExtractor): - """Extractor for all images from a user's timeline""" + """Extractor for Tweets from a user's timeline""" subcategory = "timeline" pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])" r"|i(?:/user/|ntent/user\?user_id=)(\d+))") @@ -341,8 +345,25 @@ class TwitterTimelineExtractor(TwitterExtractor): return TwitterAPI(self).timeline_profile(self.user) +class TwitterRepliesExtractor(TwitterExtractor): + """Extractor for Tweets from a user's timeline including replies""" + subcategory = "replies" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)" + test = ( + ("https://twitter.com/supernaturepics/with_replies", { + "range": "1-40", + "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40", + }), + ("https://mobile.twitter.com/supernaturepics/with_replies#t"), + ("https://www.twitter.com/id:2976459548/with_replies"), + ) + + def tweets(self): + return TwitterAPI(self).timeline_profile(self.user, replies=True) + + class TwitterMediaExtractor(TwitterExtractor): - """Extractor for all images from a user's Media Tweets""" + """Extractor for Tweets from a user's Media timeline""" subcategory = "media" pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)" test = ( @@ -652,11 +673,11 @@ class TwitterAPI(): endpoint = "/2/timeline/conversation/{}.json".format(conversation_id) return self._pagination(endpoint) - def timeline_profile(self, screen_name): + def timeline_profile(self, screen_name, replies=False): user_id = self._user_id_by_screen_name(screen_name) endpoint = "/2/timeline/profile/{}.json".format(user_id) params = self.params.copy() - params["include_tweet_replies"] = "false" + params["include_tweet_replies"] = "true" if replies else "false" return self._pagination(endpoint, params) def timeline_media(self, screen_name): @@ -886,7 +907,7 @@ class TwitterAPI(): quoted = quoted.copy() quoted["author"] = users[quoted["user_id_str"]] quoted["user"] = tweet["user"] - quoted["quoted"] = True + quoted["quoted_by_id_str"] = tweet["id_str"] yield quoted # update cursor value |
