From a26df18796ff4e506b16bf32fcec9336233b9e2e Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 28 Jan 2025 19:12:09 -0500 Subject: New upstream version 1.28.5. --- gallery_dl/extractor/4archive.py | 2 +- gallery_dl/extractor/__init__.py | 4 +- gallery_dl/extractor/adultempire.py | 3 + gallery_dl/extractor/architizer.py | 10 +- gallery_dl/extractor/artstation.py | 12 +- gallery_dl/extractor/batoto.py | 1 + gallery_dl/extractor/bunkr.py | 10 +- gallery_dl/extractor/cohost.py | 250 ----------------------------------- gallery_dl/extractor/danbooru.py | 2 +- gallery_dl/extractor/deviantart.py | 2 +- gallery_dl/extractor/e621.py | 19 ++- gallery_dl/extractor/facebook.py | 35 ++--- gallery_dl/extractor/fanleaks.py | 87 ------------ gallery_dl/extractor/fapachi.py | 3 +- gallery_dl/extractor/hiperdex.py | 12 +- gallery_dl/extractor/imagehosts.py | 28 ++++ gallery_dl/extractor/issuu.py | 32 +++-- gallery_dl/extractor/kemonoparty.py | 51 ++++--- gallery_dl/extractor/khinsider.py | 26 +++- gallery_dl/extractor/komikcast.py | 11 +- gallery_dl/extractor/lofter.py | 8 ++ gallery_dl/extractor/lolisafe.py | 9 +- gallery_dl/extractor/mangafox.py | 6 +- gallery_dl/extractor/mangahere.py | 6 +- gallery_dl/extractor/mangaread.py | 6 +- gallery_dl/extractor/nekohouse.py | 122 +++++++++++++++++ gallery_dl/extractor/pixiv.py | 42 ++++-- gallery_dl/extractor/pornpics.py | 22 ++- gallery_dl/extractor/rule34xyz.py | 8 +- gallery_dl/extractor/saint.py | 1 + gallery_dl/extractor/shimmie2.py | 4 - gallery_dl/extractor/szurubooru.py | 8 +- gallery_dl/extractor/toyhouse.py | 10 +- gallery_dl/extractor/twitter.py | 44 ++++-- gallery_dl/extractor/urlgalleries.py | 13 +- gallery_dl/extractor/vsco.py | 3 +- gallery_dl/extractor/webtoons.py | 4 +- gallery_dl/extractor/weebcentral.py | 6 +- gallery_dl/extractor/xfolio.py | 146 ++++++++++++++++++++ gallery_dl/extractor/xhamster.py | 74 +++++------ 40 files changed, 606 insertions(+), 536 deletions(-) delete mode 100644 gallery_dl/extractor/cohost.py delete mode 100644 gallery_dl/extractor/fanleaks.py create mode 100644 gallery_dl/extractor/nekohouse.py create mode 100644 gallery_dl/extractor/xfolio.py (limited to 'gallery_dl/extractor') diff --git a/gallery_dl/extractor/4archive.py b/gallery_dl/extractor/4archive.py index 948a605..d198369 100644 --- a/gallery_dl/extractor/4archive.py +++ b/gallery_dl/extractor/4archive.py @@ -64,7 +64,7 @@ class _4archiveThreadExtractor(Extractor): data = { "name": extr('class="name">', ""), "date": text.parse_datetime( - extr('class="dateTime postNum">', "<").strip(), + extr('class="dateTime postNum" >', "<").strip(), "%Y-%m-%d %H:%M:%S"), "no" : text.parse_int(extr('href="#p', '"')), } diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index b582c99..fc8d7b2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -39,7 +39,6 @@ modules = [ "chevereto", "cien", "civitai", - "cohost", "comicvine", "cyberdrop", "danbooru", @@ -52,7 +51,6 @@ modules = [ "exhentai", "facebook", "fanbox", - "fanleaks", "fantia", "fapello", "fapachi", @@ -116,6 +114,7 @@ modules = [ "myportfolio", "naver", "naverwebtoon", + "nekohouse", "newgrounds", "nhentai", "nijie", @@ -196,6 +195,7 @@ modules = [ "wikiart", "wikifeet", "wikimedia", + "xfolio", "xhamster", "xvideos", "yiffverse", diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py index 1617414..c891b17 100644 --- a/gallery_dl/extractor/adultempire.py +++ b/gallery_dl/extractor/adultempire.py @@ -24,6 +24,9 @@ class AdultempireGalleryExtractor(GalleryExtractor): GalleryExtractor.__init__(self, match) self.gallery_id = match.group(2) + def _init(self): + self.cookies.set("ageConfirmed", "true", domain="www.adultempire.com") + def metadata(self, page): extr = text.extract_from(page, page.index('
')) return { diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py index 8064e78..0268224 100644 --- a/gallery_dl/extractor/architizer.py +++ b/gallery_dl/extractor/architizer.py @@ -32,10 +32,10 @@ class ArchitizerProjectExtractor(GalleryExtractor): extr('id="Pages"', "") return { - "title" : extr('data-name="', '"'), - "slug" : extr('data-slug="', '"'), - "gid" : extr('data-gid="', '"').rpartition(".")[2], - "firm" : extr('data-firm-leaders-str="', '"'), + "title" : extr("data-name='", "'"), + "slug" : extr("data-slug='", "'"), + "gid" : extr("data-gid='", "'").rpartition(".")[2], + "firm" : extr("data-firm-leaders-str='", "'"), "location" : extr("

", "<").strip(), "type" : text.unescape(text.remove_html(extr( '
Type
', '", " | Bunkr<")) if not file_url: webpage_url = text.unescape(text.rextract( @@ -166,6 +169,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): return { "file" : text.unescape(file_url), + "name" : text.unescape(file_name), "_http_headers" : {"Referer": response.url}, "_http_validate": self._validate, } diff --git a/gallery_dl/extractor/cohost.py b/gallery_dl/extractor/cohost.py deleted file mode 100644 index 6a43224..0000000 --- a/gallery_dl/extractor/cohost.py +++ /dev/null @@ -1,250 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2024 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://cohost.org/""" - -from .common import Extractor, Message -from .. import text, util - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?cohost\.org" - - -class CohostExtractor(Extractor): - """Base class for cohost extractors""" - category = "cohost" - root = "https://cohost.org" - directory_fmt = ("{category}", "{postingProject[handle]}") - filename_fmt = ("{postId}{headline:?_//[b:200]}{num:?_//}.{extension}") - archive_fmt = "{postId}_{num}" - - def _init(self): - self.replies = self.config("replies", True) - self.pinned = self.config("pinned", False) - self.shares = self.config("shares", False) - self.asks = self.config("asks", True) - - self.avatar = self.config("avatar", False) - if self.avatar: - self._urls_avatar = {None, ""} - - self.background = self.config("background", False) - if self.background: - self._urls_background = {None, ""} - - def items(self): - for post in self.posts(): - reason = post.get("limitedVisibilityReason") - if reason and reason != "none": - if reason == "log-in-first": - reason = ("This page's posts are visible only to users " - "who are logged in.") - self.log.warning('%s: "%s"', post["postId"], reason) - - files = self._extract_files(post) - post["count"] = len(files) - post["date"] = text.parse_datetime( - post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") - - yield Message.Directory, post - - project = post["postingProject"] - if self.avatar: - url = project.get("avatarURL") - if url not in self._urls_avatar: - self._urls_avatar.add(url) - p = post.copy() - p["postId"] = p["kind"] = "avatar" - p["headline"] = p["num"] = "" - yield Message.Url, url, text.nameext_from_url(url, p) - - if self.background: - url = project.get("headerURL") - if url not in self._urls_background: - self._urls_background.add(url) - p = post.copy() - p["postId"] = p["kind"] = "background" - p["headline"] = p["num"] = "" - yield Message.Url, url, text.nameext_from_url(url, p) - - for post["num"], file in enumerate(files, 1): - url = file["fileURL"] - post.update(file) - text.nameext_from_url(url, post) - yield Message.Url, url, post - - def posts(self): - return () - - def _request_api(self, endpoint, input): - url = "{}/api/v1/trpc/{}".format(self.root, endpoint) - params = {"batch": "1", "input": util.json_dumps({"0": input})} - headers = {"content-type": "application/json"} - - data = self.request(url, params=params, headers=headers).json() - return data[0]["result"]["data"] - - def _extract_files(self, post): - files = [] - - self._extract_blocks(post, files) - if self.shares and post.get("shareTree"): - for share in post["shareTree"]: - self._extract_blocks(share, files, share) - del post["shareTree"] - - return files - - def _extract_blocks(self, post, files, shared=None): - post["content"] = content = [] - - for block in post.pop("blocks") or (): - try: - type = block["type"] - if type == "attachment": - file = block["attachment"].copy() - file["shared"] = shared - files.append(file) - elif type == "attachment-row": - for att in block["attachments"]: - file = att["attachment"].copy() - file["shared"] = shared - files.append(file) - elif type == "markdown": - content.append(block["markdown"]["content"]) - elif type == "ask": - post["ask"] = block["ask"] - else: - self.log.debug("%s: Unsupported block type '%s'", - post["postId"], type) - except Exception as exc: - self.log.debug("%s: %s", exc.__class__.__name__, exc) - - -class CohostUserExtractor(CohostExtractor): - """Extractor for media from a cohost user""" - subcategory = "user" - pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:$|\?|#)" - example = "https://cohost.org/USER" - - def posts(self): - empty = 0 - params = { - "projectHandle": self.groups[0], - "page": 0, - "options": { - "pinnedPostsAtTop" : True if self.pinned else False, - "hideReplies" : not self.replies, - "hideShares" : not self.shares, - "hideAsks" : not self.asks, - "viewingOnProjectPage": True, - }, - } - - while True: - data = self._request_api("posts.profilePosts", params) - - posts = data["posts"] - if posts: - empty = 0 - yield from posts - else: - empty += 1 - - pagination = data["pagination"] - if not pagination.get("morePagesForward"): - return - if empty >= 3: - return self.log.debug("Empty API results") - params["page"] = pagination["nextPage"] - - -class CohostPostExtractor(CohostExtractor): - """Extractor for media from a single cohost post""" - subcategory = "post" - pattern = BASE_PATTERN + r"/([^/?#]+)/post/(\d+)" - example = "https://cohost.org/USER/post/12345" - - def posts(self): - endpoint = "posts.singlePost" - params = { - "handle": self.groups[0], - "postId": int(self.groups[1]), - } - - data = self._request_api(endpoint, params) - post = data["post"] - - try: - post["comments"] = data["comments"][self.groups[1]] - except LookupError: - post["comments"] = () - - return (post,) - - -class CohostTagExtractor(CohostExtractor): - """Extractor for tagged posts""" - subcategory = "tag" - pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?" - example = "https://cohost.org/USER/tagged/TAG" - - def posts(self): - user, tag, query = self.groups - url = "{}/{}/tagged/{}".format(self.root, user, tag) - params = text.parse_query(query) - post_feed_key = ("tagged-post-feed" if user == "rc" else - "project-tagged-post-feed") - - while True: - page = self.request(url, params=params).text - data = util.json_loads(text.extr( - page, 'id="__COHOST_LOADER_STATE__">', '')) - - try: - feed = data[post_feed_key] - except KeyError: - feed = data.popitem()[1] - - yield from feed["posts"] - - pagination = feed["paginationMode"] - if not pagination.get("morePagesForward"): - return - params["refTimestamp"] = pagination["refTimestamp"] - params["skipPosts"] = \ - pagination["currentSkip"] + pagination["idealPageStride"] - - -class CohostLikesExtractor(CohostExtractor): - """Extractor for liked posts""" - subcategory = "likes" - pattern = BASE_PATTERN + r"/rc/liked-posts" - example = "https://cohost.org/rc/liked-posts" - - def posts(self): - url = "{}/rc/liked-posts".format(self.root) - params = {} - - while True: - page = self.request(url, params=params).text - data = util.json_loads(text.extr( - page, 'id="__COHOST_LOADER_STATE__">', '')) - - try: - feed = data["liked-posts-feed"] - except KeyError: - feed = data.popitem()[1] - - yield from feed["posts"] - - pagination = feed["paginationMode"] - if not pagination.get("morePagesForward"): - return - params["refTimestamp"] = pagination["refTimestamp"] - params["skipPosts"] = \ - pagination["currentSkip"] + pagination["idealPageStride"] diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 37b6747..d0a9397 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -32,7 +32,7 @@ class DanbooruExtractor(BaseExtractor): if isinstance(threshold, int): self.threshold = 1 if threshold < 1 else threshold else: - self.threshold = self.per_page + self.threshold = self.per_page - 20 username, api_key = self._get_auth_info() if username: diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 8172f62..59b2d6d 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -822,7 +822,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ username, folder["gallery_id"], public=False): cache[dev["deviationid"]] = dev if has_access else None - return cache[deviation["deviationid"]] + return cache.get(deviation["deviationid"]) def _unwatch_premium(self): for username in self.unwatch: diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 33e6ba8..eddcb12 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -8,7 +8,7 @@ """Extractors for https://e621.net/ and other e621 instances""" -from .common import Message +from .common import Extractor, Message from . import danbooru from ..cache import memcache from .. import text, util @@ -156,3 +156,20 @@ class E621FavoriteExtractor(E621Extractor): def posts(self): return self._pagination("/favorites.json", self.query) + + +class E621FrontendExtractor(Extractor): + """Extractor for alternative e621 frontends""" + basecategory = "E621" + category = "e621" + subcategory = "frontend" + pattern = r"(?:https?://)?e621\.(?:cc/\?tags|anthro\.fr/\?q)=([^&#]*)" + example = "https://e621.cc/?tags=TAG" + + def initialize(self): + pass + + def items(self): + url = "https://e621.net/posts?tags=" + self.groups[0] + data = {"_extractor": E621TagExtractor} + yield Message.Queue, url, data diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 2f3fdbf..1ec6adc 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -238,8 +238,9 @@ class FacebookExtractor(Extractor): return res - def extract_set(self, first_photo_id, set_id): - all_photo_ids = [first_photo_id] + def extract_set(self, set_data): + set_id = set_data["set_id"] + all_photo_ids = [set_data["first_photo_id"]] retries = 0 i = 0 @@ -252,7 +253,6 @@ class FacebookExtractor(Extractor): photo_page = self.photo_page_request_wrapper(photo_url).text photo = self.parse_photo_page(photo_page) - photo["set_id"] = set_id photo["num"] = i + 1 if self.author_followups: @@ -281,9 +281,11 @@ class FacebookExtractor(Extractor): retries = 0 else: retries = 0 + photo.update(set_data) + yield Message.Directory, photo yield Message.Url, photo["url"], photo - if photo["next_photo_id"] == "": + if not photo["next_photo_id"]: self.log.debug( "Can't find next image in the set. " "Extraction is over." @@ -322,15 +324,11 @@ class FacebookSetExtractor(FacebookExtractor): set_url = self.set_url_fmt.format(set_id=set_id) set_page = self.request(set_url).text + set_data = self.parse_set_page(set_page) + if self.groups[2]: + set_data["first_photo_id"] = self.groups[2] - directory = self.parse_set_page(set_page) - - yield Message.Directory, directory - - yield from self.extract_set( - self.groups[2] or directory["first_photo_id"], - directory["set_id"] - ) + return self.extract_set(set_data) class FacebookPhotoExtractor(FacebookExtractor): @@ -436,13 +434,8 @@ class FacebookProfileExtractor(FacebookExtractor): if set_id: set_url = self.set_url_fmt.format(set_id=set_id) set_page = self.request(set_url).text + set_data = self.parse_set_page(set_page) + return self.extract_set(set_data) - directory = self.parse_set_page(set_page) - - yield Message.Directory, directory - - yield from self.extract_set( - directory["first_photo_id"], directory["set_id"] - ) - else: - self.log.debug("Profile photos set ID not found.") + self.log.debug("Profile photos set ID not found.") + return iter(()) diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py deleted file mode 100644 index 886e893..0000000 --- a/gallery_dl/extractor/fanleaks.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://fanleaks.club/""" - -from .common import Extractor, Message -from .. import text - - -class FanleaksExtractor(Extractor): - """Base class for Fanleaks extractors""" - category = "fanleaks" - directory_fmt = ("{category}", "{model}") - filename_fmt = "{model_id}_{id}.{extension}" - archive_fmt = "{model_id}_{id}" - root = "https://fanleaks.club" - - def __init__(self, match): - Extractor.__init__(self, match) - self.model_id = match.group(1) - - def extract_post(self, url): - extr = text.extract_from(self.request(url, notfound="post").text) - data = { - "model_id": self.model_id, - "model" : text.unescape(extr('text-lg">', "")), - "id" : text.parse_int(self.id), - "type" : extr('type="', '"')[:5] or "photo", - } - url = extr('src="', '"') - yield Message.Directory, data - yield Message.Url, url, text.nameext_from_url(url, data) - - -class FanleaksPostExtractor(FanleaksExtractor): - """Extractor for individual posts on fanleaks.club""" - subcategory = "post" - pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)" - example = "https://fanleaks.club/MODEL/12345" - - def __init__(self, match): - FanleaksExtractor.__init__(self, match) - self.id = match.group(2) - - def items(self): - url = "{}/{}/{}".format(self.root, self.model_id, self.id) - return self.extract_post(url) - - -class FanleaksModelExtractor(FanleaksExtractor): - """Extractor for all posts from a fanleaks model""" - subcategory = "model" - pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club" - r"/(?!latest/?$)([^/?#]+)/?$") - example = "https://fanleaks.club/MODEL" - - def items(self): - page_num = 1 - page = self.request( - self.root + "/" + self.model_id, notfound="model").text - data = { - "model_id": self.model_id, - "model" : text.unescape(text.extr(page, 'mt-4">', "

")), - "type" : "photo", - } - page_url = text.extr(page, "url: '", "'") - while True: - page = self.request("{}{}".format(page_url, page_num)).text - if not page: - return - - for item in text.extract_iter(page, '"): - self.id = id = text.extr(item, "/", '"') - if "/icon-play.svg" in item: - url = "{}/{}/{}".format(self.root, self.model_id, id) - yield from self.extract_post(url) - continue - - data["id"] = text.parse_int(id) - url = text.extr(item, 'src="', '"').replace( - "/thumbs/", "/", 1) - yield Message.Directory, data - yield Message.Url, url, text.nameext_from_url(url, data) - page_num += 1 diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py index 80478ca..43627e2 100644 --- a/gallery_dl/extractor/fapachi.py +++ b/gallery_dl/extractor/fapachi.py @@ -33,7 +33,8 @@ class FapachiPostExtractor(Extractor): } page = self.request("{}/{}/media/{}".format( self.root, self.user, self.id)).text - url = self.root + text.extr(page, 'd-block" src="', '"') + url = self.root + text.extract( + page, 'data-src="', '"', page.index('class="media-img'))[0] yield Message.Directory, data yield Message.Url, url, text.nameext_from_url(url, data) diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index c939a3c..f15aab7 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://hipertoon.com/""" +"""Extractors for https://hiperdex.com/""" from .common import ChapterExtractor, MangaExtractor from .. import text @@ -20,7 +20,7 @@ BASE_PATTERN = (r"((?:https?://)?(?:www\.)?" class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" - root = "https://hipertoon.com" + root = "https://hiperdex.com" @memcache(keyarg=1) def manga_data(self, manga, page=None): @@ -49,7 +49,7 @@ class HiperdexBase(): "status" : extr( 'class="summary-content">', '<').strip(), "description": text.remove_html(text.unescape(extr( - "Summary ", "
"))), + '
', "
"))), "language": "English", "lang" : "en", } @@ -69,7 +69,7 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for hiperdex manga chapters""" pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))" - example = "https://hipertoon.com/manga/MANGA/CHAPTER/" + example = "https://hiperdex.com/manga/MANGA/CHAPTER/" def __init__(self, match): root, path, self.manga, self.chapter = match.groups() @@ -91,7 +91,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): """Extractor for hiperdex manga""" chapterclass = HiperdexChapterExtractor pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$" - example = "https://hipertoon.com/manga/MANGA/" + example = "https://hiperdex.com/manga/MANGA/" def __init__(self, match): root, path, self.manga = match.groups() @@ -127,7 +127,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): chapterclass = HiperdexMangaExtractor reverse = False pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))" - example = "https://hipertoon.com/manga-artist/NAME/" + example = "https://hiperdex.com/manga-artist/NAME/" def __init__(self, match): self.root = text.ensure_http_scheme(match.group(1)) diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 5f1e0f4..d6b36cb 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -286,6 +286,34 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor): return url, url +class TurboimagehostGalleryExtractor(ImagehostImageExtractor): + """Extractor for image galleries from turboimagehost.com""" + category = "turboimagehost" + subcategory = "gallery" + pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com" + r"/album/(\d+)/([^/?#]*))") + example = "https://www.turboimagehost.com/album/12345/GALLERY_NAME" + + def items(self): + data = {"_extractor": TurboimagehostImageExtractor} + params = {"p": 1} + + while True: + page = self.request(self.page_url, params=params).text + + if params["p"] == 1 and \ + "Requested gallery don`t exist on our website." in page: + raise exception.NotFoundError("gallery") + + thumb_url = None + for thumb_url in text.extract_iter(page, '">
1 else base + try: + html = self.request(url).text + data = util.json_loads(text.unescape(text.extr( + html, '").rstrip("\n\r;")) @@ -105,12 +105,8 @@ class XhamsterUserExtractor(XhamsterExtractor): pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])" example = "https://xhamster.com/users/USER/photos" - def __init__(self, match): - XhamsterExtractor.__init__(self, match) - self.user = match.group(2) - def items(self): - url = "{}/users/{}/photos".format(self.root, self.user) + url = "{}/users/{}/photos".format(self.root, self.groups[1]) data = {"_extractor": XhamsterGalleryExtractor} while url: -- cgit v1.2.3