diff options
Diffstat (limited to 'gallery_dl/extractor/hentaifoundry.py')
| -rw-r--r-- | gallery_dl/extractor/hentaifoundry.py | 37 |
1 files changed, 22 insertions, 15 deletions
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index a08f7bb..882183b 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -43,7 +43,7 @@ class HentaifoundryExtractor(Extractor): for post_url in util.advance(self.posts(), self.start_post): image = self._parse_post(post_url) image.update(data) - yield Message.Directory, image + yield Message.Directory, "", image yield Message.Url, image["src"], image def skip(self, num): @@ -86,7 +86,8 @@ class HentaifoundryExtractor(Extractor): .replace("\r\n", "\n")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "</div>"), "title='", "'")], - "date" : text.parse_datetime(extr("datetime='", "'")), + "categories" : self._extract_categories(extr), + "date" : self.parse_datetime_iso(extr("datetime='", "'")), "views" : text.parse_int(extr(">Views</span>", "<")), "score" : text.parse_int(extr(">Vote Score</span>", "<")), "media" : text.unescape(extr(">Media</span>", "<").strip()), @@ -126,7 +127,7 @@ class HentaifoundryExtractor(Extractor): "title" : text.unescape(extr( "<div class='titlebar'>", "</a>").rpartition(">")[2]), "author" : text.unescape(extr('alt="', '"')), - "date" : text.parse_datetime(extr( + "date" : self.parse_datetime(extr( ">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"), "status" : extr("class='indent'>", "<"), } @@ -141,11 +142,17 @@ class HentaifoundryExtractor(Extractor): path = extr('class="pdfLink" href="', '"') data["src"] = self.root + path data["index"] = text.parse_int(path.rsplit("/", 2)[1]) + data["categories"] = self._extract_categories(extr) data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "</div>"), "title='", "'")] return text.nameext_from_url(data["src"], data) + def _extract_categories(self, extr): + return [text.unescape(text.extr(c, ">", "<")) + for c in extr('class="categoryBreadcrumbs">', "</span>") + .split("»")] + def _request_check(self, url, **kwargs): self.request = self._request_original @@ -207,7 +214,7 @@ class HentaifoundryExtractor(Extractor): class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor): """Extractor for a hentaifoundry user profile""" - pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile" + pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/profile" example = "https://www.hentai-foundry.com/user/USER/profile" def items(self): @@ -228,7 +235,7 @@ class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor): class HentaifoundryPicturesExtractor(HentaifoundryExtractor): """Extractor for all pictures of a hentaifoundry user""" subcategory = "pictures" - pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$" + pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$" example = "https://www.hentai-foundry.com/pictures/user/USER" def __init__(self, match): @@ -240,7 +247,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor): """Extractor for scraps of a hentaifoundry user""" subcategory = "scraps" directory_fmt = ("{category}", "{user}", "Scraps") - pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps" + pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)/scraps" example = "https://www.hentai-foundry.com/pictures/user/USER/scraps" def __init__(self, match): @@ -253,7 +260,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user}", "Favorites") archive_fmt = "f_{user}_{index}" - pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures" + pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/faves/pictures" example = "https://www.hentai-foundry.com/user/USER/faves/pictures" def __init__(self, match): @@ -266,7 +273,7 @@ class HentaifoundryTagExtractor(HentaifoundryExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{index}" - pattern = BASE_PATTERN + r"/pictures/tagged/([^/?#]+)" + pattern = rf"{BASE_PATTERN}/pictures/tagged/([^/?#]+)" example = "https://www.hentai-foundry.com/pictures/tagged/TAG" def __init__(self, match): @@ -282,7 +289,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor): subcategory = "recent" directory_fmt = ("{category}", "Recent Pictures", "{date}") archive_fmt = "r_{index}" - pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)" + pattern = rf"{BASE_PATTERN}/pictures/recent/(\d\d\d\d-\d\d-\d\d)" example = "https://www.hentai-foundry.com/pictures/recent/1970-01-01" def __init__(self, match): @@ -298,7 +305,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor): subcategory = "popular" directory_fmt = ("{category}", "Popular Pictures") archive_fmt = "p_{index}" - pattern = BASE_PATTERN + r"/pictures/popular()" + pattern = rf"{BASE_PATTERN}/pictures/popular()" example = "https://www.hentai-foundry.com/pictures/popular" def __init__(self, match): @@ -324,7 +331,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor): f"/{self.index}/?enterAgree=1") image = self._parse_post(post_url) image["user"] = self.user - yield Message.Directory, image + yield Message.Directory, "", image yield Message.Url, image["src"], image @@ -332,14 +339,14 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor): """Extractor for stories of a hentaifoundry user""" subcategory = "stories" archive_fmt = "s_{index}" - pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$" + pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)(?:/page/(\d+))?/?$" example = "https://www.hentai-foundry.com/stories/user/USER" def items(self): self._init_site_filters() for story_html in util.advance(self.stories(), self.start_post): story = self._parse_story(story_html) - yield Message.Directory, story + yield Message.Directory, "", story yield Message.Url, story["src"], story def stories(self): @@ -351,7 +358,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor): """Extractor for a hentaifoundry story""" subcategory = "story" archive_fmt = "s_{index}" - pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)" + pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)/(\d+)" example = "https://www.hentai-foundry.com/stories/user/USER/12345/TITLE" skip = Extractor.skip @@ -364,5 +371,5 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor): story_url = (f"{self.root}/stories/user/{self.user}" f"/{self.index}/x?enterAgree=1") story = self._parse_story(self.request(story_url).text) - yield Message.Directory, story + yield Message.Directory, "", story yield Message.Url, story["src"], story |
