aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/hentaifoundry.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/hentaifoundry.py')
-rw-r--r--gallery_dl/extractor/hentaifoundry.py37
1 files changed, 22 insertions, 15 deletions
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index a08f7bb..882183b 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -43,7 +43,7 @@ class HentaifoundryExtractor(Extractor):
for post_url in util.advance(self.posts(), self.start_post):
image = self._parse_post(post_url)
image.update(data)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, image["src"], image
def skip(self, num):
@@ -86,7 +86,8 @@ class HentaifoundryExtractor(Extractor):
.replace("\r\n", "\n")),
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")],
- "date" : text.parse_datetime(extr("datetime='", "'")),
+ "categories" : self._extract_categories(extr),
+ "date" : self.parse_datetime_iso(extr("datetime='", "'")),
"views" : text.parse_int(extr(">Views</span>", "<")),
"score" : text.parse_int(extr(">Vote Score</span>", "<")),
"media" : text.unescape(extr(">Media</span>", "<").strip()),
@@ -126,7 +127,7 @@ class HentaifoundryExtractor(Extractor):
"title" : text.unescape(extr(
"<div class='titlebar'>", "</a>").rpartition(">")[2]),
"author" : text.unescape(extr('alt="', '"')),
- "date" : text.parse_datetime(extr(
+ "date" : self.parse_datetime(extr(
">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
"status" : extr("class='indent'>", "<"),
}
@@ -141,11 +142,17 @@ class HentaifoundryExtractor(Extractor):
path = extr('class="pdfLink" href="', '"')
data["src"] = self.root + path
data["index"] = text.parse_int(path.rsplit("/", 2)[1])
+ data["categories"] = self._extract_categories(extr)
data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")]
return text.nameext_from_url(data["src"], data)
+ def _extract_categories(self, extr):
+ return [text.unescape(text.extr(c, ">", "<"))
+ for c in extr('class="categoryBreadcrumbs">', "</span>")
+ .split("&raquo;")]
+
def _request_check(self, url, **kwargs):
self.request = self._request_original
@@ -207,7 +214,7 @@ class HentaifoundryExtractor(Extractor):
class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor):
"""Extractor for a hentaifoundry user profile"""
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/profile"
example = "https://www.hentai-foundry.com/user/USER/profile"
def items(self):
@@ -228,7 +235,7 @@ class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor):
class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
"""Extractor for all pictures of a hentaifoundry user"""
subcategory = "pictures"
- pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
+ pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
example = "https://www.hentai-foundry.com/pictures/user/USER"
def __init__(self, match):
@@ -240,7 +247,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
"""Extractor for scraps of a hentaifoundry user"""
subcategory = "scraps"
directory_fmt = ("{category}", "{user}", "Scraps")
- pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps"
+ pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)/scraps"
example = "https://www.hentai-foundry.com/pictures/user/USER/scraps"
def __init__(self, match):
@@ -253,7 +260,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites")
archive_fmt = "f_{user}_{index}"
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/faves/pictures"
example = "https://www.hentai-foundry.com/user/USER/faves/pictures"
def __init__(self, match):
@@ -266,7 +273,7 @@ class HentaifoundryTagExtractor(HentaifoundryExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{index}"
- pattern = BASE_PATTERN + r"/pictures/tagged/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/pictures/tagged/([^/?#]+)"
example = "https://www.hentai-foundry.com/pictures/tagged/TAG"
def __init__(self, match):
@@ -282,7 +289,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
subcategory = "recent"
directory_fmt = ("{category}", "Recent Pictures", "{date}")
archive_fmt = "r_{index}"
- pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
+ pattern = rf"{BASE_PATTERN}/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
example = "https://www.hentai-foundry.com/pictures/recent/1970-01-01"
def __init__(self, match):
@@ -298,7 +305,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "Popular Pictures")
archive_fmt = "p_{index}"
- pattern = BASE_PATTERN + r"/pictures/popular()"
+ pattern = rf"{BASE_PATTERN}/pictures/popular()"
example = "https://www.hentai-foundry.com/pictures/popular"
def __init__(self, match):
@@ -324,7 +331,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
f"/{self.index}/?enterAgree=1")
image = self._parse_post(post_url)
image["user"] = self.user
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, image["src"], image
@@ -332,14 +339,14 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
"""Extractor for stories of a hentaifoundry user"""
subcategory = "stories"
archive_fmt = "s_{index}"
- pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
+ pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
example = "https://www.hentai-foundry.com/stories/user/USER"
def items(self):
self._init_site_filters()
for story_html in util.advance(self.stories(), self.start_post):
story = self._parse_story(story_html)
- yield Message.Directory, story
+ yield Message.Directory, "", story
yield Message.Url, story["src"], story
def stories(self):
@@ -351,7 +358,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
"""Extractor for a hentaifoundry story"""
subcategory = "story"
archive_fmt = "s_{index}"
- pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)/(\d+)"
example = "https://www.hentai-foundry.com/stories/user/USER/12345/TITLE"
skip = Extractor.skip
@@ -364,5 +371,5 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
story_url = (f"{self.root}/stories/user/{self.user}"
f"/{self.index}/x?enterAgree=1")
story = self._parse_story(self.request(story_url).text)
- yield Message.Directory, story
+ yield Message.Directory, "", story
yield Message.Url, story["src"], story