summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/hentaifox.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-04-13 19:33:47 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-04-13 19:33:47 -0400
commitd27dcd4646242d6da8436f14c7b37ce864355858 (patch)
treec5c86ca7435010b6b13933217a1921430cf95dc4 /gallery_dl/extractor/hentaifox.py
parent3201d77a148367d739862b4f07868a76eaeb7cb1 (diff)
New upstream version 1.17.2.upstream/1.17.2
Diffstat (limited to 'gallery_dl/extractor/hentaifox.py')
-rw-r--r--gallery_dl/extractor/hentaifox.py104
1 files changed, 64 insertions, 40 deletions
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index 093f3fe..a5bebdd 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,6 +10,7 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text
+import json
class HentaifoxBase():
@@ -21,61 +22,84 @@ class HentaifoxBase():
class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
"""Extractor for image galleries on hentaifox.com"""
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
- test = ("https://hentaifox.com/gallery/56622/", {
- "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
- "keyword": "b7ff141331d0c7fc711ab28d45dfbb013a83d8e9",
- "count": 24,
- })
+ test = (
+ ("https://hentaifox.com/gallery/56622/", {
+ "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
+ "keyword": "bcd6b67284f378e5cc30b89b761140e3e60fcd92",
+ "count": 24,
+ }),
+ # 'split_tag' element (#1378)
+ ("https://hentaifox.com/gallery/630/", {
+ "keyword": {
+ "artist": ["beti", "betty", "magi", "mimikaki"],
+ "characters": [
+ "aerith gainsborough",
+ "tifa lockhart",
+ "yuffie kisaragi"
+ ],
+ "count": 32,
+ "gallery_id": 630,
+ "group": ["cu-little2"],
+ "parody": ["darkstalkers | vampire", "final fantasy vii"],
+ "tags": ["femdom", "fingering", "masturbation", "yuri"],
+ "title": "Cu-Little Bakanya~",
+ "type": "doujinshi",
+ },
+ }),
+ )
def __init__(self, match):
GalleryExtractor.__init__(self, match)
self.gallery_id = match.group(2)
- def metadata(self, page, split=text.split_html):
+ @staticmethod
+ def _split(txt):
+ return [
+ text.remove_html(tag.partition(">")[2], "", "")
+ for tag in text.extract_iter(
+ txt, "class='tag_btn", "<span class='t_badge")
+ ]
+
+ def metadata(self, page):
extr = text.extract_from(page)
+ split = self._split
return {
"gallery_id": text.parse_int(self.gallery_id),
"title" : text.unescape(extr("<h1>", "</h1>")),
- "parody" : split(extr(">Parodies:" , "</ul>"))[::2],
- "characters": split(extr(">Characters:", "</ul>"))[::2],
- "tags" : split(extr(">Tags:" , "</ul>"))[::2],
- "artist" : split(extr(">Artists:" , "</ul>"))[::2],
- "group" : split(extr(">Groups:" , "</ul>"))[::2],
+ "parody" : split(extr(">Parodies:" , "</ul>")),
+ "characters": split(extr(">Characters:", "</ul>")),
+ "tags" : split(extr(">Tags:" , "</ul>")),
+ "artist" : split(extr(">Artists:" , "</ul>")),
+ "group" : split(extr(">Groups:" , "</ul>")),
"type" : text.remove_html(extr(">Category:", "<span")),
"language" : "English",
"lang" : "en",
}
def images(self, page):
- pos = page.find('id="load_all"')
- if pos >= 0:
- extr = text.extract
- load_id = extr(page, 'id="load_id" value="', '"', pos)[0]
- load_dir = extr(page, 'id="load_dir" value="', '"', pos)[0]
- load_pages = extr(page, 'id="load_pages" value="', '"', pos)[0]
-
- url = self.root + "/includes/thumbs_loader.php"
- data = {
- "u_id" : self.gallery_id,
- "g_id" : load_id,
- "img_dir" : load_dir,
- "visible_pages": "0",
- "total_pages" : load_pages,
- "type" : "2",
- }
- headers = {
- "Origin": self.root,
- "Referer": self.gallery_url,
- "X-Requested-With": "XMLHttpRequest",
- }
- page = self.request(
- url, method="POST", headers=headers, data=data).text
-
- return [
- (url.replace("t.", "."), None)
- for url in text.extract_iter(page, 'data-src="', '"')
- ]
+ cover, pos = text.extract(page, '<img src="', '"')
+ data , pos = text.extract(page, "$.parseJSON('", "');", pos)
+ path = "/".join(cover.split("/")[3:-1])
+
+ result = []
+ append = result.append
+ extmap = {"j": "jpg", "p": "png", "g": "gif"}
+ urlfmt = ("/" + path + "/{}.{}").format
+
+ server1 = "https://i.hentaifox.com"
+ server2 = "https://i2.hentaifox.com"
+
+ for num, image in json.loads(data).items():
+ ext, width, height = image.split(",")
+ path = urlfmt(num, extmap[ext])
+ append((server1 + path, {
+ "width" : width,
+ "height" : height,
+ "_fallback": (server2 + path,),
+ }))
+
+ return result
class HentaifoxSearchExtractor(HentaifoxBase, Extractor):