diff options
Diffstat (limited to 'gallery_dl/extractor/paheal.py')
| -rw-r--r-- | gallery_dl/extractor/paheal.py | 67 |
1 files changed, 46 insertions, 21 deletions
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index f0a50c8..1fa571c 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -55,8 +55,8 @@ class PahealExtractor(Extractor): "class='username' href='/user/", "'")), "date" : text.parse_datetime( extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"), - "source" : text.extract( - extr(">Source Link<", "</td>"), "href='", "'")[0], + "source" : text.unescape(text.extr( + extr(">Source Link<", "</td>"), "href='", "'")), } dimensions, size, ext = extr("Info</th><td>", ">").split(" // ") @@ -74,10 +74,34 @@ class PahealTagExtractor(PahealExtractor): directory_fmt = ("{category}", "{search_tags}") pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net" r"/post/list/([^/?#]+)") - test = ("https://rule34.paheal.net/post/list/Ayane_Suzuki/1", { - "pattern": r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20", - "count": ">= 15" - }) + test = ( + ("https://rule34.paheal.net/post/list/Ayane_Suzuki/1", { + "pattern": r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20", + "count": ">= 15" + }), + ("https://rule34.paheal.net/post/list/Ayane_Suzuki/1", { + "range": "1", + "options": (("metadata", True),), + "keyword": { + "date": "dt:2018-01-07 07:04:05", + "duration": 0.0, + "extension": "jpg", + "filename": "2446128 - Ayane_Suzuki Idolmaster " + "idolmaster_dearly_stars Zanzi", + "height": 768, + "id": 2446128, + "md5": "b0ceda9d860df1d15b60293a7eb465c1", + "search_tags": "Ayane_Suzuki", + "size": 205312, + "source": "https://www.pixiv.net/member_illust.php" + "?mode=medium&illust_id=19957280", + "tags": "Ayane_Suzuki Idolmaster " + "idolmaster_dearly_stars Zanzi", + "uploader": "XXXname", + "width": 1024, + }, + }), + ) per_page = 70 def __init__(self, match): @@ -96,8 +120,9 @@ class PahealTagExtractor(PahealExtractor): url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum) page = self.request(url).text + pos = page.find("id='image-list'") for post in text.extract_iter( - page, '<img id="thumb_', 'Only</a>'): + page, "<img id='thumb_", "Only</a>", pos): yield self._extract_data(post) if ">Next<" not in page: @@ -106,10 +131,10 @@ class PahealTagExtractor(PahealExtractor): @staticmethod def _extract_data(post): - pid , pos = text.extract(post, '', '"') - data, pos = text.extract(post, 'title="', '"', pos) - md5 , pos = text.extract(post, '/_thumbs/', '/', pos) - url , pos = text.extract(post, '<a href="', '"', pos) + pid , pos = text.extract(post, "", "'") + data, pos = text.extract(post, "title='", "'", pos) + md5 , pos = text.extract(post, "/_thumbs/", "/", pos) + url , pos = text.extract(post, "<a href='", "'", pos) tags, data, date = data.split("\n") dimensions, size, ext = data.split(" // ") @@ -126,7 +151,7 @@ class PahealTagExtractor(PahealExtractor): } def _extract_data_ex(self, post): - pid = post[:post.index('"')] + pid = post[:post.index("'")] return self._extract_post(pid) @@ -139,19 +164,19 @@ class PahealPostExtractor(PahealExtractor): ("https://rule34.paheal.net/post/view/481609", { "pattern": r"https://tulip\.paheal\.net/_images" r"/bbdc1c33410c2cdce7556c7990be26b7/481609%20-%20" - r"Azumanga_Daioh%20Osaka%20Vuvuzela%20inanimate\.jpg", + r"Azumanga_Daioh%20inanimate%20Osaka%20Vuvuzela\.jpg", "content": "7b924bcf150b352ac75c9d281d061e174c851a11", "keyword": { "date": "dt:2010-06-17 15:40:23", "extension": "jpg", "file_url": "re:https://tulip.paheal.net/_images/bbdc1c33410c", - "filename": "481609 - Azumanga_Daioh Osaka Vuvuzela inanimate", + "filename": "481609 - Azumanga_Daioh inanimate Osaka Vuvuzela", "height": 660, "id": 481609, "md5": "bbdc1c33410c2cdce7556c7990be26b7", "size": 157389, - "source": None, - "tags": "Azumanga_Daioh Osaka Vuvuzela inanimate", + "source": "", + "tags": "Azumanga_Daioh inanimate Osaka Vuvuzela", "uploader": "CaptainButtface", "width": 614, }, @@ -163,7 +188,7 @@ class PahealPostExtractor(PahealExtractor): "md5": "b39edfe455a0381110c710d6ed2ef57d", "size": 758989, "source": "http://www.furaffinity.net/view/4057821/", - "tags": "Vuvuzela inanimate thelost-dragon", + "tags": "inanimate thelost-dragon Vuvuzela", "uploader": "leacheate_soup", "width": 1200, }, @@ -171,8 +196,8 @@ class PahealPostExtractor(PahealExtractor): # video ("https://rule34.paheal.net/post/view/3864982", { "pattern": r"https://[\w]+\.paheal\.net/_images/7629fc0ff77e32637d" - r"de5bf4f992b2cb/3864982%20-%20Metal_Gear%20Metal_Gear_" - r"Solid_V%20Quiet%20Vg_erotica%20animated%20webm\.webm", + r"de5bf4f992b2cb/3864982%20-%20animated%20Metal_Gear%20" + r"Metal_Gear_Solid_V%20Quiet%20Vg_erotica%20webm\.webm", "keyword": { "date": "dt:2020-09-06 01:59:03", "duration": 30.0, @@ -183,8 +208,8 @@ class PahealPostExtractor(PahealExtractor): "size": 18454938, "source": "https://twitter.com/VG_Worklog" "/status/1302407696294055936", - "tags": "Metal_Gear Metal_Gear_Solid_V Quiet " - "Vg_erotica animated webm", + "tags": "animated Metal_Gear Metal_Gear_Solid_V " + "Quiet Vg_erotica webm", "uploader": "justausername", "width": 1768, }, |
