summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/yaplog.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/yaplog.py')
-rw-r--r--gallery_dl/extractor/yaplog.py41
1 files changed, 30 insertions, 11 deletions
diff --git a/gallery_dl/extractor/yaplog.py b/gallery_dl/extractor/yaplog.py
index b3c5501..b07ba4b 100644
--- a/gallery_dl/extractor/yaplog.py
+++ b/gallery_dl/extractor/yaplog.py
@@ -12,6 +12,9 @@ from .common import Extractor, Message, AsynchronousMixin
from .. import text, util
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?yaplog\.jp/([\w-]+)"
+
+
class YaplogExtractor(AsynchronousMixin, Extractor):
"""Base class for yaplog extractors"""
category = "yaplog"
@@ -31,11 +34,15 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
for num, url in enumerate(urls, 1):
page = self.request(url).text if num > 1 else url
iurl = text.extract(page, '<img src="', '"')[0]
- iid, _, ext = iurl.rpartition("/")[2].rpartition(".")
+ if iurl[0] == "/":
+ iurl = text.urljoin(self.root, iurl)
+ name, _, ext = iurl.rpartition("/")[2].rpartition(".")
+ iid = name.rpartition("_")[0] or name
image = {
"url" : iurl,
"num" : num,
- "id" : text.parse_int(iid.partition("_")[0]),
+ "id" : text.parse_int(iid, iid),
+ "filename" : name,
"extension": ext,
"post" : post,
}
@@ -52,7 +59,8 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
prev , pos = text.extract(page, 'class="last"><a href="', '"', pos)
urls = list(text.extract_iter(page, '<li><a href="', '"', pos))
- urls[0] = page # cache HTML of first page
+ if urls:
+ urls[0] = page # cache HTML of first page
if len(urls) == 24 and text.extract(page, '(1/', ')')[0] != '24':
# there are a maximum of 24 image entries in an /image/ page
@@ -69,14 +77,14 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
"id" : text.parse_int(pid),
"title": text.unescape(title[:-3]),
"user" : self.user,
- "date" : date,
+ "date" : text.parse_datetime(date, "%B %d [%a], %Y, %H:%M"),
}
class YaplogBlogExtractor(YaplogExtractor):
"""Extractor for a user's blog on yaplog.jp"""
subcategory = "blog"
- pattern = r"(?:https?://)?(?:www\.)?yaplog\.jp/(\w+)/?(?:$|[?&#])"
+ pattern = BASE_PATTERN + r"/?(?:$|[?&#])"
test = ("https://yaplog.jp/omitakashi3", {
"pattern": r"https://img.yaplog.jp/img/18/pc/o/m/i/omitakashi3/0/",
"count": ">= 2",
@@ -92,12 +100,23 @@ class YaplogBlogExtractor(YaplogExtractor):
class YaplogPostExtractor(YaplogExtractor):
"""Extractor for images from a blog post on yaplog.jp"""
subcategory = "post"
- pattern = (r"(?:https?://)?(?:www\.)?yaplog\.jp"
- r"/(\w+)/(?:archive|image)/(\d+)")
- test = ("https://yaplog.jp/imamiami0726/image/1299", {
- "url": "896cae20fa718735a57e723c48544e830ff31345",
- "keyword": "f8d8781e61c4c38238a7622d6df6c905f864e5d3",
- })
+ pattern = BASE_PATTERN + r"/(?:archive|image)/(\d+)"
+ test = (
+ ("https://yaplog.jp/imamiami0726/image/1299", {
+ "url": "896cae20fa718735a57e723c48544e830ff31345",
+ "keyword": "22df8ad6cb534514c6bb2ff000381d156769a620",
+ }),
+ # complete image URLs (#443)
+ ("https://yaplog.jp/msjane/archive/246", {
+ "pattern": r"https://yaplog.jp/cv/msjane/img/246/img\d+_t.jpg"
+ }),
+ # empty post (#443)
+ ("https://yaplog.jp/f_l_a_s_c_o/image/872", {
+ "count": 0,
+ }),
+ # blog names with '-' (#443)
+ ("https://yaplog.jp/a-pierrot-o/image/3946/22779"),
+ )
def __init__(self, match):
YaplogExtractor.__init__(self, match)