diff options
| author | 2019-10-11 20:28:32 -0400 | |
|---|---|---|
| committer | 2019-10-11 20:28:32 -0400 | |
| commit | 40f5fe6edef268632d3bc484e85e5b37bad67bff (patch) | |
| tree | 98817850b65f1d2877bd4ed63a3908f37d794f8d /gallery_dl/extractor/yaplog.py | |
| parent | 639d9ea4a667733aadc3ff83a1df2cc9f0add3a9 (diff) | |
New upstream version 1.10.6upstream/1.10.6
Diffstat (limited to 'gallery_dl/extractor/yaplog.py')
| -rw-r--r-- | gallery_dl/extractor/yaplog.py | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/gallery_dl/extractor/yaplog.py b/gallery_dl/extractor/yaplog.py index b3c5501..b07ba4b 100644 --- a/gallery_dl/extractor/yaplog.py +++ b/gallery_dl/extractor/yaplog.py @@ -12,6 +12,9 @@ from .common import Extractor, Message, AsynchronousMixin from .. import text, util +BASE_PATTERN = r"(?:https?://)?(?:www\.)?yaplog\.jp/([\w-]+)" + + class YaplogExtractor(AsynchronousMixin, Extractor): """Base class for yaplog extractors""" category = "yaplog" @@ -31,11 +34,15 @@ class YaplogExtractor(AsynchronousMixin, Extractor): for num, url in enumerate(urls, 1): page = self.request(url).text if num > 1 else url iurl = text.extract(page, '<img src="', '"')[0] - iid, _, ext = iurl.rpartition("/")[2].rpartition(".") + if iurl[0] == "/": + iurl = text.urljoin(self.root, iurl) + name, _, ext = iurl.rpartition("/")[2].rpartition(".") + iid = name.rpartition("_")[0] or name image = { "url" : iurl, "num" : num, - "id" : text.parse_int(iid.partition("_")[0]), + "id" : text.parse_int(iid, iid), + "filename" : name, "extension": ext, "post" : post, } @@ -52,7 +59,8 @@ class YaplogExtractor(AsynchronousMixin, Extractor): prev , pos = text.extract(page, 'class="last"><a href="', '"', pos) urls = list(text.extract_iter(page, '<li><a href="', '"', pos)) - urls[0] = page # cache HTML of first page + if urls: + urls[0] = page # cache HTML of first page if len(urls) == 24 and text.extract(page, '(1/', ')')[0] != '24': # there are a maximum of 24 image entries in an /image/ page @@ -69,14 +77,14 @@ class YaplogExtractor(AsynchronousMixin, Extractor): "id" : text.parse_int(pid), "title": text.unescape(title[:-3]), "user" : self.user, - "date" : date, + "date" : text.parse_datetime(date, "%B %d [%a], %Y, %H:%M"), } class YaplogBlogExtractor(YaplogExtractor): """Extractor for a user's blog on yaplog.jp""" subcategory = "blog" - pattern = r"(?:https?://)?(?:www\.)?yaplog\.jp/(\w+)/?(?:$|[?&#])" + pattern = BASE_PATTERN + r"/?(?:$|[?&#])" test = ("https://yaplog.jp/omitakashi3", { "pattern": r"https://img.yaplog.jp/img/18/pc/o/m/i/omitakashi3/0/", "count": ">= 2", @@ -92,12 +100,23 @@ class YaplogBlogExtractor(YaplogExtractor): class YaplogPostExtractor(YaplogExtractor): """Extractor for images from a blog post on yaplog.jp""" subcategory = "post" - pattern = (r"(?:https?://)?(?:www\.)?yaplog\.jp" - r"/(\w+)/(?:archive|image)/(\d+)") - test = ("https://yaplog.jp/imamiami0726/image/1299", { - "url": "896cae20fa718735a57e723c48544e830ff31345", - "keyword": "f8d8781e61c4c38238a7622d6df6c905f864e5d3", - }) + pattern = BASE_PATTERN + r"/(?:archive|image)/(\d+)" + test = ( + ("https://yaplog.jp/imamiami0726/image/1299", { + "url": "896cae20fa718735a57e723c48544e830ff31345", + "keyword": "22df8ad6cb534514c6bb2ff000381d156769a620", + }), + # complete image URLs (#443) + ("https://yaplog.jp/msjane/archive/246", { + "pattern": r"https://yaplog.jp/cv/msjane/img/246/img\d+_t.jpg" + }), + # empty post (#443) + ("https://yaplog.jp/f_l_a_s_c_o/image/872", { + "count": 0, + }), + # blog names with '-' (#443) + ("https://yaplog.jp/a-pierrot-o/image/3946/22779"), + ) def __init__(self, match): YaplogExtractor.__init__(self, match) |
