New upstream version 1.10.6upstream/1.10.6

author: Unit 193 <unit193@ubuntu.com> 2019-10-11 20:28:32 -0400
committer: Unit 193 <unit193@ubuntu.com> 2019-10-11 20:28:32 -0400
commit: 40f5fe6edef268632d3bc484e85e5b37bad67bff (patch)
tree: 98817850b65f1d2877bd4ed63a3908f37d794f8d /gallery_dl/extractor/yaplog.py
parent: 639d9ea4a667733aadc3ff83a1df2cc9f0add3a9 (diff)
1 files changed, 30 insertions, 11 deletions
diff --git a/gallery_dl/extractor/yaplog.py b/gallery_dl/extractor/yaplog.py
index b3c5501..b07ba4b 100644
--- a/gallery_dl/extractor/yaplog.py
+++ b/gallery_dl/extractor/yaplog.py
@@ -12,6 +12,9 @@ from .common import Extractor, Message, AsynchronousMixin
 from .. import text, util
 
 
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?yaplog\.jp/([\w-]+)"
+
+
 class YaplogExtractor(AsynchronousMixin, Extractor):
     """Base class for yaplog extractors"""
     category = "yaplog"
@@ -31,11 +34,15 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
             for num, url in enumerate(urls, 1):
                 page = self.request(url).text if num > 1 else url
                 iurl = text.extract(page, '<img src="', '"')[0]
-                iid, _, ext = iurl.rpartition("/")[2].rpartition(".")
+                if iurl[0] == "/":
+                    iurl = text.urljoin(self.root, iurl)
+                name, _, ext = iurl.rpartition("/")[2].rpartition(".")
+                iid = name.rpartition("_")[0] or name
                 image = {
                     "url"      : iurl,
                     "num"      : num,
-                    "id"       : text.parse_int(iid.partition("_")[0]),
+                    "id"       : text.parse_int(iid, iid),
+                    "filename" : name,
                     "extension": ext,
                     "post"     : post,
                 }
@@ -52,7 +59,8 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
         prev , pos = text.extract(page, 'class="last"><a href="', '"', pos)
 
         urls = list(text.extract_iter(page, '<li><a href="', '"', pos))
-        urls[0] = page  # cache HTML of first page
+        if urls:
+            urls[0] = page  # cache HTML of first page
 
         if len(urls) == 24 and text.extract(page, '(1/', ')')[0] != '24':
             # there are a maximum of 24 image entries in an /image/ page
@@ -69,14 +77,14 @@ class YaplogExtractor(AsynchronousMixin, Extractor):
             "id"   : text.parse_int(pid),
             "title": text.unescape(title[:-3]),
             "user" : self.user,
-            "date" : date,
+            "date" : text.parse_datetime(date, "%B %d [%a], %Y, %H:%M"),
         }
 
 
 class YaplogBlogExtractor(YaplogExtractor):
     """Extractor for a user's blog on yaplog.jp"""
     subcategory = "blog"
-    pattern = r"(?:https?://)?(?:www\.)?yaplog\.jp/(\w+)/?(?:$|[?&#])"
+    pattern = BASE_PATTERN + r"/?(?:$|[?&#])"
     test = ("https://yaplog.jp/omitakashi3", {
         "pattern": r"https://img.yaplog.jp/img/18/pc/o/m/i/omitakashi3/0/",
         "count": ">= 2",
@@ -92,12 +100,23 @@ class YaplogBlogExtractor(YaplogExtractor):
 class YaplogPostExtractor(YaplogExtractor):
     """Extractor for images from a blog post on yaplog.jp"""
     subcategory = "post"
-    pattern = (r"(?:https?://)?(?:www\.)?yaplog\.jp"
-               r"/(\w+)/(?:archive|image)/(\d+)")
-    test = ("https://yaplog.jp/imamiami0726/image/1299", {
-        "url": "896cae20fa718735a57e723c48544e830ff31345",
-        "keyword": "f8d8781e61c4c38238a7622d6df6c905f864e5d3",
-    })
+    pattern = BASE_PATTERN + r"/(?:archive|image)/(\d+)"
+    test = (
+        ("https://yaplog.jp/imamiami0726/image/1299", {
+            "url": "896cae20fa718735a57e723c48544e830ff31345",
+            "keyword": "22df8ad6cb534514c6bb2ff000381d156769a620",
+        }),
+        # complete image URLs (#443)
+        ("https://yaplog.jp/msjane/archive/246", {
+            "pattern": r"https://yaplog.jp/cv/msjane/img/246/img\d+_t.jpg"
+        }),
+        # empty post (#443)
+        ("https://yaplog.jp/f_l_a_s_c_o/image/872", {
+            "count": 0,
+        }),
+        # blog names with '-' (#443)
+        ("https://yaplog.jp/a-pierrot-o/image/3946/22779"),
+    )
 
     def __init__(self, match):
         YaplogExtractor.__init__(self, match)
author	Unit 193 <unit193@ubuntu.com>	2019-10-11 20:28:32 -0400
committer	Unit 193 <unit193@ubuntu.com>	2019-10-11 20:28:32 -0400
commit	40f5fe6edef268632d3bc484e85e5b37bad67bff (patch)
tree	98817850b65f1d2877bd4ed63a3908f37d794f8d /gallery_dl/extractor/yaplog.py
parent	639d9ea4a667733aadc3ff83a1df2cc9f0add3a9 (diff)