diff options
Diffstat (limited to 'gallery_dl/extractor/naver.py')
| -rw-r--r-- | gallery_dl/extractor/naver.py | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index 55faf9e..d3150e6 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -26,7 +26,8 @@ class NaverPostExtractor(NaverBase, GalleryExtractor): "{post[date]:%Y-%m-%d} {post[title]}") archive_fmt = "{blog[id]}_{post[num]}_{num}" pattern = (r"(?:https?://)?blog\.naver\.com/" - r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)") + r"(?:PostView\.n(?:aver|hn)\?blogId=(\w+)&logNo=(\d+)|" + r"(\w+)/(\d+)/?$)") example = "https://blog.naver.com/BLOGID/12345" def __init__(self, match): @@ -46,8 +47,10 @@ class NaverPostExtractor(NaverBase, GalleryExtractor): extr = text.extract_from(page) data = { "post": { - "title" : extr('"og:title" content="', '"'), - "description": extr('"og:description" content="', '"'), + "title" : text.unescape(extr( + '"og:title" content="', '"')), + "description": text.unescape(extr( + '"og:description" content="', '"')).replace(" ", " "), "num" : text.parse_int(self.post_id), }, "blog": { @@ -62,10 +65,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor): return data def images(self, page): - return [ - (url.replace("://post", "://blog", 1).partition("?")[0], None) - for url in text.extract_iter(page, 'data-lazy-src="', '"') - ] + results = [] + for url in text.extract_iter(page, 'data-lazy-src="', '"'): + url = url.replace("://post", "://blog", 1).partition("?")[0] + if "\ufffd" in text.unquote(url): + url = text.unquote(url, encoding="EUC-KR") + results.append((url, None)) + return results class NaverBlogExtractor(NaverBase, Extractor): @@ -73,7 +79,8 @@ class NaverBlogExtractor(NaverBase, Extractor): subcategory = "blog" categorytransfer = True pattern = (r"(?:https?://)?blog\.naver\.com/" - r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)") + r"(?:PostList\.n(?:aver|hn)\?(?:[^&#]+&)*blogId=([^&#]+)|" + r"(\w+)/?$)") example = "https://blog.naver.com/BLOGID" def __init__(self, match): @@ -81,12 +88,11 @@ class NaverBlogExtractor(NaverBase, Extractor): self.blog_id = match.group(1) or match.group(2) def items(self): - # fetch first post number url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id) - post_num = text.extract( + post_num = text.extr( self.request(url).text, 'gnFirstLogNo = "', '"', - )[0] + ) # setup params for API calls url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root) |
