summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/naver.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-03-25 02:57:44 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-03-25 02:57:44 -0400
commit6e662211019a89caec44de8a57c675872b0b5498 (patch)
tree5d9d5a2b7efc3a24dd6074e99b253b639fe5af1d /gallery_dl/extractor/naver.py
parent01166fa52707cc282467427cf0e65c1b8983c4be (diff)
New upstream version 1.26.9.upstream/1.26.9
Diffstat (limited to 'gallery_dl/extractor/naver.py')
-rw-r--r--gallery_dl/extractor/naver.py28
1 files changed, 17 insertions, 11 deletions
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index 55faf9e..d3150e6 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -26,7 +26,8 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
"{post[date]:%Y-%m-%d} {post[title]}")
archive_fmt = "{blog[id]}_{post[num]}_{num}"
pattern = (r"(?:https?://)?blog\.naver\.com/"
- r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)")
+ r"(?:PostView\.n(?:aver|hn)\?blogId=(\w+)&logNo=(\d+)|"
+ r"(\w+)/(\d+)/?$)")
example = "https://blog.naver.com/BLOGID/12345"
def __init__(self, match):
@@ -46,8 +47,10 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
extr = text.extract_from(page)
data = {
"post": {
- "title" : extr('"og:title" content="', '"'),
- "description": extr('"og:description" content="', '"'),
+ "title" : text.unescape(extr(
+ '"og:title" content="', '"')),
+ "description": text.unescape(extr(
+ '"og:description" content="', '"')).replace("&nbsp;", " "),
"num" : text.parse_int(self.post_id),
},
"blog": {
@@ -62,10 +65,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
return data
def images(self, page):
- return [
- (url.replace("://post", "://blog", 1).partition("?")[0], None)
- for url in text.extract_iter(page, 'data-lazy-src="', '"')
- ]
+ results = []
+ for url in text.extract_iter(page, 'data-lazy-src="', '"'):
+ url = url.replace("://post", "://blog", 1).partition("?")[0]
+ if "\ufffd" in text.unquote(url):
+ url = text.unquote(url, encoding="EUC-KR")
+ results.append((url, None))
+ return results
class NaverBlogExtractor(NaverBase, Extractor):
@@ -73,7 +79,8 @@ class NaverBlogExtractor(NaverBase, Extractor):
subcategory = "blog"
categorytransfer = True
pattern = (r"(?:https?://)?blog\.naver\.com/"
- r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
+ r"(?:PostList\.n(?:aver|hn)\?(?:[^&#]+&)*blogId=([^&#]+)|"
+ r"(\w+)/?$)")
example = "https://blog.naver.com/BLOGID"
def __init__(self, match):
@@ -81,12 +88,11 @@ class NaverBlogExtractor(NaverBase, Extractor):
self.blog_id = match.group(1) or match.group(2)
def items(self):
-
# fetch first post number
url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id)
- post_num = text.extract(
+ post_num = text.extr(
self.request(url).text, 'gnFirstLogNo = "', '"',
- )[0]
+ )
# setup params for API calls
url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root)