1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://chzzk.naver.com/"""
from .common import Extractor, Message
from .. import text, util
class NaverChzzkExtractor(Extractor):
"""Base class for chzzk.naver.com extractors"""
category = "naver-chzzk"
filename_fmt = "{uid}_{id}_{num}.{extension}"
directory_fmt = ("{category}", "{user[userNickname]}")
archive_fmt = "{uid}_{id}_{num}"
def request_api(self, uid, id=None, params=None):
return self.request_json(
f"https://apis.naver.com/nng_main/nng_comment_api/v1/type"
f"/CHANNEL_POST/id/{uid}/comments/{id or ''}",
params=params)["content"]
def items(self):
for comment in self.comments():
data = comment["comment"]
files = data.pop("attaches") or ()
data["id"] = data["commentId"]
data["uid"] = data["objectId"]
data["user"] = comment["user"]
data["count"] = len(files)
data["date"] = self.parse_datetime(
data["createdDate"], "%Y%m%d%H%M%S")
yield Message.Directory, "", data
for data["num"], file in enumerate(files, 1):
if extra := file.get("extraJson"):
file.update(util.json_loads(extra))
file["date"] = self.parse_datetime_iso(
file["createdDate"])
file["date_updated"] = self.parse_datetime_iso(
file["updatedDate"])
data["file"] = file
url = file["attachValue"]
yield Message.Url, url, text.nameext_from_url(url, data)
class NaverChzzkCommentExtractor(NaverChzzkExtractor):
"""Extractor for individual comment from chzzk.naver.com"""
subcategory = "comment"
pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community/detail/(\d+)"
example = "https://chzzk.naver.com/0123456789abcdef/community/detail/12345"
def comments(self):
uid, id = self.groups
res = self.request_api(uid, id)
return ({"comment": res["comment"], "user": res["user"]},)
class NaverChzzkCommunityExtractor(NaverChzzkExtractor):
"""Extractor for comments from chzzk.naver.com"""
subcategory = "community"
pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community"
example = "https://chzzk.naver.com/0123456789abcdef/community"
request_interval = (0.5, 1.5)
def comments(self):
uid = self.match[1]
params = {
"limit": 10,
"offset": text.parse_int(self.config("offset")),
"pagingType": "PAGE",
}
while True:
comments = self.request_api(uid, params=params)["comments"]
yield from comments["data"]
if not comments["page"]["next"]:
return
params["offset"] += params["limit"]
|