diff options
| author | 2021-08-04 02:14:44 -0400 | |
|---|---|---|
| committer | 2021-08-04 02:14:44 -0400 | |
| commit | 873d9a628e9412a79bdc64cd962470749de3425b (patch) | |
| tree | 8cd421ef79a9fa784147fa888543216f0872357b /gallery_dl/extractor/vk.py | |
| parent | 32de2b06db501c7de81678bce8e3e0c3e63d340c (diff) | |
New upstream version 1.18.2.upstream/1.18.2
Diffstat (limited to 'gallery_dl/extractor/vk.py')
| -rw-r--r-- | gallery_dl/extractor/vk.py | 65 |
1 files changed, 47 insertions, 18 deletions
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index 1ce1140..2178641 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -17,43 +17,60 @@ class VkPhotosExtractor(Extractor): """Extractor for photos from a vk user""" category = "vk" subcategory = "photos" - directory_fmt = ("{category}", "{user[id]}") + directory_fmt = ("{category}", "{user[name]|user[id]}") filename_fmt = "{id}.{extension}" archive_fmt = "{id}" root = "https://vk.com" request_interval = 1.0 - pattern = r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:albums|photos|id)(\d+)" + pattern = (r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:" + r"(?:albums|photos|id)(-?\d+)|([^/?#]+))") test = ( ("https://vk.com/id398982326", { "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+" r"/[0-9a-f]+/[\w-]+\.jpg", "count": ">= 35", + "keywords": { + "id": r"re:\d+", + "user": { + "id": "398982326", + "info": "Мы за Движуху! – m1ni SounD #4 [EROmusic]", + "name": "", + "nick": "Dobrov Kurva", + }, + }, + }), + ("https://vk.com/cosplayinrussia", { + "range": "75-100", + "keywords": { + "id": r"re:\d+", + "user": { + "id" : "-165740836", + "info": "Предложка открыта, кидайте ваши косплейчики. При " + "правильном оформлении они будут опубликованы", + "name": "cosplayinrussia", + "nick": "Косплей | Cosplay 18+", + }, + }, }), ("https://m.vk.com/albums398982326"), ("https://www.vk.com/id398982326?profile=1"), + ("https://vk.com/albums-165740836"), ) def __init__(self, match): Extractor.__init__(self, match) - self.user_id = match.group(1) + self.user_id, self.user_name = match.groups() def items(self): - user_id = self.user_id - - if self.config("metadata"): - url = "{}/id{}".format(self.root, user_id) - extr = text.extract_from(self.request(url).text) - data = {"user": { - "id" : user_id, - "nick": text.unescape(extr( - "<title>", " | VK<")), - "name": text.unescape(extr( - '<h1 class="page_name">', "<")).replace(" ", " "), - "info": text.unescape(text.remove_html(extr( - '<span class="current_text">', '</span'))) - }} + if self.user_id: + user_id = self.user_id + prefix = "public" if user_id[0] == "-" else "id" + url = "{}/{}{}".format(self.root, prefix, user_id.lstrip("-")) + data = self._extract_profile(url) else: - data = {"user": {"id": user_id}} + url = "{}/{}".format(self.root, self.user_name) + data = self._extract_profile(url) + user_id = data["user"]["id"] photos_url = "{}/photos{}".format(self.root, user_id) headers = { @@ -86,3 +103,15 @@ class VkPhotosExtractor(Extractor): if cnt <= 40 or offset == params["offset"]: return params["offset"] = offset + + def _extract_profile(self, url): + extr = text.extract_from(self.request(url).text) + return {"user": { + "name": text.unescape(extr( + 'rel="canonical" href="https://vk.com/', '"')), + "nick": text.unescape(extr( + '<h1 class="page_name">', "<")).replace(" ", " "), + "info": text.unescape(text.remove_html(extr( + '<span class="current_text">', '</span'))), + "id" : extr('<a href="/albums', '"'), + }} |
