summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/vk.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/vk.py')
-rw-r--r--gallery_dl/extractor/vk.py88
1 files changed, 88 insertions, 0 deletions
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
new file mode 100644
index 0000000..1ce1140
--- /dev/null
+++ b/gallery_dl/extractor/vk.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://vk.com/"""
+
+from .common import Extractor, Message
+from .. import text
+import re
+
+
+class VkPhotosExtractor(Extractor):
+ """Extractor for photos from a vk user"""
+ category = "vk"
+ subcategory = "photos"
+ directory_fmt = ("{category}", "{user[id]}")
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ root = "https://vk.com"
+ request_interval = 1.0
+ pattern = r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:albums|photos|id)(\d+)"
+ test = (
+ ("https://vk.com/id398982326", {
+ "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
+ r"/[0-9a-f]+/[\w-]+\.jpg",
+ "count": ">= 35",
+ }),
+ ("https://m.vk.com/albums398982326"),
+ ("https://www.vk.com/id398982326?profile=1"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def items(self):
+ user_id = self.user_id
+
+ if self.config("metadata"):
+ url = "{}/id{}".format(self.root, user_id)
+ extr = text.extract_from(self.request(url).text)
+ data = {"user": {
+ "id" : user_id,
+ "nick": text.unescape(extr(
+ "<title>", " | VK<")),
+ "name": text.unescape(extr(
+ '<h1 class="page_name">', "<")).replace(" ", " "),
+ "info": text.unescape(text.remove_html(extr(
+ '<span class="current_text">', '</span')))
+ }}
+ else:
+ data = {"user": {"id": user_id}}
+
+ photos_url = "{}/photos{}".format(self.root, user_id)
+ headers = {
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer" : photos_url,
+ }
+ params = {
+ "al" : "1",
+ "al_ad" : "0",
+ "offset": 0,
+ "part" : "1",
+ }
+
+ yield Message.Directory, data
+ sub = re.compile(r"/imp[fg]/").sub
+ needle = 'data-id="{}_'.format(user_id)
+
+ while True:
+ offset, html = self.request(
+ photos_url, method="POST", headers=headers, data=params
+ ).json()["payload"][1]
+
+ for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
+ data["id"] = photo[:photo.find('"')]
+ url = photo[photo.rindex("(")+1:]
+ url = sub("/", url.partition("?")[0])
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ if cnt <= 40 or offset == params["offset"]:
+ return
+ params["offset"] = offset