summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/vk.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/vk.py')
-rw-r--r--gallery_dl/extractor/vk.py82
1 files changed, 50 insertions, 32 deletions
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index ea034a7..0f323e1 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2023 Mike Fährmann
+# Copyright 2021-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://vk.com/"""
from .common import Extractor, Message
-from .. import text, exception
-import re
+from .. import text, util, exception
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
@@ -27,12 +26,17 @@ class VkExtractor(Extractor):
def _init(self):
self.offset = text.parse_int(self.config("offset"))
+ def finalize(self):
+ if self.offset:
+ self.log.info("Use '-o offset=%s' to continue downloading "
+ "from the current position", self.offset)
+
def skip(self, num):
self.offset += num
return num
def items(self):
- sub = re.compile(r"/imp[fg]/").sub
+ subn = util.re(r"/imp[fg]/").subn
sizes = "wzyxrqpo"
data = self.metadata()
@@ -54,9 +58,12 @@ class VkExtractor(Extractor):
self.log.warning("no photo URL found (%s)", photo.get("id"))
continue
- photo["url"] = sub("/", url.partition("?")[0])
- # photo["url"] = url
- photo["_fallback"] = (url,)
+ url_sub, count = subn("/", url.partition("?")[0])
+ if count:
+ photo["_fallback"] = (url,)
+ photo["url"] = url = url_sub
+ else:
+ photo["url"] = url
try:
_, photo["width"], photo["height"] = photo[size]
@@ -67,8 +74,8 @@ class VkExtractor(Extractor):
photo["id"] = photo["id"].rpartition("_")[2]
photo.update(data)
- text.nameext_from_url(photo["url"], photo)
- yield Message.Url, photo["url"], photo
+ text.nameext_from_url(url, photo)
+ yield Message.Url, url, photo
def _pagination(self, photos_id):
url = self.root + "/al_photos.php"
@@ -86,10 +93,13 @@ class VkExtractor(Extractor):
}
while True:
- payload = self.request(
- url, method="POST", headers=headers, data=data,
- ).json()["payload"][1]
+ response = self.request(
+ url, method="POST", headers=headers, data=data)
+ if response.history and "/challenge.html" in response.url:
+ raise exception.AbortExtraction(
+ f"HTTP redirect to 'challenge' page:\n{response.url}")
+ payload = response.json()["payload"][1]
if len(payload) < 4:
self.log.debug(payload)
raise exception.AuthorizationError(
@@ -98,18 +108,19 @@ class VkExtractor(Extractor):
total = payload[1]
photos = payload[3]
- data["offset"] += len(photos)
- if data["offset"] >= total:
+ offset_next = self.offset + len(photos)
+ if offset_next >= total:
# the last chunk of photos also contains the first few photos
# again if 'total' is not a multiple of 10
- extra = total - data["offset"]
- if extra:
+ if extra := total - offset_next:
del photos[extra:]
yield from photos
+ self.offset = 0
return
yield from photos
+ data["offset"] = self.offset = offset_next
class VkPhotosExtractor(VkExtractor):
@@ -131,26 +142,34 @@ class VkPhotosExtractor(VkExtractor):
if self.user_id:
user_id = self.user_id
prefix = "public" if user_id[0] == "-" else "id"
- url = "{}/{}{}".format(self.root, prefix, user_id.lstrip("-"))
+ url = f"{self.root}/{prefix}{user_id.lstrip('-')}"
data = self._extract_profile(url)
else:
- url = "{}/{}".format(self.root, self.user_name)
+ url = f"{self.root}/{self.user_name}"
data = self._extract_profile(url)
self.user_id = data["user"]["id"]
return data
def _extract_profile(self, url):
- extr = text.extract_from(self.request(url).text)
- return {"user": {
- "name": text.unescape(extr(
- 'rel="canonical" href="https://vk.com/', '"')),
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ user = {
+ "id" : extr('property="og:url" content="https://vk.com/id', '"'),
"nick": text.unescape(extr(
- '<h1 class="page_name">', "<")).replace(" ", " "),
- "info": text.unescape(text.remove_html(extr(
- '<span class="current_text">', '</span'))),
- "id" : (extr('<a href="/albums', '"') or
- extr('data-from-id="', '"')),
- }}
+ "<title>", " | VK</title>")),
+ "info": text.unescape(extr(
+ ',"activity":"', '","')).replace("\\/", "/"),
+ "name": extr('href="https://m.vk.com/', '"'),
+ }
+
+ if user["id"]:
+ user["group"] = False
+ else:
+ user["group"] = True
+ user["id"] = extr('data-from-id="', '"')
+
+ return {"user": user}
class VkAlbumExtractor(VkExtractor):
@@ -165,8 +184,7 @@ class VkAlbumExtractor(VkExtractor):
self.user_id, self.album_id = match.groups()
def photos(self):
- return self._pagination("album{}_{}".format(
- self.user_id, self.album_id))
+ return self._pagination(f"album{self.user_id}_{self.album_id}")
def metadata(self):
return {
@@ -184,10 +202,10 @@ class VkTaggedExtractor(VkExtractor):
def __init__(self, match):
VkExtractor.__init__(self, match)
- self.user_id = match.group(1)
+ self.user_id = match[1]
def photos(self):
- return self._pagination("tag{}".format(self.user_id))
+ return self._pagination(f"tag{self.user_id}")
def metadata(self):
return {"user": {"id": self.user_id}}