aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/blogger.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/blogger.py')
-rw-r--r--gallery_dl/extractor/blogger.py58
1 files changed, 26 insertions, 32 deletions
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index ef117da..796d9d1 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2023 Mike Fährmann
+# Copyright 2019-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,12 @@
from .common import BaseExtractor, Message
from .. import text, util
-import re
+
+
+def original(url):
+ return (util.re(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)")
+ .sub(r"\1s0", url)
+ .replace("http:", "https:", 1))
class BloggerExtractor(BaseExtractor):
@@ -33,13 +38,12 @@ class BloggerExtractor(BaseExtractor):
blog["date"] = text.parse_datetime(blog["published"])
del blog["selfLink"]
- sub = re.compile(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)").sub
- findall_image = re.compile(
+ findall_image = util.re(
r'src="(https?://(?:'
r'blogger\.googleusercontent\.com/img|'
r'lh\d+(?:-\w+)?\.googleusercontent\.com|'
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
- findall_video = re.compile(
+ findall_video = util.re(
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
metadata = self.metadata()
@@ -48,7 +52,7 @@ class BloggerExtractor(BaseExtractor):
files = findall_image(content)
for idx, url in enumerate(files):
- files[idx] = sub(r"\1s0", url).replace("http:", "https:", 1)
+ files[idx] = original(url)
if self.videos and 'id="BLOG_video-' in content:
page = self.request(post["url"]).text
@@ -98,12 +102,8 @@ class BloggerPostExtractor(BloggerExtractor):
pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
example = "https://BLOG.blogspot.com/1970/01/TITLE.html"
- def __init__(self, match):
- BloggerExtractor.__init__(self, match)
- self.path = match.group(match.lastindex)
-
def posts(self, blog):
- return (self.api.post_by_path(blog["id"], self.path),)
+ return (self.api.post_by_path(blog["id"], self.groups[-1]),)
class BloggerBlogExtractor(BloggerExtractor):
@@ -122,16 +122,13 @@ class BloggerSearchExtractor(BloggerExtractor):
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
example = "https://BLOG.blogspot.com/search?q=QUERY"
- def __init__(self, match):
- BloggerExtractor.__init__(self, match)
- self.query = text.unquote(match.group(match.lastindex))
+ def metadata(self):
+ self.query = query = text.unquote(self.groups[-1])
+ return {"query": query}
def posts(self, blog):
return self.api.blog_search(blog["id"], self.query)
- def metadata(self):
- return {"query": self.query}
-
class BloggerLabelExtractor(BloggerExtractor):
"""Extractor for Blogger posts by label"""
@@ -139,21 +136,18 @@ class BloggerLabelExtractor(BloggerExtractor):
pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
example = "https://BLOG.blogspot.com/search/label/LABEL"
- def __init__(self, match):
- BloggerExtractor.__init__(self, match)
- self.label = text.unquote(match.group(match.lastindex))
+ def metadata(self):
+ self.label = label = text.unquote(self.groups[-1])
+ return {"label": label}
def posts(self, blog):
return self.api.blog_posts(blog["id"], self.label)
- def metadata(self):
- return {"label": self.label}
-
class BloggerAPI():
- """Minimal interface for the Blogger v3 API
+ """Minimal interface for the Blogger API v3
- Ref: https://developers.google.com/blogger
+ https://developers.google.com/blogger
"""
API_KEY = "AIzaSyCN9ax34oMMyM07g_M-5pjeDp_312eITK8"
@@ -162,27 +156,27 @@ class BloggerAPI():
self.api_key = extractor.config("api-key") or self.API_KEY
def blog_by_url(self, url):
- return self._call("blogs/byurl", {"url": url}, "blog")
+ return self._call("/blogs/byurl", {"url": url}, "blog")
def blog_posts(self, blog_id, label=None):
- endpoint = "blogs/{}/posts".format(blog_id)
+ endpoint = f"/blogs/{blog_id}/posts"
params = {"labels": label}
return self._pagination(endpoint, params)
def blog_search(self, blog_id, query):
- endpoint = "blogs/{}/posts/search".format(blog_id)
+ endpoint = f"/blogs/{blog_id}/posts/search"
params = {"q": query}
return self._pagination(endpoint, params)
def post_by_path(self, blog_id, path):
- endpoint = "blogs/{}/posts/bypath".format(blog_id)
+ endpoint = f"/blogs/{blog_id}/posts/bypath"
return self._call(endpoint, {"path": path}, "post")
def _call(self, endpoint, params, notfound=None):
- url = "https://www.googleapis.com/blogger/v3/" + endpoint
+ url = "https://www.googleapis.com/blogger/v3" + endpoint
params["key"] = self.api_key
- return self.extractor.request(
- url, params=params, notfound=notfound).json()
+ return self.extractor.request_json(
+ url, params=params, notfound=notfound)
def _pagination(self, endpoint, params):
while True: