summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/blogger.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/blogger.py')
-rw-r--r--gallery_dl/extractor/blogger.py59
1 files changed, 40 insertions, 19 deletions
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index e0885d2..232f3ea 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -48,6 +48,7 @@ class BloggerExtractor(Extractor):
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
findall_video = re.compile(
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
+ metadata = self.metadata()
for post in self.posts(blog):
content = post["content"]
@@ -74,18 +75,21 @@ class BloggerExtractor(Extractor):
del post["selfLink"]
del post["blog"]
- yield Message.Directory, {"blog": blog, "post": post}
- for num, url in enumerate(files, 1):
- yield Message.Url, url, text.nameext_from_url(url, {
- "blog": blog,
- "post": post,
- "url" : url,
- "num" : num,
- })
+ data = {"blog": blog, "post": post}
+ if metadata:
+ data.update(metadata)
+ yield Message.Directory, data
+
+ for data["num"], url in enumerate(files, 1):
+ data["url"] = url
+ yield Message.Url, url, text.nameext_from_url(url, data)
def posts(self, blog):
"""Return an iterable with all relevant post objects"""
+ def metadata(self):
+ """Return additional metadata"""
+
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
@@ -173,31 +177,48 @@ class BloggerBlogExtractor(BloggerExtractor):
class BloggerSearchExtractor(BloggerExtractor):
- """Extractor for search resuls and labels"""
+ """Extractor for Blogger search resuls"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search(?:/?\?q=([^/?#]+)|/label/([^/?#]+))"
+ pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
test = (
("https://julianbphotography.blogspot.com/search?q=400mm", {
- "count": "< 10"
+ "count": "< 10",
+ "keyword": {"query": "400mm"},
}),
+ )
+
+ def __init__(self, match):
+ BloggerExtractor.__init__(self, match)
+ self.query = text.unquote(match.group(3))
+
+ def posts(self, blog):
+ return self.api.blog_search(blog["id"], self.query)
+
+ def metadata(self):
+ return {"query": self.query}
+
+
+class BloggerLabelExtractor(BloggerExtractor):
+ """Extractor for Blogger posts by label"""
+ subcategory = "label"
+ pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
+ test = (
("https://dmmagazine.blogspot.com/search/label/D%26D", {
"range": "1-25",
"count": 25,
+ "keyword": {"label": "D&D"},
}),
)
def __init__(self, match):
BloggerExtractor.__init__(self, match)
- query = match.group(3)
- if query:
- self.query, self.label = query, None
- else:
- self.query, self.label = None, match.group(4)
+ self.label = text.unquote(match.group(3))
def posts(self, blog):
- if self.query:
- return self.api.blog_search(blog["id"], text.unquote(self.query))
- return self.api.blog_posts(blog["id"], text.unquote(self.label))
+ return self.api.blog_posts(blog["id"], self.label)
+
+ def metadata(self):
+ return {"label": self.label}
class BloggerAPI():