diff options
Diffstat (limited to 'gallery_dl/extractor/blogger.py')
| -rw-r--r-- | gallery_dl/extractor/blogger.py | 59 |
1 files changed, 40 insertions, 19 deletions
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index e0885d2..232f3ea 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -48,6 +48,7 @@ class BloggerExtractor(Extractor): r'\d+\.bp\.blogspot\.com)/[^"]+)').findall findall_video = re.compile( r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall + metadata = self.metadata() for post in self.posts(blog): content = post["content"] @@ -74,18 +75,21 @@ class BloggerExtractor(Extractor): del post["selfLink"] del post["blog"] - yield Message.Directory, {"blog": blog, "post": post} - for num, url in enumerate(files, 1): - yield Message.Url, url, text.nameext_from_url(url, { - "blog": blog, - "post": post, - "url" : url, - "num" : num, - }) + data = {"blog": blog, "post": post} + if metadata: + data.update(metadata) + yield Message.Directory, data + + for data["num"], url in enumerate(files, 1): + data["url"] = url + yield Message.Url, url, text.nameext_from_url(url, data) def posts(self, blog): """Return an iterable with all relevant post objects""" + def metadata(self): + """Return additional metadata""" + class BloggerPostExtractor(BloggerExtractor): """Extractor for a single blog post""" @@ -173,31 +177,48 @@ class BloggerBlogExtractor(BloggerExtractor): class BloggerSearchExtractor(BloggerExtractor): - """Extractor for search resuls and labels""" + """Extractor for Blogger search resuls""" subcategory = "search" - pattern = BASE_PATTERN + r"/search(?:/?\?q=([^/?#]+)|/label/([^/?#]+))" + pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)" test = ( ("https://julianbphotography.blogspot.com/search?q=400mm", { - "count": "< 10" + "count": "< 10", + "keyword": {"query": "400mm"}, }), + ) + + def __init__(self, match): + BloggerExtractor.__init__(self, match) + self.query = text.unquote(match.group(3)) + + def posts(self, blog): + return self.api.blog_search(blog["id"], self.query) + + def metadata(self): + return {"query": self.query} + + +class BloggerLabelExtractor(BloggerExtractor): + """Extractor for Blogger posts by label""" + subcategory = "label" + pattern = BASE_PATTERN + r"/search/label/([^/?#]+)" + test = ( ("https://dmmagazine.blogspot.com/search/label/D%26D", { "range": "1-25", "count": 25, + "keyword": {"label": "D&D"}, }), ) def __init__(self, match): BloggerExtractor.__init__(self, match) - query = match.group(3) - if query: - self.query, self.label = query, None - else: - self.query, self.label = None, match.group(4) + self.label = text.unquote(match.group(3)) def posts(self, blog): - if self.query: - return self.api.blog_search(blog["id"], text.unquote(self.query)) - return self.api.blog_posts(blog["id"], text.unquote(self.label)) + return self.api.blog_posts(blog["id"], self.label) + + def metadata(self): + return {"label": self.label} class BloggerAPI(): |
