summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/telegraph.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-04-09 00:15:19 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2022-04-09 00:15:19 -0400
commit2fe1dfed848fc26b7419e3bfe91a62e686960429 (patch)
tree901cb64e2a1748df2bb8c7abc60ff6d72ae4bc27 /gallery_dl/extractor/telegraph.py
parentc2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (diff)
New upstream version 1.21.1.upstream/1.21.1
Diffstat (limited to 'gallery_dl/extractor/telegraph.py')
-rw-r--r--gallery_dl/extractor/telegraph.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
new file mode 100644
index 0000000..8e9bf2c
--- /dev/null
+++ b/gallery_dl/extractor/telegraph.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractor for https://telegra.ph/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class TelegraphGalleryExtractor(GalleryExtractor):
+ """Extractor for articles from telegra.ph"""
+
+ category = "telegraph"
+ root = "https://telegra.ph"
+ directory_fmt = ("{category}", "{slug}")
+ filename_fmt = "{num_formatted}_{filename}.{extension}"
+ archive_fmt = "{slug}_{num}"
+ pattern = r"(?:https?://)(?:www\.)??telegra\.ph(/[^/?#]+)"
+ test = (
+ ("https://telegra.ph/Telegraph-Test-03-28", {
+ "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.png",
+ "keyword": {
+ "author": "mikf",
+ "caption": r"re:test|",
+ "count": 2,
+ "date": "dt:2022-03-28 16:01:36",
+ "description": "Just a test",
+ "post_url": "https://telegra.ph/Telegraph-Test-03-28",
+ "slug": "Telegraph-Test-03-28",
+ "title": "Telegra.ph Test",
+ },
+ }),
+ ("https://telegra.ph/森-03-28", {
+ "pattern": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+ "count": 1,
+ "keyword": {
+ "author": "&",
+ "caption": "kokiri",
+ "count": 1,
+ "date": "dt:2022-03-28 16:31:26",
+ "description": "コキリの森",
+ "extension": "jpg",
+ "filename": "3ea79d23b0dd0889f215a",
+ "num": 1,
+ "num_formatted": "1",
+ "post_url": "https://telegra.ph/森-03-28",
+ "slug": "森-03-28",
+ "title": '"森"',
+ "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+ },
+ }),
+ )
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ data = {
+ "title": text.unescape(extr(
+ 'property="og:title" content="', '"')),
+ "description": text.unescape(extr(
+ 'property="og:description" content="', '"')),
+ "date": text.parse_datetime(extr(
+ 'property="article:published_time" content="', '"'),
+ "%Y-%m-%dT%H:%M:%S%z"),
+ "author": text.unescape(extr(
+ 'property="article:author" content="', '"')),
+ "post_url": text.unescape(extr(
+ 'rel="canonical" href="', '"')),
+ }
+ data["slug"] = data["post_url"][19:]
+ return data
+
+ def images(self, page):
+ figures = tuple(text.extract_iter(page, "<figure>", "</figure>"))
+ num_zeroes = len(str(len(figures)))
+ num = 0
+
+ result = []
+ for figure in figures:
+ src, pos = text.extract(figure, 'src="', '"')
+ if src.startswith("/embed/"):
+ continue
+ caption, pos = text.extract(figure, "<figcaption>", "<", pos)
+ url = self.root + src
+ num += 1
+
+ result.append((url, {
+ "url" : url,
+ "caption" : text.unescape(caption),
+ "num" : num,
+ "num_formatted": str(num).zfill(num_zeroes),
+ }))
+ return result