New upstream version 1.21.1.upstream/1.21.1

author: Unit 193 <unit193@unit193.net> 2022-04-09 00:15:19 -0400
committer: Unit 193 <unit193@unit193.net> 2022-04-09 00:15:19 -0400
commit: 2fe1dfed848fc26b7419e3bfe91a62e686960429 (patch)
tree: 901cb64e2a1748df2bb8c7abc60ff6d72ae4bc27 /gallery_dl/extractor/telegraph.py
parent: c2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (diff)
1 files changed, 95 insertions, 0 deletions
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
new file mode 100644
index 0000000..8e9bf2c
--- /dev/null
+++ b/gallery_dl/extractor/telegraph.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractor for https://telegra.ph/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class TelegraphGalleryExtractor(GalleryExtractor):
+    """Extractor for articles from telegra.ph"""
+
+    category = "telegraph"
+    root = "https://telegra.ph"
+    directory_fmt = ("{category}", "{slug}")
+    filename_fmt = "{num_formatted}_{filename}.{extension}"
+    archive_fmt = "{slug}_{num}"
+    pattern = r"(?:https?://)(?:www\.)??telegra\.ph(/[^/?#]+)"
+    test = (
+        ("https://telegra.ph/Telegraph-Test-03-28", {
+            "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.png",
+            "keyword": {
+                "author": "mikf",
+                "caption": r"re:test|",
+                "count": 2,
+                "date": "dt:2022-03-28 16:01:36",
+                "description": "Just a test",
+                "post_url": "https://telegra.ph/Telegraph-Test-03-28",
+                "slug": "Telegraph-Test-03-28",
+                "title": "Telegra.ph Test",
+            },
+        }),
+        ("https://telegra.ph/森-03-28", {
+            "pattern": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+            "count": 1,
+            "keyword": {
+                "author": "&",
+                "caption": "kokiri",
+                "count": 1,
+                "date": "dt:2022-03-28 16:31:26",
+                "description": "コキリの森",
+                "extension": "jpg",
+                "filename": "3ea79d23b0dd0889f215a",
+                "num": 1,
+                "num_formatted": "1",
+                "post_url": "https://telegra.ph/森-03-28",
+                "slug": "森-03-28",
+                "title": '"森"',
+                "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+            },
+        }),
+    )
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+        data = {
+            "title": text.unescape(extr(
+                'property="og:title" content="', '"')),
+            "description": text.unescape(extr(
+                'property="og:description" content="', '"')),
+            "date": text.parse_datetime(extr(
+                'property="article:published_time" content="', '"'),
+                "%Y-%m-%dT%H:%M:%S%z"),
+            "author": text.unescape(extr(
+                'property="article:author" content="', '"')),
+            "post_url": text.unescape(extr(
+                'rel="canonical" href="', '"')),
+        }
+        data["slug"] = data["post_url"][19:]
+        return data
+
+    def images(self, page):
+        figures = tuple(text.extract_iter(page, "<figure>", "</figure>"))
+        num_zeroes = len(str(len(figures)))
+        num = 0
+
+        result = []
+        for figure in figures:
+            src, pos = text.extract(figure, 'src="', '"')
+            if src.startswith("/embed/"):
+                continue
+            caption, pos = text.extract(figure, "<figcaption>", "<", pos)
+            url = self.root + src
+            num += 1
+
+            result.append((url, {
+                "url"          : url,
+                "caption"      : text.unescape(caption),
+                "num"          : num,
+                "num_formatted": str(num).zfill(num_zeroes),
+            }))
+        return result
author	Unit 193 <unit193@unit193.net>	2022-04-09 00:15:19 -0400
committer	Unit 193 <unit193@unit193.net>	2022-04-09 00:15:19 -0400
commit	2fe1dfed848fc26b7419e3bfe91a62e686960429 (patch)
tree	901cb64e2a1748df2bb8c7abc60ff6d72ae4bc27 /gallery_dl/extractor/telegraph.py
parent	c2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (diff)