gallery_dl/extractor/desktopography.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://desktopography.net/"""

from .common import Extractor, Message
from .. import text

BASE_PATTERN = r"(?:https?://)?desktopography\.net"


class DesktopographyExtractor(Extractor):
    """Base class for desktopography extractors"""
    category = "desktopography"
    archive_fmt = "{filename}"
    root = "https://desktopography.net"


class DesktopographySiteExtractor(DesktopographyExtractor):
    """Extractor for all desktopography exhibitions """
    subcategory = "site"
    pattern = BASE_PATTERN + r"/$"
    example = "https://desktopography.net/"

    def items(self):
        page = self.request(self.root).text
        data = {"_extractor": DesktopographyExhibitionExtractor}

        for exhibition_year in text.extract_iter(
                page,
                '<a href="https://desktopography.net/exhibition-',
                '/">'):

            url = self.root + "/exhibition-" + exhibition_year + "/"
            yield Message.Queue, url, data


class DesktopographyExhibitionExtractor(DesktopographyExtractor):
    """Extractor for a yearly desktopography exhibition"""
    subcategory = "exhibition"
    pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
    example = "https://desktopography.net/exhibition-2020/"

    def __init__(self, match):
        DesktopographyExtractor.__init__(self, match)
        self.year = match[1]

    def items(self):
        url = f"{self.root}/exhibition-{self.year}/"
        base_entry_url = "https://desktopography.net/portfolios/"
        page = self.request(url).text

        data = {
            "_extractor": DesktopographyEntryExtractor,
            "year": self.year,
        }

        for entry_url in text.extract_iter(
                page,
                '<a class="overlay-background" href="' + base_entry_url,
                '">'):

            url = base_entry_url + entry_url
            yield Message.Queue, url, data


class DesktopographyEntryExtractor(DesktopographyExtractor):
    """Extractor for all resolutions of a desktopography wallpaper"""
    subcategory = "entry"
    pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
    example = "https://desktopography.net/portfolios/NAME/"

    def __init__(self, match):
        DesktopographyExtractor.__init__(self, match)
        self.entry = match[1]

    def items(self):
        url = f"{self.root}/portfolios/{self.entry}"
        page = self.request(url).text

        entry_data = {"entry": self.entry}
        yield Message.Directory, entry_data

        for image_data in text.extract_iter(
                page,
                '<a target="_blank" href="https://desktopography.net',
                '">'):

            path, _, filename = image_data.partition(
                '" class="wallpaper-button" download="')
            text.nameext_from_url(filename, entry_data)
            yield Message.Url, self.root + path, entry_data