summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/cien.py
blob: a9ccab5ab3cb916e121822583af8cca902fe3c46 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-

# Copyright 2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://ci-en.net/"""

from .common import Extractor, Message
from .. import text, util

BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"


class CienExtractor(Extractor):
    category = "cien"
    root = "https://ci-en.net"

    def __init__(self, match):
        self.root = text.root_from_url(match.group(0))
        Extractor.__init__(self, match)

    def _pagination_articles(self, url, params):
        data = {"extractor": CienArticleExtractor}
        params["page"] = text.parse_int(params.get("page"), 1)

        while True:
            page = self.request(url, params=params).text

            for card in text.extract_iter(
                    page, ' class="c-cardCase-item', '</div>'):
                article_url = text.extr(card, ' href="', '"')
                yield Message.Queue, article_url, data

            if ' rel="next"' not in page:
                return
            params["page"] += 1


class CienArticleExtractor(CienExtractor):
    subcategory = "article"
    pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)"
    example = "https://ci-en.net/creator/123/article/12345"

    def items(self):
        url = "{}/creator/{}/article/{}".format(
            self.root, self.groups[0], self.groups[1])
        page = self.request(url, notfound="article").text
        return
        yield 1


class CienCreatorExtractor(CienExtractor):
    subcategory = "creator"
    pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$"
    example = "https://ci-en.net/creator/123"

    def items(self):
        url = "{}/creator/{}/article".format(self.root, self.groups[0])
        params = text.parse_query(self.groups[1])
        params["mode"] = "list"
        return self._pagination_articles(url, params)


class CienRecentExtractor(CienExtractor):
    subcategory = "recent"
    pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?"
    example = "https://ci-en.net/mypage/recent"

    def items(self):
        url = self.root + "/mypage/recent"
        params = text.parse_query(self.groups[0])
        return self._pagination_articles(url, params)


class CienFollowingExtractor(CienExtractor):
    subcategory = "following"
    pattern = BASE_PATTERN + r"/mypage/subscription(/following)?"
    example = "https://ci-en.net/mypage/subscription"

    def items(self):
        url = self.root + "/mypage/recent"
        params = text.parse_query(self.groups[0])
        return self._pagination_articles(url, params)