summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/idolcomplex.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/idolcomplex.py')
-rw-r--r--gallery_dl/extractor/idolcomplex.py17
1 files changed, 10 insertions, 7 deletions
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index b9e2c3d..f70a948 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -34,8 +34,11 @@ class IdolcomplexExtractor(SankakuExtractor):
self.start_post = 0
def _init(self):
+ self.find_pids = re.compile(
+ r" href=[\"#]/\w\w/posts/([0-9a-f]+)"
+ ).findall
self.find_tags = re.compile(
- r'tag-type-([^"]+)">\s*<div [^>]+>\s*<a href="/\?tags=([^"]+)'
+ r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
).findall
def items(self):
@@ -149,8 +152,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
- example = "https://idol.sankakucomplex.com/?tags=TAGS"
+ pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
+ example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
per_page = 20
def __init__(self, match):
@@ -196,7 +199,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
page = self.request(self.root, params=params, retries=10).text
pos = ((page.find('id="more-popular-posts-link"') + 1) or
(page.find('<span class="thumb') + 1))
- yield from text.extract_iter(page, ' href="/posts/', '"', pos)
+
+ yield from self.find_pids(page, pos)
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
if not next_url:
@@ -218,7 +222,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
- pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pools?/show/(\d+)"
+ pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
example = "https://idol.sankakucomplex.com/pools/show/12345"
per_page = 24
@@ -242,8 +246,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
while True:
page = self.request(url, params=params, retries=10).text
pos = page.find('id="pool-show"') + 1
- post_ids = list(text.extract_iter(
- page, ' href="/posts/', '"', pos))
+ post_ids = self.find_pids(page, pos)
yield from post_ids
if len(post_ids) < self.per_page: