aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/poringa.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/poringa.py')
-rw-r--r--gallery_dl/extractor/poringa.py138
1 files changed, 138 insertions, 0 deletions
diff --git a/gallery_dl/extractor/poringa.py b/gallery_dl/extractor/poringa.py
new file mode 100644
index 0000000..0149d06
--- /dev/null
+++ b/gallery_dl/extractor/poringa.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for http://www.poringa.net/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
+import itertools
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?poringa\.net"
+
+
+class PoringaExtractor(Extractor):
+ category = "poringa"
+ directory_fmt = ("{category}", "{user}", "{post_id}")
+ filename_fmt = "{post_id}_{title}_{num:>03}_{filename}.{extension}"
+ archive_fmt = "{post_id}_{num}"
+ root = "http://www.poringa.net"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item = match.group(1)
+ self.__cookies = True
+
+ def items(self):
+ for post_id in self.posts():
+ url = "{}/posts/imagenes/{}".format(self.root, post_id)
+
+ try:
+ response = self.request(url)
+ except exception.HttpError as exc:
+ self.log.warning(
+ "Unable to fetch posts for '%s' (%s)", post_id, exc)
+ continue
+
+ if "/registro-login?" in response.url:
+ self.log.warning("Private post '%s'", post_id)
+ continue
+
+ page = response.text
+ title, pos = text.extract(
+ page, 'property="og:title" content="', '"')
+
+ try:
+ pos = page.index('<div class="main-info', pos)
+ user, pos = text.extract(
+ page, 'href="http://www.poringa.net/', '"', pos)
+ except ValueError:
+ user = None
+
+ if not user:
+ user = "poringa"
+
+ data = {
+ "post_id" : post_id,
+ "title" : text.unescape(title),
+ "user" : text.unquote(user),
+ "_http_headers": {"Referer": url},
+ }
+
+ main_post = text.extr(
+ page, 'property="dc:content" role="main">', '</div>')
+ urls = list(text.extract_iter(
+ main_post, '<img class="imagen" border="0" src="', '"'))
+ data["count"] = len(urls)
+
+ yield Message.Directory, data
+ for data["num"], url in enumerate(urls, 1):
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def posts(self):
+ return ()
+
+ def request(self, url, **kwargs):
+ if self.__cookies:
+ self.__cookies = False
+ self.cookies_update(_cookie_cache())
+
+ for _ in range(5):
+ response = Extractor.request(self, url, **kwargs)
+ if response.cookies:
+ _cookie_cache.update("", response.cookies)
+ if response.content.find(
+ b"<title>Please wait a few moments</title>", 0, 600) < 0:
+ return response
+ self.sleep(5.0, "check")
+
+ def _pagination(self, url, params):
+ for params["p"] in itertools.count(1):
+ page = self.request(url, params=params).text
+
+ posts_ids = PoringaPostExtractor.pattern.findall(page)
+ posts_ids = list(dict.fromkeys(posts_ids))
+ yield from posts_ids
+
+ if len(posts_ids) < 19:
+ return
+
+
+class PoringaPostExtractor(PoringaExtractor):
+ """Extractor for posts on poringa.net"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)"
+ example = "http://www.poringa.net/posts/imagenes/12345/TITLE.html"
+
+ def posts(self):
+ return (self.item,)
+
+
+class PoringaUserExtractor(PoringaExtractor):
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(\w+)$"
+ example = "http://www.poringa.net/USER"
+
+ def posts(self):
+ url = self.root + "/buscar/"
+ params = {"q": self.item}
+ return self._pagination(url, params)
+
+
+class PoringaSearchExtractor(PoringaExtractor):
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)"
+ example = "http://www.poringa.net/buscar/?q=QUERY"
+
+ def posts(self):
+ url = self.root + "/buscar/"
+ params = {"q": self.item}
+ return self._pagination(url, params)
+
+
+@cache()
+def _cookie_cache():
+ return ()