summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/nijie.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-10-01 19:12:47 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-10-01 19:12:47 -0400
commit639d9ea4a667733aadc3ff83a1df2cc9f0add3a9 (patch)
tree5761b58d6fc3e8bbb99b39b8e4417673bccb0b86 /gallery_dl/extractor/nijie.py
parentc09a9f00dd83017d486cd77650347bc2a397ad55 (diff)
New upstream version 1.10.5upstream/1.10.5
Diffstat (limited to 'gallery_dl/extractor/nijie.py')
-rw-r--r--gallery_dl/extractor/nijie.py171
1 files changed, 98 insertions, 73 deletions
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 4c48d73..fdfad87 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -13,12 +13,15 @@ from .. import text, exception
from ..cache import cache
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?nijie\.info"
+
+
class NijieExtractor(AsynchronousMixin, Extractor):
"""Base class for nijie extractors"""
category = "nijie"
directory_fmt = ("{category}", "{user_id}")
- filename_fmt = "{category}_{artist_id}_{image_id}_p{index:>02}.{extension}"
- archive_fmt = "{image_id}_{index}"
+ filename_fmt = "{category}_{artist_id}_{image_id}_p{num:>02}.{extension}"
+ archive_fmt = "{image_id}_{num}"
cookiedomain = "nijie.info"
cookienames = ("nemail", "nlogin")
root = "https://nijie.info"
@@ -27,61 +30,66 @@ class NijieExtractor(AsynchronousMixin, Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.user_id = match.group(1)
+ self.user_id = text.parse_int(match.group(1))
+ self.user_name = None
self.session.headers["Referer"] = self.root + "/"
def items(self):
self.login()
- data = self.get_job_metadata()
-
yield Message.Version, 1
- yield Message.Directory, data
- for image_id in self.get_image_ids():
- for image_url, image_data in self.get_image_data(image_id):
- image_data.update(data)
- if not image_data["extension"]:
- image_data["extension"] = "jpg"
- yield Message.Url, image_url, image_data
+ for image_id in self.image_ids():
+
+ response = self.request(self.view_url + image_id, fatal=False)
+ if response.status_code >= 400:
+ continue
+ page = response.text
+
+ data = self._extract_data(page)
+ data["image_id"] = text.parse_int(image_id)
+ yield Message.Directory, data
- def get_job_metadata(self):
- """Collect metadata for extractor-job"""
- return {"user_id": text.parse_int(self.user_id)}
+ for image in self._extract_images(page):
+ image.update(data)
+ if not image["extension"]:
+ image["extension"] = "jpg"
+ yield Message.Url, image["url"], image
- def get_image_ids(self):
+ def image_ids(self):
"""Collect all relevant image-ids"""
- def get_image_data(self, image_id):
- """Get URL and metadata for images specified by 'image_id'"""
- page = self.request(self.view_url + image_id).text
- return self.extract_image_data(page, image_id)
-
- def extract_image_data(self, page, image_id):
- """Get URL and metadata for images from 'page'"""
- title, pos = text.extract(
- page, '<meta property="og:title" content="', '"')
- description, pos = text.extract(
- page, '<meta property="og:description" content="', '"', pos)
- artist_id, pos = text.extract(
- page, '"sameAs": "https://nijie.info/members.php?id=', '"', pos)
- images = list(text.extract_iter(
- page, '<a href="./view_popup.php', '</a>', pos))
-
- title = title.rpartition("|")[0].strip()
- image_id = text.parse_int(image_id)
- artist_id = text.parse_int(artist_id)
-
- for index, image in enumerate(images):
+ @staticmethod
+ def _extract_data(page):
+ """Extract image metadata from 'page'"""
+ extr = text.extract_from(page)
+ keywords = text.unescape(extr(
+ 'name="keywords" content="', '" />')).split(",")
+ data = {
+ "title" : keywords[0].strip(),
+ "description": text.unescape(extr(
+ '"description": "', '"').replace("&amp;", "&")),
+ "date" : text.parse_datetime(extr(
+ '"datePublished": "', '"')[:-4] + "+0900",
+ "%a %d %b %Y %I:%M:%S %p%z"),
+ "artist_id" : text.parse_int(extr(
+ '"sameAs": "https://nijie.info/members.php?id=', '"')),
+ "artist_name": keywords[1],
+ "tags" : keywords[2:-1],
+ }
+ data["user_id"] = data["artist_id"]
+ data["user_name"] = data["artist_name"]
+ return data
+
+ @staticmethod
+ def _extract_images(page):
+ """Extract image URLs from 'page'"""
+ images = text.extract_iter(page, '<a href="./view_popup.php', '</a>')
+ for num, image in enumerate(images):
url = "https:" + text.extract(image, 'src="', '"')[0]
- url = url.replace("/__rs_l120x120/", "/", 1)
-
- yield url, text.nameext_from_url(url, {
- "index": index,
- "count": len(images),
- "title": title,
- "description": description,
- "image_id": image_id,
- "artist_id": artist_id,
+ url = url.replace("/__rs_l120x120/", "/")
+ yield text.nameext_from_url(url, {
+ "num": num,
+ "url": url,
})
def login(self):
@@ -107,6 +115,10 @@ class NijieExtractor(AsynchronousMixin, Extractor):
while True:
page = self.request(url, params=params, notfound="artist").text
+
+ if not self.user_name:
+ self.user_name = text.unescape(text.extract(
+ page, '<br />', '<')[0] or "")
yield from text.extract_iter(page, 'illust_id="', '"')
if '<a rel="next"' not in page:
@@ -117,12 +129,25 @@ class NijieExtractor(AsynchronousMixin, Extractor):
class NijieUserExtractor(NijieExtractor):
"""Extractor for works of a nijie-user"""
subcategory = "user"
- pattern = (r"(?:https?://)?(?:www\.)?nijie\.info"
- r"/members(?:_illust)?\.php\?id=(\d+)")
+ pattern = BASE_PATTERN + r"/members(?:_illust)?\.php\?id=(\d+)"
test = (
("https://nijie.info/members_illust.php?id=44", {
"url": "66c4ff94c6e77c0765dd88f2d8c663055fda573e",
- "keyword": "d629c69e3172db1d7e026145e8eb640ac31ac16a",
+ "keyword": {
+ "artist_id": 44,
+ "artist_name": "ED",
+ "date": "type:datetime",
+ "description": str,
+ "extension": "jpg",
+ "filename": str,
+ "image_id": int,
+ "num": int,
+ "tags": list,
+ "title": str,
+ "url": r"re:https://pic.nijie.net/\d+/nijie_picture/.*jpg$",
+ "user_id": 44,
+ "user_name": "ED",
+ },
}),
("https://nijie.info/members_illust.php?id=43", {
"exception": exception.NotFoundError,
@@ -130,20 +155,23 @@ class NijieUserExtractor(NijieExtractor):
("https://nijie.info/members.php?id=44"),
)
- def get_image_ids(self):
+ def image_ids(self):
return self._pagination("members_illust")
class NijieDoujinExtractor(NijieExtractor):
"""Extractor for doujin entries of a nijie-user"""
subcategory = "doujin"
- pattern = (r"(?:https?://)?(?:www\.)?nijie\.info/"
- r"members_dojin\.php\?id=(\d+)")
+ pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)"
test = ("https://nijie.info/members_dojin.php?id=6782", {
"count": ">= 18",
+ "keyword": {
+ "user_id" : 6782,
+ "user_name": "ジョニー@アビオン村",
+ },
})
- def get_image_ids(self):
+ def image_ids(self):
return self._pagination("members_dojin")
@@ -151,30 +179,38 @@ class NijieFavoriteExtractor(NijieExtractor):
"""Extractor for all favorites/bookmarks of a nijie-user"""
subcategory = "favorite"
directory_fmt = ("{category}", "bookmarks", "{user_id}")
- archive_fmt = "f_{user_id}_{image_id}_{index}"
- pattern = (r"(?:https?://)?(?:www\.)?nijie\.info"
- r"/user_like_illust_view\.php\?id=(\d+)")
+ archive_fmt = "f_{user_id}_{image_id}_{num}"
+ pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)"
test = ("https://nijie.info/user_like_illust_view.php?id=44", {
"count": ">= 16",
+ "keyword": {
+ "user_id" : 44,
+ "user_name": "ED",
+ },
})
- def get_image_ids(self):
+ def image_ids(self):
return self._pagination("user_like_illust_view")
+ def _extract_data(self, page):
+ data = NijieExtractor._extract_data(page)
+ data["user_id"] = self.user_id
+ data["user_name"] = self.user_name
+ return data
+
class NijieImageExtractor(NijieExtractor):
"""Extractor for a work/image from nijie.info"""
subcategory = "image"
- pattern = (r"(?:https?://)?(?:www\.)?nijie\.info"
- r"/view(?:_popup)?\.php\?id=(\d+)")
+ pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
test = (
("https://nijie.info/view.php?id=70720", {
"url": "5497f897311397dafa188521258624346a0af2a3",
- "keyword": "408393d010307c76d52cbd0a4368d6d357805aea",
+ "keyword": "fd12bca6f4402a0c996315d28c65f7914ad70c51",
"content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
}),
("https://nijie.info/view.php?id=70724", {
- "exception": exception.NotFoundError,
+ "count": 0,
}),
("https://nijie.info/view_popup.php?id=70720"),
)
@@ -182,17 +218,6 @@ class NijieImageExtractor(NijieExtractor):
def __init__(self, match):
NijieExtractor.__init__(self, match)
self.image_id = match.group(1)
- self.page = ""
- def get_job_metadata(self):
- self.page = self.request(
- self.view_url + self.image_id, notfound="image").text
- self.user_id = text.extract(
- self.page, '"sameAs": "https://nijie.info/members.php?id=', '"')[0]
- return NijieExtractor.get_job_metadata(self)
-
- def get_image_ids(self):
+ def image_ids(self):
return (self.image_id,)
-
- def get_image_data(self, _):
- return self.extract_image_data(self.page, self.image_id)