', '<'),
"uploader" : extr('
', '
'),
- "date" : text.parse_datetime(extr(
- '>Posted:
', ' | '), "%Y-%m-%d %H:%M"),
+ "date" : self.parse_datetime_iso(extr(
+ '>Posted:
', ' | ')),
"parent" : extr(
'>Parent:
%s)",
item["id"], item["feeRequired"], fee_max)
- continue
-
- try:
- url = "https://api.fanbox.cc/post.info?postId=" + item["id"]
- body = self.request_json(url, headers=self.headers)["body"]
- content_body, post = self._extract_post(body)
- except Exception as exc:
- self.log.warning("Skipping post %s (%s: %s)",
- item["id"], exc.__class__.__name__, exc)
- continue
-
- yield Message.Directory, post
+ else:
+ try:
+ url = ("https://api.fanbox.cc/post.info?postId=" +
+ item["id"])
+ item = self.request_json(url, headers=self.headers)["body"]
+ except Exception as exc:
+ self.log.warning("Skipping post %s (%s: %s)",
+ item["id"], exc.__class__.__name__, exc)
+
+ content_body, post = self._extract_post(item)
+ yield Message.Directory, "", post
yield from self._get_urls_from_post(content_body, post)
def posts(self):
@@ -128,15 +127,19 @@ class FanboxExtractor(Extractor):
if file.get("extension", "").lower() in exts
]
- post["date"] = text.parse_datetime(post["publishedDatetime"])
+ try:
+ post["date"] = self.parse_datetime_iso(post["publishedDatetime"])
+ except Exception:
+ post["date"] = None
post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False
- if self._meta_user:
- post["user"] = self._get_user_data(post["creatorId"])
- if self._meta_plan:
+ cid = post.get("creatorId")
+ if self._meta_user and cid is not None:
+ post["user"] = self._get_user_data(cid)
+ if self._meta_plan and cid is not None:
plans = self._get_plan_data(post["creatorId"])
- fee = post["feeRequired"]
+ fee = post.get("feeRequired") or 0
try:
post["plan"] = plans[fee]
except KeyError:
@@ -147,7 +150,7 @@ class FanboxExtractor(Extractor):
plan["fee"] = fee
post["plan"] = plans[fee] = plan
if self._meta_comments:
- if post["commentCount"]:
+ if post.get("commentCount"):
post["comments"] = list(self._get_comment_data(post["id"]))
else:
post["commentd"] = ()
@@ -216,7 +219,7 @@ class FanboxExtractor(Extractor):
def _get_urls_from_post(self, content_body, post):
num = 0
if cover_image := post.get("coverImageUrl"):
- cover_image = util.re("/c/[0-9a-z_]+").sub("", cover_image)
+ cover_image = text.re("/c/[0-9a-z_]+").sub("", cover_image)
final_post = post.copy()
final_post["isCoverImage"] = True
final_post["fileUrl"] = cover_image
@@ -352,7 +355,7 @@ class FanboxExtractor(Extractor):
class FanboxCreatorExtractor(FanboxExtractor):
"""Extractor for a Fanbox creator's works"""
subcategory = "creator"
- pattern = USER_PATTERN + r"(?:/posts)?/?$"
+ pattern = rf"{USER_PATTERN}(?:/posts)?/?$"
example = "https://USER.fanbox.cc/"
def posts(self):
@@ -362,15 +365,26 @@ class FanboxCreatorExtractor(FanboxExtractor):
def _pagination_creator(self, url):
urls = self.request_json(url, headers=self.headers)["body"]
+ if offset := self.config("offset"):
+ quotient, remainder = divmod(offset, 10)
+ if quotient:
+ urls = urls[quotient:]
+ else:
+ remainder = None
+
for url in urls:
url = text.ensure_http_scheme(url)
- yield from self.request_json(url, headers=self.headers)["body"]
+ posts = self.request_json(url, headers=self.headers)["body"]
+ if remainder:
+ posts = posts[remainder:]
+ remainder = None
+ yield from posts
class FanboxPostExtractor(FanboxExtractor):
"""Extractor for media from a single Fanbox post"""
subcategory = "post"
- pattern = USER_PATTERN + r"/posts/(\d+)"
+ pattern = rf"{USER_PATTERN}/posts/(\d+)"
example = "https://USER.fanbox.cc/posts/12345"
def posts(self):
@@ -380,7 +394,7 @@ class FanboxPostExtractor(FanboxExtractor):
class FanboxHomeExtractor(FanboxExtractor):
"""Extractor for your Fanbox home feed"""
subcategory = "home"
- pattern = BASE_PATTERN + r"/?$"
+ pattern = rf"{BASE_PATTERN}/?$"
example = "https://fanbox.cc/"
def posts(self):
@@ -391,7 +405,7 @@ class FanboxHomeExtractor(FanboxExtractor):
class FanboxSupportingExtractor(FanboxExtractor):
"""Extractor for your supported Fanbox users feed"""
subcategory = "supporting"
- pattern = BASE_PATTERN + r"/home/supporting"
+ pattern = rf"{BASE_PATTERN}/home/supporting"
example = "https://fanbox.cc/home/supporting"
def posts(self):
@@ -403,6 +417,7 @@ class FanboxRedirectExtractor(Extractor):
"""Extractor for pixiv redirects to fanbox.cc"""
category = "fanbox"
subcategory = "redirect"
+ cookies_domain = None
pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)"
example = "https://www.pixiv.net/fanbox/creator/12345"
diff --git a/gallery_dl/extractor/fansly.py b/gallery_dl/extractor/fansly.py
index 7138599..ba60b15 100644
--- a/gallery_dl/extractor/fansly.py
+++ b/gallery_dl/extractor/fansly.py
@@ -35,9 +35,9 @@ class FanslyExtractor(Extractor):
for post in self.posts():
files = self._extract_files(post)
post["count"] = len(files)
- post["date"] = text.parse_timestamp(post["createdAt"])
+ post["date"] = self.parse_timestamp(post["createdAt"])
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post.update(file)
url = file["url"]
@@ -61,7 +61,8 @@ class FanslyExtractor(Extractor):
yield from self.posts_wall(account, wall)
def _extract_files(self, post):
- files = []
+ if "attachments" not in post:
+ return ()
if "_extra" in post:
extra = post.pop("_extra", ())
@@ -75,11 +76,12 @@ class FanslyExtractor(Extractor):
if mid in media
)
+ files = []
for attachment in post.pop("attachments"):
try:
self._extract_attachment(files, post, attachment)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.error(
"%s/%s, Failed to extract media (%s: %s)",
post["id"], attachment.get("id"),
@@ -117,8 +119,8 @@ class FanslyExtractor(Extractor):
file = {
**variant,
"format": variant["type"],
- "date": text.parse_timestamp(media["createdAt"]),
- "date_updated": text.parse_timestamp(media["updatedAt"]),
+ "date": self.parse_timestamp(media["createdAt"]),
+ "date_updated": self.parse_timestamp(media["updatedAt"]),
}
if "metadata" in location:
@@ -331,12 +333,20 @@ class FanslyAPI():
posts = response["posts"]
for post in posts:
- post["account"] = accounts[post.pop("accountId")]
+ try:
+ post["account"] = accounts[post.pop("accountId")]
+ except KeyError:
+ pass
extra = None
attachments = []
for attachment in post["attachments"]:
- cid = attachment["contentId"]
+ try:
+ cid = attachment["contentId"]
+ except KeyError:
+ attachments.append(attachment)
+ continue
+
if cid in media:
attachments.append(media[cid])
elif cid in bundles:
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index e32a86b..d13ec13 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -48,7 +48,7 @@ class FantiaExtractor(Extractor):
for content in contents:
files = self._process_content(post, content)
- yield Message.Directory, post
+ yield Message.Directory, "", post
if content["visible_status"] != "visible":
self.log.warning(
@@ -101,7 +101,7 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"],
"rating": resp["rating"],
"posted_at": resp["posted_at"],
- "date": text.parse_datetime(
+ "date": self.parse_datetime(
resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"],
diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py
index 7ff71b0..a18ce31 100644
--- a/gallery_dl/extractor/fapachi.py
+++ b/gallery_dl/extractor/fapachi.py
@@ -34,7 +34,7 @@ class FapachiPostExtractor(Extractor):
page = self.request(f"{self.root}/{self.user}/media/{self.id}").text
url = self.root + text.extract(
page, 'data-src="', '"', page.index('class="media-img'))[0]
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
index b961cbe..afef942 100644
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@@ -20,7 +20,7 @@ class FapelloPostExtractor(Extractor):
directory_fmt = ("{category}", "{model}")
filename_fmt = "{model}_{id}.{extension}"
archive_fmt = "{type}_{model}_{id}"
- pattern = BASE_PATTERN + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?!search/|popular_videos/)([^/?#]+)/(\d+)"
example = "https://fapello.com/MODEL/12345/"
def __init__(self, match):
@@ -44,7 +44,7 @@ class FapelloPostExtractor(Extractor):
}
url = text.extr(page, 'src="', '"').replace(
".md", "").replace(".th", "")
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, text.nameext_from_url(url, data)
@@ -52,9 +52,9 @@ class FapelloModelExtractor(Extractor):
"""Extractor for all posts from a fapello model"""
category = "fapello"
subcategory = "model"
- pattern = (BASE_PATTERN + r"/(?!top-(?:likes|followers)|popular_videos"
- r"|videos|trending|search/?$)"
- r"([^/?#]+)/?$")
+ pattern = (rf"{BASE_PATTERN}/(?!top-(?:likes|followers)|popular_videos"
+ rf"|videos|trending|search/?$)"
+ rf"([^/?#]+)/?$")
example = "https://fapello.com/model/"
def __init__(self, match):
@@ -85,9 +85,9 @@ class FapelloPathExtractor(Extractor):
"""Extractor for models and posts from fapello.com paths"""
category = "fapello"
subcategory = "path"
- pattern = (BASE_PATTERN +
- r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
- r"|popular_videos/[^/?#]+)/?$")
+ pattern = (rf"{BASE_PATTERN}/(?!search/?$)"
+ rf"(top-(?:likes|followers)|videos|trending"
+ rf"|popular_videos/[^/?#]+)/?$")
example = "https://fapello.com/trending/"
def __init__(self, match):
diff --git a/gallery_dl/extractor/fikfap.py b/gallery_dl/extractor/fikfap.py
new file mode 100644
index 0000000..75071c5
--- /dev/null
+++ b/gallery_dl/extractor/fikfap.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fikfap.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fikfap\.com"
+
+
+class FikfapExtractor(Extractor):
+ """Base class for fikfap extractors"""
+ category = "fikfap"
+ root = "https://fikfap.com"
+ root_api = "https://api.fikfap.com"
+ directory_fmt = ("{category}", "{author[username]}")
+ filename_fmt = "{postId} {label[:240]}.{extension}"
+ archive_fmt = "{postId}"
+
+ def items(self):
+ headers = {
+ "Referer" : self.root + "/",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "cross-site",
+ }
+
+ for post in self.posts():
+ if url := post.get("videoFileOriginalUrl"):
+ post["extension"] = text.ext_from_url(url)
+ elif url := post.get("videoStreamUrl"):
+ url = "ytdl:" + url
+ post["extension"] = "mp4"
+ post["_ytdl_manifest"] = "hls"
+ post["_ytdl_manifest_headers"] = headers
+ else:
+ self.log.warning("%s: No video available", post["postId"])
+ continue
+
+ post["date"] = self.parse_datetime_iso(post["createdAt"])
+ post["date_updated"] = self.parse_datetime_iso(post["updatedAt"])
+ post["tags"] = [t["label"] for t in post["hashtags"]]
+ post["filename"] = post["label"]
+
+ yield Message.Directory, "", post
+ yield Message.Url, url, post
+
+ def request_api(self, url, params):
+ return self.request_json(url, params=params, headers={
+ "Referer" : self.root + "/",
+ "Authorization-Anonymous": "2527cc30-c3c5-41be-b8bb-104b6ea7a206",
+ "IsLoggedIn" : "false",
+ "IsPWA" : "false",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-site",
+ })
+
+
+class FikfapPostExtractor(FikfapExtractor):
+ subcategory = "post"
+ pattern = rf"{BASE_PATTERN}/user/(\w+)/post/(\d+)"
+ example = "https://fikfap.com/user/USER/post/12345"
+
+ def posts(self):
+ user, pid = self.groups
+
+ url = f"{self.root_api}/profile/username/{user}/posts"
+ params = {"amount" : "1", "startId": pid}
+ posts = self.request_api(url, params)
+
+ pid = int(pid)
+ for post in posts:
+ if post["postId"] == pid:
+ return (post,)
+ raise exception.NotFoundError("post")
+
+
+class FikfapUserExtractor(FikfapExtractor):
+ subcategory = "user"
+ pattern = rf"{BASE_PATTERN}/user/(\w+)"
+ example = "https://fikfap.com/user/USER"
+
+ def posts(self):
+ user = self.groups[0]
+
+ url = f"{self.root_api}/profile/username/{user}/posts"
+ params = {"amount": "21"}
+
+ while True:
+ data = self.request_api(url, params)
+
+ yield from data
+
+ if len(data) < 21:
+ return
+ params["afterId"] = data[-1]["postId"]
diff --git a/gallery_dl/extractor/fitnakedgirls.py b/gallery_dl/extractor/fitnakedgirls.py
new file mode 100644
index 0000000..d252ec4
--- /dev/null
+++ b/gallery_dl/extractor/fitnakedgirls.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fitnakedgirls.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fitnakedgirls\.com"
+
+
+class FitnakedgirlsExtractor(Extractor):
+ """Base class for fitnakedgirls extractors"""
+ category = "fitnakedgirls"
+ root = "https://fitnakedgirls.com"
+
+ def items(self):
+ data = {"_extractor": FitnakedgirlsGalleryExtractor}
+ for url in self.galleries():
+ yield Message.Queue, url, data
+
+ def _pagination(self, base):
+ url = base
+ pnum = 1
+
+ while True:
+ page = self.request(url).text
+
+ for post in text.extract_iter(
+ page, 'class="entry-body', ""):
+ yield text.extr(post, 'href="', '"')
+
+ pnum += 1
+ url = f"{base}page/{pnum}/"
+ if f'href="{url}"' not in page:
+ return
+
+ def _extract_title(self, extr, sep=" - "):
+ title = text.unescape(extr("", "<"))
+ if sep in title:
+ title = title.rpartition(sep)[0]
+ return title.strip()
+
+
+class FitnakedgirlsGalleryExtractor(GalleryExtractor, FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls galleries"""
+ directory_fmt = ("{category}", "{title}")
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{gallery_id}_{filename}"
+ pattern = rf"{BASE_PATTERN}/photos/gallery/([\w-]+)/?$"
+ example = "https://fitnakedgirls.com/photos/gallery/MODEL-nude/"
+
+ def __init__(self, match):
+ url = f"{self.root}/photos/gallery/{match[1]}/"
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ title = self._extract_title(extr)
+
+ # Strip common patterns to get cleaner model name
+ for pattern in (" Nudes", " Nude", " nudes", " nude"):
+ if pattern in title:
+ title = title.partition(pattern)[0]
+ break
+
+ return {
+ "gallery_id" : text.parse_int(extr('data-post-id="', '"')),
+ "gallery_slug": self.groups[0],
+ "model": title,
+ "title": title,
+ "date" : self.parse_datetime_iso(extr(
+ 'article:published_time" content="', '"')),
+ }
+
+ def images(self, page):
+ results = []
+
+ content = text.extr(
+ page, 'itemprop="articleBody"', '') or page
+
+ # Extract videos from wp-block-video figures
+ for figure in text.extract_iter(
+ content, '', ''):
+ if src := text.extr(figure, 'src="', '"'):
+ if "/wp-content/uploads/" in src:
+ results.append((src, None))
+
+ # Extract images from wp-block-image figures (newer template)
+ for figure in text.extract_iter(
+ content, '"):
+ if "size-large" in img:
+ if src := text.extr(img, 'data-src="', '"'):
+ if "/wp-content/uploads/" in src:
+ results.append((src, None))
+
+ return results
+
+
+class FitnakedgirlsCategoryExtractor(FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls category pages"""
+ subcategory = "category"
+ pattern = rf"{BASE_PATTERN}/photos/gallery/category/([\w-]+)"
+ example = "https://fitnakedgirls.com/photos/gallery/category/CATEGORY/"
+
+ def galleries(self):
+ base = f"{self.root}/photos/gallery/category/{self.groups[0]}/"
+ return self._pagination(base)
+
+
+class FitnakedgirlsTagExtractor(FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls tag pages"""
+ subcategory = "tag"
+ pattern = rf"{BASE_PATTERN}/photos/gallery/tag/([\w-]+)"
+ example = "https://fitnakedgirls.com/photos/gallery/tag/TAG/"
+
+ def galleries(self):
+ base = f"{self.root}/photos/gallery/tag/{self.groups[0]}/"
+ return self._pagination(base)
+
+
+class FitnakedgirlsVideoExtractor(FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls video posts"""
+ subcategory = "video"
+ directory_fmt = ("{category}", "{title}")
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{video_id}_{filename}"
+ pattern = rf"{BASE_PATTERN}/videos/(\d+)/(\d+)/([\w-]+)"
+ example = "https://fitnakedgirls.com/videos/2025/08/VIDEO-TITLE/"
+
+ def items(self):
+ year, month, slug = self.groups
+ url = f"{self.root}/videos/{year}/{month}/{slug}/"
+ page = self.request(url).text
+
+ extr = text.extract_from(page)
+ data = {
+ "slug" : slug,
+ "title" : self._extract_title(extr, " | "),
+ "video_id": text.parse_int(extr('data-post-id="', '"')),
+ "date" : self.parse_datetime_iso(
+ extr('article:published_time" content="', '"')),
+ }
+
+ yield Message.Directory, "", data
+
+ content = text.extr(
+ page, 'itemprop="articleBody"', '') or page
+ for video in text.extract_iter(content, " | "), "href='", "'")),
}
@@ -133,7 +132,7 @@ class PahealTagExtractor(PahealExtractor):
"duration" : text.parse_float(duration[:-1]),
"tags" : text.unescape(tags),
"size" : text.parse_bytes(size[:-1]),
- "date" : text.parse_datetime(date, "%B %d, %Y; %H:%M"),
+ "date" : self.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : f"{pid} - {tags}",
"extension": ext,
}
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index cf1a6d6..12dfd48 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.patreon.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, util, dt, exception
from ..cache import memcache
import collections
import itertools
@@ -46,20 +46,21 @@ class PatreonExtractor(Extractor):
for post in self.posts():
- yield Message.Directory, post
+ yield Message.Directory, "", post
if not post.get("current_user_can_view", True):
self.log.warning("Not allowed to view post %s", post["id"])
continue
post["num"] = 0
hashes = set()
- for kind, url, name in itertools.chain.from_iterable(
+ for kind, file, url, name in itertools.chain.from_iterable(
g(post) for g in generators):
fhash = self._filehash(url)
if fhash not in hashes or not fhash:
hashes.add(fhash)
post["hash"] = fhash
post["type"] = kind
+ post["file"] = file
post["num"] += 1
text.nameext_from_url(name, post)
if text.ext_from_url(url) == "m3u8":
@@ -86,7 +87,7 @@ class PatreonExtractor(Extractor):
name = url
else:
name = self._filename(url) or url
- return (("postfile", url, name),)
+ return (("postfile", postfile, url, name),)
return ()
def _images(self, post):
@@ -94,7 +95,7 @@ class PatreonExtractor(Extractor):
for image in images:
if url := self._images_url(image):
name = image.get("file_name") or self._filename(url) or url
- yield "image", url, name
+ yield "image", image, url, name
def _images_url(self, image):
return image.get("download_url")
@@ -109,24 +110,24 @@ class PatreonExtractor(Extractor):
if image := post.get("image"):
if url := image.get("large_url"):
name = image.get("file_name") or self._filename(url) or url
- return (("image_large", url, name),)
+ return (("image_large", image, url, name),)
return ()
def _attachments(self, post):
for attachment in post.get("attachments") or ():
if url := self.request_location(attachment["url"], fatal=False):
- yield "attachment", url, attachment["name"]
+ yield "attachment", attachment, url, attachment["name"]
for attachment in post.get("attachments_media") or ():
if url := attachment.get("download_url"):
- yield "attachment", url, attachment["file_name"]
+ yield "attachment", attachment, url, attachment["file_name"]
def _content(self, post):
if content := post.get("content"):
for img in text.extract_iter(
content, '
![]()
= rhs # noqa E731
elif sort == "date_asc":
date_key = "start_date"
- date_off = timedelta(days=-1)
+ date_off = dt.timedelta(days=-1)
date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731
else:
date_key = None
@@ -1357,8 +1360,8 @@ class PixivAppAPI():
if date_key and text.parse_int(params.get("offset")) >= 5000:
date_last = data["illusts"][-1]["create_date"]
- date_val = (text.parse_datetime(
- date_last) + date_off).strftime("%Y-%m-%d")
+ date_val = (dt.parse_iso(date_last) + date_off).strftime(
+ "%Y-%m-%d")
self.log.info("Reached 'offset' >= 5000; "
"Updating '%s' to '%s'", date_key, date_val)
params[date_key] = date_val
diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py
index 75c06bb..2feab95 100644
--- a/gallery_dl/extractor/pixnet.py
+++ b/gallery_dl/extractor/pixnet.py
@@ -65,7 +65,7 @@ class PixnetImageExtractor(PixnetExtractor):
subcategory = "image"
filename_fmt = "{id}.{extension}"
directory_fmt = ("{category}", "{blog}")
- pattern = BASE_PATTERN + r"/album/photo/(\d+)"
+ pattern = rf"{BASE_PATTERN}/album/photo/(\d+)"
example = "https://USER.pixnet.net/album/photo/12345"
def items(self):
@@ -83,7 +83,7 @@ class PixnetImageExtractor(PixnetExtractor):
data["blog"] = self.blog
data["user"] = data.pop("author_name")
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, data["url"], data
@@ -92,7 +92,7 @@ class PixnetSetExtractor(PixnetExtractor):
subcategory = "set"
directory_fmt = ("{category}", "{blog}",
"{folder_id} {folder_title}", "{set_id} {set_title}")
- pattern = BASE_PATTERN + r"/album/set/(\d+)"
+ pattern = rf"{BASE_PATTERN}/album/set/(\d+)"
example = "https://USER.pixnet.net/album/set/12345"
def items(self):
@@ -100,7 +100,7 @@ class PixnetSetExtractor(PixnetExtractor):
page = self.request(url, encoding="utf-8").text
data = self.metadata(page)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, info in enumerate(self._pagination(page), 1):
url, pos = text.extract(info, ' href="', '"')
src, pos = text.extract(info, ' src="', '"', pos)
@@ -137,7 +137,7 @@ class PixnetFolderExtractor(PixnetExtractor):
"""Extractor for all sets in a pixnet folder"""
subcategory = "folder"
url_fmt = "{}/album/folder/{}"
- pattern = BASE_PATTERN + r"/album/folder/(\d+)"
+ pattern = rf"{BASE_PATTERN}/album/folder/(\d+)"
example = "https://USER.pixnet.net/album/folder/12345"
@@ -145,5 +145,5 @@ class PixnetUserExtractor(PixnetExtractor):
"""Extractor for all sets and folders of a pixnet user"""
subcategory = "user"
url_fmt = "{}{}/album/list"
- pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])"
+ pattern = rf"{BASE_PATTERN}()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])"
example = "https://USER.pixnet.net/"
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 37b9b10..76ca59f 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -9,8 +9,7 @@
"""Extractors for https://www.plurk.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
-import datetime
+from .. import text, util, dt, exception
class PlurkExtractor(Extractor):
@@ -62,7 +61,7 @@ class PlurkExtractor(Extractor):
if not data:
raise exception.NotFoundError("user")
return util.json_loads(
- util.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
+ text.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
class PlurkTimelineExtractor(PlurkExtractor):
@@ -88,12 +87,10 @@ class PlurkTimelineExtractor(PlurkExtractor):
while plurks:
yield from plurks
- offset = datetime.datetime.strptime(
- plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
+ offset = dt.parse(plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z")
- response = self.request(
- url, method="POST", headers=headers, data=data)
- plurks = response.json()["plurks"]
+ plurks = self.request_json(
+ url, method="POST", headers=headers, data=data)["plurks"]
class PlurkPostExtractor(PlurkExtractor):
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 32ca528..c3aaaba 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -81,7 +81,7 @@ class PoipikuExtractor(Extractor):
"PasswordIcon", ">"):
post["password"] = True
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], url in enumerate(extract_files(
post, thumb, extr), 1):
yield Message.Url, url, text.nameext_from_url(url, post)
diff --git a/gallery_dl/extractor/poringa.py b/gallery_dl/extractor/poringa.py
index da17eae..832bedf 100644
--- a/gallery_dl/extractor/poringa.py
+++ b/gallery_dl/extractor/poringa.py
@@ -68,7 +68,7 @@ class PoringaExtractor(Extractor):
main_post, '

', '<'),
"user" : text.remove_html(extr("Created by:", "
")),
}
- yield Message.Directory, gif
+ yield Message.Directory, "", gif
yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
class PornhubUserExtractor(Dispatch, PornhubExtractor):
"""Extractor for a pornhub user"""
- pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)/?$"
example = "https://www.pornhub.com/model/USER"
def items(self):
@@ -178,7 +177,7 @@ class PornhubUserExtractor(Dispatch, PornhubExtractor):
class PornhubPhotosExtractor(PornhubExtractor):
"""Extractor for all galleries of a pornhub user"""
subcategory = "photos"
- pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ pattern = (rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)"
"/(photos(?:/[^/?#]+)?)")
example = "https://www.pornhub.com/model/USER/photos"
@@ -199,7 +198,7 @@ class PornhubPhotosExtractor(PornhubExtractor):
class PornhubGifsExtractor(PornhubExtractor):
"""Extractor for a pornhub user's gifs"""
subcategory = "gifs"
- pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ pattern = (rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)"
"/(gifs(?:/[^/?#]+)?)")
example = "https://www.pornhub.com/model/USER/gifs"
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
index 34a0111..9c926e8 100644
--- a/gallery_dl/extractor/pornpics.py
+++ b/gallery_dl/extractor/pornpics.py
@@ -58,7 +58,7 @@ class PornpicsExtractor(Extractor):
class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
"""Extractor for pornpics galleries"""
- pattern = BASE_PATTERN + r"/galleries/((?:[^/?#]+-)?(\d+))"
+ pattern = rf"{BASE_PATTERN}/galleries/((?:[^/?#]+-)?(\d+))"
example = "https://www.pornpics.com/galleries/TITLE-12345/"
def __init__(self, match):
@@ -94,7 +94,7 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
class PornpicsTagExtractor(PornpicsExtractor):
"""Extractor for galleries from pornpics tag searches"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"/tags/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/tags/([^/?#]+)"
example = "https://www.pornpics.com/tags/TAGS/"
def galleries(self):
@@ -105,7 +105,7 @@ class PornpicsTagExtractor(PornpicsExtractor):
class PornpicsSearchExtractor(PornpicsExtractor):
"""Extractor for galleries from pornpics search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/(?:\?q=|pornstars/|channels/)([^/]+)"
+ pattern = rf"{BASE_PATTERN}/(?:\?q=|pornstars/|channels/)([^/]+)"
example = "https://www.pornpics.com/?q=QUERY"
def galleries(self):
@@ -116,3 +116,35 @@ class PornpicsSearchExtractor(PornpicsExtractor):
"offset": 0,
}
return self._pagination(url, params)
+
+
+class PornpicsListingExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics listing pages
+
+ These pages (popular, recent, etc.) don't support JSON pagination
+ and use single quotes in HTML, unlike category pages.
+ """
+ subcategory = "listing"
+ pattern = (rf"{BASE_PATTERN}"
+ rf"/(popular|recent|rating|likes|views|comments)/?$")
+ example = "https://www.pornpics.com/popular/"
+
+ def galleries(self):
+ url = f"{self.root}/{self.groups[0]}/"
+ page = self.request(url).text
+ return [
+ {"g_url": href}
+ for href in text.extract_iter(
+ page, "class='rel-link' href='", "'")
+ ]
+
+
+class PornpicsCategoryExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics categories"""
+ subcategory = "category"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$"
+ example = "https://www.pornpics.com/ass/"
+
+ def galleries(self):
+ url = f"{self.root}/{self.groups[0]}/"
+ return self._pagination(url)
diff --git a/gallery_dl/extractor/pornstarstube.py b/gallery_dl/extractor/pornstarstube.py
new file mode 100644
index 0000000..82519a0
--- /dev/null
+++ b/gallery_dl/extractor/pornstarstube.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pornstars.tube/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class PornstarstubeGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from pornstars.tube"""
+ category = "pornstarstube"
+ root = "https://pornstars.tube"
+ pattern = (r"(?:https?://)?(?:www\.)?pornstars\.tube"
+ r"/albums/(\d+)(?:/([\w-]+))?")
+ example = "https://pornstars.tube/albums/12345/SLUG/"
+
+ def __init__(self, match):
+ url = f"{self.root}/albums/{match[1]}/{match[2] or 'a'}/"
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ gid, slug = self.groups
+ return {
+ "gallery_id": text.parse_int(gid),
+ "slug" : slug or "",
+ "title" : text.unescape(text.extr(
+ page, "