# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://cfake.com/"""
from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?(?:www\.)?cfake\.com"
class CfakeExtractor(Extractor):
"""Base class for cfake extractors"""
category = "cfake"
root = "https://cfake.com"
directory_fmt = ("{category}", "{type}", "{type_name} ({type_id})")
filename_fmt = "{category}_{type_name}_{id}.{extension}"
archive_fmt = "{id}"
def items(self):
type, type_name, type_id, sub_id, pnum = self.groups
if type.endswith("ies"):
type = type[:-3] + "y"
kwdict = self.kwdict
kwdict["type"] = type
kwdict["type_id"] = text.parse_int(type_id)
kwdict["type_name"] = text.unquote(type_name).replace("_", " ")
kwdict["sub_id"] = text.parse_int(sub_id)
kwdict["page"] = pnum = text.parse_int(pnum, 1)
yield Message.Directory, "", {}
base = f"{self.root}/images/{type}/{type_name}/{type_id}"
if sub_id:
base = f"{base}/{sub_id}"
while True:
url = base if pnum < 2 else f"{base}/p{pnum}"
page = self.request(url).text
# Extract and yield images
num = 0
for image in self._extract_images(page):
num += 1
image["num"] = num + (pnum - 1) * 50
url = image["url"]
yield Message.Url, url, text.nameext_from_url(url, image)
# Check for next page
if not num or not (pnum := self._check_pagination(page)):
return
kwdict["page"] = pnum
def _extract_images(self, page):
"""Extract image URLs and metadata from a gallery page"""
for item in text.extract_iter(
page, '', '')
# Extract rating
rating_text = text.extr(item, 'class="current-rating"', '')
rating = text.extr(rating_text, 'width:', 'px')
# Convert thumbnail path to full image path
# show_param is like "2025/filename.jpg"
image_url = f"{self.root}/medias/photos/{show_param}"
yield {
"url": image_url,
"id": text.parse_int(picture_id) if picture_id else 0,
"name": text.unescape(name_param) if name_param else "",
"date": date,
"rating": rating,
}
def _check_pagination(self, page):
"""Check if there are more pages and return next page number"""
# Look for current page indicator
# Format: id="num_page_current" >1
current_section = text.extr(
page, 'id="num_page_current"', '')
if not current_section:
return None
# Extract current page number from the link text
current_page_str = text.extr(current_section, '">', '')
if not current_page_str:
return None
current_page = text.parse_int(current_page_str)
if not current_page:
return None
next_page = current_page + 1
# Check if next page link exists anywhere in the page
# Look for href="/images/.../pN" pattern
if f'/p{next_page}"' in page or f'/p{next_page} ' in page:
return next_page
return None
class CfakeCelebrityExtractor(CfakeExtractor):
"""Extractor for celebrity image galleries from cfake.com"""
subcategory = "celebrity"
pattern = (BASE_PATTERN + r"/images/(celebrity)"
r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
example = "https://cfake.com/images/celebrity/NAME/123"
class CfakeCategoryExtractor(CfakeExtractor):
"""Extractor for category image galleries from cfake.com"""
subcategory = "category"
pattern = (BASE_PATTERN + r"/images/(categories)"
r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
example = "https://cfake.com/images/categories/NAME/123"
class CfakeCreatedExtractor(CfakeExtractor):
"""Extractor for 'created' image galleries from cfake.com"""
subcategory = "created"
pattern = (BASE_PATTERN + r"/images/(created)"
r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
example = "https://cfake.com/images/created/NAME/12345/123"
class CfakeCountryExtractor(CfakeExtractor):
"""Extractor for country image galleries from cfake.com"""
subcategory = "country"
pattern = (BASE_PATTERN + r"/images/(country)"
r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
example = "https://cfake.com/images/country/NAME/12345/123"