summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-10-03 18:31:58 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-10-03 18:31:58 -0400
commitb8758ecd073910ce3220b2e68399147b425c37b8 (patch)
treed6aee20213508c8f425cbacb3d714367eca904c5 /gallery_dl/extractor
parente2f67519f8c1750a71aab3dc56b8345fff21bac5 (diff)
New upstream version 1.26.0.upstream/1.26.0
Diffstat (limited to 'gallery_dl/extractor')
-rw-r--r--gallery_dl/extractor/2chan.py24
-rw-r--r--gallery_dl/extractor/2chen.py30
-rw-r--r--gallery_dl/extractor/35photo.py44
-rw-r--r--gallery_dl/extractor/3dbooru.py39
-rw-r--r--gallery_dl/extractor/4chan.py19
-rw-r--r--gallery_dl/extractor/4chanarchives.py26
-rw-r--r--gallery_dl/extractor/500px.py79
-rw-r--r--gallery_dl/extractor/8chan.py64
-rw-r--r--gallery_dl/extractor/8muses.py46
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/adultempire.py14
-rw-r--r--gallery_dl/extractor/architizer.py39
-rw-r--r--gallery_dl/extractor/artstation.py93
-rw-r--r--gallery_dl/extractor/aryion.py71
-rw-r--r--gallery_dl/extractor/bbc.py25
-rw-r--r--gallery_dl/extractor/behance.py362
-rw-r--r--gallery_dl/extractor/blogger.py87
-rw-r--r--gallery_dl/extractor/bunkr.py114
-rw-r--r--gallery_dl/extractor/catbox.py27
-rw-r--r--gallery_dl/extractor/comicvine.py16
-rw-r--r--gallery_dl/extractor/common.py196
-rw-r--r--gallery_dl/extractor/cyberdrop.py27
-rw-r--r--gallery_dl/extractor/danbooru.py81
-rw-r--r--gallery_dl/extractor/desktopography.py6
-rw-r--r--gallery_dl/extractor/deviantart.py621
-rw-r--r--gallery_dl/extractor/directlink.py33
-rw-r--r--gallery_dl/extractor/dynastyscans.py34
-rw-r--r--gallery_dl/extractor/e621.py120
-rw-r--r--gallery_dl/extractor/erome.py36
-rw-r--r--gallery_dl/extractor/exhentai.py157
-rw-r--r--gallery_dl/extractor/fallenangels.py30
-rw-r--r--gallery_dl/extractor/fanbox.py80
-rw-r--r--gallery_dl/extractor/fanleaks.py50
-rw-r--r--gallery_dl/extractor/fantia.py92
-rw-r--r--gallery_dl/extractor/fapachi.py27
-rw-r--r--gallery_dl/extractor/fapello.py53
-rw-r--r--gallery_dl/extractor/flickr.py91
-rw-r--r--gallery_dl/extractor/foolfuuka.py75
-rw-r--r--gallery_dl/extractor/foolslide.py26
-rw-r--r--gallery_dl/extractor/furaffinity.py133
-rw-r--r--gallery_dl/extractor/fuskator.py27
-rw-r--r--gallery_dl/extractor/gelbooru.py90
-rw-r--r--gallery_dl/extractor/gelbooru_v01.py70
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py170
-rw-r--r--gallery_dl/extractor/generic.py26
-rw-r--r--gallery_dl/extractor/gfycat.py305
-rw-r--r--gallery_dl/extractor/gofile.py48
-rw-r--r--gallery_dl/extractor/hbrowse.py11
-rw-r--r--gallery_dl/extractor/hentai2read.py54
-rw-r--r--gallery_dl/extractor/hentaicosplays.py35
-rw-r--r--gallery_dl/extractor/hentaifoundry.py102
-rw-r--r--gallery_dl/extractor/hentaifox.py52
-rw-r--r--gallery_dl/extractor/hentaihand.py32
-rw-r--r--gallery_dl/extractor/hentaihere.py48
-rw-r--r--gallery_dl/extractor/hiperdex.py65
-rw-r--r--gallery_dl/extractor/hitomi.py63
-rw-r--r--gallery_dl/extractor/hotleak.py66
-rw-r--r--gallery_dl/extractor/idolcomplex.py53
-rw-r--r--gallery_dl/extractor/imagebam.py48
-rw-r--r--gallery_dl/extractor/imagechest.py33
-rw-r--r--gallery_dl/extractor/imagefap.py113
-rw-r--r--gallery_dl/extractor/imagehosts.py189
-rw-r--r--gallery_dl/extractor/imgbb.py47
-rw-r--r--gallery_dl/extractor/imgbox.py30
-rw-r--r--gallery_dl/extractor/imgth.py21
-rw-r--r--gallery_dl/extractor/imgur.py198
-rw-r--r--gallery_dl/extractor/inkbunny.py116
-rw-r--r--gallery_dl/extractor/instagram.py256
-rw-r--r--gallery_dl/extractor/issuu.py35
-rw-r--r--gallery_dl/extractor/itaku.py71
-rw-r--r--gallery_dl/extractor/itchio.py25
-rw-r--r--gallery_dl/extractor/jpgfish.py58
-rw-r--r--gallery_dl/extractor/jschan.py17
-rw-r--r--gallery_dl/extractor/kabeuchi.py13
-rw-r--r--gallery_dl/extractor/keenspot.py25
-rw-r--r--gallery_dl/extractor/kemonoparty.py168
-rw-r--r--gallery_dl/extractor/khinsider.py25
-rw-r--r--gallery_dl/extractor/komikcast.py23
-rw-r--r--gallery_dl/extractor/lensdump.py35
-rw-r--r--gallery_dl/extractor/lexica.py32
-rw-r--r--gallery_dl/extractor/lightroom.py19
-rw-r--r--gallery_dl/extractor/livedoor.py44
-rw-r--r--gallery_dl/extractor/lolisafe.py17
-rw-r--r--gallery_dl/extractor/luscious.py84
-rw-r--r--gallery_dl/extractor/lynxchan.py36
-rw-r--r--gallery_dl/extractor/mangadex.py68
-rw-r--r--gallery_dl/extractor/mangafox.py43
-rw-r--r--gallery_dl/extractor/mangahere.py41
-rw-r--r--gallery_dl/extractor/mangakakalot.py24
-rw-r--r--gallery_dl/extractor/manganelo.py38
-rw-r--r--gallery_dl/extractor/mangapark.py70
-rw-r--r--gallery_dl/extractor/mangaread.py96
-rw-r--r--gallery_dl/extractor/mangasee.py93
-rw-r--r--gallery_dl/extractor/mangoxo.py36
-rw-r--r--gallery_dl/extractor/mastodon.py64
-rw-r--r--gallery_dl/extractor/mememuseum.py120
-rw-r--r--gallery_dl/extractor/misskey.py54
-rw-r--r--gallery_dl/extractor/moebooru.py127
-rw-r--r--gallery_dl/extractor/myhentaigallery.py23
-rw-r--r--gallery_dl/extractor/myportfolio.py22
-rw-r--r--gallery_dl/extractor/naver.py27
-rw-r--r--gallery_dl/extractor/naverwebtoon.py65
-rw-r--r--gallery_dl/extractor/newgrounds.py220
-rw-r--r--gallery_dl/extractor/nhentai.py46
-rw-r--r--gallery_dl/extractor/nijie.py165
-rw-r--r--gallery_dl/extractor/nitter.py187
-rw-r--r--gallery_dl/extractor/nozomi.py69
-rw-r--r--gallery_dl/extractor/nsfwalbum.py13
-rw-r--r--gallery_dl/extractor/nudecollect.py63
-rw-r--r--gallery_dl/extractor/oauth.py9
-rw-r--r--gallery_dl/extractor/paheal.py88
-rw-r--r--gallery_dl/extractor/patreon.py59
-rw-r--r--gallery_dl/extractor/philomena.py112
-rw-r--r--gallery_dl/extractor/photobucket.py48
-rw-r--r--gallery_dl/extractor/photovogue.py33
-rw-r--r--gallery_dl/extractor/picarto.py9
-rw-r--r--gallery_dl/extractor/piczel.py34
-rw-r--r--gallery_dl/extractor/pillowfort.py87
-rw-r--r--gallery_dl/extractor/pinterest.py96
-rw-r--r--gallery_dl/extractor/pixiv.py293
-rw-r--r--gallery_dl/extractor/pixnet.py42
-rw-r--r--gallery_dl/extractor/plurk.py16
-rw-r--r--gallery_dl/extractor/poipiku.py59
-rw-r--r--gallery_dl/extractor/pornhub.py170
-rw-r--r--gallery_dl/extractor/pornpics.py59
-rw-r--r--gallery_dl/extractor/pururin.py96
-rw-r--r--gallery_dl/extractor/reactor.py95
-rw-r--r--gallery_dl/extractor/readcomiconline.py23
-rw-r--r--gallery_dl/extractor/recursive.py37
-rw-r--r--gallery_dl/extractor/reddit.py163
-rw-r--r--gallery_dl/extractor/redgifs.py109
-rw-r--r--gallery_dl/extractor/rule34us.py39
-rw-r--r--gallery_dl/extractor/sankaku.py97
-rw-r--r--gallery_dl/extractor/sankakucomplex.py38
-rw-r--r--gallery_dl/extractor/seiga.py54
-rw-r--r--gallery_dl/extractor/senmanga.py55
-rw-r--r--gallery_dl/extractor/sexcom.py64
-rw-r--r--gallery_dl/extractor/shimmie2.py220
-rw-r--r--gallery_dl/extractor/shopify.py48
-rw-r--r--gallery_dl/extractor/simplyhentai.py48
-rw-r--r--gallery_dl/extractor/skeb.py72
-rw-r--r--gallery_dl/extractor/slickpic.py28
-rw-r--r--gallery_dl/extractor/slideshare.py43
-rw-r--r--gallery_dl/extractor/smugmug.py58
-rw-r--r--gallery_dl/extractor/soundgasm.py38
-rw-r--r--gallery_dl/extractor/speakerdeck.py14
-rw-r--r--gallery_dl/extractor/subscribestar.py84
-rw-r--r--gallery_dl/extractor/szurubooru.py27
-rw-r--r--gallery_dl/extractor/tapas.py86
-rw-r--r--gallery_dl/extractor/tcbscans.py44
-rw-r--r--gallery_dl/extractor/telegraph.py67
-rw-r--r--gallery_dl/extractor/test.py8
-rw-r--r--gallery_dl/extractor/toyhouse.py57
-rw-r--r--gallery_dl/extractor/tsumino.py56
-rw-r--r--gallery_dl/extractor/tumblr.py147
-rw-r--r--gallery_dl/extractor/tumblrgallery.py13
-rw-r--r--gallery_dl/extractor/twibooru.py75
-rw-r--r--gallery_dl/extractor/twitter.py464
-rw-r--r--gallery_dl/extractor/unsplash.py98
-rw-r--r--gallery_dl/extractor/uploadir.py37
-rw-r--r--gallery_dl/extractor/urlshortener.py16
-rw-r--r--gallery_dl/extractor/vanillarock.py20
-rw-r--r--gallery_dl/extractor/vichan.py53
-rw-r--r--gallery_dl/extractor/vipergirls.py46
-rw-r--r--gallery_dl/extractor/vk.py57
-rw-r--r--gallery_dl/extractor/vsco.py41
-rw-r--r--gallery_dl/extractor/wallhaven.py70
-rw-r--r--gallery_dl/extractor/wallpapercave.py4
-rw-r--r--gallery_dl/extractor/warosu.py14
-rw-r--r--gallery_dl/extractor/weasyl.py59
-rw-r--r--gallery_dl/extractor/webmshare.py33
-rw-r--r--gallery_dl/extractor/webtoons.py55
-rw-r--r--gallery_dl/extractor/weibo.py119
-rw-r--r--gallery_dl/extractor/wikiart.py27
-rw-r--r--gallery_dl/extractor/wikifeet.py57
-rw-r--r--gallery_dl/extractor/xhamster.py60
-rw-r--r--gallery_dl/extractor/xvideos.py35
-rw-r--r--gallery_dl/extractor/ytdl.py6
-rw-r--r--gallery_dl/extractor/zerochan.py74
179 files changed, 2183 insertions, 10636 deletions
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index 92ea6ca..337ba48 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,26 +20,8 @@ class _2chanThreadExtractor(Extractor):
filename_fmt = "{tim}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
- pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/]+)/res/(\d+)"
- test = ("https://dec.2chan.net/70/res/14565.htm", {
- "pattern": r"https://dec\.2chan\.net/70/src/\d{13}\.jpg",
- "count": ">= 3",
- "keyword": {
- "board": "70",
- "board_name": "新板提案",
- "com": str,
- "fsize": r"re:\d+",
- "name": "名無し",
- "no": r"re:1[45]\d\d\d",
- "now": r"re:22/../..\(.\)..:..:..",
- "post": "無題",
- "server": "dec",
- "thread": "14565",
- "tim": r"re:^\d{13}$",
- "time": r"re:^\d{10}$",
- "title": "ヒロアカ板"
- },
- })
+ pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/?#]+)/res/(\d+)"
+ example = "https://dec.2chan.net/12/res/12345.htm"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py
index f142690..0c97889 100644
--- a/gallery_dl/extractor/2chen.py
+++ b/gallery_dl/extractor/2chen.py
@@ -21,26 +21,7 @@ class _2chenThreadExtractor(Extractor):
filename_fmt = "{time} {filename}.{extension}"
archive_fmt = "{board}_{thread}_{hash}_{time}"
pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
- test = (
- ("https://sturdychan.help/tv/268929", {
- "pattern": r"https://sturdychan\.help/assets/images"
- r"/src/\w{40}\.\w+$",
- "count": ">= 179",
- "keyword": {
- "board": "tv",
- "date": "type:datetime",
- "hash": r"re:[0-9a-f]{40}",
- "name": "Anonymous",
- "no": r"re:\d+",
- "thread": "268929",
- "time": int,
- "title": "「/ttg/ #118: 🇧🇷 edition」",
- "url": str,
- },
- }),
- ("https://2chen.club/tv/1"),
- ("https://2chen.moe/jp/303786"),
- )
+ example = "https://sturdychan.help/a/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -101,14 +82,7 @@ class _2chenBoardExtractor(Extractor):
subcategory = "board"
root = "https://sturdychan.help"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/catalog|/?$)"
- test = (
- ("https://sturdychan.help/co/", {
- "pattern": _2chenThreadExtractor.pattern
- }),
- ("https://2chen.moe/co"),
- ("https://2chen.club/tv"),
- ("https://2chen.moe/co/catalog"),
- )
+ example = "https://sturdychan.help/a/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index f86691d..773116e 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -101,20 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
subcategory = "user"
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)")
- test = (
- ("https://35photo.pro/liya", {
- "pattern": r"https://([a-z][0-9]\.)?35photo\.pro"
- r"/photos_(main|series)/.*\.jpg",
- "count": 9,
- }),
- ("https://35photo.pro/suhoveev", {
- # last photo ID (1267028) isn't given as 'photo-id="<id>"
- # there are only 23 photos without the last one
- "count": ">= 33",
- }),
- ("https://en.35photo.pro/liya"),
- ("https://ru.35photo.pro/liya"),
- )
+ example = "https://35photo.pro/USER"
def __init__(self, match):
_35photoExtractor.__init__(self, match)
@@ -143,11 +130,7 @@ class _35photoTagExtractor(_35photoExtractor):
directory_fmt = ("{category}", "Tags", "{search_tag}")
archive_fmt = "t{search_tag}_{id}_{num}"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?#]+)"
- test = ("https://35photo.pro/tags/landscape/", {
- "range": "1-25",
- "count": 25,
- "archive": False,
- })
+ example = "https://35photo.pro/tags/TAG/"
def __init__(self, match):
_35photoExtractor.__init__(self, match)
@@ -180,7 +163,7 @@ class _35photoGenreExtractor(_35photoExtractor):
directory_fmt = ("{category}", "Genre", "{genre}")
archive_fmt = "g{genre_id}_{id}_{num}"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/genre_(\d+)(/new/)?"
- test = ("https://35photo.pro/genre_109/",)
+ example = "https://35photo.pro/genre_12345/"
def __init__(self, match):
_35photoExtractor.__init__(self, match)
@@ -212,24 +195,7 @@ class _35photoImageExtractor(_35photoExtractor):
"""Extractor for individual images from 35photo.pro"""
subcategory = "image"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/photo_(\d+)"
- test = ("https://35photo.pro/photo_753340/", {
- "count": 1,
- "keyword": {
- "url" : r"re:https://35photo\.pro/photos_main/.*\.jpg",
- "id" : 753340,
- "title" : "Winter walk",
- "description": str,
- "tags" : list,
- "views" : int,
- "favorites" : int,
- "score" : int,
- "type" : 0,
- "date" : "15 авг, 2014",
- "user" : "liya",
- "user_id" : 20415,
- "user_name" : "Liya Mirzaeva",
- },
- })
+ example = "https://35photo.pro/photo_12345/"
def __init__(self, match):
_35photoExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py
index e0066cb..c1bc9e5 100644
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,22 +17,17 @@ class _3dbooruBase():
basecategory = "booru"
root = "http://behoimi.org"
- def __init__(self, match):
- super().__init__(match)
- self.session.headers.update({
- "Referer": "http://behoimi.org/post/show/",
- "Accept-Encoding": "identity",
- })
+ def _init(self):
+ headers = self.session.headers
+ headers["Referer"] = "http://behoimi.org/post/show/"
+ headers["Accept-Encoding"] = "identity"
class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
"""Extractor for images from behoimi.org based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post"
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)")
- test = ("http://behoimi.org/post?tags=himekawa_azuru+dress", {
- "url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
- "content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
- })
+ example = "http://behoimi.org/post?tags=TAG"
def posts(self):
params = {"tags": self.tags}
@@ -42,10 +37,7 @@ class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor):
"""Extractor for image-pools from behoimi.org"""
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"
- test = ("http://behoimi.org/pool/show/27", {
- "url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
- "content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
- })
+ example = "http://behoimi.org/pool/show/12345"
def posts(self):
params = {"tags": "pool:" + self.pool_id}
@@ -55,17 +47,7 @@ class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor):
class _3dbooruPostExtractor(_3dbooruBase, moebooru.MoebooruPostExtractor):
"""Extractor for single images from behoimi.org"""
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"
- test = ("http://behoimi.org/post/show/140852", {
- "url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
- "content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4",
- "options": (("tags", True),),
- "keyword": {
- "tags_character": "furude_rika",
- "tags_copyright": "higurashi_no_naku_koro_ni",
- "tags_model": "himekawa_azuru",
- "tags_general": str,
- },
- })
+ example = "http://behoimi.org/post/show/12345"
def posts(self):
params = {"tags": "id:" + self.post_id}
@@ -78,7 +60,4 @@ class _3dbooruPopularExtractor(
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?")
- test = ("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
- "pattern": r"http://behoimi\.org/data/../../[0-9a-f]{32}\.jpg",
- "count": 20,
- })
+ example = "http://behoimi.org/post/popular_by_month"
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index bf9615d..2db6042 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,17 +21,7 @@ class _4chanThreadExtractor(Extractor):
archive_fmt = "{board}_{thread}_{tim}"
pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org"
r"/([^/]+)/thread/(\d+)")
- test = (
- ("https://boards.4chan.org/tg/thread/15396072/", {
- "url": "39082ad166161966d7ba8e37f2173a824eb540f0",
- "keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
- "content": "20b7b51afa51c9c31a0020a0737b889532c8d7ec",
- }),
- ("https://boards.4channel.org/tg/thread/15396072/", {
- "url": "39082ad166161966d7ba8e37f2173a824eb540f0",
- "keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
- }),
- )
+ example = "https://boards.4channel.org/a/thread/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -65,10 +55,7 @@ class _4chanBoardExtractor(Extractor):
category = "4chan"
subcategory = "board"
pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?#]+)/\d*$"
- test = ("https://boards.4channel.org/po/", {
- "pattern": _4chanThreadExtractor.pattern,
- "count": ">= 100",
- })
+ example = "https://boards.4channel.org/a/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/4chanarchives.py b/gallery_dl/extractor/4chanarchives.py
index 041e6a3..f018d3e 100644
--- a/gallery_dl/extractor/4chanarchives.py
+++ b/gallery_dl/extractor/4chanarchives.py
@@ -21,21 +21,7 @@ class _4chanarchivesThreadExtractor(Extractor):
filename_fmt = "{no}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{no}"
pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)/thread/(\d+)"
- test = (
- ("https://4chanarchives.com/board/c/thread/2707110", {
- "pattern": r"https://i\.imgur\.com/(0wLGseE|qbByWDc)\.jpg",
- "count": 2,
- "keyword": {
- "board": "c",
- "com": str,
- "name": "Anonymous",
- "no": int,
- "thread": "2707110",
- "time": r"re:2016-07-1\d \d\d:\d\d:\d\d",
- "title": "Ren Kagami from 'Oyako Neburi'",
- },
- }),
- )
+ example = "https://4chanarchives.com/board/a/thread/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -106,15 +92,7 @@ class _4chanarchivesBoardExtractor(Extractor):
subcategory = "board"
root = "https://4chanarchives.com"
pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)(?:/(\d+))?/?$"
- test = (
- ("https://4chanarchives.com/board/c/", {
- "pattern": _4chanarchivesThreadExtractor.pattern,
- "range": "1-40",
- "count": 40,
- }),
- ("https://4chanarchives.com/board/c"),
- ("https://4chanarchives.com/board/c/10"),
- )
+ example = "https://4chanarchives.com/board/a/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 1213194..41cc0de 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -21,11 +21,7 @@ class _500pxExtractor(Extractor):
filename_fmt = "{id}_{name}.{extension}"
archive_fmt = "{id}"
root = "https://500px.com"
- cookiedomain = ".500px.com"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.session.headers["Referer"] = self.root + "/"
+ cookies_domain = ".500px.com"
def items(self):
data = self.metadata()
@@ -73,7 +69,7 @@ class _500pxExtractor(Extractor):
def _request_api(self, url, params):
headers = {
"Origin": self.root,
- "x-csrf-token": self.session.cookies.get(
+ "x-csrf-token": self.cookies.get(
"x-csrf-token", domain=".500px.com"),
}
return self.request(url, headers=headers, params=params).json()
@@ -81,7 +77,7 @@ class _500pxExtractor(Extractor):
def _request_graphql(self, opname, variables):
url = "https://api.500px.com/graphql"
headers = {
- "x-csrf-token": self.session.cookies.get(
+ "x-csrf-token": self.cookies.get(
"x-csrf-token", domain=".500px.com"),
}
data = {
@@ -97,15 +93,7 @@ class _500pxUserExtractor(_500pxExtractor):
"""Extractor for photos from a user's photostream on 500px.com"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])"
- test = (
- ("https://500px.com/p/light_expression_photography", {
- "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2",
- "range": "1-99",
- "count": 99,
- }),
- ("https://500px.com/light_expression_photography"),
- ("https://web.500px.com/light_expression_photography"),
- )
+ example = "https://500px.com/USER"
def __init__(self, match):
_500pxExtractor.__init__(self, match)
@@ -135,17 +123,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?"
r"([^/?#]+)/galleries/([^/?#]+)")
- test = (
- ("https://500px.com/p/fashvamp/galleries/lera", {
- "url": "002dc81dee5b4a655f0e31ad8349e8903b296df6",
- "count": 3,
- "keyword": {
- "gallery": dict,
- "user": dict,
- },
- }),
- ("https://500px.com/fashvamp/galleries/lera"),
- )
+ example = "https://500px.com/USER/galleries/GALLERY"
def __init__(self, match):
_500pxExtractor.__init__(self, match)
@@ -201,7 +179,7 @@ class _500pxFavoriteExtractor(_500pxExtractor):
"""Extractor for favorite 500px photos"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/liked/?$"
- test = ("https://500px.com/liked",)
+ example = "https://500px.com/liked"
def photos(self):
variables = {"pageSize": 20}
@@ -225,50 +203,7 @@ class _500pxImageExtractor(_500pxExtractor):
"""Extractor for individual images from 500px.com"""
subcategory = "image"
pattern = BASE_PATTERN + r"/photo/(\d+)"
- test = ("https://500px.com/photo/222049255/queen-of-coasts", {
- "url": "fbdf7df39325cae02f5688e9f92935b0e7113315",
- "count": 1,
- "keyword": {
- "camera": "Canon EOS 600D",
- "camera_info": dict,
- "comments": list,
- "comments_count": int,
- "created_at": "2017-08-01T08:40:05+00:00",
- "description": str,
- "editored_by": None,
- "editors_choice": False,
- "extension": "jpg",
- "feature": "popular",
- "feature_date": "2017-08-01T09:58:28+00:00",
- "focal_length": "208",
- "height": 3111,
- "id": 222049255,
- "image_format": "jpg",
- "image_url": list,
- "images": list,
- "iso": "100",
- "lens": "EF-S55-250mm f/4-5.6 IS II",
- "lens_info": dict,
- "liked": None,
- "location": None,
- "location_details": dict,
- "name": "Queen Of Coasts",
- "nsfw": False,
- "privacy": False,
- "profile": True,
- "rating": float,
- "status": 1,
- "tags": list,
- "taken_at": "2017-05-04T17:36:51+00:00",
- "times_viewed": int,
- "url": "/photo/222049255/Queen-Of-Coasts-by-Alice-Nabieva",
- "user": dict,
- "user_id": 12847235,
- "votes_count": int,
- "watermark": True,
- "width": 4637,
- },
- })
+ example = "https://500px.com/photo/12345/TITLE"
def __init__(self, match):
_500pxExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index 0e128c3..fc16f43 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -27,7 +27,7 @@ class _8chanExtractor(Extractor):
Extractor.__init__(self, match)
@memcache()
- def _prepare_cookies(self):
+ def cookies_prepare(self):
# fetch captcha cookies
# (necessary to download without getting interrupted)
now = datetime.utcnow()
@@ -39,14 +39,14 @@ class _8chanExtractor(Extractor):
# - remove 'expires' timestamp
# - move 'captchaexpiration' value forward by 1 month)
domain = self.root.rpartition("/")[2]
- for cookie in self.session.cookies:
+ for cookie in self.cookies:
if cookie.domain.endswith(domain):
cookie.expires = None
if cookie.name == "captchaexpiration":
cookie.value = (now + timedelta(30, 300)).strftime(
"%a, %d %b %Y %H:%M:%S GMT")
- return self.session.cookies
+ return self.cookies
class _8chanThreadExtractor(_8chanExtractor):
@@ -57,48 +57,7 @@ class _8chanThreadExtractor(_8chanExtractor):
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
- test = (
- ("https://8chan.moe/vhs/res/4.html", {
- "pattern": r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$",
- "count": 14,
- "keyword": {
- "archived": False,
- "autoSage": False,
- "boardDescription": "Film and Cinema",
- "boardMarkdown": None,
- "boardName": "Movies",
- "boardUri": "vhs",
- "creation": r"re:\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z",
- "cyclic": False,
- "email": None,
- "id": "re:^[0-9a-f]{6}$",
- "locked": False,
- "markdown": str,
- "maxFileCount": 5,
- "maxFileSize": "32.00 MB",
- "maxMessageLength": 8001,
- "message": str,
- "mime": str,
- "name": "Anonymous",
- "num": int,
- "originalName": str,
- "path": r"re:/.media/[0-9a-f]{64}\.\w+$",
- "pinned": False,
- "postId": int,
- "signedRole": None,
- "size": int,
- "threadId": 4,
- "thumb": r"re:/.media/t_[0-9a-f]{64}$",
- "uniquePosters": 9,
- "usesCustomCss": True,
- "usesCustomJs": False,
- "?wsPort": 8880,
- "?wssPort": 2087,
- },
- }),
- ("https://8chan.se/vhs/res/4.html"),
- ("https://8chan.cc/vhs/res/4.html"),
- )
+ example = "https://8chan.moe/a/res/12345.html"
def __init__(self, match):
_8chanExtractor.__init__(self, match)
@@ -113,7 +72,7 @@ class _8chanThreadExtractor(_8chanExtractor):
thread["_http_headers"] = {"Referer": url + "html"}
try:
- self.session.cookies = self._prepare_cookies()
+ self.cookies = self.cookies_prepare()
except Exception as exc:
self.log.debug("Failed to fetch captcha cookies: %s: %s",
exc.__class__.__name__, exc, exc_info=True)
@@ -137,20 +96,11 @@ class _8chanBoardExtractor(_8chanExtractor):
"""Extractor for 8chan boards"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
- test = (
- ("https://8chan.moe/vhs/"),
- ("https://8chan.moe/vhs/2.html", {
- "pattern": _8chanThreadExtractor.pattern,
- "count": 23,
- }),
- ("https://8chan.se/vhs/"),
- ("https://8chan.cc/vhs/"),
- )
+ example = "https://8chan.moe/a/"
def __init__(self, match):
_8chanExtractor.__init__(self, match)
_, self.board, self.page = match.groups()
- self.session.headers["Referer"] = self.root + "/"
def items(self):
page = text.parse_int(self.page, 1)
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index 584c6d2..f88a0c6 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -22,51 +22,7 @@ class _8musesAlbumExtractor(Extractor):
root = "https://comics.8muses.com"
pattern = (r"(?:https?://)?(?:comics\.|www\.)?8muses\.com"
r"(/comics/album/[^?#]+)(\?[^#]+)?")
- test = (
- ("https://comics.8muses.com/comics/album/Fakku-Comics/mogg/Liar", {
- "url": "6286ac33087c236c5a7e51f8a9d4e4d5548212d4",
- "pattern": r"https://comics.8muses.com/image/fl/[\w-]+",
- "keyword": {
- "url" : str,
- "hash" : str,
- "page" : int,
- "count": 6,
- "album": {
- "id" : 10467,
- "title" : "Liar",
- "path" : "Fakku Comics/mogg/Liar",
- "parts" : ["Fakku Comics", "mogg", "Liar"],
- "private": False,
- "url" : "https://comics.8muses.com/comics"
- "/album/Fakku-Comics/mogg/Liar",
- "parent" : 10464,
- "views" : int,
- "likes" : int,
- "date" : "dt:2018-07-10 00:00:00",
- },
- },
- }),
- ("https://www.8muses.com/comics/album/Fakku-Comics/santa", {
- "count": ">= 3",
- "pattern": pattern,
- "keyword": {
- "url" : str,
- "name" : str,
- "private": False,
- },
- }),
- # custom sorting
- ("https://www.8muses.com/comics/album/Fakku-Comics/11?sort=az", {
- "count": ">= 70",
- "keyword": {"name": r"re:^[R-Zr-z]"},
- }),
- # non-ASCII characters
- (("https://comics.8muses.com/comics/album/Various-Authors/Chessire88"
- "/From-Trainers-to-Pokmons"), {
- "count": 2,
- "keyword": {"name": "re:From Trainers to Pokémons"},
- }),
- )
+ example = "https://comics.8muses.com/comics/album/PATH/TITLE"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index fa56bfb..3abe74b 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -49,7 +49,6 @@ modules = [
"gelbooru",
"gelbooru_v01",
"gelbooru_v02",
- "gfycat",
"gofile",
"hbrowse",
"hentai2read",
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
index 8160e48..1617414 100644
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -18,17 +18,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
root = "https://www.adultempire.com"
pattern = (r"(?:https?://)?(?:www\.)?adult(?:dvd)?empire\.com"
r"(/(\d+)/gallery\.html)")
- test = (
- ("https://www.adultempire.com/5998/gallery.html", {
- "range": "1",
- "keyword": "5b3266e69801db0d78c22181da23bc102886e027",
- "content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
- }),
- ("https://www.adultdvdempire.com/5683/gallery.html", {
- "url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
- "keyword": "8d448d79c4ac5f5b10a3019d5b5129ddb43655e5",
- }),
- )
+ example = "https://www.adultempire.com/12345/gallery.html"
def __init__(self, match):
GalleryExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py
index dbc197e..8064e78 100644
--- a/gallery_dl/extractor/architizer.py
+++ b/gallery_dl/extractor/architizer.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,25 +21,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
filename_fmt = "{filename}.{extension}"
archive_fmt = "{gid}_{num}"
pattern = r"(?:https?://)?architizer\.com/projects/([^/?#]+)"
- test = ("https://architizer.com/projects/house-lo/", {
- "pattern": r"https://architizer-prod\.imgix\.net/media/mediadata"
- r"/uploads/.+\.jpg$",
- "keyword": {
- "count": 27,
- "description": str,
- "firm": "Atelier Lina Bellovicova",
- "gid": "225496",
- "location": "Czechia",
- "num": int,
- "size": "1000 sqft - 3000 sqft",
- "slug": "house-lo",
- "status": "Built",
- "subcategory": "project",
- "title": "House LO",
- "type": "Residential › Private House",
- "year": "2020",
- },
- })
+ example = "https://architizer.com/projects/NAME/"
def __init__(self, match):
url = "{}/projects/{}/".format(self.root, match.group(1))
@@ -47,11 +29,13 @@ class ArchitizerProjectExtractor(GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
+ extr('id="Pages"', "")
+
return {
- "title" : extr("data-name='", "'"),
- "slug" : extr("data-slug='", "'"),
- "gid" : extr("data-gid='", "'").rpartition(".")[2],
- "firm" : extr("data-firm-leaders-str='", "'"),
+ "title" : extr('data-name="', '"'),
+ "slug" : extr('data-slug="', '"'),
+ "gid" : extr('data-gid="', '"').rpartition(".")[2],
+ "firm" : extr('data-firm-leaders-str="', '"'),
"location" : extr("<h2>", "<").strip(),
"type" : text.unescape(text.remove_html(extr(
'<div class="title">Type</div>', '<br'))),
@@ -70,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
return [
(url, None)
for url in text.extract_iter(
- page, "property='og:image:secure_url' content='", "?")
+ page, 'property="og:image:secure_url" content="', "?")
]
@@ -80,10 +64,7 @@ class ArchitizerFirmExtractor(Extractor):
subcategory = "firm"
root = "https://architizer.com"
pattern = r"(?:https?://)?architizer\.com/firms/([^/?#]+)"
- test = ("https://architizer.com/firms/olson-kundig/", {
- "pattern": ArchitizerProjectExtractor.pattern,
- "count": ">= 90",
- })
+ example = "https://architizer.com/firms/NAME/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index a3a7c1e..b58b3d3 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -27,12 +27,12 @@ class ArtstationExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1) or match.group(2)
- self.external = self.config("external", False)
def items(self):
data = self.metadata()
projects = self.projects()
+ external = self.config("external", False)
max_posts = self.config("max-posts")
if max_posts:
projects = itertools.islice(projects, max_posts)
@@ -45,7 +45,7 @@ class ArtstationExtractor(Extractor):
asset["num"] = num
yield Message.Directory, asset
- if adict["has_embedded_player"] and self.external:
+ if adict["has_embedded_player"] and external:
player = adict["player_embedded"]
url = (text.extr(player, 'src="', '"') or
text.extr(player, "src='", "'"))
@@ -117,7 +117,6 @@ class ArtstationExtractor(Extractor):
headers = {
"Accept" : "application/json, text/plain, */*",
"Origin" : self.root,
- "Referer": self.root + "/",
}
if json:
@@ -147,7 +146,6 @@ class ArtstationExtractor(Extractor):
headers = {
"Accept" : "*/*",
"Origin" : self.root,
- "Referer": self.root + "/",
}
return self.request(
url, method="POST", headers=headers, json={},
@@ -178,17 +176,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?"
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$")
- test = (
- ("https://www.artstation.com/sungchoi/", {
- "pattern": r"https://\w+\.artstation\.com/p/assets/images"
- r"/images/\d+/\d+/\d+/(4k|large|medium|small)/[^/]+",
- "range": "1-10",
- "count": ">= 10",
- }),
- ("https://www.artstation.com/sungchoi/albums/all/"),
- ("https://sungchoi.artstation.com/"),
- ("https://sungchoi.artstation.com/projects/"),
- )
+ example = "https://www.artstation.com/USER"
def projects(self):
url = "{}/users/{}/projects.json".format(self.root, self.user)
@@ -205,15 +193,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)"
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)")
- test = (
- ("https://www.artstation.com/huimeiye/albums/770899", {
- "count": 2,
- }),
- ("https://www.artstation.com/huimeiye/albums/770898", {
- "exception": exception.NotFoundError,
- }),
- ("https://huimeiye.artstation.com/albums/770899"),
- )
+ example = "https://www.artstation.com/USER/albums/12345"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@@ -247,17 +227,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)/likes/?")
- test = (
- ("https://www.artstation.com/mikf/likes", {
- "pattern": r"https://\w+\.artstation\.com/p/assets/images"
- r"/images/\d+/\d+/\d+/(4k|large|medium|small)/[^/]+",
- "count": 6,
- }),
- # no likes
- ("https://www.artstation.com/sungchoi/likes", {
- "count": 0,
- }),
- )
+ example = "https://www.artstation.com/USER/likes"
def projects(self):
url = "{}/users/{}/likes.json".format(self.root, self.user)
@@ -274,14 +244,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/contests/[^/?#]+/challenges/(\d+)"
r"/?(?:\?sorting=([a-z]+))?")
- test = (
- ("https://www.artstation.com/contests/thu-2017/challenges/20"),
- (("https://www.artstation.com/contests/beyond-human"
- "/challenges/23?sorting=winners"), {
- "range": "1-30",
- "count": 30,
- }),
- )
+ example = "https://www.artstation.com/contests/NAME/challenges/12345"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@@ -327,10 +290,7 @@ class ArtstationSearchExtractor(ArtstationExtractor):
archive_fmt = "s_{search[query]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/search/?\?([^#]+)")
- test = ("https://www.artstation.com/search?query=ancient&sort_by=rank", {
- "range": "1-20",
- "count": 20,
- })
+ example = "https://www.artstation.com/search?query=QUERY"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@@ -377,10 +337,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
archive_fmt = "A_{asset[id]}"
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/artwork/?\?([^#]+)")
- test = ("https://www.artstation.com/artwork?sorting=latest", {
- "range": "1-20",
- "count": 20,
- })
+ example = "https://www.artstation.com/artwork?sorting=SORT"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@@ -400,32 +357,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:"
r"(?:\w+\.)?artstation\.com/(?:artwork|projects|search)"
r"|artstn\.co/p)/(\w+)")
- test = (
- ("https://www.artstation.com/artwork/LQVJr", {
- "pattern": r"https?://\w+\.artstation\.com/p/assets"
- r"/images/images/008/760/279/4k/.+",
- "content": "7b113871465fdc09d127adfdc2767d51cf45a7e9",
- # SHA1 hash without _no_cache()
- # "content": "44b80f9af36d40efc5a2668cdd11d36d6793bae9",
- }),
- # multiple images per project
- ("https://www.artstation.com/artwork/Db3dy", {
- "count": 4,
- }),
- # embedded youtube video
- ("https://www.artstation.com/artwork/g4WPK", {
- "range": "2",
- "options": (("external", True),),
- "pattern": "ytdl:https://www.youtube.com/embed/JNFfJtwwrU0",
- }),
- # 404 (#3016)
- ("https://www.artstation.com/artwork/3q3mXB", {
- "count": 0,
- }),
- # alternate URL patterns
- ("https://sungchoi.artstation.com/projects/LQVJr"),
- ("https://artstn.co/p/LQVJr"),
- )
+ example = "https://www.artstation.com/artwork/abcde"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@@ -453,10 +385,7 @@ class ArtstationFollowingExtractor(ArtstationExtractor):
subcategory = "following"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)/following")
- test = ("https://www.artstation.com/sungchoi/following", {
- "pattern": ArtstationUserExtractor.pattern,
- "count": ">= 50",
- })
+ example = "https://www.artstation.com/USER/following"
def items(self):
url = "{}/users/{}/following.json".format(self.root, self.user)
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 6f01572..576bc83 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,8 +23,8 @@ class AryionExtractor(Extractor):
directory_fmt = ("{category}", "{user!l}", "{path:J - }")
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
- cookiedomain = ".aryion.com"
- cookienames = ("phpbb3_rl7a3_sid",)
+ cookies_domain = ".aryion.com"
+ cookies_names = ("phpbb3_rl7a3_sid",)
root = "https://aryion.com"
def __init__(self, match):
@@ -33,11 +33,12 @@ class AryionExtractor(Extractor):
self.recursive = True
def login(self):
- if self._check_cookies(self.cookienames):
+ if self.cookies_check(self.cookies_names):
return
+
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
+ self.cookies_update(self._login_impl(username, password))
@cache(maxage=14*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -53,7 +54,7 @@ class AryionExtractor(Extractor):
response = self.request(url, method="POST", data=data)
if b"You have been successfully logged in." not in response.content:
raise exception.AuthenticationError()
- return {c: response.cookies[c] for c in self.cookienames}
+ return {c: response.cookies[c] for c in self.cookies_names}
def items(self):
self.login()
@@ -175,22 +176,15 @@ class AryionGalleryExtractor(AryionExtractor):
subcategory = "gallery"
categorytransfer = True
pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?#]+)"
- test = (
- ("https://aryion.com/g4/gallery/jameshoward", {
- "options": (("recursive", False),),
- "pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$",
- "range": "48-52",
- "count": 5,
- }),
- ("https://aryion.com/g4/user/jameshoward"),
- ("https://aryion.com/g4/latest.php?name=jameshoward"),
- )
+ example = "https://aryion.com/g4/gallery/USER"
def __init__(self, match):
AryionExtractor.__init__(self, match)
- self.recursive = self.config("recursive", True)
self.offset = 0
+ def _init(self):
+ self.recursive = self.config("recursive", True)
+
def skip(self, num):
if self.recursive:
return 0
@@ -212,13 +206,13 @@ class AryionTagExtractor(AryionExtractor):
directory_fmt = ("{category}", "tags", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
- test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
- "count": ">= 5",
- })
+ example = "https://aryion.com/g4/tags.php?tag=TAG"
- def metadata(self):
+ def _init(self):
self.params = text.parse_query(self.user)
self.user = None
+
+ def metadata(self):
return {"search_tags": self.params.get("tag")}
def posts(self):
@@ -230,40 +224,7 @@ class AryionPostExtractor(AryionExtractor):
"""Extractor for individual posts on eka's portal"""
subcategory = "post"
pattern = BASE_PATTERN + r"/view/(\d+)"
- test = (
- ("https://aryion.com/g4/view/510079", {
- "url": "f233286fa5558c07ae500f7f2d5cb0799881450e",
- "keyword": {
- "artist" : "jameshoward",
- "user" : "jameshoward",
- "filename" : "jameshoward-510079-subscribestar_150",
- "extension": "jpg",
- "id" : 510079,
- "width" : 1665,
- "height" : 1619,
- "size" : 784239,
- "title" : "I'm on subscribestar now too!",
- "description": r"re:Doesn't hurt to have a backup, right\?",
- "tags" : ["Non-Vore", "subscribestar"],
- "date" : "dt:2019-02-16 19:30:34",
- "path" : [],
- "views" : int,
- "favorites": int,
- "comments" : int,
- "_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT",
- },
- }),
- # x-folder (#694)
- ("https://aryion.com/g4/view/588928", {
- "pattern": pattern,
- "count": ">= 8",
- }),
- # x-comic-folder (#945)
- ("https://aryion.com/g4/view/537379", {
- "pattern": pattern,
- "count": 2,
- }),
- )
+ example = "https://aryion.com/g4/view/12345"
def posts(self):
post_id, self.user = self.user, None
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index 638fedc..54aaac4 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -23,18 +23,7 @@ class BbcGalleryExtractor(GalleryExtractor):
filename_fmt = "{num:>02}.{extension}"
archive_fmt = "{programme}_{num}"
pattern = BASE_PATTERN + r"[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$"
- test = (
- ("https://www.bbc.co.uk/programmes/p084qtzs/p085g9kg", {
- "pattern": r"https://ichef\.bbci\.co\.uk"
- r"/images/ic/1920xn/\w+\.jpg",
- "count": 37,
- "keyword": {
- "programme": "p084qtzs",
- "path": ["BBC One", "Doctor Who", "The Timeless Children"],
- },
- }),
- ("https://www.bbc.co.uk/programmes/p084qtzs"),
- )
+ example = "https://www.bbc.co.uk/programmes/PATH"
def metadata(self, page):
data = util.json_loads(text.extr(
@@ -72,17 +61,7 @@ class BbcProgrammeExtractor(Extractor):
subcategory = "programme"
root = "https://www.bbc.co.uk"
pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?"
- test = (
- ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", {
- "pattern": BbcGalleryExtractor.pattern,
- "range": "1-50",
- "count": ">= 50",
- }),
- ("https://www.bbc.co.uk/programmes/b006q2x0/galleries?page=40", {
- "pattern": BbcGalleryExtractor.pattern,
- "count": ">= 100",
- }),
- )
+ example = "https://www.bbc.co.uk/programmes/ID/galleries"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index d8cc51d..fc5f9ef 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.behance.net/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text, util, exception
class BehanceExtractor(Extractor):
@@ -18,6 +18,12 @@ class BehanceExtractor(Extractor):
root = "https://www.behance.net"
request_interval = (2.0, 4.0)
+ def _init(self):
+ self._bcp = self.cookies.get("bcp", domain="www.behance.net")
+ if not self._bcp:
+ self._bcp = "4c34489d-914c-46cd-b44c-dfd0e661136d"
+ self.cookies.set("bcp", self._bcp, domain="www.behance.net")
+
def items(self):
for gallery in self.galleries():
gallery["_extractor"] = BehanceGalleryExtractor
@@ -26,14 +32,29 @@ class BehanceExtractor(Extractor):
def galleries(self):
"""Return all relevant gallery URLs"""
- @staticmethod
- def _update(data):
+ def _request_graphql(self, endpoint, variables):
+ url = self.root + "/v3/graphql"
+ headers = {
+ "Origin": self.root,
+ "X-BCP" : self._bcp,
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ data = {
+ "query" : GRAPHQL_QUERIES[endpoint],
+ "variables": variables,
+ }
+
+ return self.request(url, method="POST", headers=headers,
+ json=data).json()["data"]
+
+ def _update(self, data):
# compress data to simple lists
if data["fields"] and isinstance(data["fields"][0], dict):
data["fields"] = [
field.get("name") or field.get("label")
for field in data["fields"]
]
+
data["owners"] = [
owner.get("display_name") or owner.get("displayName")
for owner in data["owners"]
@@ -44,6 +65,9 @@ class BehanceExtractor(Extractor):
tags = [tag["title"] for tag in tags]
data["tags"] = tags
+ data["date"] = text.parse_timestamp(
+ data.get("publishedOn") or data.get("conceived_on") or 0)
+
# backwards compatibility
data["gallery_id"] = data["id"]
data["title"] = data["name"]
@@ -59,38 +83,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
filename_fmt = "{category}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
- test = (
- ("https://www.behance.net/gallery/17386197/A-Short-Story", {
- "count": 2,
- "url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2",
- "keyword": {
- "id": 17386197,
- "name": 're:"Hi". A short story about the important things ',
- "owners": ["Place Studio", "Julio César Velazquez"],
- "fields": ["Animation", "Character Design", "Directing"],
- "tags": list,
- "module": dict,
- },
- }),
- ("https://www.behance.net/gallery/21324767/Nevada-City", {
- "count": 6,
- "url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d",
- "keyword": {"owners": ["Alex Strohl"]},
- }),
- # 'media_collection' modules
- ("https://www.behance.net/gallery/88276087/Audi-R8-RWD", {
- "count": 20,
- "url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
- "pattern": r"https://mir-s3-cdn-cf\.behance\.net/project_modules"
- r"/source/[0-9a-f]+.[0-9a-f]+\.jpg"
- }),
- # 'video' modules (#1282)
- ("https://www.behance.net/gallery/101185577/COLCCI", {
- "pattern": r"https://cdn-prod-ccv\.adobe\.com/\w+"
- r"/rend/\w+_720\.mp4\?",
- "count": 3,
- }),
- )
+ example = "https://www.behance.net/gallery/12345/TITLE"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
@@ -111,10 +104,6 @@ class BehanceGalleryExtractor(BehanceExtractor):
"""Collect gallery info dict"""
url = "{}/gallery/{}/a".format(self.root, self.gallery_id)
cookies = {
- "_evidon_consent_cookie":
- '{"consent_date":"2019-01-31T09:41:15.132Z"}',
- "bcp": "4c34489d-914c-46cd-b44c-dfd0e661136d",
- "gk_suid": "66981391",
"gki": '{"feature_project_view":false,'
'"feature_discover_login_prompt":false,'
'"feature_project_login_prompt":false}',
@@ -128,6 +117,18 @@ class BehanceGalleryExtractor(BehanceExtractor):
def get_images(self, data):
"""Extract image results from an API response"""
+ if not data["modules"]:
+ access = data.get("matureAccess")
+ if access == "logged-out":
+ raise exception.AuthorizationError(
+ "Mature content galleries require logged-in cookies")
+ if access == "restricted-safe":
+ raise exception.AuthorizationError(
+ "Mature content blocked in account settings")
+ if access and access != "allowed":
+ raise exception.AuthorizationError()
+ return ()
+
result = []
append = result.append
@@ -139,7 +140,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
append((url, module))
elif mtype == "VideoModule":
- renditions = module["videoData"]["renditions"]
+ try:
+ renditions = module["videoData"]["renditions"]
+ except Exception:
+ self.log.warning("No download URLs for video %s",
+ module.get("id") or "???")
+ continue
+
try:
url = [
r["url"] for r in renditions
@@ -148,6 +155,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
url = "ytdl:" + renditions[-1]["url"]
+
append((url, module))
elif mtype == "MediaCollectionModule":
@@ -172,27 +180,27 @@ class BehanceUserExtractor(BehanceExtractor):
subcategory = "user"
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
- test = ("https://www.behance.net/alexstrohl", {
- "count": ">= 8",
- "pattern": BehanceGalleryExtractor.pattern,
- })
+ example = "https://www.behance.net/USER"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
self.user = match.group(1)
def galleries(self):
- url = "{}/{}/projects".format(self.root, self.user)
- params = {"offset": 0}
- headers = {"X-Requested-With": "XMLHttpRequest"}
+ endpoint = "GetProfileProjects"
+ variables = {
+ "username": self.user,
+ "after" : "MAo=", # "0" in base64
+ }
while True:
- data = self.request(url, params=params, headers=headers).json()
- work = data["profile"]["activeSection"]["work"]
- yield from work["projects"]
- if not work["hasMore"]:
+ data = self._request_graphql(endpoint, variables)
+ items = data["user"]["profileProjects"]
+ yield from items["nodes"]
+
+ if not items["pageInfo"]["hasNextPage"]:
return
- params["offset"] += len(work["projects"])
+ variables["after"] = items["pageInfo"]["endCursor"]
class BehanceCollectionExtractor(BehanceExtractor):
@@ -200,31 +208,193 @@ class BehanceCollectionExtractor(BehanceExtractor):
subcategory = "collection"
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
- test = ("https://www.behance.net/collection/71340149/inspiration", {
- "count": ">= 145",
- "pattern": BehanceGalleryExtractor.pattern,
- })
+ example = "https://www.behance.net/collection/12345/TITLE"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
self.collection_id = match.group(1)
def galleries(self):
- url = self.root + "/v3/graphql"
- headers = {
- "Origin" : self.root,
- "Referer": self.root + "/collection/" + self.collection_id,
- "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
- "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
- "X-Requested-With": "XMLHttpRequest",
+ endpoint = "GetMoodboardItemsAndRecommendations"
+ variables = {
+ "afterItem": "MAo=", # "0" in base64
+ "firstItem": 40,
+ "id" : int(self.collection_id),
+ "shouldGetItems" : True,
+ "shouldGetMoodboardFields": False,
+ "shouldGetRecommendations": False,
}
- cookies = {
- "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
- "gk_suid": "66981391",
- "ilo0" : "true",
+
+ while True:
+ data = self._request_graphql(endpoint, variables)
+ items = data["moodboard"]["items"]
+
+ for node in items["nodes"]:
+ yield node["entity"]
+
+ if not items["pageInfo"]["hasNextPage"]:
+ return
+ variables["afterItem"] = items["pageInfo"]["endCursor"]
+
+
+GRAPHQL_QUERIES = {
+ "GetProfileProjects": """\
+query GetProfileProjects($username: String, $after: String) {
+ user(username: $username) {
+ profileProjects(first: 12, after: $after) {
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ nodes {
+ __typename
+ adminFlags {
+ mature_lock
+ privacy_lock
+ dmca_lock
+ flagged_lock
+ privacy_violation_lock
+ trademark_lock
+ spam_lock
+ eu_ip_lock
+ }
+ colors {
+ r
+ g
+ b
+ }
+ covers {
+ size_202 {
+ url
+ }
+ size_404 {
+ url
+ }
+ size_808 {
+ url
+ }
+ }
+ features {
+ url
+ name
+ featuredOn
+ ribbon {
+ image
+ image2x
+ image3x
+ }
}
+ fields {
+ id
+ label
+ slug
+ url
+ }
+ hasMatureContent
+ id
+ isFeatured
+ isHiddenFromWorkTab
+ isMatureReviewSubmitted
+ isOwner
+ isFounder
+ isPinnedToSubscriptionOverview
+ isPrivate
+ linkedAssets {
+ ...sourceLinkFields
+ }
+ linkedAssetsCount
+ sourceFiles {
+ ...sourceFileFields
+ }
+ matureAccess
+ modifiedOn
+ name
+ owners {
+ ...OwnerFields
+ images {
+ size_50 {
+ url
+ }
+ }
+ }
+ premium
+ publishedOn
+ stats {
+ appreciations {
+ all
+ }
+ views {
+ all
+ }
+ comments {
+ all
+ }
+ }
+ slug
+ tools {
+ id
+ title
+ category
+ categoryLabel
+ categoryId
+ approved
+ url
+ backgroundColor
+ }
+ url
+ }
+ }
+ }
+}
+
+fragment sourceFileFields on SourceFile {
+ __typename
+ sourceFileId
+ projectId
+ userId
+ title
+ assetId
+ renditionUrl
+ mimeType
+ size
+ category
+ licenseType
+ unitAmount
+ currency
+ tier
+ hidden
+ extension
+ hasUserPurchased
+}
+
+fragment sourceLinkFields on LinkedAsset {
+ __typename
+ name
+ premium
+ url
+ category
+ licenseType
+}
+
+fragment OwnerFields on User {
+ displayName
+ hasPremiumAccess
+ id
+ isFollowing
+ isProfileOwner
+ location
+ locationUrl
+ url
+ username
+ availabilityInfo {
+ availabilityTimeline
+ isAvailableFullTime
+ isAvailableFreelance
+ }
+}
+""",
- query = """
+ "GetMoodboardItemsAndRecommendations": """\
query GetMoodboardItemsAndRecommendations(
$id: Int!
$firstItem: Int!
@@ -269,13 +439,7 @@ fragment moodboardFields on Moodboard {
url
isOwner
owners {
- id
- displayName
- url
- firstName
- location
- locationUrl
- isFollowing
+ ...OwnerFields
images {
size_50 {
url
@@ -300,6 +464,7 @@ fragment moodboardFields on Moodboard {
}
fragment projectFields on Project {
+ __typename
id
isOwner
publishedOn
@@ -328,13 +493,7 @@ fragment projectFields on Project {
b
}
owners {
- url
- displayName
- id
- location
- locationUrl
- isProfileOwner
- isFollowing
+ ...OwnerFields
images {
size_50 {
url
@@ -468,26 +627,23 @@ fragment nodesFields on MoodboardItem {
}
}
}
-"""
- variables = {
- "afterItem": "MAo=",
- "firstItem": 40,
- "id" : int(self.collection_id),
- "shouldGetItems" : True,
- "shouldGetMoodboardFields": False,
- "shouldGetRecommendations": False,
- }
- data = {"query": query, "variables": variables}
-
- while True:
- items = self.request(
- url, method="POST", headers=headers,
- cookies=cookies, json=data,
- ).json()["data"]["moodboard"]["items"]
- for node in items["nodes"]:
- yield node["entity"]
+fragment OwnerFields on User {
+ displayName
+ hasPremiumAccess
+ id
+ isFollowing
+ isProfileOwner
+ location
+ locationUrl
+ url
+ username
+ availabilityInfo {
+ availabilityTimeline
+ isAvailableFullTime
+ isAvailableFreelance
+ }
+}
+""",
- if not items["pageInfo"]["hasNextPage"]:
- return
- variables["afterItem"] = items["pageInfo"]["endCursor"]
+}
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 3ceada8..d75c349 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -28,12 +28,13 @@ class BloggerExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.videos = self.config("videos", True)
self.blog = match.group(1) or match.group(2)
+
+ def _init(self):
self.api = BloggerAPI(self)
+ self.videos = self.config("videos", True)
def items(self):
-
blog = self.api.blog_by_url("http://" + self.blog)
blog["pages"] = blog["pages"]["totalItems"]
blog["posts"] = blog["posts"]["totalItems"]
@@ -94,59 +95,8 @@ class BloggerExtractor(Extractor):
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?#]+\.html)"
- test = (
- ("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", {
- "url": "9928429fb62f712eb4de80f53625eccecc614aae",
- "pattern": r"https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
- "keyword": {
- "blog": {
- "date" : "dt:2010-11-21 18:19:42",
- "description": "",
- "id" : "5623928067739466034",
- "kind" : "blogger#blog",
- "locale" : dict,
- "name" : "Julian Bunker Photography",
- "pages" : int,
- "posts" : int,
- "published" : "2010-11-21T10:19:42-08:00",
- "updated" : str,
- "url" : "http://julianbphotography.blogspot.com/",
- },
- "post": {
- "author" : "Julian Bunker",
- "content" : str,
- "date" : "dt:2010-12-26 01:08:00",
- "etag" : str,
- "id" : "6955139236418998998",
- "kind" : "blogger#post",
- "published" : "2010-12-25T17:08:00-08:00",
- "replies" : "0",
- "title" : "Moon Rise",
- "updated" : "2011-12-06T05:21:24-08:00",
- "url" : "re:.+/2010/12/moon-rise.html$",
- },
- "num": int,
- "url": str,
- },
- }),
- ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"),
- # video (#587)
- (("http://cfnmscenesinmovies.blogspot.com/2011/11/"
- "cfnm-scene-jenna-fischer-in-office.html"), {
- "pattern": r"https://.+\.googlevideo\.com/videoplayback",
- }),
- # image URLs with width/height (#1061)
- # ("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", {
- # "pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png",
- # }),
- # new image domain (#2204)
- (("https://randomthingsthroughmyletterbox.blogspot.com/2022/01"
- "/bitter-flowers-by-gunnar-staalesen-blog.html"), {
- "pattern": r"https://blogger.googleusercontent.com/img/a/.+=s0$",
- "count": 8,
- }),
- )
+ pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
+ example = "https://BLOG.blogspot.com/1970/01/TITLE.html"
def __init__(self, match):
BloggerExtractor.__init__(self, match)
@@ -160,17 +110,7 @@ class BloggerBlogExtractor(BloggerExtractor):
"""Extractor for an entire Blogger blog"""
subcategory = "blog"
pattern = BASE_PATTERN + r"/?$"
- test = (
- ("https://julianbphotography.blogspot.com/", {
- "range": "1-25",
- "count": 25,
- "pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
- }),
- ("blogger:https://www.kefblog.com.ng/", {
- "range": "1-25",
- "count": 25,
- }),
- )
+ example = "https://BLOG.blogspot.com/"
def posts(self, blog):
return self.api.blog_posts(blog["id"])
@@ -180,12 +120,7 @@ class BloggerSearchExtractor(BloggerExtractor):
"""Extractor for Blogger search resuls"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
- test = (
- ("https://julianbphotography.blogspot.com/search?q=400mm", {
- "count": "< 10",
- "keyword": {"query": "400mm"},
- }),
- )
+ example = "https://BLOG.blogspot.com/search?q=QUERY"
def __init__(self, match):
BloggerExtractor.__init__(self, match)
@@ -202,13 +137,7 @@ class BloggerLabelExtractor(BloggerExtractor):
"""Extractor for Blogger posts by label"""
subcategory = "label"
pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
- test = (
- ("https://dmmagazine.blogspot.com/search/label/D%26D", {
- "range": "1-25",
- "count": 25,
- "keyword": {"label": "D&D"},
- }),
- )
+ example = "https://BLOG.blogspot.com/search/label/LABEL"
def __init__(self, match):
BloggerExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 35b2752..5509f5a 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -10,6 +10,18 @@
from .lolisafe import LolisafeAlbumExtractor
from .. import text
+from urllib.parse import urlsplit, urlunsplit
+
+MEDIA_DOMAIN_OVERRIDES = {
+ "cdn9.bunkr.ru" : "c9.bunkr.ru",
+ "cdn12.bunkr.ru": "media-files12.bunkr.la",
+ "cdn-pizza.bunkr.ru": "pizza.bunkr.ru",
+}
+
+CDN_HOSTED_EXTENSIONS = (
+ ".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", ".wmv",
+ ".zip", ".rar", ".7z",
+)
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
@@ -17,53 +29,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
category = "bunkr"
root = "https://bunkrr.su"
pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
- test = (
- ("https://bunkrr.su/a/Lktg9Keq", {
- "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- "keyword": {
- "album_id": "Lktg9Keq",
- "album_name": 'test テスト "&>',
- "count": 1,
- "filename": 'test-テスト-"&>-QjgneIQv',
- "id": "QjgneIQv",
- "name": 'test-テスト-"&>',
- "num": int,
- },
- }),
- # mp4 (#2239)
- ("https://app.bunkr.ru/a/ptRHaCn2", {
- "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
- "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
- }),
- # cdn4
- ("https://bunkr.is/a/iXTTc1o2", {
- "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
- "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
- "keyword": {
- "album_id": "iXTTc1o2",
- "album_name": "test2",
- "album_size": "691.1 KB",
- "count": 2,
- "description": "072022",
- "filename": "re:video-wFO9FtxG|image-sZrQUeOx",
- "id": "re:wFO9FtxG|sZrQUeOx",
- "name": "re:video|image",
- "num": int,
- },
- }),
- # cdn12 .ru TLD (#4147)
- ("https://bunkrr.su/a/j1G29CnD", {
- "pattern": r"https://(cdn12.bunkr.ru|media-files12.bunkr.la)/\w+",
- "count": 8,
- }),
- ("https://bunkrr.su/a/Lktg9Keq"),
- ("https://bunkr.la/a/Lktg9Keq"),
- ("https://bunkr.su/a/Lktg9Keq"),
- ("https://bunkr.ru/a/Lktg9Keq"),
- ("https://bunkr.is/a/Lktg9Keq"),
- ("https://bunkr.to/a/Lktg9Keq"),
- )
+ example = "https://bunkrr.su/a/ID"
def fetch_album(self, album_id):
# album metadata
@@ -72,37 +38,37 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
page, "<h1", "</div>").partition(">")[2])
count, _, size = info[1].split(None, 2)
- # files
- cdn = None
- files = []
- append = files.append
- headers = {"Referer": self.root + "/"}
-
pos = page.index('class="grid-images')
- for url in text.extract_iter(page, '<a href="', '"', pos):
- if url.startswith("/"):
- if not cdn:
- # fetch cdn root from download page
- durl = "{}/d/{}".format(self.root, url[3:])
- cdn = text.extr(self.request(
- durl).text, 'link.href = "', '"')
- cdn = cdn[:cdn.index("/", 8)]
- url = cdn + url[2:]
-
- url = text.unescape(url)
- if url.endswith((".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",
- ".zip", ".rar", ".7z")):
- if url.startswith("https://cdn12."):
- url = ("https://media-files12.bunkr.la" +
- url[url.find("/", 14):])
- else:
- url = url.replace("://cdn", "://media-files", 1)
- append({"file": url, "_http_headers": headers})
+ urls = list(text.extract_iter(page, '<a href="', '"', pos))
- return files, {
+ return self._extract_files(urls), {
"album_id" : self.album_id,
"album_name" : text.unescape(info[0]),
"album_size" : size[1:-1],
"description": text.unescape(info[2]) if len(info) > 2 else "",
- "count" : len(files),
+ "count" : len(urls),
}
+
+ def _extract_files(self, urls):
+ for url in urls:
+ if url.startswith("/"):
+ try:
+ page = self.request(self.root + text.unescape(url)).text
+ if url[1] == "v":
+ url = text.extr(page, '<source src="', '"')
+ else:
+ url = text.extr(page, '<img src="', '"')
+ except Exception as exc:
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ continue
+
+ else:
+ if url.lower().endswith(CDN_HOSTED_EXTENSIONS):
+ scheme, domain, path, query, fragment = urlsplit(url)
+ if domain in MEDIA_DOMAIN_OVERRIDES:
+ domain = MEDIA_DOMAIN_OVERRIDES[domain]
+ else:
+ domain = domain.replace("cdn", "media-files", 1)
+ url = urlunsplit((scheme, domain, path, query, fragment))
+
+ yield {"file": text.unescape(url)}
diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py
index 7a21d2a..6c81f53 100644
--- a/gallery_dl/extractor/catbox.py
+++ b/gallery_dl/extractor/catbox.py
@@ -21,22 +21,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{filename}"
pattern = r"(?:https?://)?(?:www\.)?catbox\.moe(/c/[^/?#]+)"
- test = (
- ("https://catbox.moe/c/1igcbe", {
- "url": "35866a88c29462814f103bc22ec031eaeb380f8a",
- "content": "70ddb9de3872e2d17cc27e48e6bf395e5c8c0b32",
- "pattern": r"https://files\.catbox\.moe/\w+\.\w{3}$",
- "count": 3,
- "keyword": {
- "album_id": "1igcbe",
- "album_name": "test",
- "date": "dt:2022-08-18 00:00:00",
- "description": "album test &>",
- },
- }),
- ("https://www.catbox.moe/c/cd90s1"),
- ("https://catbox.moe/c/w7tm47#"),
- )
+ example = "https://catbox.moe/c/ID"
def metadata(self, page):
extr = text.extract_from(page)
@@ -62,15 +47,7 @@ class CatboxFileExtractor(Extractor):
subcategory = "file"
archive_fmt = "{filename}"
pattern = r"(?:https?://)?(?:files|litter|de)\.catbox\.moe/([^/?#]+)"
- test = (
- ("https://files.catbox.moe/8ih3y7.png", {
- "pattern": r"^https://files\.catbox\.moe/8ih3y7\.png$",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- "count": 1,
- }),
- ("https://litter.catbox.moe/t8v3n9.png"),
- ("https://de.catbox.moe/bjdmz1.jpg"),
- )
+ example = "https://files.catbox.moe/NAME.EXT"
def items(self):
url = text.ensure_http_scheme(self.url)
diff --git a/gallery_dl/extractor/comicvine.py b/gallery_dl/extractor/comicvine.py
index 3a57886..d076795 100644
--- a/gallery_dl/extractor/comicvine.py
+++ b/gallery_dl/extractor/comicvine.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,19 +25,7 @@ class ComicvineTagExtractor(BooruExtractor):
archive_fmt = "{id}"
pattern = (r"(?:https?://)?comicvine\.gamespot\.com"
r"(/([^/?#]+)/(\d+-\d+)/images/.*)")
- test = (
- ("https://comicvine.gamespot.com/jock/4040-5653/images/", {
- "pattern": r"https://comicvine\.gamespot\.com/a/uploads"
- r"/original/\d+/\d+/\d+-.+\.(jpe?g|png)",
- "count": ">= 140",
- }),
- (("https://comicvine.gamespot.com/batman/4005-1699"
- "/images/?tag=Fan%20Art%20%26%20Cosplay"), {
- "pattern": r"https://comicvine\.gamespot\.com/a/uploads"
- r"/original/\d+/\d+/\d+-.+",
- "count": ">= 450",
- }),
- )
+ example = "https://comicvine.gamespot.com/TAG/123-45/images/"
def __init__(self, match):
BooruExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 5c9b157..0d67df7 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -32,45 +32,21 @@ class Extractor():
directory_fmt = ("{category}",)
filename_fmt = "{filename}.{extension}"
archive_fmt = ""
- cookiedomain = ""
- browser = None
root = ""
- test = None
- finalize = None
+ cookies_domain = ""
+ referer = True
+ tls12 = True
+ browser = None
request_interval = 0.0
request_interval_min = 0.0
request_timestamp = 0.0
- tls12 = True
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
-
- if self.basecategory:
- self.config = self._config_shared
- self.config_accumulate = self._config_shared_accumulate
self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = ""
- self._write_pages = self.config("write-pages", False)
- self._retry_codes = self.config("retry-codes")
- self._retries = self.config("retries", 4)
- self._timeout = self.config("timeout", 30)
- self._verify = self.config("verify", True)
- self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
- self._interval = util.build_duration_func(
- self.config("sleep-request", self.request_interval),
- self.request_interval_min,
- )
-
- if self._retries < 0:
- self._retries = float("inf")
- if not self._retry_codes:
- self._retry_codes = ()
-
- self._init_session()
- self._init_cookies()
-
@classmethod
def from_url(cls, url):
if isinstance(cls.pattern, str):
@@ -79,8 +55,19 @@ class Extractor():
return cls(match) if match else None
def __iter__(self):
+ self.initialize()
return self.items()
+ def initialize(self):
+ self._init_options()
+ self._init_session()
+ self._init_cookies()
+ self._init()
+ self.initialize = util.noop
+
+ def finalize(self):
+ pass
+
def items(self):
yield Message.Version, 1
@@ -109,16 +96,22 @@ class Extractor():
return config.accumulate(self._cfgpath, key)
def _config_shared(self, key, default=None):
- return config.interpolate_common(("extractor",), (
- (self.category, self.subcategory),
- (self.basecategory, self.subcategory),
- ), key, default)
+ return config.interpolate_common(
+ ("extractor",), self._cfgpath, key, default)
def _config_shared_accumulate(self, key):
- values = config.accumulate(self._cfgpath, key)
- conf = config.get(("extractor",), self.basecategory)
- if conf:
- values[:0] = config.accumulate((self.subcategory,), key, conf=conf)
+ first = True
+ extr = ("extractor",)
+
+ for path in self._cfgpath:
+ if first:
+ first = False
+ values = config.accumulate(extr + path, key)
+ else:
+ conf = config.get(extr, path[0])
+ if conf:
+ values[:0] = config.accumulate(
+ (self.subcategory,), key, conf=conf)
return values
def request(self, url, method="GET", session=None,
@@ -245,6 +238,26 @@ class Extractor():
return username, password
+ def _init(self):
+ pass
+
+ def _init_options(self):
+ self._write_pages = self.config("write-pages", False)
+ self._retry_codes = self.config("retry-codes")
+ self._retries = self.config("retries", 4)
+ self._timeout = self.config("timeout", 30)
+ self._verify = self.config("verify", True)
+ self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
+ self._interval = util.build_duration_func(
+ self.config("sleep-request", self.request_interval),
+ self.request_interval_min,
+ )
+
+ if self._retries < 0:
+ self._retries = float("inf")
+ if not self._retry_codes:
+ self._retry_codes = ()
+
def _init_session(self):
self.session = session = requests.Session()
headers = session.headers
@@ -286,7 +299,7 @@ class Extractor():
useragent = self.config("user-agent")
if useragent is None:
useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
- "rv:115.0) Gecko/20100101 Firefox/115.0")
+ "rv:109.0) Gecko/20100101 Firefox/115.0")
elif useragent == "browser":
useragent = _browser_useragent()
headers["User-Agent"] = useragent
@@ -298,6 +311,13 @@ class Extractor():
else:
headers["Accept-Encoding"] = "gzip, deflate"
+ referer = self.config("referer", self.referer)
+ if referer:
+ if isinstance(referer, str):
+ headers["Referer"] = referer
+ elif self.root:
+ headers["Referer"] = self.root + "/"
+
custom_headers = self.config("headers")
if custom_headers:
headers.update(custom_headers)
@@ -330,26 +350,26 @@ class Extractor():
def _init_cookies(self):
"""Populate the session's cookiejar"""
- self._cookiefile = None
- self._cookiejar = self.session.cookies
- if self.cookiedomain is None:
+ self.cookies = self.session.cookies
+ self.cookies_file = None
+ if self.cookies_domain is None:
return
cookies = self.config("cookies")
if cookies:
if isinstance(cookies, dict):
- self._update_cookies_dict(cookies, self.cookiedomain)
+ self.cookies_update_dict(cookies, self.cookies_domain)
elif isinstance(cookies, str):
- cookiefile = util.expand_path(cookies)
+ path = util.expand_path(cookies)
try:
- with open(cookiefile) as fp:
- util.cookiestxt_load(fp, self._cookiejar)
+ with open(path) as fp:
+ util.cookiestxt_load(fp, self.cookies)
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
self.log.debug("Loading cookies from '%s'", cookies)
- self._cookiefile = cookiefile
+ self.cookies_file = path
elif isinstance(cookies, (list, tuple)):
key = tuple(cookies)
@@ -357,7 +377,7 @@ class Extractor():
if cookiejar is None:
from ..cookies import load_cookies
- cookiejar = self._cookiejar.__class__()
+ cookiejar = self.cookies.__class__()
try:
load_cookies(cookiejar, cookies)
except Exception as exc:
@@ -367,9 +387,9 @@ class Extractor():
else:
self.log.debug("Using cached cookies from %s", key)
- setcookie = self._cookiejar.set_cookie
+ set_cookie = self.cookies.set_cookie
for cookie in cookiejar:
- setcookie(cookie)
+ set_cookie(cookie)
else:
self.log.warning(
@@ -377,8 +397,8 @@ class Extractor():
"option, got '%s' (%s)",
cookies.__class__.__name__, cookies)
- def _store_cookies(self):
- """Store the session's cookiejar in a cookies.txt file"""
+ def cookies_store(self):
+ """Store the session's cookies in a cookies.txt file"""
export = self.config("cookies-update", True)
if not export:
return
@@ -386,47 +406,47 @@ class Extractor():
if isinstance(export, str):
path = util.expand_path(export)
else:
- path = self._cookiefile
+ path = self.cookies_file
if not path:
return
try:
with open(path, "w") as fp:
- util.cookiestxt_store(fp, self._cookiejar)
+ util.cookiestxt_store(fp, self.cookies)
except OSError as exc:
self.log.warning("cookies: %s", exc)
- def _update_cookies(self, cookies, domain=""):
+ def cookies_update(self, cookies, domain=""):
"""Update the session's cookiejar with 'cookies'"""
if isinstance(cookies, dict):
- self._update_cookies_dict(cookies, domain or self.cookiedomain)
+ self.cookies_update_dict(cookies, domain or self.cookies_domain)
else:
- setcookie = self._cookiejar.set_cookie
+ set_cookie = self.cookies.set_cookie
try:
cookies = iter(cookies)
except TypeError:
- setcookie(cookies)
+ set_cookie(cookies)
else:
for cookie in cookies:
- setcookie(cookie)
+ set_cookie(cookie)
- def _update_cookies_dict(self, cookiedict, domain):
+ def cookies_update_dict(self, cookiedict, domain):
"""Update cookiejar with name-value pairs from a dict"""
- setcookie = self._cookiejar.set
+ set_cookie = self.cookies.set
for name, value in cookiedict.items():
- setcookie(name, value, domain=domain)
+ set_cookie(name, value, domain=domain)
- def _check_cookies(self, cookienames, domain=None):
- """Check if all 'cookienames' are in the session's cookiejar"""
- if not self._cookiejar:
+ def cookies_check(self, cookies_names, domain=None):
+ """Check if all 'cookies_names' are in the session's cookiejar"""
+ if not self.cookies:
return False
if domain is None:
- domain = self.cookiedomain
- names = set(cookienames)
+ domain = self.cookies_domain
+ names = set(cookies_names)
now = time.time()
- for cookie in self._cookiejar:
+ for cookie in self.cookies:
if cookie.name in names and (
not domain or cookie.domain == domain):
@@ -450,9 +470,16 @@ class Extractor():
return False
def _prepare_ddosguard_cookies(self):
- if not self._cookiejar.get("__ddg2", domain=self.cookiedomain):
- self._cookiejar.set(
- "__ddg2", util.generate_token(), domain=self.cookiedomain)
+ if not self.cookies.get("__ddg2", domain=self.cookies_domain):
+ self.cookies.set(
+ "__ddg2", util.generate_token(), domain=self.cookies_domain)
+
+ def _cache(self, func, maxage, keyarg=None):
+ # return cache.DatabaseCacheDecorator(func, maxage, keyarg)
+ return cache.DatabaseCacheDecorator(func, keyarg, maxage)
+
+ def _cache_memory(self, func, maxage=None, keyarg=None):
+ return cache.Memcache()
def _get_date_min_max(self, dmin=None, dmax=None):
"""Retrieve and parse 'date-min' and 'date-max' config values"""
@@ -489,19 +516,8 @@ class Extractor():
return iter(result)
@classmethod
- def _get_tests(cls):
- """Yield an extractor's test cases as (URL, RESULTS) tuples"""
- tests = cls.test
- if not tests:
- return
-
- if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)):
- tests = (tests,)
-
- for test in tests:
- if isinstance(test, str):
- test = (test, None)
- yield test
+ def _dump(cls, obj):
+ util.dump_json(obj, ensure_ascii=False, indent=2)
def _dump_response(self, response, history=True):
"""Write the response content to a .dump file in the current directory.
@@ -654,6 +670,8 @@ class AsynchronousMixin():
"""Run info extraction in a separate thread"""
def __iter__(self):
+ self.initialize()
+
messages = queue.Queue(5)
thread = threading.Thread(
target=self.async_items,
@@ -805,8 +823,8 @@ _browser_cookies = {}
HTTP_HEADERS = {
"firefox": (
- ("User-Agent", "Mozilla/5.0 ({}; rv:115.0) "
- "Gecko/20100101 Firefox/115.0"),
+ ("User-Agent", "Mozilla/5.0 ({}; "
+ "rv:109.0) Gecko/20100101 Firefox/115.0"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/avif,image/webp,*/*;q=0.8"),
("Accept-Language", "en-US,en;q=0.5"),
@@ -897,13 +915,3 @@ if action:
except Exception:
pass
del action
-
-# Undo automatic pyOpenSSL injection by requests
-pyopenssl = config.get((), "pyopenssl", False)
-if not pyopenssl:
- try:
- from requests.packages.urllib3.contrib import pyopenssl # noqa
- pyopenssl.extract_from_urllib3()
- except ImportError:
- pass
-del pyopenssl
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index 7a79eca..59fd1e5 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -14,32 +14,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
category = "cyberdrop"
root = "https://cyberdrop.me"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)"
- test = (
- # images
- ("https://cyberdrop.me/a/keKRjm4t", {
- "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.(jpg|png|webp)$",
- "keyword": {
- "album_id": "keKRjm4t",
- "album_name": "Fate (SFW)",
- "album_size": 150069254,
- "count": 62,
- "date": "dt:2020-06-18 13:14:20",
- "description": "",
- "id": r"re:\w{8}",
- },
- }),
- # videos
- ("https://cyberdrop.to/a/l8gIAXVD", {
- "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$",
- "count": 31,
- "keyword": {
- "album_id": "l8gIAXVD",
- "album_name": "Achelois17 videos",
- "album_size": 652037121,
- "date": "dt:2020-06-16 15:40:44",
- },
- }),
- )
+ example = "https://cyberdrop.me/a/ID"
def fetch_album(self, album_id):
url = self.root + "/a/" + self.album_id
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 5cfbf5c..56d81e5 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -22,8 +22,7 @@ class DanbooruExtractor(BaseExtractor):
per_page = 200
request_interval = 1.0
- def __init__(self, match):
- BaseExtractor.__init__(self, match)
+ def _init(self):
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
self.includes = False
@@ -151,7 +150,8 @@ class DanbooruExtractor(BaseExtractor):
BASE_PATTERN = DanbooruExtractor.update({
"danbooru": {
"root": None,
- "pattern": r"(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us",
+ "pattern": r"(?:(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us"
+ r"|donmai\.moe)",
},
"atfbooru": {
"root": "https://booru.allthefallen.moe",
@@ -159,7 +159,11 @@ BASE_PATTERN = DanbooruExtractor.update({
},
"aibooru": {
"root": None,
- "pattern": r"(?:safe.)?aibooru\.online",
+ "pattern": r"(?:safe\.)?aibooru\.online",
+ },
+ "booruvar": {
+ "root": "https://booru.borvar.art",
+ "pattern": r"booru\.borvar\.art",
},
})
@@ -170,33 +174,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)"
- test = (
- ("https://danbooru.donmai.us/posts?tags=bonocho", {
- "content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
- }),
- # test page transitions
- ("https://danbooru.donmai.us/posts?tags=mushishi", {
- "count": ">= 300",
- }),
- # 'external' option (#1747)
- ("https://danbooru.donmai.us/posts?tags=pixiv_id%3A1476533", {
- "options": (("external", True),),
- "pattern": r"https://i\.pximg\.net/img-original/img"
- r"/2008/08/28/02/35/48/1476533_p0\.jpg",
- }),
- ("https://booru.allthefallen.moe/posts?tags=yume_shokunin", {
- "count": 12,
- }),
- ("https://aibooru.online/posts?tags=center_frills&z=1", {
- "pattern": r"https://cdn\.aibooru\.online/original"
- r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+",
- "count": ">= 3",
- }),
- ("https://hijiribe.donmai.us/posts?tags=bonocho"),
- ("https://sonohara.donmai.us/posts?tags=bonocho"),
- ("https://safebooru.donmai.us/posts?tags=bonocho"),
- ("https://safe.aibooru.online/posts?tags=center_frills"),
- )
+ example = "https://danbooru.donmai.us/posts?tags=TAG"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
@@ -230,17 +208,7 @@ class DanbooruPoolExtractor(DanbooruExtractor):
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
archive_fmt = "p_{pool[id]}_{id}"
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
- test = (
- ("https://danbooru.donmai.us/pools/7659", {
- "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
- }),
- ("https://booru.allthefallen.moe/pools/9", {
- "url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5",
- "count": 6,
- }),
- ("https://aibooru.online/pools/1"),
- ("https://danbooru.donmai.us/pool/show/7659"),
- )
+ example = "https://danbooru.donmai.us/pools/12345"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
@@ -263,23 +231,7 @@ class DanbooruPostExtractor(DanbooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
- test = (
- ("https://danbooru.donmai.us/posts/294929", {
- "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
- "keyword": {"date": "dt:2008-08-12 04:46:05"},
- }),
- ("https://danbooru.donmai.us/posts/3613024", {
- "pattern": r"https?://.+\.zip$",
- "options": (("ugoira", True),)
- }),
- ("https://booru.allthefallen.moe/posts/22", {
- "content": "21dda68e1d7e0a554078e62923f537d8e895cac8",
- }),
- ("https://aibooru.online/posts/1", {
- "content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9",
- }),
- ("https://danbooru.donmai.us/post/show/294929"),
- )
+ example = "https://danbooru.donmai.us/posts/12345"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
@@ -300,16 +252,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
- test = (
- ("https://danbooru.donmai.us/explore/posts/popular"),
- (("https://danbooru.donmai.us/explore/posts/popular"
- "?date=2013-06-06&scale=week"), {
- "range": "1-120",
- "count": 120,
- }),
- ("https://booru.allthefallen.moe/explore/posts/popular"),
- ("https://aibooru.online/explore/posts/popular"),
- )
+ example = "https://danbooru.donmai.us/explore/posts/popular"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py
index 363341a..35bb299 100644
--- a/gallery_dl/extractor/desktopography.py
+++ b/gallery_dl/extractor/desktopography.py
@@ -23,7 +23,7 @@ class DesktopographySiteExtractor(DesktopographyExtractor):
"""Extractor for all desktopography exhibitions """
subcategory = "site"
pattern = BASE_PATTERN + r"/$"
- test = ("https://desktopography.net/",)
+ example = "https://desktopography.net/"
def items(self):
page = self.request(self.root).text
@@ -42,7 +42,7 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor):
"""Extractor for a yearly desktopography exhibition"""
subcategory = "exhibition"
pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
- test = ("https://desktopography.net/exhibition-2020/",)
+ example = "https://desktopography.net/exhibition-2020/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
@@ -71,7 +71,7 @@ class DesktopographyEntryExtractor(DesktopographyExtractor):
"""Extractor for all resolutions of a desktopography wallpaper"""
subcategory = "entry"
pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
- test = ("https://desktopography.net/portfolios/new-era/",)
+ example = "https://desktopography.net/portfolios/NAME/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 18d9867..9421096 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -32,20 +32,26 @@ class DeviantartExtractor(Extractor):
root = "https://www.deviantart.com"
directory_fmt = ("{category}", "{username}")
filename_fmt = "{category}_{index}_{title}.{extension}"
- cookiedomain = None
- cookienames = ("auth", "auth_secure", "userinfo")
+ cookies_domain = None
+ cookies_names = ("auth", "auth_secure", "userinfo")
_last_request = 0
def __init__(self, match):
Extractor.__init__(self, match)
+ self.user = match.group(1) or match.group(2)
+ self.offset = 0
+
+ def _init(self):
+ self.jwt = self.config("jwt", True)
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
+ self.quality = self.config("quality", "100")
self.original = self.config("original", True)
self.comments = self.config("comments", False)
- self.user = match.group(1) or match.group(2)
+
+ self.api = DeviantartOAuthAPI(self)
self.group = False
- self.offset = 0
- self.api = None
+ self._premium_cache = {}
unwatch = self.config("auto-unwatch")
if unwatch:
@@ -54,33 +60,37 @@ class DeviantartExtractor(Extractor):
else:
self.unwatch = None
+ if self.quality:
+ self.quality = ",q_{}".format(self.quality)
+
if self.original != "image":
self._update_content = self._update_content_default
else:
self._update_content = self._update_content_image
self.original = True
- self._premium_cache = {}
- self.commit_journal = {
- "html": self._commit_journal_html,
- "text": self._commit_journal_text,
- }.get(self.config("journals", "html"))
+ journals = self.config("journals", "html")
+ if journals == "html":
+ self.commit_journal = self._commit_journal_html
+ elif journals == "text":
+ self.commit_journal = self._commit_journal_text
+ else:
+ self.commit_journal = None
def skip(self, num):
self.offset += num
return num
def login(self):
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if not username:
- return False
- self._update_cookies(_login_impl(self, username, password))
- return True
+ if self.cookies_check(self.cookies_names):
+ return True
- def items(self):
- self.api = DeviantartOAuthAPI(self)
+ username, password = self._get_auth_info()
+ if username:
+ self.cookies_update(_login_impl(self, username, password))
+ return True
+ def items(self):
if self.user and self.config("group", True):
profile = self.api.user_profile(self.user)
self.group = not profile
@@ -117,21 +127,36 @@ class DeviantartExtractor(Extractor):
if self.original and deviation["is_downloadable"]:
self._update_content(deviation, content)
- else:
+ elif self.jwt:
self._update_token(deviation, content)
+ elif content["src"].startswith("https://images-wixmp-"):
+ if deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ intermediary, count = re.subn(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*",
+ r"/intermediary\1", content["src"], 1)
+ if count:
+ deviation["_fallback"] = (content["src"],)
+ content["src"] = intermediary
+ if self.quality:
+ content["src"] = re.sub(
+ r",q_\d+", self.quality, content["src"], 1)
yield self.commit(deviation, content)
elif deviation["is_downloadable"]:
content = self.api.deviation_download(deviation["deviationid"])
+ deviation["is_original"] = True
yield self.commit(deviation, content)
if "videos" in deviation and deviation["videos"]:
video = max(deviation["videos"],
key=lambda x: text.parse_int(x["quality"][:-1]))
+ deviation["is_original"] = False
yield self.commit(deviation, video)
if "flash" in deviation:
+ deviation["is_original"] = True
yield self.commit(deviation, deviation["flash"])
if self.commit_journal:
@@ -145,6 +170,7 @@ class DeviantartExtractor(Extractor):
if journal:
if self.extra:
deviation["_journal"] = journal["html"]
+ deviation["is_original"] = True
yield self.commit_journal(deviation, journal)
if not self.extra:
@@ -222,6 +248,8 @@ class DeviantartExtractor(Extractor):
target["filename"] = deviation["filename"]
deviation["target"] = target
deviation["extension"] = target["extension"] = text.ext_from_url(name)
+ if "is_original" not in deviation:
+ deviation["is_original"] = ("/v1/" not in url)
return Message.Url, url, deviation
def _commit_journal_html(self, deviation, journal):
@@ -320,9 +348,14 @@ class DeviantartExtractor(Extractor):
yield url, folder
def _update_content_default(self, deviation, content):
- public = False if "premium_folder_data" in deviation else None
+ if "premium_folder_data" in deviation or deviation.get("is_mature"):
+ public = False
+ else:
+ public = None
+
data = self.api.deviation_download(deviation["deviationid"], public)
content.update(data)
+ deviation["is_original"] = True
def _update_content_image(self, deviation, content):
data = self.api.deviation_download(deviation["deviationid"])
@@ -330,6 +363,7 @@ class DeviantartExtractor(Extractor):
mtype = mimetypes.guess_type(url, False)[0]
if mtype and mtype.startswith("image/"):
content.update(data)
+ deviation["is_original"] = True
def _update_token(self, deviation, content):
"""Replace JWT to be able to remove width/height limits
@@ -341,6 +375,9 @@ class DeviantartExtractor(Extractor):
if not sep:
return
+ # 'images-wixmp' returns 401 errors, but just 'wixmp' still works
+ url = url.replace("//images-wixmp", "//wixmp", 1)
+
# header = b'{"typ":"JWT","alg":"none"}'
payload = (
b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
@@ -349,11 +386,12 @@ class DeviantartExtractor(Extractor):
)
deviation["_fallback"] = (content["src"],)
+ deviation["is_original"] = True
content["src"] = (
"{}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{}.".format(
url,
# base64 of 'header' is precomputed as 'eyJ0eX...'
- # binascii.a2b_base64(header).rstrip(b"=\n").decode(),
+ # binascii.b2a_base64(header).rstrip(b"=\n").decode(),
binascii.b2a_base64(payload).rstrip(b"=\n").decode())
)
@@ -435,18 +473,12 @@ class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
subcategory = "user"
pattern = BASE_PATTERN + r"/?$"
- test = (
- ("https://www.deviantart.com/shimoda7", {
- "pattern": r"/shimoda7/gallery$",
- }),
- ("https://www.deviantart.com/shimoda7", {
- "options": (("include", "all"),),
- "pattern": r"/shimoda7/"
- r"(gallery(/scraps)?|posts(/statuses)?|favourites)$",
- "count": 5,
- }),
- ("https://shimoda7.deviantart.com/"),
- )
+ example = "https://www.deviantart.com/USER"
+
+ def initialize(self):
+ pass
+
+ skip = Extractor.skip
def items(self):
base = "{}/{}/".format(self.root, self.user)
@@ -467,84 +499,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
subcategory = "gallery"
archive_fmt = "g_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
- test = (
- ("https://www.deviantart.com/shimoda7/gallery/", {
- "pattern": r"https://(images-)?wixmp-[^.]+\.wixmp\.com"
- r"/f/.+/.+\.(jpg|png)\?token=.+",
- "count": ">= 30",
- "keyword": {
- "allows_comments": bool,
- "author": {
- "type": "regular",
- "usericon": str,
- "userid": "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B",
- "username": "shimoda7",
- },
- "category_path": str,
- "content": {
- "filesize": int,
- "height": int,
- "src": str,
- "transparency": bool,
- "width": int,
- },
- "da_category": str,
- "date": "type:datetime",
- "deviationid": str,
- "?download_filesize": int,
- "extension": str,
- "index": int,
- "is_deleted": bool,
- "is_downloadable": bool,
- "is_favourited": bool,
- "is_mature": bool,
- "preview": {
- "height": int,
- "src": str,
- "transparency": bool,
- "width": int,
- },
- "published_time": int,
- "stats": {
- "comments": int,
- "favourites": int,
- },
- "target": dict,
- "thumbs": list,
- "title": str,
- "url": r"re:https://www.deviantart.com/shimoda7/art/[^/]+-\d+",
- "username": "shimoda7",
- },
- }),
- # group
- ("https://www.deviantart.com/yakuzafc/gallery", {
- "pattern": r"https://www.deviantart.com/yakuzafc/gallery"
- r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}/",
- "count": ">= 15",
- }),
- # 'folders' option (#276)
- ("https://www.deviantart.com/justatest235723/gallery", {
- "count": 3,
- "options": (("metadata", 1), ("folders", 1), ("original", 0)),
- "keyword": {
- "description": str,
- "folders": list,
- "is_watching": bool,
- "license": str,
- "tags": list,
- },
- }),
- ("https://www.deviantart.com/shimoda8/gallery/", {
- "exception": exception.NotFoundError,
- }),
-
- ("https://www.deviantart.com/shimoda7/gallery"),
- ("https://www.deviantart.com/shimoda7/gallery/all"),
- ("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
- ("https://shimoda7.deviantart.com/gallery/"),
- ("https://shimoda7.deviantart.com/gallery/all/"),
- ("https://shimoda7.deviantart.com/gallery/?catpath=/"),
- )
+ example = "https://www.deviantart.com/USER/gallery/"
def deviations(self):
if self.flat and not self.group:
@@ -559,32 +514,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
- test = (
- # user
- ("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
- "count": 5,
- "options": (("original", False),),
- }),
- # group
- ("https://www.deviantart.com/yakuzafc/gallery/37412168/Crafts", {
- "count": ">= 4",
- "options": (("original", False),),
- }),
- # uuid
- (("https://www.deviantart.com/shimoda7/gallery"
- "/B38E3C6A-2029-6B45-757B-3C8D3422AD1A/misc"), {
- "count": 5,
- "options": (("original", False),),
- }),
- # name starts with '_', special characters (#1451)
- (("https://www.deviantart.com/justatest235723"
- "/gallery/69302698/-test-b-c-d-e-f-"), {
- "count": 1,
- "options": (("original", False),),
- }),
- ("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"),
- ("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"),
- )
+ example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@@ -613,33 +543,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
subcategory = "stash"
archive_fmt = "{index}.{extension}"
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
- test = (
- ("https://sta.sh/022c83odnaxc", {
- "pattern": r"https://wixmp-[^.]+\.wixmp\.com"
- r"/f/.+/.+\.png\?token=.+",
- "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
- "count": 1,
- }),
- # multiple stash items
- ("https://sta.sh/21jf51j7pzl2", {
- "options": (("original", False),),
- "count": 4,
- }),
- # downloadable, but no "content" field (#307)
- ("https://sta.sh/024t4coz16mi", {
- "pattern": r"https://wixmp-[^.]+\.wixmp\.com"
- r"/f/.+/.+\.rar\?token=.+",
- "count": 1,
- }),
- # mixed folders and images (#659)
- ("https://sta.sh/215twi387vfj", {
- "options": (("original", False),),
- "count": 4,
- }),
- ("https://sta.sh/abcdefghijkl", {
- "count": 0,
- }),
- )
+ example = "https://sta.sh/abcde"
skip = Extractor.skip
@@ -684,20 +588,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Favourites")
archive_fmt = "f_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
- test = (
- ("https://www.deviantart.com/h3813067/favourites/", {
- "options": (("metadata", True), ("flat", False)), # issue #271
- "count": 1,
- }),
- ("https://www.deviantart.com/h3813067/favourites/", {
- "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
- }),
- ("https://www.deviantart.com/h3813067/favourites/all"),
- ("https://www.deviantart.com/h3813067/favourites/?catpath=/"),
- ("https://h3813067.deviantart.com/favourites/"),
- ("https://h3813067.deviantart.com/favourites/all"),
- ("https://h3813067.deviantart.com/favourites/?catpath=/"),
- )
+ example = "https://www.deviantart.com/USER/favourites/"
def deviations(self):
if self.flat:
@@ -714,20 +605,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
"{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
- test = (
- (("https://www.deviantart.com/pencilshadings/favourites"
- "/70595441/3D-Favorites"), {
- "count": ">= 15",
- "options": (("original", False),),
- }),
- (("https://www.deviantart.com/pencilshadings/favourites"
- "/F050486B-CB62-3C66-87FB-1105A7F6379F/3D Favorites"), {
- "count": ">= 15",
- "options": (("original", False),),
- }),
- ("https://pencilshadings.deviantart.com"
- "/favourites/70595441/3D-Favorites"),
- )
+ example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@@ -758,24 +636,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
- test = (
- ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
- "url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
- }),
- ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
- "url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
- "options": (("journals", "text"),),
- }),
- ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
- "count": 0,
- "options": (("journals", "none"),),
- }),
- ("https://www.deviantart.com/shimoda7/posts/"),
- ("https://www.deviantart.com/shimoda7/journal/"),
- ("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
- ("https://shimoda7.deviantart.com/journal/"),
- ("https://shimoda7.deviantart.com/journal/?catpath=/"),
- )
+ example = "https://www.deviantart.com/USER/posts/journals/"
def deviations(self):
return self.api.browse_user_journals(self.user, self.offset)
@@ -788,45 +649,7 @@ class DeviantartStatusExtractor(DeviantartExtractor):
filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
archive_fmt = "S_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/posts/statuses"
- test = (
- ("https://www.deviantart.com/t1na/posts/statuses", {
- "count": 0,
- }),
- ("https://www.deviantart.com/justgalym/posts/statuses", {
- "count": 4,
- "url": "bf4c44c0c60ff2648a880f4c3723464ad3e7d074",
- }),
- # shared deviation
- ("https://www.deviantart.com/justgalym/posts/statuses", {
- "options": (("journals", "none"),),
- "count": 1,
- "pattern": r"https://images-wixmp-\w+\.wixmp\.com/f"
- r"/[^/]+/[^.]+\.jpg\?token=",
- }),
- # shared sta.sh item
- ("https://www.deviantart.com/vanillaghosties/posts/statuses", {
- "options": (("journals", "none"), ("original", False)),
- "range": "5-",
- "count": 1,
- "keyword": {
- "index" : int,
- "index_base36": "re:^[0-9a-z]+$",
- "url" : "re:^https://sta.sh",
- },
- }),
- # "deleted" deviations in 'items'
- ("https://www.deviantart.com/AndrejSKalin/posts/statuses", {
- "options": (("journals", "none"), ("original", 0),
- ("image-filter", "deviationid[:8] == '147C8B03'")),
- "count": 2,
- "archive": False,
- "keyword": {"deviationid": "147C8B03-7D34-AE93-9241-FA3C6DBBC655"}
- }),
- ("https://www.deviantart.com/justgalym/posts/statuses", {
- "options": (("journals", "text"),),
- "url": "c8744f7f733a3029116607b826321233c5ca452d",
- }),
- )
+ example = "https://www.deviantart.com/USER/posts/statuses/"
def deviations(self):
for status in self.api.user_statuses(self.user, self.offset):
@@ -890,19 +713,7 @@ class DeviantartPopularExtractor(DeviantartExtractor):
r"(?:deviations/?)?\?order=(popular-[^/?#]+)"
r"|((?:[\w-]+/)*)(popular-[^/?#]+)"
r")/?(?:\?([^#]*))?")
- test = (
- ("https://www.deviantart.com/?order=popular-all-time", {
- "options": (("original", False),),
- "range": "1-30",
- "count": 30,
- }),
- ("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
- "options": (("original", False),),
- "range": "1-30",
- "count": 30,
- }),
- ("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
- )
+ example = "https://www.deviantart.com/popular-24-hours/"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@@ -947,11 +758,7 @@ class DeviantartTagExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "Tags", "{search_tags}")
archive_fmt = "T_{search_tags}_{index}.{extension}"
pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
- test = ("https://www.deviantart.com/tag/nature", {
- "options": (("original", False),),
- "range": "1-30",
- "count": 30,
- })
+ example = "https://www.deviantart.com/tag/TAG"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@@ -970,10 +777,7 @@ class DeviantartWatchExtractor(DeviantartExtractor):
subcategory = "watch"
pattern = (r"(?:https?://)?(?:www\.)?deviantart\.com"
r"/(?:watch/deviations|notifications/watch)()()")
- test = (
- ("https://www.deviantart.com/watch/deviations"),
- ("https://www.deviantart.com/notifications/watch"),
- )
+ example = "https://www.deviantart.com/watch/deviations"
def deviations(self):
return self.api.browse_deviantsyouwatch()
@@ -983,7 +787,7 @@ class DeviantartWatchPostsExtractor(DeviantartExtractor):
"""Extractor for Posts from watched users"""
subcategory = "watch-posts"
pattern = r"(?:https?://)?(?:www\.)?deviantart\.com/watch/posts()()"
- test = ("https://www.deviantart.com/watch/posts",)
+ example = "https://www.deviantart.com/watch/posts"
def deviations(self):
return self.api.browse_posts_deviantsyouwatch()
@@ -1001,100 +805,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
r"(\d+)" # bare deviation ID without slug
r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
- test = (
- (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
- "options": (("original", 0),),
- "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
- }),
- ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
- "exception": exception.NotFoundError,
- }),
- (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
- "options": (("comments", True),),
- "keyword": {"comments": list},
- "pattern": r"https://wixmp-[^.]+\.wixmp\.com"
- r"/f/.+/.+\.jpg\?token=.+",
- }),
- # wixmp URL rewrite
- (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com/f"
- r"/[^/]+/[^.]+\.jpg\?token="),
- }),
- # GIF (#242)
- (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
- "pattern": r"https://wixmp-\w+\.wixmp\.com/f/03fd2413-efe9-4e5c-"
- r"8734-2b72605b3fbb/dcxbsnb-1bbf0b38-42af-4070-8878-"
- r"f30961955bec\.gif\?token=ey...",
- }),
- # Flash animation with GIF preview (#1731)
- ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", {
- "pattern": r"https://wixmp-[^.]+\.wixmp\.com"
- r"/f/.+/.+\.swf\?token=.+",
- "keyword": {
- "filename": "flash_comic_tutorial_by_yuumei-d3juatd",
- "extension": "swf",
- },
- }),
- # sta.sh URLs from description (#302)
- (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
- "options": (("extra", 1), ("original", 0)),
- "pattern": DeviantartStashExtractor.pattern,
- "range": "2-",
- "count": 4,
- }),
- # sta.sh URL from deviation["text_content"]["body"]["features"]
- (("https://www.deviantart.com"
- "/cimar-wildehopps/art/Honorary-Vixen-859809305"), {
- "options": (("extra", 1),),
- "pattern": ("text:<!DOCTYPE html>\n|" +
- DeviantartStashExtractor.pattern),
- "count": 2,
- }),
- # journal
- ("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
- "url": "d34b2c9f873423e665a1b8ced20fcb75951694a3",
- "pattern": "text:<!DOCTYPE html>\n",
- }),
- # journal-like post with isJournal == False (#419)
- ("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", {
- "url": "e2e0044bd255304412179b6118536dbd9bb3bb0e",
- "pattern": "text:<!DOCTYPE html>\n",
- }),
- # /view/ URLs
- ("https://deviantart.com/view/904858796/", {
- "content": "8770ec40ad1c1d60f6b602b16301d124f612948f",
- }),
- ("http://www.deviantart.com/view/890672057", {
- "content": "1497e13d925caeb13a250cd666b779a640209236",
- }),
- ("https://www.deviantart.com/view/706871727", {
- "content": "3f62ae0c2fca2294ac28e41888ea06bb37c22c65",
- }),
- ("https://www.deviantart.com/view/1", {
- "exception": exception.NotFoundError,
- }),
- # /deviation/ (#3558)
- ("https://www.deviantart.com/deviation/817215762"),
- # fav.me (#3558)
- ("https://fav.me/ddijrpu", {
- "count": 1,
- }),
- ("https://fav.me/dddd", {
- "exception": exception.NotFoundError,
- }),
- # old-style URLs
- ("https://shimoda7.deviantart.com"
- "/art/For-the-sake-of-a-memory-10073852"),
- ("https://myria-moon.deviantart.com"
- "/art/Aime-Moi-part-en-vadrouille-261986576"),
- ("https://zzz.deviantart.com/art/zzz-1234567890"),
- # old /view/ URLs from the Wayback Machine
- ("https://www.deviantart.com/view.php?id=14864502"),
- ("http://www.deviantart.com/view-full.php?id=100842"),
-
- ("https://www.fxdeviantart.com/zzz/art/zzz-1234567890"),
- ("https://www.fxdeviantart.com/view/1234567890"),
- )
+ example = "https://www.deviantart.com/UsER/art/TITLE-12345"
skip = Extractor.skip
@@ -1105,11 +816,14 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
match.group(4) or match.group(5) or id_from_base36(match.group(6))
def deviations(self):
- url = "{}/{}/{}/{}".format(
- self.root, self.user or "u", self.type or "art", self.deviation_id)
+ if self.user:
+ url = "{}/{}/{}/{}".format(
+ self.root, self.user, self.type or "art", self.deviation_id)
+ else:
+ url = "{}/view/{}/".format(self.root, self.deviation_id)
- uuid = text.extract(self._limited_request(url).text,
- '"deviationUuid\\":\\"', '\\')[0]
+ uuid = text.extr(self._limited_request(url).text,
+ '"deviationUuid\\":\\"', '\\')
if not uuid:
raise exception.NotFoundError("deviation")
return (self.api.deviation(uuid),)
@@ -1120,15 +834,9 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
archive_fmt = "s_{_username}_{index}.{extension}"
- cookiedomain = ".deviantart.com"
+ cookies_domain = ".deviantart.com"
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
- test = (
- ("https://www.deviantart.com/shimoda7/gallery/scraps", {
- "count": 12,
- }),
- ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
- ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
- )
+ example = "https://www.deviantart.com/USER/gallery/scraps"
def deviations(self):
self.login()
@@ -1143,14 +851,10 @@ class DeviantartSearchExtractor(DeviantartExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search_tags}")
archive_fmt = "Q_{search_tags}_{index}.{extension}"
- cookiedomain = ".deviantart.com"
+ cookies_domain = ".deviantart.com"
pattern = (r"(?:https?://)?www\.deviantart\.com"
r"/search(?:/deviations)?/?\?([^#]+)")
- test = (
- ("https://www.deviantart.com/search?q=tree"),
- ("https://www.deviantart.com/search/deviations?order=popular-1-week"),
- )
-
+ example = "https://www.deviantart.com/search?q=QUERY"
skip = Extractor.skip
def __init__(self, match):
@@ -1173,11 +877,6 @@ class DeviantartSearchExtractor(DeviantartExtractor):
def _search_html(self, params):
url = self.root + "/search"
- deviation = {
- "deviationId": None,
- "author": {"username": "u"},
- "isJournal": False,
- }
while True:
response = self.request(url, params=params)
@@ -1186,13 +885,15 @@ class DeviantartSearchExtractor(DeviantartExtractor):
raise exception.StopExtraction("HTTP redirect to login page")
page = response.text
- items , pos = text.rextract(page, r'\"items\":[', ']')
- cursor, pos = text.extract(page, r'\"cursor\":\"', '\\', pos)
-
- for deviation_id in items.split(","):
- deviation["deviationId"] = deviation_id
- yield deviation
+ for dev in DeviantartDeviationExtractor.pattern.findall(
+ page)[2::3]:
+ yield {
+ "deviationId": dev[3],
+ "author": {"username": dev[0]},
+ "isJournal": dev[2] == "journal",
+ }
+ cursor = text.extr(page, r'\"cursor\":\"', '\\',)
if not cursor:
return
params["cursor"] = cursor
@@ -1202,15 +903,9 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
"""Extractor for deviantart gallery searches"""
subcategory = "gallery-search"
archive_fmt = "g_{_username}_{index}.{extension}"
- cookiedomain = ".deviantart.com"
+ cookies_domain = ".deviantart.com"
pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
- test = (
- ("https://www.deviantart.com/shimoda7/gallery?q=memory", {
- "options": (("original", 0),),
- "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
- }),
- ("https://www.deviantart.com/shimoda7/gallery?q=memory&sort=popular"),
- )
+ example = "https://www.deviantart.com/USER/gallery?q=QUERY"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@@ -1220,14 +915,12 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
self.login()
eclipse_api = DeviantartEclipseAPI(self)
- info = eclipse_api.user_info(self.user)
-
query = text.parse_query(self.query)
self.search = query["q"]
return self._eclipse_to_oauth(
eclipse_api, eclipse_api.galleries_search(
- info["user"]["userId"],
+ self.user,
self.search,
self.offset,
query.get("sort", "most-recent"),
@@ -1242,11 +935,7 @@ class DeviantartFollowingExtractor(DeviantartExtractor):
"""Extractor for user's watched users"""
subcategory = "following"
pattern = BASE_PATTERN + "/about#watching$"
- test = ("https://www.deviantart.com/shimoda7/about#watching", {
- "pattern": DeviantartUserExtractor.pattern,
- "range": "1-50",
- "count": 50,
- })
+ example = "https://www.deviantart.com/USER/about#watching"
def items(self):
eclipse_api = DeviantartEclipseAPI(self)
@@ -1393,7 +1082,12 @@ class DeviantartOAuthAPI():
def deviation(self, deviation_id, public=None):
"""Query and return info about a single Deviation"""
endpoint = "/deviation/" + deviation_id
+
deviation = self._call(endpoint, public=public)
+ if deviation.get("is_mature") and public is None and \
+ self.refresh_token_key:
+ deviation = self._call(endpoint, public=False)
+
if self.metadata:
self._metadata((deviation,))
if self.folders:
@@ -1549,8 +1243,12 @@ class DeviantartOAuthAPI():
return data
if not fatal and status != 429:
return None
- if data.get("error_description") == "User not found.":
+
+ error = data.get("error_description")
+ if error == "User not found.":
raise exception.NotFoundError("user or group")
+ if error == "Deviation not downloadable.":
+ raise exception.AuthorizationError()
self.log.debug(response.text)
msg = "API responded with {} {}".format(
@@ -1574,6 +1272,17 @@ class DeviantartOAuthAPI():
self.log.error(msg)
return data
+ def _switch_tokens(self, results, params):
+ if len(results) < params["limit"]:
+ return True
+
+ if not self.extractor.jwt:
+ for item in results:
+ if item.get("is_mature"):
+ return True
+
+ return False
+
def _pagination(self, endpoint, params,
extend=True, public=None, unpack=False, key="results"):
warn = True
@@ -1592,7 +1301,7 @@ class DeviantartOAuthAPI():
results = [item["journal"] for item in results
if "journal" in item]
if extend:
- if public and len(results) < params["limit"]:
+ if public and self._switch_tokens(results, params):
if self.refresh_token_key:
self.log.debug("Switching to private access token")
public = False
@@ -1600,9 +1309,10 @@ class DeviantartOAuthAPI():
elif data["has_more"] and warn:
warn = False
self.log.warning(
- "Private deviations detected! Run 'gallery-dl "
- "oauth:deviantart' and follow the instructions to "
- "be able to access them.")
+ "Private or mature deviations detected! "
+ "Run 'gallery-dl oauth:deviantart' and follow the "
+ "instructions to be able to access them.")
+
# "statusid" cannot be used instead
if results and "deviationid" in results[0]:
if self.metadata:
@@ -1711,70 +1421,70 @@ class DeviantartEclipseAPI():
self.request = self.extractor._limited_request
self.csrf_token = None
- def deviation_extended_fetch(self, deviation_id, user=None, kind=None):
- endpoint = "/da-browse/shared_api/deviation/extended_fetch"
+ def deviation_extended_fetch(self, deviation_id, user, kind=None):
+ endpoint = "/_puppy/dadeviation/init"
params = {
- "deviationid" : deviation_id,
- "username" : user,
- "type" : kind,
- "include_session": "false",
+ "deviationid" : deviation_id,
+ "username" : user,
+ "type" : kind,
+ "include_session" : "false",
+ "expand" : "deviation.related",
+ "da_minor_version": "20230710",
}
return self._call(endpoint, params)
- def gallery_scraps(self, user, offset=None):
- endpoint = "/da-user-profile/api/gallery/contents"
+ def gallery_scraps(self, user, offset=0):
+ endpoint = "/_puppy/dashared/gallection/contents"
params = {
"username" : user,
+ "type" : "gallery",
"offset" : offset,
"limit" : 24,
"scraps_folder": "true",
}
return self._pagination(endpoint, params)
- def galleries_search(self, user_id, query,
- offset=None, order="most-recent"):
- endpoint = "/shared_api/galleries/search"
+ def galleries_search(self, user, query, offset=0, order="most-recent"):
+ endpoint = "/_puppy/dashared/gallection/search"
params = {
- "userid": user_id,
- "order" : order,
- "q" : query,
- "offset": offset,
- "limit" : 24,
+ "username": user,
+ "type" : "gallery",
+ "order" : order,
+ "q" : query,
+ "offset" : offset,
+ "limit" : 24,
}
return self._pagination(endpoint, params)
def search_deviations(self, params):
- endpoint = "/da-browse/api/networkbar/search/deviations"
+ endpoint = "/_puppy/dabrowse/search/deviations"
return self._pagination(endpoint, params, key="deviations")
def user_info(self, user, expand=False):
- endpoint = "/shared_api/user/info"
+ endpoint = "/_puppy/dauserprofile/init/about"
params = {"username": user}
- if expand:
- params["expand"] = "user.stats,user.profile,user.watch"
return self._call(endpoint, params)
- def user_watching(self, user, offset=None):
- endpoint = "/da-user-profile/api/module/watching"
+ def user_watching(self, user, offset=0):
+ gruserid, moduleid = self._ids_watching(user)
+
+ endpoint = "/_puppy/gruser/module/watching"
params = {
- "username": user,
- "moduleid": self._module_id_watching(user),
- "offset" : offset,
- "limit" : 24,
+ "gruserid" : gruserid,
+ "gruser_typeid": "4",
+ "username" : user,
+ "moduleid" : moduleid,
+ "offset" : offset,
+ "limit" : 24,
}
return self._pagination(endpoint, params)
def _call(self, endpoint, params):
- url = "https://www.deviantart.com/_napi" + endpoint
- headers = {"Referer": "https://www.deviantart.com/"}
+ url = "https://www.deviantart.com" + endpoint
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
- response = self.request(
- url, params=params, headers=headers, fatal=None)
+ response = self.request(url, params=params, fatal=None)
- if response.status_code == 404:
- raise exception.StopExtraction(
- "Your account must use the Eclipse interface.")
try:
return response.json()
except Exception:
@@ -1812,14 +1522,19 @@ class DeviantartEclipseAPI():
else:
params["offset"] = int(params["offset"]) + len(results)
- def _module_id_watching(self, user):
+ def _ids_watching(self, user):
url = "{}/{}/about".format(self.extractor.root, user)
page = self.request(url).text
- pos = page.find('\\"type\\":\\"watching\\"')
+
+ gruserid, pos = text.extract(page, ' data-userid="', '"')
+
+ pos = page.find('\\"type\\":\\"watching\\"', pos)
if pos < 0:
raise exception.NotFoundError("module")
+ moduleid = text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ')
+
self._fetch_csrf_token(page)
- return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ')
+ return gruserid, moduleid
def _fetch_csrf_token(self, page=None):
if page is None:
@@ -1866,7 +1581,7 @@ def _login_impl(extr, username, password):
return {
cookie.name: cookie.value
- for cookie in extr.session.cookies
+ for cookie in extr.cookies
}
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index e85eb8d..26f2184 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,36 +20,7 @@ class DirectlinkExtractor(Extractor):
pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
- test = (
- (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
- "url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
- "keyword": "105770a3f4393618ab7b811b731b22663b5d3794",
- }),
- # empty path
- (("https://example.org/file.webm"), {
- "url": "2d807ed7059d1b532f1bb71dc24b510b80ff943f",
- "keyword": "29dad729c40fb09349f83edafa498dba1297464a",
- }),
- # more complex example
- ("https://example.org/path/to/file.webm?que=1?&ry=2/#fragment", {
- "url": "6fb1061390f8aada3db01cb24b51797c7ee42b31",
- "keyword": "3d7abc31d45ba324e59bc599c3b4862452d5f29c",
- }),
- # percent-encoded characters
- ("https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E", {
- "url": "2627e8140727fdf743f86fe18f69f99a052c9718",
- "keyword": "831790fddda081bdddd14f96985ab02dc5b5341f",
- }),
- # upper case file extension (#296)
- ("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
- ".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP"
- "mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"),
- # internationalized domain name
- ("https://räksmörgås.josefsson.org/raksmorgas.jpg", {
- "url": "a65667f670b194afbd1e3ea5e7a78938d36747da",
- "keyword": "fd5037fe86eebd4764e176cbaf318caec0f700be",
- }),
- )
+ example = "https://en.wikipedia.org/static/images/project-logos/enwiki.png"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 59e8c90..733d0d8 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -43,18 +43,7 @@ class DynastyscansBase():
class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"""Extractor for manga-chapters from dynasty-scans.com"""
pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)"
- test = (
- (("http://dynasty-scans.com/chapters/"
- "hitoribocchi_no_oo_seikatsu_ch33"), {
- "url": "dce64e8c504118f1ab4135c00245ea12413896cb",
- "keyword": "b67599703c27316a2fe4f11c3232130a1904e032",
- }),
- (("http://dynasty-scans.com/chapters/"
- "new_game_the_spinoff_special_13"), {
- "url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538",
- "keyword": "6b674eb3a274999153f6be044973b195008ced2f",
- }),
- )
+ example = "https://dynasty-scans.com/chapters/NAME"
def metadata(self, page):
extr = text.extract_from(page)
@@ -93,10 +82,7 @@ class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor):
chapterclass = DynastyscansChapterExtractor
reverse = False
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
- test = ("https://dynasty-scans.com/series/hitoribocchi_no_oo_seikatsu", {
- "pattern": DynastyscansChapterExtractor.pattern,
- "count": ">= 100",
- })
+ example = "https://dynasty-scans.com/series/NAME"
def chapters(self, page):
return [
@@ -112,16 +98,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
filename_fmt = "{image_id}.{extension}"
archive_fmt = "i_{image_id}"
pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$"
- test = (
- ("https://dynasty-scans.com/images?with[]=4930&with[]=5211", {
- "url": "22cf0fb64e12b29e79b0a3d26666086a48f9916a",
- "keyword": "11cbc555a15528d25567977b8808e10369c4c3ee",
- }),
- ("https://dynasty-scans.com/images", {
- "range": "1",
- "count": 1,
- }),
- )
+ example = "https://dynasty-scans.com/images?QUERY"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -150,10 +127,7 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor):
"""Extractor for individual images on dynasty-scans.com"""
subcategory = "image"
pattern = BASE_PATTERN + r"/images/(\d+)"
- test = ("https://dynasty-scans.com/images/1245", {
- "url": "15e54bd94148a07ed037f387d046c27befa043b2",
- "keyword": "0d8976c2d6fbc9ed6aa712642631b96e456dc37f",
- })
+ example = "https://dynasty-scans.com/images/12345"
def images(self):
return (self.query,)
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index d4f6cd4..af963bc 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -74,45 +74,23 @@ BASE_PATTERN = E621Extractor.update({
"root": "https://e926.net",
"pattern": r"e926\.net",
},
+ "e6ai": {
+ "root": "https://e6ai.net",
+ "pattern": r"e6ai\.net",
+ },
})
class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
"""Extractor for e621 posts from tag searches"""
pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
- test = (
- ("https://e621.net/posts?tags=anry", {
- "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
- "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
- }),
- ("https://e621.net/post/index/1/anry"),
- ("https://e621.net/post?tags=anry"),
-
- ("https://e926.net/posts?tags=anry", {
- "url": "12198b275c62ffe2de67cca676c8e64de80c425d",
- "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
- }),
- ("https://e926.net/post/index/1/anry"),
- ("https://e926.net/post?tags=anry"),
- )
+ example = "https://e621.net/posts?tags=TAG"
class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
"""Extractor for e621 pools"""
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
- test = (
- ("https://e621.net/pools/73", {
- "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
- "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
- }),
- ("https://e621.net/pool/show/73"),
-
- ("https://e926.net/pools/73", {
- "url": "6936f1b6a18c5c25bee7cad700088dbc2503481b",
- "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
- }),
- ("https://e926.net/pool/show/73"),
- )
+ example = "https://e621.net/pools/12345"
def posts(self):
self.log.info("Fetching posts of pool %s", self.pool_id)
@@ -138,61 +116,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
"""Extractor for single e621 posts"""
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
- test = (
- ("https://e621.net/posts/535", {
- "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
- "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
- "keyword": {"date": "dt:2007-02-17 19:02:32"},
- }),
- ("https://e621.net/posts/3181052", {
- "options": (("metadata", "notes,pools"),),
- "pattern": r"https://static\d\.e621\.net/data/c6/8c"
- r"/c68cca0643890b615f75fb2719589bff\.png",
- "keyword": {
- "notes": [
- {
- "body": "Little Legends 2",
- "created_at": "2022-05-16T13:58:38.877-04:00",
- "creator_id": 517450,
- "creator_name": "EeveeCuddler69",
- "height": 475,
- "id": 321296,
- "is_active": True,
- "post_id": 3181052,
- "updated_at": "2022-05-16T13:59:02.050-04:00",
- "version": 3,
- "width": 809,
- "x": 83,
- "y": 117,
- },
- ],
- "pools": [
- {
- "category": "series",
- "created_at": "2022-02-17T00:29:22.669-05:00",
- "creator_id": 1077440,
- "creator_name": "Yeetus90",
- "description": "* \"Little Legends\":/pools/27971\r\n"
- "* Little Legends 2\r\n"
- "* \"Little Legends 3\":/pools/27481",
- "id": 27492,
- "is_active": False,
- "name": "Little Legends 2",
- "post_count": 39,
- "post_ids": list,
- "updated_at": "2022-03-27T06:30:03.382-04:00"
- },
- ],
- },
- }),
- ("https://e621.net/post/show/535"),
-
- ("https://e926.net/posts/535", {
- "url": "17aec8ebd8fab098d321adcb62a2db59dab1f4bf",
- "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
- }),
- ("https://e926.net/post/show/535"),
- )
+ example = "https://e621.net/posts/12345"
def posts(self):
url = "{}/posts/{}.json".format(self.root, self.post_id)
@@ -202,21 +126,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
"""Extractor for popular images from e621"""
pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
- test = (
- ("https://e621.net/explore/posts/popular"),
- (("https://e621.net/explore/posts/popular"
- "?date=2019-06-01&scale=month"), {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": ">= 70",
- }),
-
- ("https://e926.net/explore/posts/popular"),
- (("https://e926.net/explore/posts/popular"
- "?date=2019-06-01&scale=month"), {
- "pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
- "count": ">= 70",
- }),
- )
+ example = "https://e621.net/explore/posts/popular"
def posts(self):
return self._pagination("/popular.json", self.params)
@@ -228,19 +138,7 @@ class E621FavoriteExtractor(E621Extractor):
directory_fmt = ("{category}", "Favorites", "{user_id}")
archive_fmt = "f_{user_id}_{id}"
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
- test = (
- ("https://e621.net/favorites"),
- ("https://e621.net/favorites?page=2&user_id=53275", {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": "> 260",
- }),
-
- ("https://e926.net/favorites"),
- ("https://e926.net/favorites?page=2&user_id=53275", {
- "pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
- "count": "> 260",
- }),
- )
+ example = "https://e621.net/favorites"
def __init__(self, match):
E621Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 709bc57..2aed678 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -65,7 +65,7 @@ class EromeExtractor(Extractor):
def request(self, url, **kwargs):
if self.__cookies:
self.__cookies = False
- self.session.cookies.update(_cookie_cache())
+ self.cookies.update(_cookie_cache())
for _ in range(5):
response = Extractor.request(self, url, **kwargs)
@@ -91,29 +91,7 @@ class EromeAlbumExtractor(EromeExtractor):
"""Extractor for albums on erome.com"""
subcategory = "album"
pattern = BASE_PATTERN + r"/a/(\w+)"
- test = (
- ("https://www.erome.com/a/NQgdlWvk", {
- "pattern": r"https://v\d+\.erome\.com/\d+"
- r"/NQgdlWvk/j7jlzmYB_480p\.mp4",
- "count": 1,
- "keyword": {
- "album_id": "NQgdlWvk",
- "num": 1,
- "title": "porn",
- "user": "yYgWBZw8o8qsMzM",
- },
- }),
- ("https://www.erome.com/a/TdbZ4ogi", {
- "pattern": r"https://s\d+\.erome\.com/\d+/TdbZ4ogi/\w+",
- "count": 6,
- "keyword": {
- "album_id": "TdbZ4ogi",
- "num": int,
- "title": "82e78cfbb461ad87198f927fcb1fda9a1efac9ff.",
- "user": "yYgWBZw8o8qsMzM",
- },
- }),
- )
+ example = "https://www.erome.com/a/ID"
def albums(self):
return (self.item,)
@@ -122,10 +100,7 @@ class EromeAlbumExtractor(EromeExtractor):
class EromeUserExtractor(EromeExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
- test = ("https://www.erome.com/yYgWBZw8o8qsMzM", {
- "range": "1-25",
- "count": 25,
- })
+ example = "https://www.erome.com/USER"
def albums(self):
url = "{}/{}".format(self.root, self.item)
@@ -135,10 +110,7 @@ class EromeUserExtractor(EromeExtractor):
class EromeSearchExtractor(EromeExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
- test = ("https://www.erome.com/search?q=cute", {
- "range": "1-25",
- "count": 25,
- })
+ example = "https://www.erome.com/search?q=QUERY"
def albums(self):
url = self.root + "/search"
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 9cd7ae4..44bfe7d 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -23,25 +23,28 @@ class ExhentaiExtractor(Extractor):
directory_fmt = ("{category}", "{gid} {title[:247]}")
filename_fmt = "{gid}_{num:>04}_{image_token}_{filename}.{extension}"
archive_fmt = "{gid}_{num}"
- cookienames = ("ipb_member_id", "ipb_pass_hash")
- cookiedomain = ".exhentai.org"
+ cookies_domain = ".exhentai.org"
+ cookies_names = ("ipb_member_id", "ipb_pass_hash")
root = "https://exhentai.org"
request_interval = 5.0
LIMIT = False
def __init__(self, match):
- # allow calling 'self.config()' before 'Extractor.__init__()'
- self._cfgpath = ("extractor", self.category, self.subcategory)
+ Extractor.__init__(self, match)
+ self.version = match.group(1)
- version = match.group(1)
+ def initialize(self):
domain = self.config("domain", "auto")
if domain == "auto":
- domain = ("ex" if version == "ex" else "e-") + "hentai.org"
+ domain = ("ex" if self.version == "ex" else "e-") + "hentai.org"
self.root = "https://" + domain
- self.cookiedomain = "." + domain
+ self.cookies_domain = "." + domain
- Extractor.__init__(self, match)
+ Extractor.initialize(self)
+
+ if self.version != "ex":
+ self.cookies.set("nw", "1", domain=self.cookies_domain)
self.original = self.config("original", True)
limits = self.config("limits", False)
@@ -51,10 +54,6 @@ class ExhentaiExtractor(Extractor):
else:
self.limits = False
- self.session.headers["Referer"] = self.root + "/"
- if version != "ex":
- self.session.cookies.set("nw", "1", domain=self.cookiedomain)
-
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
if response.history and response.headers.get("Content-Length") == "0":
@@ -66,17 +65,20 @@ class ExhentaiExtractor(Extractor):
"""Login and set necessary cookies"""
if self.LIMIT:
raise exception.StopExtraction("Image limit reached!")
- if self._check_cookies(self.cookienames):
+
+ if self.cookies_check(self.cookies_names):
return
+
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
- else:
- self.log.info("no username given; using e-hentai.org")
- self.root = "https://e-hentai.org"
- self.original = False
- self.limits = False
- self.session.cookies["nw"] = "1"
+ return self.cookies_update(self._login_impl(username, password))
+
+ self.log.info("no username given; using e-hentai.org")
+ self.root = "https://e-hentai.org"
+ self.cookies_domain = ".e-hentai.org"
+ self.cookies.set("nw", "1", domain=self.cookies_domain)
+ self.original = False
+ self.limits = False
@cache(maxage=90*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -97,7 +99,7 @@ class ExhentaiExtractor(Extractor):
response = self.request(url, method="POST", headers=headers, data=data)
if b"You are now logged in as:" not in response.content:
raise exception.AuthenticationError()
- return {c: response.cookies[c] for c in self.cookienames}
+ return {c: response.cookies[c] for c in self.cookies_names}
class ExhentaiGalleryExtractor(ExhentaiExtractor):
@@ -106,61 +108,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
pattern = (BASE_PATTERN +
r"(?:/g/(\d+)/([\da-f]{10})"
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
- test = (
- ("https://exhentai.org/g/1200119/d55c44d3d0/", {
- "options": (("original", False),),
- "keyword": {
- "cost": int,
- "date": "dt:2018-03-18 20:14:00",
- "eh_category": "Non-H",
- "expunged": False,
- "favorites": r"re:^[12]\d$",
- "filecount": "4",
- "filesize": 1488978,
- "gid": 1200119,
- "height": int,
- "image_token": "re:[0-9a-f]{10}",
- "lang": "ja",
- "language": "Japanese",
- "parent": "",
- "rating": r"re:\d\.\d+",
- "size": int,
- "tags": [
- "parody:komi-san wa komyushou desu.",
- "character:shouko komi",
- "group:seventh lowlife",
- "other:sample",
- ],
- "thumb": "https://exhentai.org/t/ce/0a/ce0a5bcb583229a9b07c0f8"
- "3bcb1630ab1350640-624622-736-1036-jpg_250.jpg",
- "title": "C93 [Seventh_Lowlife] Komi-san ha Tokidoki Daitan de"
- "su (Komi-san wa Komyushou desu) [Sample]",
- "title_jpn": "(C93) [Comiketjack (わ!)] 古見さんは、時々大胆"
- "です。 (古見さんは、コミュ症です。) [見本]",
- "token": "d55c44d3d0",
- "torrentcount": "0",
- "uploader": "klorpa",
- "width": int,
- },
- "content": ("2c68cff8a7ca540a78c36fdbf5fbae0260484f87",
- "e9891a4c017ed0bb734cd1efba5cd03f594d31ff"),
- }),
- ("https://exhentai.org/g/960461/4f0e369d82/", {
- "exception": exception.NotFoundError,
- }),
- ("http://exhentai.org/g/962698/7f02358e00/", {
- "exception": exception.AuthorizationError,
- }),
- ("https://exhentai.org/s/f68367b4c8/1200119-3", {
- "options": (("original", False),),
- "count": 2,
- }),
- ("https://e-hentai.org/s/f68367b4c8/1200119-3", {
- "options": (("original", False),),
- "count": 2,
- }),
- ("https://g.e-hentai.org/g/1200119/d55c44d3d0/"),
- )
+ example = "https://e-hentai.org/g/12345/67890abcde/"
def __init__(self, match):
ExhentaiExtractor.__init__(self, match)
@@ -171,10 +119,25 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.image_token = match.group(4)
self.image_num = text.parse_int(match.group(6), 1)
+ def _init(self):
source = self.config("source")
if source == "hitomi":
self.items = self._items_hitomi
+ def favorite(self, slot="0"):
+ url = self.root + "/gallerypopups.php"
+ params = {
+ "gid": self.gallery_id,
+ "t" : self.gallery_token,
+ "act": "addfav",
+ }
+ data = {
+ "favcat" : slot,
+ "apply" : "Apply Changes",
+ "update" : "1",
+ }
+ self.request(url, method="POST", params=params, data=data)
+
def items(self):
self.login()
@@ -219,6 +182,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["_http_validate"] = None
yield Message.Url, url, data
+ fav = self.config("fav")
+ if fav is not None:
+ self.favorite(fav)
+
def _items_hitomi(self):
if self.config("metadata", False):
data = self.metadata_from_api()
@@ -390,8 +357,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
url = "https://e-hentai.org/home.php"
cookies = {
cookie.name: cookie.value
- for cookie in self.session.cookies
- if cookie.domain == self.cookiedomain and cookie.name != "igneous"
+ for cookie in self.cookies
+ if cookie.domain == self.cookies_domain and
+ cookie.name != "igneous"
}
page = self.request(url, cookies=cookies).text
@@ -458,26 +426,10 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
"""Extractor for exhentai search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/(?:\?([^#]*)|tag/([^/?#]+))"
- test = (
- ("https://e-hentai.org/?f_search=touhou"),
- ("https://exhentai.org/?f_cats=767&f_search=touhou"),
- ("https://exhentai.org/tag/parody:touhou+project"),
- (("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
- "&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
- "&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), {
- "pattern": ExhentaiGalleryExtractor.pattern,
- "range": "1-30",
- "count": 30,
- "keyword": {
- "gallery_id": int,
- "gallery_token": r"re:^[0-9a-f]{10}$"
- },
- }),
- )
+ example = "https://e-hentai.org/?f_search=QUERY"
def __init__(self, match):
ExhentaiExtractor.__init__(self, match)
- self.search_url = self.root
_, query, tag = match.groups()
if tag:
@@ -492,6 +444,9 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
if "next" not in self.params:
self.params["page"] = text.parse_int(self.params.get("page"))
+ def _init(self):
+ self.search_url = self.root
+
def items(self):
self.login()
data = {"_extractor": ExhentaiGalleryExtractor}
@@ -528,15 +483,7 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
"""Extractor for favorited exhentai galleries"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites\.php(?:\?([^#]*)())?"
- test = (
- ("https://e-hentai.org/favorites.php", {
- "count": 1,
- "pattern": r"https?://e-hentai\.org/g/1200119/d55c44d3d0"
- }),
- ("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
- "&f_apply=Search+Favorites"),
- )
+ example = "https://e-hentai.org/favorites.php"
- def __init__(self, match):
- ExhentaiSearchExtractor.__init__(self, match)
+ def _init(self):
self.search_url = self.root + "/favorites.php"
diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py
index 0503dcf..650a707 100644
--- a/gallery_dl/extractor/fallenangels.py
+++ b/gallery_dl/extractor/fallenangels.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,24 +13,11 @@ from .. import text, util
class FallenangelsChapterExtractor(ChapterExtractor):
- """Extractor for manga-chapters from fascans.com"""
+ """Extractor for manga chapters from fascans.com"""
category = "fallenangels"
pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
r"/manga/([^/?#]+)/([^/?#]+)")
- test = (
- ("https://manga.fascans.com/manga/chronos-ruler/20/1", {
- "url": "4604a7914566cc2da0ff789aa178e2d1c8c241e3",
- "keyword": "2dfcc50020e32cd207be88e2a8fac0933e36bdfb",
- }),
- ("http://truyen.fascans.com/manga/hungry-marie/8", {
- "url": "1f923d9cb337d5e7bbf4323719881794a951c6ae",
- "keyword": "2bdb7334c0e3eceb9946ffd3132df679b4a94f6a",
- }),
- ("http://manga.fascans.com/manga/rakudai-kishi-no-eiyuutan/19.5", {
- "url": "273f6863966c83ea79ad5846a2866e08067d3f0e",
- "keyword": "d1065685bfe0054c4ff2a0f20acb089de4cec253",
- }),
- )
+ example = "https://manga.fascans.com/manga/NAME/CHAPTER/"
def __init__(self, match):
self.version, self.manga, self.chapter = match.groups()
@@ -66,16 +53,7 @@ class FallenangelsMangaExtractor(MangaExtractor):
chapterclass = FallenangelsChapterExtractor
category = "fallenangels"
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
- test = (
- ("https://manga.fascans.com/manga/chronos-ruler", {
- "url": "eea07dd50f5bc4903aa09e2cc3e45c7241c9a9c2",
- "keyword": "c414249525d4c74ad83498b3c59a813557e59d7e",
- }),
- ("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", {
- "url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b",
- "keyword": "2d2a2a5d9ea5925eb9a47bb13d848967f3af086c",
- }),
- )
+ example = "https://manga.fascans.com/manga/NAME"
def __init__(self, match):
url = "https://" + match.group(1)
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 373529f..4572bea 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -6,10 +6,9 @@
"""Extractors for https://www.fanbox.cc/"""
-import re
from .common import Extractor, Message
from .. import text
-
+import re
BASE_PATTERN = (
r"(?:https?://)?(?:"
@@ -27,17 +26,15 @@ class FanboxExtractor(Extractor):
archive_fmt = "{id}_{num}"
_warning = True
- def __init__(self, match):
- Extractor.__init__(self, match)
+ def _init(self):
self.embeds = self.config("embeds", True)
- def items(self):
-
if self._warning:
- if not self._check_cookies(("FANBOXSESSID",)):
+ if not self.cookies_check(("FANBOXSESSID",)):
self.log.warning("no 'FANBOXSESSID' cookie set")
FanboxExtractor._warning = False
+ def items(self):
for content_body, post in self.posts():
yield Message.Directory, post
yield from self._get_urls_from_post(content_body, post)
@@ -245,20 +242,7 @@ class FanboxCreatorExtractor(FanboxExtractor):
"""Extractor for a Fanbox creator's works"""
subcategory = "creator"
pattern = BASE_PATTERN + r"(?:/posts)?/?$"
- test = (
- ("https://xub.fanbox.cc", {
- "range": "1-15",
- "count": ">= 15",
- "keyword": {
- "creatorId" : "xub",
- "tags" : list,
- "title" : str,
- },
- }),
- ("https://xub.fanbox.cc/posts"),
- ("https://www.fanbox.cc/@xub/"),
- ("https://www.fanbox.cc/@xub/posts"),
- )
+ example = "https://USER.fanbox.cc/"
def __init__(self, match):
FanboxExtractor.__init__(self, match)
@@ -273,55 +257,7 @@ class FanboxPostExtractor(FanboxExtractor):
"""Extractor for media from a single Fanbox post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/posts/(\d+)"
- test = (
- ("https://www.fanbox.cc/@xub/posts/1910054", {
- "count": 3,
- "keyword": {
- "title": "えま★おうがすと",
- "tags": list,
- "hasAdultContent": True,
- "isCoverImage": False
- },
- }),
- # entry post type, image embedded in html of the post
- ("https://nekoworks.fanbox.cc/posts/915", {
- "count": 2,
- "keyword": {
- "title": "【SAYORI FAN CLUB】お届け内容",
- "tags": list,
- "html": str,
- "hasAdultContent": True
- },
- }),
- # article post type, imageMap, 2 twitter embeds, fanbox embed
- ("https://steelwire.fanbox.cc/posts/285502", {
- "options": (("embeds", True),),
- "count": 10,
- "keyword": {
- "title": "イラスト+SS|義足の炭鉱少年が義足を見せてくれるだけ 【全体公開版】",
- "tags": list,
- "articleBody": dict,
- "hasAdultContent": True
- },
- }),
- # 'content' metadata (#3020)
- ("https://www.fanbox.cc/@official-en/posts/4326303", {
- "keyword": {
- "content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, "
- r"September 5th, 2022, we are happy to announce "
- r"the start of the FANBOX hashtag event "
- r"#MySetupTour ! \nAbout the event\nTo join this "
- r"event .+ \nPlease check this page for further "
- r"details regarding the Privacy & Terms.\n"
- r"https://fanbox.pixiv.help/.+/10184952456601\n\n\n"
- r"Thank you for your continued support of FANBOX.$",
- },
- }),
- # imageMap file order (#2718)
- ("https://mochirong.fanbox.cc/posts/3746116", {
- "url": "c92ddd06f2efc4a5fe30ec67e21544f79a5c4062",
- }),
- )
+ example = "https://USER.fanbox.cc/posts/12345"
def __init__(self, match):
FanboxExtractor.__init__(self, match)
@@ -336,9 +272,7 @@ class FanboxRedirectExtractor(Extractor):
category = "fanbox"
subcategory = "redirect"
pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)"
- test = ("https://www.pixiv.net/fanbox/creator/52336352", {
- "pattern": FanboxCreatorExtractor.pattern,
- })
+ example = "https://www.pixiv.net/fanbox/creator/12345"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py
index 466bb8c..886e893 100644
--- a/gallery_dl/extractor/fanleaks.py
+++ b/gallery_dl/extractor/fanleaks.py
@@ -7,7 +7,7 @@
"""Extractors for https://fanleaks.club/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text
class FanleaksExtractor(Extractor):
@@ -36,34 +36,10 @@ class FanleaksExtractor(Extractor):
class FanleaksPostExtractor(FanleaksExtractor):
- """Extractor for individual posts on fanleak.club"""
+ """Extractor for individual posts on fanleaks.club"""
subcategory = "post"
pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)"
- test = (
- ("https://fanleaks.club/selti/880", {
- "pattern": (r"https://fanleaks\.club//models"
- r"/selti/images/selti_0880\.jpg"),
- "keyword": {
- "model_id": "selti",
- "model" : "Selti",
- "id" : 880,
- "type" : "photo",
- },
- }),
- ("https://fanleaks.club/daisy-keech/1038", {
- "pattern": (r"https://fanleaks\.club//models"
- r"/daisy-keech/videos/daisy-keech_1038\.mp4"),
- "keyword": {
- "model_id": "daisy-keech",
- "model" : "Daisy Keech",
- "id" : 1038,
- "type" : "video",
- },
- }),
- ("https://fanleaks.club/hannahowo/000", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://fanleaks.club/MODEL/12345"
def __init__(self, match):
FanleaksExtractor.__init__(self, match)
@@ -79,22 +55,7 @@ class FanleaksModelExtractor(FanleaksExtractor):
subcategory = "model"
pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club"
r"/(?!latest/?$)([^/?#]+)/?$")
- test = (
- ("https://fanleaks.club/hannahowo", {
- "pattern": (r"https://fanleaks\.club//models"
- r"/hannahowo/(images|videos)/hannahowo_\d+\.\w+"),
- "range" : "1-100",
- "count" : 100,
- }),
- ("https://fanleaks.club/belle-delphine", {
- "pattern": (r"https://fanleaks\.club//models"
- r"/belle-delphine/(images|videos)"
- r"/belle-delphine_\d+\.\w+"),
- "range" : "1-100",
- "count" : 100,
- }),
- ("https://fanleaks.club/daisy-keech"),
- )
+ example = "https://fanleaks.club/MODEL"
def items(self):
page_num = 1
@@ -102,8 +63,7 @@ class FanleaksModelExtractor(FanleaksExtractor):
self.root + "/" + self.model_id, notfound="model").text
data = {
"model_id": self.model_id,
- "model" : text.unescape(
- text.extr(page, 'mt-4">', "</h1>")),
+ "model" : text.unescape(text.extr(page, 'mt-4">', "</h1>")),
"type" : "photo",
}
page_url = text.extr(page, "url: '", "'")
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index f92b904..f1d51e2 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -19,13 +19,12 @@ class FantiaExtractor(Extractor):
archive_fmt = "{post_id}_{file_id}"
_warning = True
- def items(self):
+ def _init(self):
self.headers = {
"Accept" : "application/json, text/plain, */*",
- "Referer": self.root,
"X-Requested-With": "XMLHttpRequest",
}
- _empty_plan = {
+ self._empty_plan = {
"id" : 0,
"price": 0,
"limit": 0,
@@ -33,22 +32,18 @@ class FantiaExtractor(Extractor):
"description": "",
"thumb": self.root + "/images/fallback/plan/thumb_default.png",
}
-
if self._warning:
- if not self._check_cookies(("_session_id",)):
+ if not self.cookies_check(("_session_id",)):
self.log.warning("no '_session_id' cookie set")
FantiaExtractor._warning = False
+ def items(self):
for post_id in self.posts():
post = self._get_post_data(post_id)
post["num"] = 0
for content in self._get_post_contents(post):
- post["content_category"] = content["category"]
- post["content_title"] = content["title"]
- post["content_filename"] = content.get("filename", "")
- post["content_id"] = content["id"]
- post["plan"] = content["plan"] or _empty_plan
+ files = self._process_content(post, content)
yield Message.Directory, post
if content["visible_status"] != "visible":
@@ -57,23 +52,21 @@ class FantiaExtractor(Extractor):
"%s#post-content-id-%s", content["visible_status"],
post["post_url"], content["id"])
- for url in self._get_content_urls(post, content):
- text.nameext_from_url(
- post["content_filename"] or url, post)
- post["file_url"] = url
+ for file in files:
+ post.update(file)
post["num"] += 1
- yield Message.Url, url, post
+ text.nameext_from_url(
+ post["content_filename"] or file["file_url"], post)
+ yield Message.Url, file["file_url"], post
def posts(self):
"""Return post IDs"""
def _pagination(self, url):
params = {"page": 1}
- headers = self.headers.copy()
- del headers["X-Requested-With"]
while True:
- page = self.request(url, params=params, headers=headers).text
+ page = self.request(url, params=params).text
self._csrf_token(page)
post_id = None
@@ -132,59 +125,52 @@ class FantiaExtractor(Extractor):
return contents
- def _get_content_urls(self, post, content):
- """Extract individual URL data from the response"""
- if "comment" in content:
- post["content_comment"] = content["comment"]
+ def _process_content(self, post, content):
+ post["content_category"] = content["category"]
+ post["content_title"] = content["title"]
+ post["content_filename"] = content.get("filename") or ""
+ post["content_id"] = content["id"]
+ post["content_comment"] = content.get("comment") or ""
+ post["plan"] = content["plan"] or self._empty_plan
+
+ files = []
if "post_content_photos" in content:
for photo in content["post_content_photos"]:
- post["file_id"] = photo["id"]
- yield photo["url"]["original"]
+ files.append({"file_id" : photo["id"],
+ "file_url": photo["url"]["original"]})
if "download_uri" in content:
- post["file_id"] = content["id"]
url = content["download_uri"]
if url[0] == "/":
url = self.root + url
- yield url
+ files.append({"file_id" : content["id"],
+ "file_url": url})
if content["category"] == "blog" and "comment" in content:
comment_json = util.json_loads(content["comment"])
- ops = comment_json.get("ops") or ()
- # collect blogpost text first
blog_text = ""
- for op in ops:
+ for op in comment_json.get("ops") or ():
insert = op.get("insert")
if isinstance(insert, str):
blog_text += insert
+ elif isinstance(insert, dict) and "fantiaImage" in insert:
+ img = insert["fantiaImage"]
+ files.append({"file_id" : img["id"],
+ "file_url": self.root + img["original_url"]})
post["blogpost_text"] = blog_text
+ else:
+ post["blogpost_text"] = ""
- # collect images
- for op in ops:
- insert = op.get("insert")
- if isinstance(insert, dict) and "fantiaImage" in insert:
- img = insert["fantiaImage"]
- post["file_id"] = img["id"]
- yield self.root + img["original_url"]
+ return files
class FantiaCreatorExtractor(FantiaExtractor):
"""Extractor for a Fantia creator's works"""
subcategory = "creator"
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/fanclubs/(\d+)"
- test = (
- ("https://fantia.jp/fanclubs/6939", {
- "range": "1-25",
- "count": ">= 25",
- "keyword": {
- "fanclub_user_id" : 52152,
- "tags" : list,
- "title" : str,
- },
- }),
- )
+ example = "https://fantia.jp/fanclubs/12345"
def __init__(self, match):
FantiaExtractor.__init__(self, match)
@@ -199,17 +185,7 @@ class FantiaPostExtractor(FantiaExtractor):
"""Extractor for media from a single Fantia post"""
subcategory = "post"
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/posts/(\d+)"
- test = (
- ("https://fantia.jp/posts/508363", {
- "count": 6,
- "keyword": {
- "post_title": "zunda逆バニーでおしりコッショリ",
- "tags": list,
- "rating": "adult",
- "post_id": 508363
- },
- }),
- )
+ example = "https://fantia.jp/posts/12345"
def __init__(self, match):
FantiaExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py
index ee6d15a..6e81519 100644
--- a/gallery_dl/extractor/fapachi.py
+++ b/gallery_dl/extractor/fapachi.py
@@ -14,25 +14,13 @@ class FapachiPostExtractor(Extractor):
"""Extractor for individual posts on fapachi.com"""
category = "fapachi"
subcategory = "post"
+ root = "https://fapachi.com"
directory_fmt = ("{category}", "{user}")
filename_fmt = "{user}_{id}.{extension}"
archive_fmt = "{user}_{id}"
pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com"
r"/(?!search/)([^/?#]+)/media/(\d+)")
- root = "https://fapachi.com"
- test = (
- # NSFW
- ("https://fapachi.com/sonson/media/0082", {
- "pattern": (r"https://fapachi\.com/models/s/o/"
- r"sonson/1/full/sonson_0082\.jpeg"),
- "keyword": {
- "user": "sonson",
- "id" : "0082",
- },
- }),
- # NSFW
- ("https://fapachi.com/ferxiita/media/0159"),
- )
+ example = "https://fapachi.com/MODEL/media/12345"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -54,17 +42,10 @@ class FapachiUserExtractor(Extractor):
"""Extractor for all posts from a fapachi user"""
category = "fapachi"
subcategory = "user"
+ root = "https://fapachi.com"
pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com"
r"/(?!search(?:/|$))([^/?#]+)(?:/page/(\d+))?$")
- root = "https://fapachi.com"
- test = (
- ("https://fapachi.com/sonson", {
- "pattern": FapachiPostExtractor.pattern,
- "range" : "1-50",
- "count" : 50,
- }),
- ("https://fapachi.com/ferxiita/page/3"),
- )
+ example = "https://fapachi.com/MODEL"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
index d6fcb4b..d4524e0 100644
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@@ -19,32 +19,7 @@ class FapelloPostExtractor(Extractor):
archive_fmt = "{type}_{model}_{id}"
pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)")
- test = (
- ("https://fapello.com/carrykey/530/", {
- "pattern": (r"https://fapello\.com/content/c/a"
- r"/carrykey/1000/carrykey_0530\.jpg"),
- "keyword": {
- "model": "carrykey",
- "id" : 530,
- "type" : "photo",
- "thumbnail": "",
- },
- }),
- ("https://fapello.com/vladislava-661/693/", {
- "pattern": (r"https://cdn\.fapello\.com/content/v/l"
- r"/vladislava-661/1000/vladislava-661_0693\.mp4"),
- "keyword": {
- "model": "vladislava-661",
- "id" : 693,
- "type" : "video",
- "thumbnail": ("https://fapello.com/content/v/l"
- "/vladislava-661/1000/vladislava-661_0693.jpg"),
- },
- }),
- ("https://fapello.com/carrykey/000/", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://fapello.com/MODEL/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -77,14 +52,7 @@ class FapelloModelExtractor(Extractor):
r"/(?!top-(?:likes|followers)|popular_videos"
r"|videos|trending|search/?$)"
r"([^/?#]+)/?$")
- test = (
- ("https://fapello.com/hyoon/", {
- "pattern": FapelloPostExtractor.pattern,
- "range" : "1-50",
- "count" : 50,
- }),
- ("https://fapello.com/kobaebeefboo/"),
- )
+ example = "https://fapello.com/model/"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -112,22 +80,7 @@ class FapelloPathExtractor(Extractor):
pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
r"|popular_videos/[^/?#]+)/?$")
- test = (
- ("https://fapello.com/top-likes/", {
- "pattern": FapelloModelExtractor.pattern,
- "range" : "1-10",
- "count" : 10,
- }),
- ("https://fapello.com/videos/", {
- "pattern": FapelloPostExtractor.pattern,
- "range" : "1-10",
- "count" : 10,
- }),
- ("https://fapello.com/top-followers/"),
- ("https://fapello.com/trending/"),
- ("https://fapello.com/popular_videos/twelve_hours/"),
- ("https://fapello.com/popular_videos/week/"),
- )
+ example = "https://fapello.com/trending/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index d44ff3c..ea32765 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,12 +20,16 @@ class FlickrExtractor(Extractor):
filename_fmt = "{category}_{id}.{extension}"
directory_fmt = ("{category}", "{user[username]}")
archive_fmt = "{id}"
- cookiedomain = None
+ cookies_domain = None
+ request_interval = (1.0, 2.0)
+ request_interval_min = 0.2
def __init__(self, match):
Extractor.__init__(self, match)
- self.api = FlickrAPI(self)
self.item_id = match.group(1)
+
+ def _init(self):
+ self.api = FlickrAPI(self)
self.user = None
def items(self):
@@ -60,42 +64,7 @@ class FlickrImageExtractor(FlickrExtractor):
r"(?:(?:www\.|secure\.|m\.)?flickr\.com/photos/[^/?#]+/"
r"|[\w-]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)"
r"|flic\.kr/p/([A-Za-z1-9]+))")
- test = (
- ("https://www.flickr.com/photos/departingyyz/16089302239", {
- "pattern": pattern,
- "content": ("3133006c6d657fe54cf7d4c46b82abbcb0efaf9f",
- "0821a28ee46386e85b02b67cf2720063440a228c"),
- "keyword": {
- "comments": int,
- "description": str,
- "extension": "jpg",
- "filename": "16089302239_de18cd8017_b",
- "id": 16089302239,
- "height": 683,
- "label": "Large",
- "media": "photo",
- "url": str,
- "views": int,
- "width": 1024,
- },
- }),
- ("https://secure.flickr.com/photos/departingyyz/16089302239"),
- ("https://m.flickr.com/photos/departingyyz/16089302239"),
- ("https://flickr.com/photos/departingyyz/16089302239"),
-
- ("https://www.flickr.com/photos/145617051@N08/46733161535", {
- "count": 1,
- "keyword": {"media": "video"},
- }),
- ("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", {
- "pattern": pattern}),
- ("https://farm2.static.flickr.com/1035/1188352415_cb139831d0.jpg", {
- "pattern": pattern}),
- ("https://flic.kr/p/FPVo9U", {
- "pattern": pattern}),
- ("https://www.flickr.com/photos/zzz/16089302238", {
- "exception": exception.NotFoundError}),
- )
+ example = "https://www.flickr.com/photos/USER/12345"
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@@ -141,18 +110,7 @@ class FlickrAlbumExtractor(FlickrExtractor):
"Albums", "{album[id]} {album[title]}")
archive_fmt = "a_{album[id]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
- test = (
- (("https://www.flickr.com/photos/shona_s/albums/72157633471741607"), {
- "pattern": FlickrImageExtractor.pattern,
- "count": 6,
- }),
- ("https://www.flickr.com/photos/shona_s/albums", {
- "pattern": pattern,
- "count": 2,
- }),
- ("https://secure.flickr.com/photos/shona_s/albums"),
- ("https://m.flickr.com/photos/shona_s/albums"),
- )
+ example = "https://www.flickr.com/photos/USER/albums/12345"
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@@ -190,11 +148,7 @@ class FlickrGalleryExtractor(FlickrExtractor):
"Galleries", "{gallery[gallery_id]} {gallery[title]}")
archive_fmt = "g_{gallery[id]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)"
- test = (("https://www.flickr.com/photos/flickr/"
- "galleries/72157681572514792/"), {
- "pattern": FlickrImageExtractor.pattern,
- "count": ">= 10",
- })
+ example = "https://www.flickr.com/photos/USER/galleries/12345/"
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@@ -215,10 +169,7 @@ class FlickrGroupExtractor(FlickrExtractor):
directory_fmt = ("{category}", "Groups", "{group[groupname]}")
archive_fmt = "G_{group[nsid]}_{id}"
pattern = BASE_PATTERN + r"/groups/([^/?#]+)"
- test = ("https://www.flickr.com/groups/bird_headshots/", {
- "pattern": FlickrImageExtractor.pattern,
- "count": "> 150",
- })
+ example = "https://www.flickr.com/groups/NAME/"
def metadata(self):
self.group = self.api.urls_lookupGroup(self.item_id)
@@ -233,10 +184,7 @@ class FlickrUserExtractor(FlickrExtractor):
subcategory = "user"
archive_fmt = "u_{user[nsid]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/?$"
- test = ("https://www.flickr.com/photos/shona_s/", {
- "pattern": FlickrImageExtractor.pattern,
- "count": 28,
- })
+ example = "https://www.flickr.com/photos/USER/"
def photos(self):
return self.api.people_getPhotos(self.user["nsid"])
@@ -248,10 +196,7 @@ class FlickrFavoriteExtractor(FlickrExtractor):
directory_fmt = ("{category}", "{user[username]}", "Favorites")
archive_fmt = "f_{user[nsid]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/favorites"
- test = ("https://www.flickr.com/photos/shona_s/favorites", {
- "pattern": FlickrImageExtractor.pattern,
- "count": 4,
- })
+ example = "https://www.flickr.com/photos/USER/favorites"
def photos(self):
return self.api.favorites_getList(self.user["nsid"])
@@ -263,11 +208,7 @@ class FlickrSearchExtractor(FlickrExtractor):
directory_fmt = ("{category}", "Search", "{search[text]}")
archive_fmt = "s_{search}_{id}"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
- test = (
- ("https://flickr.com/search/?text=mountain"),
- ("https://flickr.com/search/?text=tree%20cloud%20house"
- "&color_codes=4&styles=minimalism"),
- )
+ example = "https://flickr.com/search/?text=QUERY"
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@@ -289,8 +230,8 @@ class FlickrAPI(oauth.OAuth1API):
"""
API_URL = "https://api.flickr.com/services/rest/"
- API_KEY = "ac4fd7aa98585b9eee1ba761c209de68"
- API_SECRET = "3adb0f568dc68393"
+ API_KEY = "f8f78d1a40debf471f0b22fa2d00525f"
+ API_SECRET = "4f9dae1113e45556"
FORMATS = [
("o" , "Original" , None),
("6k", "X-Large 6K" , 6144),
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 76fb69e..93ac541 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -22,7 +22,6 @@ class FoolfuukaExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
- self.session.headers["Referer"] = self.root
if self.category == "b4k":
self.remote = self._remote_direct
@@ -109,43 +108,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
directory_fmt = ("{category}", "{board[shortname]}",
"{thread_num} {title|comment[:50]}")
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
- test = (
- ("https://archive.4plebs.org/tg/thread/54059290", {
- "url": "fd823f17b5001442b941fddcd9ec91bafedfbc79",
- }),
- ("https://archived.moe/gd/thread/309639/", {
- "url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
- "content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
- }),
- ("https://archived.moe/a/thread/159767162/", {
- "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
- }),
- ("https://archiveofsins.com/h/thread/4668813/", {
- "url": "f612d287087e10a228ef69517cf811539db9a102",
- "content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
- }),
- ("https://arch.b4k.co/meta/thread/196/", {
- "url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
- }),
- ("https://desuarchive.org/a/thread/159542679/", {
- "url": "e7d624aded15a069194e38dc731ec23217a422fb",
- }),
- ("https://boards.fireden.net/sci/thread/11264294/", {
- "url": "61cab625c95584a12a30049d054931d64f8d20aa",
- }),
- ("https://archive.palanq.win/c/thread/4209598/", {
- "url": "1f9b5570d228f1f2991c827a6631030bc0e5933c",
- }),
- ("https://rbt.asia/g/thread/61487650/", {
- "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
- }),
- ("https://archive.rebeccablacktech.com/g/thread/61487650/", {
- "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
- }),
- ("https://thebarchive.com/b/thread/739772332/", {
- "url": "e8b18001307d130d67db31740ce57c8561b5d80c",
- }),
- )
+ example = "https://archived.moe/a/thread/12345/"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
@@ -173,17 +136,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
- test = (
- ("https://archive.4plebs.org/tg/"),
- ("https://archived.moe/gd/"),
- ("https://archiveofsins.com/h/"),
- ("https://arch.b4k.co/meta/"),
- ("https://desuarchive.org/a/"),
- ("https://boards.fireden.net/sci/"),
- ("https://archive.palanq.win/c/"),
- ("https://rbt.asia/g/"),
- ("https://thebarchive.com/b/"),
- )
+ example = "https://archived.moe/a/"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
@@ -215,18 +168,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
subcategory = "search"
directory_fmt = ("{category}", "search", "{search}")
pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
+ example = "https://archived.moe/_/search/text/QUERY/"
request_interval = 1.0
- test = (
- ("https://archive.4plebs.org/_/search/text/test/"),
- ("https://archived.moe/_/search/text/test/"),
- ("https://archiveofsins.com/_/search/text/test/"),
- ("https://archiveofsins.com/_/search/text/test/"),
- ("https://desuarchive.org/_/search/text/test/"),
- ("https://boards.fireden.net/_/search/text/test/"),
- ("https://archive.palanq.win/_/search/text/test/"),
- ("https://rbt.asia/_/search/text/test/"),
- ("https://thebarchive.com/_/search/text/test/"),
- )
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
@@ -281,17 +224,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
subcategory = "gallery"
directory_fmt = ("{category}", "{board}", "gallery")
pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?"
- test = (
- ("https://archive.4plebs.org/tg/gallery/1"),
- ("https://archived.moe/gd/gallery/2"),
- ("https://archiveofsins.com/h/gallery/3"),
- ("https://arch.b4k.co/meta/gallery/"),
- ("https://desuarchive.org/a/gallery/5"),
- ("https://boards.fireden.net/sci/gallery/6"),
- ("https://archive.palanq.win/c/gallery"),
- ("https://rbt.asia/g/gallery/8"),
- ("https://thebarchive.com/b/gallery/9"),
- )
+ example = "https://archived.moe/a/gallery"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 57d37b7..b0699b0 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -53,13 +53,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
- test = (
- (("https://read.powermanga.org"
- "/read/one_piece_digital_colour_comics/en/0/75/"), {
- "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
- "keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
- }),
- )
+ example = "https://read.powermanga.org/read/MANGA/en/0/123/"
def items(self):
page = self.request(self.gallery_url).text
@@ -103,23 +97,7 @@ class FoolslideMangaExtractor(FoolslideExtractor):
subcategory = "manga"
categorytransfer = True
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
- test = (
- (("https://read.powermanga.org"
- "/series/one_piece_digital_colour_comics/"), {
- "count": ">= 1",
- "keyword": {
- "chapter": int,
- "chapter_minor": str,
- "chapter_string": str,
- "group": "PowerManga",
- "lang": "en",
- "language": "English",
- "manga": "One Piece Digital Colour Comics",
- "title": str,
- "volume": int,
- },
- }),
- )
+ example = "https://read.powermanga.org/series/MANGA/"
def items(self):
page = self.request(self.gallery_url).text
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index ec9cd94..56721d0 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -20,7 +20,8 @@ class FuraffinityExtractor(Extractor):
directory_fmt = ("{category}", "{user!l}")
filename_fmt = "{id}{title:? //}.{extension}"
archive_fmt = "{id}"
- cookiedomain = ".furaffinity.net"
+ cookies_domain = ".furaffinity.net"
+ cookies_names = ("a", "b")
root = "https://www.furaffinity.net"
_warning = True
@@ -29,6 +30,9 @@ class FuraffinityExtractor(Extractor):
self.user = match.group(1)
self.offset = 0
+ def _init(self):
+ self.external = self.config("external", False)
+
if self.config("descriptions") == "html":
self._process_description = str.strip
@@ -38,14 +42,12 @@ class FuraffinityExtractor(Extractor):
else:
self._new_layout = None
- def items(self):
-
if self._warning:
- if not self._check_cookies(("a", "b")):
+ if not self.cookies_check(self.cookies_names):
self.log.warning("no 'a' and 'b' session cookies set")
FuraffinityExtractor._warning = False
- external = self.config("external", False)
+ def items(self):
metadata = self.metadata()
for post_id in util.advance(self.posts(), self.offset):
post = self._parse_post(post_id)
@@ -55,7 +57,7 @@ class FuraffinityExtractor(Extractor):
yield Message.Directory, post
yield Message.Url, post["url"], post
- if external:
+ if self.external:
for url in text.extract_iter(
post["_description"], 'href="http', '"'):
yield Message.Queue, "http" + url, post
@@ -217,12 +219,7 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's gallery"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)"
- test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
- "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
- r"/art/mirlinthloth/\d+/\d+.\w+\.\w+",
- "range": "45-50",
- "count": 6,
- })
+ example = "https://www.furaffinity.net/gallery/USER/"
def posts(self):
return self._pagination("gallery")
@@ -233,11 +230,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{user!l}", "Scraps")
pattern = BASE_PATTERN + r"/scraps/([^/?#]+)"
- test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
- "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
- r"/art/[^/]+(/stories)?/\d+/\d+.\w+.",
- "count": ">= 3",
- })
+ example = "https://www.furaffinity.net/scraps/USER/"
def posts(self):
return self._pagination("scraps")
@@ -248,13 +241,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{user!l}", "Favorites")
pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
- test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
- "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
- r"/art/[^/]+/\d+/\d+.\w+\.\w+",
- "keyword": {"favorite_id": int},
- "range": "45-50",
- "count": 6,
- })
+ example = "https://www.furaffinity.net/favorites/USER/"
def posts(self):
return self._pagination_favorites()
@@ -271,19 +258,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search(?:/([^/?#]+))?/?[?&]([^#]+)"
- test = (
- ("https://www.furaffinity.net/search/?q=cute", {
- "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
- r"/art/[^/]+/\d+/\d+.\w+\.\w+",
- "range": "45-50",
- "count": 6,
- }),
- # first page of search results (#2402)
- ("https://www.furaffinity.net/search/?q=leaf&range=1day", {
- "range": "1-3",
- "count": 3,
- }),
- )
+ example = "https://www.furaffinity.net/search/?q=QUERY"
def __init__(self, match):
FuraffinityExtractor.__init__(self, match)
@@ -302,65 +277,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
"""Extractor for individual posts on furaffinity"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
- test = (
- ("https://www.furaffinity.net/view/21835115/", {
- "pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/(download/)?art"
- r"/mirlinthloth/music/1488278723/1480267446.mirlinthlot"
- r"h_dj_fennmink_-_bude_s_4_ever\.mp3",
- "keyword": {
- "artist" : "mirlinthloth",
- "artist_url" : "mirlinthloth",
- "date" : "dt:2016-11-27 17:24:06",
- "description": "A Song made playing the game Cosmic DJ.",
- "extension" : "mp3",
- "filename" : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever",
- "id" : 21835115,
- "tags" : list,
- "title" : "Bude's 4 Ever",
- "url" : r"re:https://d\d?\.f(uraffinity|acdn)\.net/art",
- "user" : "mirlinthloth",
- "views" : int,
- "favorites" : int,
- "comments" : int,
- "rating" : "General",
- "fa_category": "Music",
- "theme" : "All",
- "species" : "Unspecified / Any",
- "gender" : "Any",
- "width" : 120,
- "height" : 120,
- },
- }),
- # 'external' option (#1492)
- ("https://www.furaffinity.net/view/42166511/", {
- "options": (("external", True),),
- "pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/"
- r"|http://www\.postybirb\.com",
- "count": 2,
- }),
- # no tags (#2277)
- ("https://www.furaffinity.net/view/45331225/", {
- "keyword": {
- "artist": "Kota_Remminders",
- "artist_url": "kotaremminders",
- "date": "dt:2022-01-03 17:49:33",
- "fa_category": "Adoptables",
- "filename": "1641232173.kotaremminders_chidopts1",
- "gender": "Any",
- "height": 905,
- "id": 45331225,
- "rating": "General",
- "species": "Unspecified / Any",
- "tags": [],
- "theme": "All",
- "title": "REMINDER",
- "width": 1280,
- },
- }),
- ("https://furaffinity.net/view/21835115/"),
- ("https://sfw.furaffinity.net/view/21835115/"),
- ("https://www.furaffinity.net/full/21835115/"),
- )
+ example = "https://www.furaffinity.net/view/12345/"
def posts(self):
post_id = self.user
@@ -371,18 +288,14 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
class FuraffinityUserExtractor(FuraffinityExtractor):
"""Extractor for furaffinity user profiles"""
subcategory = "user"
- cookiedomain = None
+ cookies_domain = None
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
- test = (
- ("https://www.furaffinity.net/user/mirlinthloth/", {
- "pattern": r"/gallery/mirlinthloth/$",
- }),
- ("https://www.furaffinity.net/user/mirlinthloth/", {
- "options": (("include", "all"),),
- "pattern": r"/(gallery|scraps|favorites)/mirlinthloth/$",
- "count": 3,
- }),
- )
+ example = "https://www.furaffinity.net/user/USER/"
+
+ def initialize(self):
+ pass
+
+ skip = Extractor.skip
def items(self):
base = "{}/{{}}/{}/".format(self.root, self.user)
@@ -397,11 +310,7 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's watched users"""
subcategory = "following"
pattern = BASE_PATTERN + "/watchlist/by/([^/?#]+)"
- test = ("https://www.furaffinity.net/watchlist/by/mirlinthloth/", {
- "pattern": FuraffinityUserExtractor.pattern,
- "range": "176-225",
- "count": 50,
- })
+ example = "https://www.furaffinity.net/watchlist/by/USER/"
def items(self):
url = "{}/watchlist/by/{}/".format(self.root, self.user)
diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py
index d6640f5..20afb5a 100644
--- a/gallery_dl/extractor/fuskator.py
+++ b/gallery_dl/extractor/fuskator.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -18,22 +18,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
category = "fuskator"
root = "https://fuskator.com"
pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?#]+)"
- test = (
- ("https://fuskator.com/thumbs/d0GnIzXrSKU/", {
- "pattern": r"https://i\d+.fuskator.com/large/d0GnIzXrSKU/.+\.jpg",
- "count": 22,
- "keyword": {
- "gallery_id": 473023,
- "gallery_hash": "d0GnIzXrSKU",
- "title": "re:Shaved Brunette Babe Maria Ryabushkina with ",
- "views": int,
- "score": float,
- "count": 22,
- "tags": list,
- },
- }),
- ("https://fuskator.com/expanded/gXpKzjgIidA/index.html"),
- )
+ example = "https://fuskator.com/thumbs/ID/"
def __init__(self, match):
self.gallery_hash = match.group(1)
@@ -82,13 +67,7 @@ class FuskatorSearchExtractor(Extractor):
subcategory = "search"
root = "https://fuskator.com"
pattern = r"(?:https?://)?fuskator\.com(/(?:search|page)/.+)"
- test = (
- ("https://fuskator.com/search/red_swimsuit/", {
- "pattern": FuskatorGalleryExtractor.pattern,
- "count": ">= 40",
- }),
- ("https://fuskator.com/page/3/swimsuit/quality/"),
- )
+ example = "https://fuskator.com/search/TAG/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index e2173de..b62ff78 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -115,18 +115,7 @@ class GelbooruTagExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02TagExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)"
- test = (
- ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
- "count": 5,
- }),
- ("https://gelbooru.com/index.php?page=post&s=list&tags=meiya_neon", {
- "range": "196-204",
- "url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
- "pattern": r"https://img\d\.gelbooru\.com"
- r"/images/../../[0-9a-f]{32}\.jpg",
- "count": 9,
- }),
- )
+ example = "https://gelbooru.com/index.php?page=post&s=list&tags=TAG"
class GelbooruPoolExtractor(GelbooruBase,
@@ -134,11 +123,7 @@ class GelbooruPoolExtractor(GelbooruBase,
"""Extractor for gelbooru pools"""
per_page = 45
pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)"
- test = (
- ("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
- "count": 6,
- }),
- )
+ example = "https://gelbooru.com/index.php?page=pool&s=show&id=12345"
skip = GelbooruBase._skip_offset
@@ -169,9 +154,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
"""Extractor for gelbooru favorites"""
per_page = 100
pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)"
- test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=279415", {
- "count": 3,
- })
+ example = "https://gelbooru.com/index.php?page=favorites&s=view&id=12345"
skip = GelbooruBase._skip_offset
@@ -221,76 +204,21 @@ class GelbooruPostExtractor(GelbooruBase,
r"(?=(?:[^#]+&)?page=post(?:&|#|$))"
r"(?=(?:[^#]+&)?s=view(?:&|#|$))"
r"(?:[^#]+&)?id=(\d+)")
- test = (
- ("https://gelbooru.com/index.php?page=post&s=view&id=313638", {
- "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
- "count": 1,
- }),
-
- ("https://gelbooru.com/index.php?page=post&s=view&id=313638"),
- ("https://gelbooru.com/index.php?s=view&page=post&id=313638"),
- ("https://gelbooru.com/index.php?page=post&id=313638&s=view"),
- ("https://gelbooru.com/index.php?s=view&id=313638&page=post"),
- ("https://gelbooru.com/index.php?id=313638&page=post&s=view"),
- ("https://gelbooru.com/index.php?id=313638&s=view&page=post"),
-
- ("https://gelbooru.com/index.php?page=post&s=view&id=6018318", {
- "options": (("tags", True),),
- "content": "977caf22f27c72a5d07ea4d4d9719acdab810991",
- "keyword": {
- "tags_artist": "kirisaki_shuusei",
- "tags_character": str,
- "tags_copyright": "vocaloid",
- "tags_general": str,
- "tags_metadata": str,
- },
- }),
- # video
- ("https://gelbooru.com/index.php?page=post&s=view&id=5938076", {
- "content": "6360452fa8c2f0c1137749e81471238564df832a",
- "pattern": r"https://img\d\.gelbooru\.com/images"
- r"/22/61/226111273615049235b001b381707bd0\.webm",
- }),
- # notes
- ("https://gelbooru.com/index.php?page=post&s=view&id=5997331", {
- "options": (("notes", True),),
- "keyword": {
- "notes": [
- {
- "body": "Look over this way when you talk~",
- "height": 553,
- "width": 246,
- "x": 35,
- "y": 72,
- },
- {
- "body": "Hey~\nAre you listening~?",
- "height": 557,
- "width": 246,
- "x": 1233,
- "y": 109,
- },
- ],
- },
- }),
- )
+ example = "https://gelbooru.com/index.php?page=post&s=view&id=12345"
class GelbooruRedirectExtractor(GelbooruBase, Extractor):
subcategory = "redirect"
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com"
r"/redirect\.php\?s=([^&#]+)")
- test = (("https://gelbooru.com/redirect.php?s=Ly9nZWxib29ydS5jb20vaW5kZXgu"
- "cGhwP3BhZ2U9cG9zdCZzPXZpZXcmaWQ9MTgzMDA0Ng=="), {
- "pattern": r"https://gelbooru.com/index.php"
- r"\?page=post&s=view&id=1830046"
- })
+ example = "https://gelbooru.com/redirect.php?s=BASE64"
def __init__(self, match):
Extractor.__init__(self, match)
- self.redirect_url = text.ensure_http_scheme(
- binascii.a2b_base64(match.group(1)).decode())
+ self.url_base64 = match.group(1)
def items(self):
+ url = text.ensure_http_scheme(binascii.a2b_base64(
+ self.url_base64).decode())
data = {"_extractor": GelbooruPostExtractor}
- yield Message.Queue, self.redirect_url, data
+ yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py
index b6fbcb6..0b96048 100644
--- a/gallery_dl/extractor/gelbooru_v01.py
+++ b/gallery_dl/extractor/gelbooru_v01.py
@@ -90,24 +90,7 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
- test = (
- (("https://the-collection.booru.org"
- "/index.php?page=post&s=list&tags=parody"), {
- "range": "1-25",
- "count": 25,
- }),
- (("https://illusioncards.booru.org"
- "/index.php?page=post&s=list&tags=koikatsu"), {
- "range": "1-25",
- "count": 25,
- }),
- ("https://allgirl.booru.org/index.php?page=post&s=list&tags=dress", {
- "range": "1-25",
- "count": 25,
- }),
- ("https://drawfriends.booru.org/index.php?page=post&s=list&tags=all"),
- ("https://vidyart2.booru.org/index.php?page=post&s=list&tags=all"),
- )
+ example = "https://allgirl.booru.org/index.php?page=post&s=list&tags=TAG"
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
@@ -128,21 +111,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
- test = (
- (("https://the-collection.booru.org"
- "/index.php?page=favorites&s=view&id=1166"), {
- "count": 2,
- }),
- (("https://illusioncards.booru.org"
- "/index.php?page=favorites&s=view&id=84887"), {
- "count": 2,
- }),
- ("https://allgirl.booru.org/index.php?page=favorites&s=view&id=380", {
- "count": 4,
- }),
- ("https://drawfriends.booru.org/index.php?page=favorites&s=view&id=1"),
- ("https://vidyart2.booru.org/index.php?page=favorites&s=view&id=1"),
- )
+ example = "https://allgirl.booru.org/index.php?page=favorites&s=view&id=1"
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
@@ -161,40 +130,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
- test = (
- (("https://the-collection.booru.org"
- "/index.php?page=post&s=view&id=100520"), {
- "url": "0329ac8588bb93cf242ca0edbe3e995b4ba554e8",
- "content": "1e585874e7b874f7937df1060dd1517fef2f4dfb",
- }),
- (("https://illusioncards.booru.org"
- "/index.php?page=post&s=view&id=82746"), {
- "url": "3f9cd2fadf78869b90bc5422f27b48f1af0e0909",
- "content": "159e60b92d05597bd1bb63510c2c3e4a4bada1dc",
- }),
- ("https://allgirl.booru.org/index.php?page=post&s=view&id=107213", {
- "url": "b416800d2d2b072f80d3b37cfca9cb806fb25d51",
- "content": "3e3c65e0854a988696e11adf0de52f8fa90a51c7",
- "keyword": {
- "created_at": "2021-02-13 16:27:39",
- "date": "dt:2021-02-13 16:27:39",
- "file_url": "https://img.booru.org/allgirl//images/107"
- "/2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb.jpg",
- "height": "1200",
- "id": "107213",
- "md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb",
- "rating": "s",
- "score": str,
- "source": "",
- "tags": "blush dress green_eyes green_hair hatsune_miku "
- "long_hair twintails vocaloid",
- "uploader": "Honochi31",
- "width": "1600"
- },
- }),
- ("https://drawfriends.booru.org/index.php?page=post&s=view&id=107474"),
- ("https://vidyart2.booru.org/index.php?page=post&s=view&id=39168"),
- )
+ example = "https://allgirl.booru.org/index.php?page=post&s=view&id=12345"
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 958c4b5..0864b9f 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -19,8 +19,7 @@ import re
class GelbooruV02Extractor(booru.BooruExtractor):
basecategory = "gelbooru_v02"
- def __init__(self, match):
- booru.BooruExtractor.__init__(self, match)
+ def _init(self):
self.api_key = self.config("api-key")
self.user_id = self.config("user-id")
@@ -184,6 +183,10 @@ INSTANCES = {
"root": "https://hypnohub.net",
"pattern": r"hypnohub\.net",
},
+ "xbooru": {
+ "root": "https://xbooru.com",
+ "pattern": r"xbooru\.com",
+ },
}
BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
@@ -194,27 +197,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
- test = (
- ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
- "content": ("5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
- "622e80be3f496672c44aab5c47fbc6941c61bc79"),
- "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
- "count": 2,
- }),
- ("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
- "url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
- "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
- }),
- ("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
- "count": ">= 64",
- }),
- ("https://tbib.org/index.php?page=post&s=list&tags=yuyaiyaui", {
- "count": ">= 120",
- }),
- ("https://hypnohub.net/index.php?page=post&s=list&tags=gonoike_biwa", {
- "url": "fe662b86d38c331fcac9c62af100167d404937dc",
- }),
- )
+ example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
@@ -233,21 +216,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
- test = (
- ("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
- "count": 3,
- }),
- ("https://safebooru.org/index.php?page=pool&s=show&id=11", {
- "count": 5,
- }),
- ("https://realbooru.com/index.php?page=pool&s=show&id=1", {
- "count": 3,
- }),
- ("https://hypnohub.net/index.php?page=pool&s=show&id=61", {
- "url": "d314826280073441a2da609f70ee814d1f4b9407",
- "count": 3,
- }),
- )
+ example = "https://safebooru.org/index.php?page=pool&s=show&id=12345"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
@@ -299,23 +268,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
- test = (
- ("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", {
- "count": 3,
- }),
- ("https://safebooru.org/index.php?page=favorites&s=view&id=17567", {
- "count": 2,
- }),
- ("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
- "count": 2,
- }),
- ("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
- "count": 3,
- }),
- ("https://hypnohub.net/index.php?page=favorites&s=view&id=43546", {
- "count": 3,
- }),
- )
+ example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
@@ -336,112 +289,7 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
- test = (
- ("https://rule34.xxx/index.php?page=post&s=view&id=863", {
- "pattern": r"https://api-cdn\.rule34\.xxx/images"
- r"/1/6aafbdb3e22f3f3b412ea2cf53321317a37063f3\.jpg",
- "content": ("a43f418aa350039af0d11cae501396a33bbe2201",
- "67b516295950867e1c1ab6bc13b35d3b762ed2a3"),
- "options": (("tags", True), ("notes", True)),
- "keyword": {
- "tags_artist": "reverse_noise yamu_(reverse_noise)",
- "tags_character": "hong_meiling",
- "tags_copyright": "touhou",
- "tags_general": str,
- "tags_metadata": "censored translated",
- "notes": [
- {
- "body": "It feels angry, I'm losing myself... "
- "It won't calm down!",
- "height": 65,
- "id": 93586,
- "width": 116,
- "x": 22,
- "y": 333,
- },
- {
- "body": "REPUTATION OF RAGE",
- "height": 272,
- "id": 93587,
- "width": 199,
- "x": 78,
- "y": 442,
- },
- ],
-
- },
- }),
- ("https://hypnohub.net/index.php?page=post&s=view&id=1439", {
- "pattern": r"https://hypnohub\.net/images"
- r"/90/24/90245c3c5250c2a8173255d3923a010b\.jpg",
- "content": "5987c5d2354f22e5fa9b7ee7ce4a6f7beb8b2b71",
- "options": (("tags", True), ("notes", True)),
- "keyword": {
- "tags_artist": "brokenteapot",
- "tags_character": "hsien-ko",
- "tags_copyright": "capcom darkstalkers",
- "tags_general": str,
- "tags_metadata": "dialogue text translated",
- "notes": [
- {
- "body": "Master Master Master "
- "Master Master Master",
- "height": 83,
- "id": 10577,
- "width": 129,
- "x": 259,
- "y": 20,
- },
- {
- "body": "Response Response Response "
- "Response Response Response",
- "height": 86,
- "id": 10578,
- "width": 125,
- "x": 126,
- "y": 20,
- },
- {
- "body": "Obedience Obedience Obedience "
- "Obedience Obedience Obedience",
- "height": 80,
- "id": 10579,
- "width": 98,
- "x": 20,
- "y": 20,
- },
- ],
-
- },
- }),
- ("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
- "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
- "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
- "options": (("tags", True),),
- "keyword": {
- "tags_artist": "kawanakajima",
- "tags_character": "heath_ledger ronald_mcdonald the_joker",
- "tags_copyright": "dc_comics mcdonald's the_dark_knight",
- "tags_general": str,
- },
- }),
- ("https://realbooru.com/index.php?page=post&s=view&id=668483", {
- "pattern": r"https://realbooru\.com//?images/dc/b5"
- r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg",
- "content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
- "options": (("tags", True),),
- "keyword": {
- "tags_general": "1girl blonde blonde_hair blue_eyes cute "
- "female female_only looking_at_viewer smile "
- "solo solo_female teeth",
- "tags_model": "jennifer_lawrence",
- },
- }),
- ("https://tbib.org/index.php?page=post&s=view&id=9233957", {
- "url": "5a6ebe07bfff8e6d27f7c30b5480f27abcb577d2",
- "content": "1c3831b6fbaa4686e3c79035b5d98460b1c85c43",
- }),
- )
+ example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 4ab26ae..16d4340 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -34,31 +34,7 @@ class GenericExtractor(Extractor):
r"(?:\?(?P<query>[^#]*))?" # optional query
r"(?:\#(?P<fragment>.*))?" # optional fragment
)
-
- test = (
- ("generic:https://www.nongnu.org/lzip/", {
- "count": 1,
- "content": "40be5c77773d3e91db6e1c5df720ee30afb62368",
- "keyword": {
- "description": "Lossless data compressor",
- "imageurl": "https://www.nongnu.org/lzip/lzip.png",
- "keywords": "lzip, clzip, plzip, lzlib, LZMA, bzip2, "
- "gzip, data compression, GNU, free software",
- "pageurl": "https://www.nongnu.org/lzip/",
- },
- }),
- # internationalized domain name
- ("generic:https://räksmörgås.josefsson.org/", {
- "count": 2,
- "pattern": "^https://räksmörgås.josefsson.org/",
- }),
- ("g:https://en.wikipedia.org/Main_Page"),
- ("g:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
- ("g:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
- ("generic:https://en.wikipedia.org/Main_Page"),
- ("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
- ("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
- )
+ example = "generic:https://www.nongnu.org/lzip/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
deleted file mode 100644
index ccebdf9..0000000
--- a/gallery_dl/extractor/gfycat.py
+++ /dev/null
@@ -1,305 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2017-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://gfycat.com/"""
-
-from .common import Extractor, Message
-from .. import text, exception
-from ..cache import cache
-
-
-class GfycatExtractor(Extractor):
- """Base class for gfycat extractors"""
- category = "gfycat"
- filename_fmt = "{category}_{gfyName}{title:?_//}.{extension}"
- archive_fmt = "{gfyName}"
- root = "https://gfycat.com"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.key = match.group(1).lower()
-
- formats = self.config("format")
- if formats is None:
- formats = ("mp4", "webm", "mobile", "gif")
- elif isinstance(formats, str):
- formats = (formats, "mp4", "webm", "mobile", "gif")
- self.formats = formats
-
- def items(self):
- metadata = self.metadata()
- for gfycat in self.gfycats():
- if "gfyName" not in gfycat:
- self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
- continue
-
- url = self._process(gfycat)
- if not url:
- self.log.warning("Skipping '%s' (format not available)",
- gfycat["gfyId"])
- continue
-
- gfycat.update(metadata)
- yield Message.Directory, gfycat
- yield Message.Url, url, gfycat
-
- def _process(self, gfycat):
- gfycat["_fallback"] = formats = self._formats(gfycat)
- gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
- return next(formats, None)
-
- def _formats(self, gfycat):
- for fmt in self.formats:
- key = fmt + "Url"
- if key in gfycat:
- url = gfycat[key]
- if url.startswith("http:"):
- url = "https" + url[4:]
- gfycat["extension"] = url.rpartition(".")[2]
- yield url
-
- def metadata(self):
- return {}
-
- def gfycats(self):
- return ()
-
-
-class GfycatUserExtractor(GfycatExtractor):
- """Extractor for gfycat user profiles"""
- subcategory = "user"
- directory_fmt = ("{category}", "{username}")
- pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)/?(?:$|\?|#)"
- test = ("https://gfycat.com/@gretta", {
- "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
- "count": ">= 100",
- })
-
- def gfycats(self):
- if self.key == "me":
- return GfycatAPI(self).me()
- return GfycatAPI(self).user(self.key)
-
-
-class GfycatCollectionExtractor(GfycatExtractor):
- """Extractor for a gfycat collection"""
- subcategory = "collection"
- directory_fmt = ("{category}", "{collection_owner}",
- "{collection_name|collection_id}")
- pattern = (r"(?:https?://)?gfycat\.com/@([^/?#]+)/collections"
- r"/(\w+)(?:/([^/?#]+))?")
- test = ("https://gfycat.com/@reactions/collections/nHgy2DtE/no-text", {
- "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
- "count": ">= 100",
- })
-
- def __init__(self, match):
- GfycatExtractor.__init__(self, match)
- self.collection_id = match.group(2)
- self.collection_name = match.group(3)
-
- def metadata(self):
- return {
- "collection_owner": self.key,
- "collection_name" : self.collection_name,
- "collection_id" : self.collection_id,
- }
-
- def gfycats(self):
- return GfycatAPI(self).collection(self.key, self.collection_id)
-
-
-class GfycatCollectionsExtractor(GfycatExtractor):
- """Extractor for a gfycat user's collections"""
- subcategory = "collections"
- pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)/collections/?(?:$|\?|#)"
- test = ("https://gfycat.com/@sannahparker/collections", {
- "pattern": GfycatCollectionExtractor.pattern,
- "count": ">= 20",
- })
-
- def items(self):
- for col in GfycatAPI(self).collections(self.key):
- url = "https://gfycat.com/@{}/collections/{}/{}".format(
- col["userId"], col["folderId"], col["linkText"])
- col["_extractor"] = GfycatCollectionExtractor
- yield Message.Queue, url, col
-
-
-class GfycatSearchExtractor(GfycatExtractor):
- """Extractor for gfycat search results"""
- subcategory = "search"
- directory_fmt = ("{category}", "Search", "{search}")
- pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?#]+)"
- test = ("https://gfycat.com/gifs/search/funny+animals", {
- "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
- "archive": False,
- "range": "100-300",
- "count": "> 200",
- })
-
- def metadata(self):
- self.key = text.unquote(self.key).replace("+", " ")
- return {"search": self.key}
-
- def gfycats(self):
- return GfycatAPI(self).search(self.key)
-
-
-class GfycatImageExtractor(GfycatExtractor):
- """Extractor for individual images from gfycat.com"""
- subcategory = "image"
- pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
- r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
- test = (
- ("https://gfycat.com/GrayGenerousCowrie", {
- "url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
- "content": "5786028e04b155baa20b87c5f4f77453cd5edc37",
- "keyword": {
- "gfyId": "graygenerouscowrie",
- "gfyName": "GrayGenerousCowrie",
- "gfyNumber": 755075459,
- "title": "Bottom's up",
- "username": "jackson3oh3",
- "createDate": 1495884169,
- "date": "dt:2017-05-27 11:22:49",
- "md5": "a4796e05b0db9ba9ce5140145cd318aa",
- "width": 400,
- "height": 224,
- "frameRate": 23.0,
- "numFrames": 158.0,
- "views": int,
- },
- }),
- (("https://thumbs.gfycat.com/SillyLameIsabellinewheatear"
- "-size_restricted.gif"), {
- "url": "13b32e6cc169d086577d7dd3fd36ee6cdbc02726",
- }),
- ("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
- "url": "e24c9f69897fd223343782425a429c5cab6a768e",
- }),
- # retry 404'ed videos on redgifs (#874)
- ("https://www.gfycat.com/foolishforkedabyssiniancat", {
- "pattern": "https://redgifs.com/watch/foolishforkedabyssiniancat",
- }),
- # malformed API response (#902)
- ("https://gfycat.com/illexcitablehairstreak", {
- "count": 0,
- }),
- ("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
- ("https://gfycat.com/ifr/UnequaledHastyAnkole"),
- ("https://gfycat.com/ru/UnequaledHastyAnkole"),
- )
-
- def items(self):
- try:
- gfycat = GfycatAPI(self).gfycat(self.key)
- except exception.HttpError:
- from .redgifs import RedgifsImageExtractor
- url = "https://redgifs.com/watch/" + self.key
- data = {"_extractor": RedgifsImageExtractor}
- yield Message.Queue, url, data
- else:
- if "gfyName" not in gfycat:
- self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
- return
- url = self._process(gfycat)
- if not url:
- self.log.warning("Skipping '%s' (format not available)",
- gfycat["gfyId"])
- return
- yield Message.Directory, gfycat
- yield Message.Url, url, gfycat
-
-
-class GfycatAPI():
- API_ROOT = "https://api.gfycat.com"
-
- def __init__(self, extractor):
- self.extractor = extractor
- self.headers = {}
- self.username, self.password = extractor._get_auth_info()
-
- def collection(self, user, collection):
- endpoint = "/v1/users/{}/collections/{}/gfycats".format(
- user, collection)
- params = {"count": 100}
- return self._pagination(endpoint, params)
-
- def collections(self, user):
- endpoint = "/v1/users/{}/collections".format(user)
- params = {"count": 100}
- return self._pagination(endpoint, params, "gfyCollections")
-
- def gfycat(self, gfycat_id):
- endpoint = "/v1/gfycats/" + gfycat_id
- return self._call(endpoint)["gfyItem"]
-
- def me(self):
- endpoint = "/v1/me/gfycats"
- params = {"count": 100}
- return self._pagination(endpoint, params)
-
- def search(self, query):
- endpoint = "/v1/gfycats/search"
- params = {"search_text": query, "count": 150}
- return self._pagination(endpoint, params)
-
- def user(self, user):
- endpoint = "/v1/users/{}/gfycats".format(user.lower())
- params = {"count": 100}
- return self._pagination(endpoint, params)
-
- def authenticate(self):
- self.headers["Authorization"] = \
- self._authenticate_impl(self.username, self.password)
-
- @cache(maxage=3600, keyarg=1)
- def _authenticate_impl(self, username, password):
- self.extractor.log.info("Logging in as %s", username)
-
- url = "https://weblogin.gfycat.com/oauth/webtoken"
- headers = {"Origin": "https://gfycat.com"}
- data = {
- "access_key": "Anr96uuqt9EdamSCwK4txKPjMsf2"
- "M95Rfa5FLLhPFucu8H5HTzeutyAa",
- }
- response = self.extractor.request(
- url, method="POST", headers=headers, json=data).json()
-
- url = "https://weblogin.gfycat.com/oauth/weblogin"
- headers["authorization"] = "Bearer " + response["access_token"]
- data = {
- "grant_type": "password",
- "username" : username,
- "password" : password,
- }
- response = self.extractor.request(
- url, method="POST", headers=headers, json=data, fatal=None).json()
-
- if "errorMessage" in response:
- raise exception.AuthenticationError(
- response["errorMessage"]["description"])
- return "Bearer " + response["access_token"]
-
- def _call(self, endpoint, params=None):
- if self.username:
- self.authenticate()
-
- url = self.API_ROOT + endpoint
- return self.extractor.request(
- url, params=params, headers=self.headers).json()
-
- def _pagination(self, endpoint, params, key="gfycats"):
- while True:
- data = self._call(endpoint, params)
- yield from data[key]
-
- if not data["cursor"]:
- return
- params["cursor"] = data["cursor"]
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index 044dddb..3928792 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -4,6 +4,8 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+"""Extractors for https://gofile.io/"""
+
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache, memcache
@@ -17,49 +19,7 @@ class GofileFolderExtractor(Extractor):
directory_fmt = ("{category}", "{name} ({code})")
archive_fmt = "{id}"
pattern = r"(?:https?://)?(?:www\.)?gofile\.io/d/([^/?#]+)"
- test = (
- ("https://gofile.io/d/k6BomI", {
- "pattern": r"https://store\d+\.gofile\.io/download"
- r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}"
- r"/test-%E3%83%86%E3%82%B9%E3%83%88-%2522%26!\.png",
- "keyword": {
- "createTime": int,
- "directLink": "re:https://store5.gofile.io/download/direct/.+",
- "downloadCount": int,
- "extension": "png",
- "filename": "test-テスト-%22&!",
- "folder": {
- "childs": [
- "b0367d79-b8ba-407f-8342-aaf8eb815443",
- "7fd4a36a-c1dd-49ff-9223-d93f7d24093f"
- ],
- "code": "k6BomI",
- "createTime": 1654076165,
- "id": "fafb59f9-a7c7-4fea-a098-b29b8d97b03c",
- "name": "root",
- "public": True,
- "totalDownloadCount": int,
- "totalSize": 182,
- "type": "folder"
- },
- "id": r"re:\w{8}-\w{4}-\w{4}-\w{4}-\w{12}",
- "link": r"re:https://store5.gofile.io/download/.+\.png",
- "md5": "re:[0-9a-f]{32}",
- "mimetype": "image/png",
- "name": "test-テスト-%22&!.png",
- "num": int,
- "parentFolder": "fafb59f9-a7c7-4fea-a098-b29b8d97b03c",
- "serverChoosen": "store5",
- "size": 182,
- "thumbnail": r"re:https://store5.gofile.io/download/.+\.png",
- "type": "file"
- },
- }),
- ("https://gofile.io/d/7fd4a36a-c1dd-49ff-9223-d93f7d24093f", {
- "options": (("website-token", None),),
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- }),
- )
+ example = "https://gofile.io/d/ID"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -72,7 +32,7 @@ class GofileFolderExtractor(Extractor):
token = self.config("api-token")
if not token:
token = self._create_account()
- self.session.cookies.set("accountToken", token, domain=".gofile.io")
+ self.cookies.set("accountToken", token, domain=".gofile.io")
self.api_token = token
self.website_token = (self.config("website-token") or
diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py
index 5b561ea..a522140 100644
--- a/gallery_dl/extractor/hbrowse.py
+++ b/gallery_dl/extractor/hbrowse.py
@@ -47,11 +47,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
"{page:>03}.{extension}")
archive_fmt = "{manga_id}_{chapter}_{page}"
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
- test = ("https://www.hbrowse.com/10363/c00000", {
- "url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
- "keyword": "274996f6c809e5250b6ff3abbc5147e29f89d9a5",
- "content": "44578ebbe176c2c27434966aef22945787e2781e",
- })
+ example = "https://www.hbrowse.com/12345/c00000"
def __init__(self, match):
self.path, self.gid, self.chapter = match.groups()
@@ -75,10 +71,7 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
chapterclass = HbrowseChapterExtractor
reverse = False
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
- test = ("https://www.hbrowse.com/10363", {
- "url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6",
- "keyword": "4b15fda1858a69de1fbf5afddfe47dd893397312",
- })
+ example = "https://www.hbrowse.com/12345"
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index e771a4f..9ab1411 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,31 +23,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
"""Extractor for a single manga chapter from hentai2read.com"""
archive_fmt = "{chapter_id}_{page}"
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/([^/?#]+))"
- test = (
- ("https://hentai2read.com/amazon_elixir/1/", {
- "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
- "keyword": "85645b02d34aa11b3deb6dadd7536863476e1bad",
- }),
- ("https://hentai2read.com/popuni_kei_joshi_panic/2.5/", {
- "pattern": r"https://hentaicdn\.com/hentai"
- r"/13088/2\.5y/ccdn00\d+\.jpg",
- "count": 36,
- "keyword": {
- "author": "Kurisu",
- "chapter": 2,
- "chapter_id": 75152,
- "chapter_minor": ".5",
- "count": 36,
- "lang": "en",
- "language": "English",
- "manga": "Popuni Kei Joshi Panic!",
- "manga_id": 13088,
- "page": int,
- "title": "Popuni Kei Joshi Panic! 2.5",
- "type": "Original",
- },
- }),
- )
+ example = "https://hentai2read.com/TITLE/1/"
def __init__(self, match):
self.chapter = match.group(2)
@@ -85,31 +61,7 @@ class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
"""Extractor for hmanga from hentai2read.com"""
chapterclass = Hentai2readChapterExtractor
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+)/?$"
- test = (
- ("https://hentai2read.com/amazon_elixir/", {
- "url": "273073752d418ec887d7f7211e42b832e8c403ba",
- "keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
- }),
- ("https://hentai2read.com/oshikage_riot/", {
- "url": "6595f920a3088a15c2819c502862d45f8eb6bea6",
- "keyword": "a2e9724acb221040d4b29bf9aa8cb75b2240d8af",
- }),
- ("https://hentai2read.com/popuni_kei_joshi_panic/", {
- "pattern": Hentai2readChapterExtractor.pattern,
- "range": "2-3",
- "keyword": {
- "chapter": int,
- "chapter_id": int,
- "chapter_minor": ".5",
- "lang": "en",
- "language": "English",
- "manga": "Popuni Kei Joshi Panic!",
- "manga_id": 13088,
- "title": str,
- "type": "Original",
- },
- }),
- )
+ example = "https://hentai2read.com/TITLE/"
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index 593a846..62df192 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -21,43 +21,16 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
pattern = r"((?:https?://)?(?:\w{2}\.)?" \
r"(hentai-cosplays|hentai-img|porn-images-xxx)\.com)/" \
r"(?:image|story)/([\w-]+)"
- test = (
- ("https://hentai-cosplays.com/image/---devilism--tide-kurihara-/", {
- "pattern": r"https://static\d?.hentai-cosplays.com/upload/"
- r"\d+/\d+/\d+/\d+.jpg$",
- "keyword": {
- "count": 18,
- "site": "hentai-cosplays",
- "slug": "---devilism--tide-kurihara-",
- "title": "艦 こ れ-devilism の tide Kurihara 憂",
- },
- }),
- ("https://fr.porn-images-xxx.com/image/enako-enako-24/", {
- "pattern": r"https://static\d?.porn-images-xxx.com/upload/"
- r"\d+/\d+/\d+/\d+.jpg$",
- "keyword": {
- "count": 11,
- "site": "porn-images-xxx",
- "title": str,
- },
- }),
- ("https://ja.hentai-img.com/image/hollow-cora-502/", {
- "pattern": r"https://static\d?.hentai-img.com/upload/"
- r"\d+/\d+/\d+/\d+.jpg$",
- "keyword": {
- "count": 2,
- "site": "hentai-img",
- "title": str,
- },
- }),
- )
+ example = "https://hentai-cosplays.com/image/TITLE/"
def __init__(self, match):
root, self.site, self.slug = match.groups()
self.root = text.ensure_http_scheme(root)
url = "{}/story/{}/".format(self.root, self.slug)
GalleryExtractor.__init__(self, match, url)
- self.session.headers["Referer"] = url
+
+ def _init(self):
+ self.session.headers["Referer"] = self.gallery_url
def metadata(self, page):
title = text.extr(page, "<title>", "</title>")
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index e01a4ed..4c02000 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -20,7 +20,7 @@ class HentaifoundryExtractor(Extractor):
directory_fmt = ("{category}", "{user}")
filename_fmt = "{category}_{index}_{title}.{extension}"
archive_fmt = "{index}"
- cookiedomain = "www.hentai-foundry.com"
+ cookies_domain = "www.hentai-foundry.com"
root = "https://www.hentai-foundry.com"
per_page = 25
@@ -123,14 +123,14 @@ class HentaifoundryExtractor(Extractor):
def _init_site_filters(self):
"""Set site-internal filters to show all images"""
- if self.session.cookies.get("PHPSESSID", domain=self.cookiedomain):
+ if self.cookies.get("PHPSESSID", domain=self.cookies_domain):
return
url = self.root + "/?enterAgree=1"
self.request(url, method="HEAD")
- csrf_token = self.session.cookies.get(
- "YII_CSRF_TOKEN", domain=self.cookiedomain)
+ csrf_token = self.cookies.get(
+ "YII_CSRF_TOKEN", domain=self.cookies_domain)
if not csrf_token:
self.log.warning("Unable to update site content filters")
return
@@ -168,7 +168,10 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
"""Extractor for a hentaifoundry user profile"""
subcategory = "user"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
- test = ("https://www.hentai-foundry.com/user/Tenpura/profile",)
+ example = "https://www.hentai-foundry.com/user/USER/profile"
+
+ def initialize(self):
+ pass
def items(self):
root = self.root
@@ -189,12 +192,7 @@ class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
"""Extractor for all pictures of a hentaifoundry user"""
subcategory = "pictures"
pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
- test = (
- ("https://www.hentai-foundry.com/pictures/user/Tenpura", {
- "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
- }),
- ("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"),
- )
+ example = "https://www.hentai-foundry.com/pictures/user/USER"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@@ -206,13 +204,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{user}", "Scraps")
pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps"
- test = (
- ("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
- "url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
- }),
- ("https://www.hentai-foundry.com"
- "/pictures/user/Evulchibi/scraps/page/3"),
- )
+ example = "https://www.hentai-foundry.com/pictures/user/USER/scraps"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@@ -226,13 +218,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
directory_fmt = ("{category}", "{user}", "Favorites")
archive_fmt = "f_{user}_{index}"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures"
- test = (
- ("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", {
- "url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b",
- }),
- ("https://www.hentai-foundry.com"
- "/user/Tenpura/faves/pictures/page/3"),
- )
+ example = "https://www.hentai-foundry.com/user/USER/faves/pictures"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@@ -246,10 +232,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
directory_fmt = ("{category}", "Recent Pictures", "{date}")
archive_fmt = "r_{index}"
pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
- test = ("https://www.hentai-foundry.com/pictures/recent/2018-09-20", {
- "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
- "range": "20-30",
- })
+ example = "https://www.hentai-foundry.com/pictures/recent/1970-01-01"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@@ -265,10 +248,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
directory_fmt = ("{category}", "Popular Pictures")
archive_fmt = "p_{index}"
pattern = BASE_PATTERN + r"/pictures/popular()"
- test = ("https://www.hentai-foundry.com/pictures/popular", {
- "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
- "range": "20-30",
- })
+ example = "https://www.hentai-foundry.com/pictures/popular"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@@ -280,34 +260,8 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
subcategory = "image"
pattern = (r"(https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
r"/(?:pictures/user|[^/?#])/([^/?#]+)/(\d+)")
- test = (
- (("https://www.hentai-foundry.com"
- "/pictures/user/Tenpura/407501/shimakaze"), {
- "url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
- "content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
- "keyword": {
- "artist" : "Tenpura",
- "date" : "dt:2016-02-22 14:41:19",
- "description": "Thank you!",
- "height" : 700,
- "index" : 407501,
- "media" : "Other digital art",
- "ratings": ["Sexual content", "Contains female nudity"],
- "score" : int,
- "tags" : ["collection", "kancolle", "kantai", "shimakaze"],
- "title" : "shimakaze",
- "user" : "Tenpura",
- "views" : int,
- "width" : 495,
- },
- }),
- ("http://www.hentai-foundry.com/pictures/user/Tenpura/407501/", {
- "pattern": "http://pictures.hentai-foundry.com/t/Tenpura/407501/",
- }),
- ("https://www.hentai-foundry.com/pictures/user/Tenpura/407501/"),
- ("https://pictures.hentai-foundry.com"
- "/t/Tenpura/407501/Tenpura-407501-shimakaze.png"),
- )
+ example = "https://www.hentai-foundry.com/pictures/user/USER/12345/TITLE"
+
skip = Extractor.skip
def __init__(self, match):
@@ -328,24 +282,7 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
subcategory = "stories"
archive_fmt = "s_{index}"
pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
- test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", {
- "count": ">= 35",
- "keyword": {
- "author" : "SnowWolf35",
- "chapters" : int,
- "comments" : int,
- "date" : "type:datetime",
- "description": str,
- "index" : int,
- "rating" : int,
- "ratings" : list,
- "status" : "re:(Inc|C)omplete",
- "title" : str,
- "user" : "SnowWolf35",
- "views" : int,
- "words" : int,
- },
- })
+ example = "https://www.hentai-foundry.com/stories/user/USER"
def items(self):
self._init_site_filters()
@@ -364,11 +301,8 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
subcategory = "story"
archive_fmt = "s_{index}"
pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)"
- test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35"
- "/26416/Overwatch-High-Chapter-Voting-Location"), {
- "url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
- "keyword": {"title": "Overwatch High Chapter Voting Location"},
- })
+ example = "https://www.hentai-foundry.com/stories/user/USER/12345/TITLE"
+
skip = Extractor.skip
def __init__(self, match):
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index a1e681d..31a302d 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -21,40 +21,7 @@ class HentaifoxBase():
class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
"""Extractor for image galleries on hentaifox.com"""
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
- test = (
- ("https://hentaifox.com/gallery/56622/", {
- "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
- "keyword": "bcd6b67284f378e5cc30b89b761140e3e60fcd92",
- "count": 24,
- }),
- # 'split_tag' element (#1378)
- ("https://hentaifox.com/gallery/630/", {
- "keyword": {
- "artist": ["beti", "betty", "magi", "mimikaki"],
- "characters": [
- "aerith gainsborough",
- "tifa lockhart",
- "yuffie kisaragi"
- ],
- "count": 32,
- "gallery_id": 630,
- "group": ["cu-little2"],
- "parody": ["darkstalkers | vampire", "final fantasy vii"],
- "tags": ["femdom", "fingering", "masturbation", "yuri"],
- "title": "Cu-Little Bakanya~",
- "type": "doujinshi",
- },
- }),
- # email-protected title (#4201)
- ("https://hentaifox.com/gallery/35261/", {
- "keyword": {
- "gallery_id": 35261,
- "title": "ManageM@ster!",
- "artist": ["haritama hiroki"],
- "group": ["studio n.ball"],
- },
- }),
- )
+ example = "https://hentaifox.com/gallery/12345/"
def __init__(self, match):
GalleryExtractor.__init__(self, match)
@@ -116,22 +83,7 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
subcategory = "search"
pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)")
- test = (
- ("https://hentaifox.com/parody/touhou-project/"),
- ("https://hentaifox.com/character/reimu-hakurei/"),
- ("https://hentaifox.com/artist/distance/"),
- ("https://hentaifox.com/search/touhou/"),
- ("https://hentaifox.com/group/v-slash/"),
- ("https://hentaifox.com/tag/heterochromia/", {
- "pattern": HentaifoxGalleryExtractor.pattern,
- "count": ">= 60",
- "keyword": {
- "url" : str,
- "gallery_id": int,
- "title" : str,
- },
- }),
- )
+ example = "https://hentaifox.com/tag/TAG/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py
index 0617330..f3f43c4 100644
--- a/gallery_dl/extractor/hentaihand.py
+++ b/gallery_dl/extractor/hentaihand.py
@@ -17,27 +17,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
category = "hentaihand"
root = "https://hentaihand.com"
pattern = r"(?:https?://)?(?:www\.)?hentaihand\.com/\w+/comic/([\w-]+)"
- test = (
- (("https://hentaihand.com/en/comic/c75-takumi-na-muchi-choudenji-hou-"
- "no-aishi-kata-how-to-love-a-super-electromagnetic-gun-toaru-kagaku-"
- "no-railgun-english"), {
- "pattern": r"https://cdn.hentaihand.com/.*/images/37387/\d+.jpg$",
- "count": 50,
- "keyword": {
- "artists" : ["Takumi Na Muchi"],
- "date" : "dt:2014-06-28 00:00:00",
- "gallery_id": 37387,
- "lang" : "en",
- "language" : "English",
- "parodies" : ["Toaru Kagaku No Railgun"],
- "relationships": list,
- "tags" : list,
- "title" : r"re:\(C75\) \[Takumi na Muchi\] Choudenji Hou ",
- "title_alt" : r"re:\(C75\) \[たくみなむち\] 超電磁砲のあいしかた",
- "type" : "Doujinshi",
- },
- }),
- )
+ example = "https://hentaihand.com/en/comic/TITLE"
def __init__(self, match):
self.slug = match.group(1)
@@ -76,15 +56,7 @@ class HentaihandTagExtractor(Extractor):
pattern = (r"(?i)(?:https?://)?(?:www\.)?hentaihand\.com"
r"/\w+/(parody|character|tag|artist|group|language"
r"|category|relationship)/([^/?#]+)")
- test = (
- ("https://hentaihand.com/en/artist/takumi-na-muchi", {
- "pattern": HentaihandGalleryExtractor.pattern,
- "count": ">= 6",
- }),
- ("https://hentaihand.com/en/tag/full-color"),
- ("https://hentaihand.com/fr/language/japanese"),
- ("https://hentaihand.com/zh/category/manga"),
- )
+ example = "https://hentaihand.com/en/tag/TAG"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py
index 2297cc0..ba9558c 100644
--- a/gallery_dl/extractor/hentaihere.py
+++ b/gallery_dl/extractor/hentaihere.py
@@ -23,32 +23,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
"""Extractor for a single manga chapter from hentaihere.com"""
archive_fmt = "{chapter_id}_{page}"
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/([^/?#]+)"
- test = (
- ("https://hentaihere.com/m/S13812/1/1/", {
- "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
- "keyword": "0207d20eea3a15d2a8d1496755bdfa49de7cfa9d",
- }),
- ("https://hentaihere.com/m/S23048/1.5/1/", {
- "pattern": r"https://hentaicdn\.com/hentai"
- r"/23048/1\.5/ccdn00\d+\.jpg",
- "count": 32,
- "keyword": {
- "author": "Shinozuka Yuuji",
- "chapter": 1,
- "chapter_id": 80186,
- "chapter_minor": ".5",
- "count": 32,
- "lang": "en",
- "language": "English",
- "manga": "High School Slut's Love Consultation",
- "manga_id": 23048,
- "page": int,
- "title": "High School Slut's Love Consultation + "
- "Girlfriend [Full Color]",
- "type": "Original",
- },
- }),
- )
+ example = "https://hentaihere.com/m/S12345/1/1/"
def __init__(self, match):
self.manga_id, self.chapter = match.groups()
@@ -87,26 +62,7 @@ class HentaihereMangaExtractor(HentaihereBase, MangaExtractor):
"""Extractor for hmanga from hentaihere.com"""
chapterclass = HentaihereChapterExtractor
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com(/m/S\d+)/?$"
- test = (
- ("https://hentaihere.com/m/S13812", {
- "url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
- "keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
- }),
- ("https://hentaihere.com/m/S7608", {
- "url": "6c5239758dc93f6b1b4175922836c10391b174f7",
- "keyword": {
- "chapter": int,
- "chapter_id": int,
- "chapter_minor": "",
- "lang": "en",
- "language": "English",
- "manga": "Oshikake Riot",
- "manga_id": 7608,
- "title": r"re:Oshikake Riot( \d+)?",
- "type": "Original",
- },
- }),
- )
+ example = "https://hentaihere.com/m/S12345"
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 3aad88c..32ca151 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -31,7 +31,7 @@ class HiperdexBase():
return {
"manga" : text.unescape(extr(
- "<title>", "<").rpartition(" - ")[0].strip()),
+ "<title>", "<").rpartition(" Manga - ")[0].strip()),
"url" : text.unescape(extr(
'property="og:url" content="', '"')),
"score" : text.parse_float(extr(
@@ -69,30 +69,7 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for manga chapters from hiperdex.com"""
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
- test = (
- ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
- "pattern": r"https://(1st)?hiperdex\d?.(com|net|info)"
- r"/wp-content/uploads/WP-manga/data"
- r"/manga_\w+/[0-9a-f]{32}/\d+\.webp",
- "count": 9,
- "keyword": {
- "artist" : "Sasuga Kei",
- "author" : "Sasuga Kei",
- "chapter": 154,
- "chapter_minor": ".5",
- "description": "re:Natsuo Fujii is in love with his teacher, ",
- "genre" : list,
- "manga" : "Domestic na Kanojo",
- "release": 2014,
- "score" : float,
- "type" : "Manga",
- },
- }),
- ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/"),
- ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"),
- ("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"),
- ("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"),
- )
+ example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
def __init__(self, match):
root, path, self.manga, self.chapter = match.groups()
@@ -114,30 +91,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"""Extractor for manga from hiperdex.com"""
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
- test = (
- ("https://hiperdex.com/manga/1603231576-youre-not-that-special/", {
- "count": 51,
- "pattern": HiperdexChapterExtractor.pattern,
- "keyword": {
- "artist" : "Bolp",
- "author" : "Abyo4",
- "chapter": int,
- "chapter_minor": "",
- "description": "re:I didn’t think much of the creepy girl in ",
- "genre" : list,
- "manga" : "You’re Not That Special!",
- "release": 2019,
- "score" : float,
- "status" : "Completed",
- "type" : "Manhwa",
- },
- }),
- ("https://hiperdex.com/manga/youre-not-that-special/"),
- ("https://1sthiperdex.com/manga/youre-not-that-special/"),
- ("https://hiperdex2.com/manga/youre-not-that-special/"),
- ("https://hiperdex.net/manga/youre-not-that-special/"),
- ("https://hiperdex.info/manga/youre-not-that-special/"),
- )
+ example = "https://hiperdex.com/manga/MANGA/"
def __init__(self, match):
root, path, self.manga = match.groups()
@@ -153,7 +107,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"Accept": "*/*",
"X-Requested-With": "XMLHttpRequest",
"Origin": self.root,
- "Referer": self.manga_url,
+ "Referer": "https://" + text.quote(self.manga_url[8:]),
}
html = self.request(url, method="POST", headers=headers).text
@@ -173,16 +127,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
chapterclass = HiperdexMangaExtractor
reverse = False
pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
- test = (
- ("https://1sthiperdex.com/manga-artist/beck-ho-an/"),
- ("https://hiperdex.net/manga-artist/beck-ho-an/"),
- ("https://hiperdex2.com/manga-artist/beck-ho-an/"),
- ("https://hiperdex.info/manga-artist/beck-ho-an/"),
- ("https://hiperdex.com/manga-author/viagra/", {
- "pattern": HiperdexMangaExtractor.pattern,
- "count": ">= 6",
- }),
- )
+ example = "https://hiperdex.com/manga-artist/NAME/"
def __init__(self, match):
self.root = text.ensure_http_scheme(match.group(1))
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 4e8d1ca..bc49ca3 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -23,55 +23,16 @@ class HitomiGalleryExtractor(GalleryExtractor):
pattern = (r"(?:https?://)?hitomi\.la"
r"/(?:manga|doujinshi|cg|gamecg|galleries|reader)"
r"/(?:[^/?#]+-)?(\d+)")
- test = (
- ("https://hitomi.la/galleries/867789.html", {
- "pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+"
- r"/[0-9a-f]{64}\.webp",
- "keyword": "86af5371f38117a07407f11af689bdd460b09710",
- "count": 16,
- }),
- # download test
- ("https://hitomi.la/galleries/1401410.html", {
- "range": "1",
- "content": "d75d5a3d1302a48469016b20e53c26b714d17745",
- }),
- # Game CG with scenes (#321)
- ("https://hitomi.la/galleries/733697.html", {
- "count": 210,
- }),
- # fallback for galleries only available through /reader/ URLs
- ("https://hitomi.la/galleries/1045954.html", {
- "count": 1413,
- }),
- # gallery with "broken" redirect
- ("https://hitomi.la/cg/scathacha-sama-okuchi-ecchi-1291900.html", {
- "count": 10,
- "options": (("format", "original"),),
- "pattern": r"https://[a-c]b\.hitomi\.la/images/\d+/\d+"
- r"/[0-9a-f]{64}\.jpg",
- }),
- # no tags
- ("https://hitomi.la/cg/1615823.html", {
- "count": 22,
- "options": (("format", "avif"),),
- "pattern": r"https://[a-c]a\.hitomi\.la/avif/\d+/\d+"
- r"/[0-9a-f]{64}\.avif",
- }),
- ("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"),
- ("https://hitomi.la/manga/867789.html"),
- ("https://hitomi.la/doujinshi/867789.html"),
- ("https://hitomi.la/cg/867789.html"),
- ("https://hitomi.la/gamecg/867789.html"),
- ("https://hitomi.la/reader/867789.html"),
- )
+ example = "https://hitomi.la/manga/TITLE-867789.html"
def __init__(self, match):
- gid = match.group(1)
- url = "https://ltn.hitomi.la/galleries/{}.js".format(gid)
+ self.gid = match.group(1)
+ url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gid)
GalleryExtractor.__init__(self, match, url)
- self.info = None
+
+ def _init(self):
self.session.headers["Referer"] = "{}/reader/{}.html".format(
- self.root, gid)
+ self.root, self.gid)
def metadata(self, page):
self.info = info = util.json_loads(page.partition("=")[2])
@@ -148,17 +109,7 @@ class HitomiTagExtractor(Extractor):
pattern = (r"(?:https?://)?hitomi\.la/"
r"(tag|artist|group|series|type|character)/"
r"([^/?#]+)\.html")
- test = (
- ("https://hitomi.la/tag/screenshots-japanese.html", {
- "pattern": HitomiGalleryExtractor.pattern,
- "count": ">= 35",
- }),
- ("https://hitomi.la/artist/a1-all-1.html"),
- ("https://hitomi.la/group/initial%2Dg-all-1.html"),
- ("https://hitomi.la/series/amnesia-all-1.html"),
- ("https://hitomi.la/type/doujinshi-all-1.html"),
- ("https://hitomi.la/character/a2-all-1.html"),
- )
+ example = "https://hitomi.la/tag/TAG-LANG.html"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index 30158b4..6d3184d 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -21,10 +21,6 @@ class HotleakExtractor(Extractor):
archive_fmt = "{type}_{creator}_{id}"
root = "https://hotleak.vip"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.session.headers["Referer"] = self.root
-
def items(self):
for post in self.posts():
yield Message.Directory, post
@@ -60,30 +56,7 @@ class HotleakPostExtractor(HotleakExtractor):
subcategory = "post"
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/]+)/(photo|video)/(\d+)")
- test = (
- ("https://hotleak.vip/kaiyakawaii/photo/1617145", {
- "pattern": r"https://hotleak\.vip/storage/images/3625"
- r"/1617145/fefdd5988dfcf6b98cc9e11616018868\.jpg",
- "keyword": {
- "id": 1617145,
- "creator": "kaiyakawaii",
- "type": "photo",
- "filename": "fefdd5988dfcf6b98cc9e11616018868",
- "extension": "jpg",
- },
- }),
- ("https://hotleak.vip/lilmochidoll/video/1625538", {
- "pattern": r"ytdl:https://cdn8-leak\.camhdxx\.com"
- r"/1661/1625538/index\.m3u8",
- "keyword": {
- "id": 1625538,
- "creator": "lilmochidoll",
- "type": "video",
- "filename": "index",
- "extension": "mp4",
- },
- }),
- )
+ example = "https://hotleak.vip/MODEL/photo/12345"
def __init__(self, match):
HotleakExtractor.__init__(self, match)
@@ -119,18 +92,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
subcategory = "creator"
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/?#]+)/?$")
- test = (
- ("https://hotleak.vip/kaiyakawaii", {
- "range": "1-200",
- "count": 200,
- }),
- ("https://hotleak.vip/stellaviolet", {
- "count": "> 600"
- }),
- ("https://hotleak.vip/doesnotexist", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://hotleak.vip/MODEL"
def __init__(self, match):
HotleakExtractor.__init__(self, match)
@@ -183,20 +145,7 @@ class HotleakCategoryExtractor(HotleakExtractor):
"""Extractor for hotleak categories"""
subcategory = "category"
pattern = BASE_PATTERN + r"/(hot|creators|videos|photos)(?:/?\?([^#]+))?"
- test = (
- ("https://hotleak.vip/photos", {
- "pattern": HotleakPostExtractor.pattern,
- "range": "1-50",
- "count": 50,
- }),
- ("https://hotleak.vip/videos"),
- ("https://hotleak.vip/creators", {
- "pattern": HotleakCreatorExtractor.pattern,
- "range": "1-50",
- "count": 50,
- }),
- ("https://hotleak.vip/hot"),
- )
+ example = "https://hotleak.vip/photos"
def __init__(self, match):
HotleakExtractor.__init__(self, match)
@@ -218,14 +167,7 @@ class HotleakSearchExtractor(HotleakExtractor):
"""Extractor for hotleak search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search(?:/?\?([^#]+))"
- test = (
- ("https://hotleak.vip/search?search=gallery-dl", {
- "count": 0,
- }),
- ("https://hotleak.vip/search?search=hannah", {
- "count": "> 30",
- }),
- )
+ example = "https://hotleak.vip/search?search=QUERY"
def __init__(self, match):
HotleakExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index ce68d6d..16e4097 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,9 +19,9 @@ import re
class IdolcomplexExtractor(SankakuExtractor):
"""Base class for idolcomplex extractors"""
category = "idolcomplex"
- cookienames = ("login", "pass_hash")
- cookiedomain = "idol.sankakucomplex.com"
- root = "https://" + cookiedomain
+ cookies_domain = "idol.sankakucomplex.com"
+ cookies_names = ("login", "pass_hash")
+ root = "https://" + cookies_domain
request_interval = 5.0
def __init__(self, match):
@@ -29,6 +29,8 @@ class IdolcomplexExtractor(SankakuExtractor):
self.logged_in = True
self.start_page = 1
self.start_post = 0
+
+ def _init(self):
self.extags = self.config("tags", False)
def items(self):
@@ -51,14 +53,14 @@ class IdolcomplexExtractor(SankakuExtractor):
"""Return an iterable containing all relevant post ids"""
def login(self):
- if self._check_cookies(self.cookienames):
+ if self.cookies_check(self.cookies_names):
return
+
username, password = self._get_auth_info()
if username:
- cookies = self._login_impl(username, password)
- self._update_cookies(cookies)
- else:
- self.logged_in = False
+ return self.cookies_update(self._login_impl(username, password))
+
+ self.logged_in = False
@cache(maxage=90*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -76,7 +78,7 @@ class IdolcomplexExtractor(SankakuExtractor):
if not response.history or response.url != self.root + "/user/home":
raise exception.AuthenticationError()
cookies = response.history[0].cookies
- return {c: cookies[c] for c in self.cookienames}
+ return {c: cookies[c] for c in self.cookies_names}
def _parse_post(self, post_id):
"""Extract metadata of a single post"""
@@ -131,20 +133,7 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
- test = (
- ("https://idol.sankakucomplex.com/?tags=lyumos", {
- "count": 5,
- "range": "18-22",
- "pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
- r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
- }),
- ("https://idol.sankakucomplex.com/?tags=order:favcount", {
- "count": 5,
- "range": "18-22",
- }),
- ("https://idol.sankakucomplex.com"
- "/?tags=lyumos+wreath&page=3&next=694215"),
- )
+ example = "https://idol.sankakucomplex.com/?tags=TAGS"
per_page = 20
def __init__(self, match):
@@ -212,9 +201,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pool/show/(\d+)"
- test = ("https://idol.sankakucomplex.com/pool/show/145", {
- "count": 3,
- })
+ example = "https://idol.sankakucomplex.com/pool/show/12345"
per_page = 24
def __init__(self, match):
@@ -249,17 +236,7 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"
- test = ("https://idol.sankakucomplex.com/post/show/694215", {
- "content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
- "options": (("tags", True),),
- "keyword": {
- "tags_character": "shani_(the_witcher)",
- "tags_copyright": "the_witcher",
- "tags_idol": str,
- "tags_medium": str,
- "tags_general": str,
- },
- })
+ example = "https://idol.sankakucomplex.com/post/show/12345"
def __init__(self, match):
IdolcomplexExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index f993db8..68360e9 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,7 @@
"""Extractors for https://www.imagebam.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text
import re
@@ -21,7 +21,9 @@ class ImagebamExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.path = match.group(1)
- self.session.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
+
+ def _init(self):
+ self.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
def _parse_image_page(self, path):
page = self.request(self.root + path).text
@@ -44,26 +46,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
archive_fmt = "{gallery_key}_{image_key}"
pattern = (r"(?:https?://)?(?:www\.)?imagebam\.com"
r"(/(?:gallery/|view/G)[a-zA-Z0-9]+)")
- test = (
- ("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
- "url": "76d976788ae2757ac81694736b07b72356f5c4c8",
- "keyword": "b048478b1bbba3072a7fa9fcc40630b3efad1f6c",
- "content": "596e6bfa157f2c7169805d50075c2986549973a8",
- }),
- ("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", {
- # more than 100 images; see issue #219
- "count": 107,
- "url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1",
- }),
- ("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
- "exception": exception.HttpError,
- }),
- # /view/ path (#2378)
- ("https://www.imagebam.com/view/GA3MT1", {
- "url": "35018ce1e00a2d2825a33d3cd37857edaf804919",
- "keyword": "3a9f98178f73694c527890c0d7ca9a92b46987ba",
- }),
- )
+ example = "https://www.imagebam.com/view/GID"
def items(self):
page = self.request(self.root + self.path).text
@@ -108,24 +91,7 @@ class ImagebamImageExtractor(ImagebamExtractor):
archive_fmt = "{image_key}"
pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
r"(/(?:image/|view/M|(?:[0-9a-f]{2}/){3})[a-zA-Z0-9]+)")
- test = (
- ("https://www.imagebam.com/image/94d56c502511890", {
- "url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
- "keyword": "2a4380d4b57554ff793898c2d6ec60987c86d1a1",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- }),
- ("http://images3.imagebam.com/1d/8c/44/94d56c502511890.png"),
- # NSFW (#1534)
- ("https://www.imagebam.com/image/0850951366904951", {
- "url": "d37297b17ed1615b4311c8ed511e50ce46e4c748",
- }),
- # /view/ path (#2378)
- ("https://www.imagebam.com/view/ME8JOQP", {
- "url": "4dca72bbe61a0360185cf4ab2bed8265b49565b8",
- "keyword": "15a494c02fd30846b41b42a26117aedde30e4ceb",
- "content": "f81008666b17a42d8834c4749b910e1dc10a6e83",
- }),
- )
+ example = "https://www.imagebam.com/view/MID"
def items(self):
path = self.path
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 9229617..9aa0332 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -18,37 +18,17 @@ class ImagechestGalleryExtractor(GalleryExtractor):
category = "imagechest"
root = "https://imgchest.com"
pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})"
- test = (
- ("https://imgchest.com/p/3na7kr3by8d", {
- "pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
- "keyword": {
- "count": 3,
- "gallery_id": "3na7kr3by8d",
- "num": int,
- "title": "Wizardry - Video Game From The Mid 80's",
- },
- "url": "7328ca4ec2459378d725e3be19f661d2b045feda",
- "content": "076959e65be30249a2c651fbe6090dc30ba85193",
- "count": 3
- }),
- # "Load More Files" button (#4028)
- ("https://imgchest.com/p/9p4n3q2z7nq", {
- "pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
- "url": "f5674e8ba79d336193c9f698708d9dcc10e78cc7",
- "count": 52,
- }),
- ("https://imgchest.com/p/xxxxxxxxxxx", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://imgchest.com/p/abcdefghijk"
def __init__(self, match):
self.gallery_id = match.group(1)
url = self.root + "/p/" + self.gallery_id
GalleryExtractor.__init__(self, match, url)
- self.access_token = self.config("access-token")
- if self.access_token:
+ def _init(self):
+ access_token = self.config("access-token")
+ if access_token:
+ self.api = ImagechestAPI(self, access_token)
self.gallery_url = None
self.metadata = self._metadata_api
self.images = self._images_api
@@ -82,8 +62,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
]
def _metadata_api(self, page):
- api = ImagechestAPI(self, self.access_token)
- post = api.post(self.gallery_id)
+ post = self.api.post(self.gallery_id)
post["date"] = text.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index c91347e..aca101e 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -23,10 +23,6 @@ class ImagefapExtractor(Extractor):
archive_fmt = "{gallery_id}_{image_id}"
request_interval = (2.0, 4.0)
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.session.headers["Referer"] = self.root
-
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
@@ -44,50 +40,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
"""Extractor for image galleries from imagefap.com"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"
-
- test = (
- ("https://www.imagefap.com/gallery/7102714", {
- "pattern": r"https://cdnh?\.imagefap\.com"
- r"/images/full/\d+/\d+/\d+\.jpg",
- "keyword": "bdcb75b1e4b9dddc718f3d66e1a58afa9d81a38b",
- "content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
- }),
- ("https://www.imagefap.com/gallery/7876223", {
- "pattern": r"https://cdnh?\.imagefap\.com"
- r"/images/full/\d+/\d+/\d+\.jpg",
- "keyword": {
- "categories": ["Asses", "Softcore", "Pornstars"],
- "count": 44,
- "description": "",
- "gallery_id": 7876223,
- "image_id": int,
- "num": int,
- "tags": ["big ass", "panties", "horny",
- "pussy", "exposed", "outdoor"],
- "title": "Kelsi Monroe in lingerie",
- "uploader": "BdRachel",
- },
- "count": 44,
- }),
- # description (#3905)
- ("https://www.imagefap.com/gallery/6180555", {
- "range": "1",
- "keyword": {
- "categories": ["Amateur", "Softcore", "Homemade"],
- "count": 36,
- "description": "Nude and dressed sluts showing off the goods",
- "gallery_id": 6180555,
- "image_id": int,
- "num": int,
- "tags": [] ,
- "title": "Dressed or Undressed MG*",
- "uploader": "splitopen",
- },
- }),
- ("https://www.imagefap.com/pictures/7102714"),
- ("https://www.imagefap.com/gallery.php?gid=7102714"),
- ("https://beta.imagefap.com/gallery.php?gid=7102714"),
- )
+ example = "https://www.imagefap.com/gallery/12345"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
@@ -158,22 +111,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
"""Extractor for single images from imagefap.com"""
subcategory = "image"
pattern = BASE_PATTERN + r"/photo/(\d+)"
- test = (
- ("https://www.imagefap.com/photo/1962981893", {
- "pattern": r"https://cdnh?\.imagefap\.com"
- r"/images/full/65/196/1962981893\.jpg",
- "keyword": {
- "date": "21/08/2014",
- "gallery_id": 7876223,
- "height": 1600,
- "image_id": 1962981893,
- "title": "Kelsi Monroe in lingerie",
- "uploader": "BdRachel",
- "width": 1066,
- },
- }),
- ("https://beta.imagefap.com/photo/1962981893"),
- )
+ example = "https://www.imagefap.com/photo/12345"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
@@ -214,35 +152,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
pattern = (BASE_PATTERN + r"/(?:organizer/|"
r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
- test = (
- ("https://www.imagefap.com/organizer/409758", {
- "pattern": r"https://www\.imagefap\.com/gallery/7876223",
- "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
- "count": 1,
- }),
- (("https://www.imagefap.com/usergallery.php"
- "?userid=1981976&folderid=409758"), {
- "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
- }),
- (("https://www.imagefap.com/usergallery.php"
- "?user=BdRachel&folderid=409758"), {
- "url": "37822523e6e4a56feb9dea35653760c86b44ff89",
- }),
- ("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
- "pattern": ImagefapGalleryExtractor.pattern,
- "range": "1-40",
- }),
- (("https://www.imagefap.com/usergallery.php"
- "?userid=1981976&folderid=-1"), {
- "pattern": ImagefapGalleryExtractor.pattern,
- "range": "1-40",
- }),
- (("https://www.imagefap.com/usergallery.php"
- "?user=BdRachel&folderid=-1"), {
- "pattern": ImagefapGalleryExtractor.pattern,
- "range": "1-40",
- }),
- )
+ example = "https://www.imagefap.com/organizer/12345"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
@@ -283,7 +193,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
yield gid, extr("<b>", "<")
cnt += 1
- if cnt < 25:
+ if cnt < 20:
break
params["page"] += 1
@@ -294,20 +204,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
pattern = (BASE_PATTERN +
r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
r"|usergallery\.php\?userid=(\d+))(?:$|#)")
- test = (
- ("https://www.imagefap.com/profile/BdRachel", {
- "pattern": ImagefapFolderExtractor.pattern,
- "count": ">= 18",
- }),
- ("https://www.imagefap.com/usergallery.php?userid=1862791", {
- "pattern": r"https://www\.imagefap\.com"
- r"/profile/LucyRae/galleries\?folderid=-1",
- "count": 1,
- }),
- ("https://www.imagefap.com/profile/BdRachel/galleries"),
- ("https://www.imagefap.com/profile.php?user=BdRachel"),
- ("https://beta.imagefap.com/profile.php?user=BdRachel"),
- )
+ example = "https://www.imagefap.com/profile/USER"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index a6e848c..94019bd 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,23 +19,23 @@ class ImagehostImageExtractor(Extractor):
basecategory = "imagehost"
subcategory = "image"
archive_fmt = "{token}"
- https = True
- params = None
- cookies = None
- encoding = None
+ _https = True
+ _params = None
+ _cookies = None
+ _encoding = None
def __init__(self, match):
Extractor.__init__(self, match)
self.page_url = "http{}://{}".format(
- "s" if self.https else "", match.group(1))
+ "s" if self._https else "", match.group(1))
self.token = match.group(2)
- if self.params == "simple":
- self.params = {
+ if self._params == "simple":
+ self._params = {
"imgContinue": "Continue+to+image+...+",
}
- elif self.params == "complex":
- self.params = {
+ elif self._params == "complex":
+ self._params = {
"op": "view",
"id": self.token,
"pre": "1",
@@ -46,16 +46,16 @@ class ImagehostImageExtractor(Extractor):
def items(self):
page = self.request(
self.page_url,
- method=("POST" if self.params else "GET"),
- data=self.params,
- cookies=self.cookies,
- encoding=self.encoding,
+ method=("POST" if self._params else "GET"),
+ data=self._params,
+ cookies=self._cookies,
+ encoding=self._encoding,
).text
url, filename = self.get_info(page)
data = text.nameext_from_url(filename, {"token": self.token})
data.update(self.metadata(page))
- if self.https and url.startswith("http:"):
+ if self._https and url.startswith("http:"):
url = "https:" + url[5:]
yield Message.Directory, data
@@ -74,36 +74,9 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
category = "imxto"
pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
r"/(?:i/|img-)(\w+)(\.html)?)")
- test = (
- ("https://imx.to/i/1qdeva", { # new-style URL
- "url": "ab2173088a6cdef631d7a47dec4a5da1c6a00130",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- "keyword": {
- "size" : 18,
- "width" : 64,
- "height": 32,
- "hash" : "94d56c599223c59f3feb71ea603484d1",
- },
- }),
- ("https://imx.to/img-57a2050547b97.html", { # old-style URL
- "url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204",
- "content": "54592f2635674c25677c6872db3709d343cdf92f",
- "keyword": {
- "size" : 5284,
- "width" : 320,
- "height": 160,
- "hash" : "40da6aaa7b8c42b18ef74309bbc713fc",
- },
- }),
- ("https://img.yt/img-57a2050547b97.html", { # img.yt domain
- "url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204",
- }),
- ("https://imx.to/img-57a2050547b98.html", {
- "exception": exception.NotFoundError,
- }),
- )
- params = "simple"
- encoding = "utf-8"
+ example = "https://imx.to/i/ID"
+ _params = "simple"
+ _encoding = "utf-8"
def __init__(self, match):
ImagehostImageExtractor.__init__(self, match)
@@ -140,11 +113,7 @@ class ImxtoGalleryExtractor(ImagehostImageExtractor):
category = "imxto"
subcategory = "gallery"
pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))"
- test = ("https://imx.to/g/ozdy", {
- "pattern": ImxtoImageExtractor.pattern,
- "keyword": {"title": "untitled gallery"},
- "count": 40,
- })
+ example = "https://imx.to/g/ID"
def items(self):
page = self.request(self.page_url).text
@@ -162,19 +131,21 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from acidimg.cc"""
category = "acidimg"
pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)"
- test = ("https://acidimg.cc/img-5acb6b9de4640.html", {
- "url": "f132a630006e8d84f52d59555191ed82b3b64c04",
- "keyword": "135347ab4345002fc013863c0d9419ba32d98f78",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- })
- params = "simple"
- encoding = "utf-8"
+ example = "https://acidimg.cc/img-abc123.html"
+ _params = "simple"
+ _encoding = "utf-8"
def get_info(self, page):
- url, pos = text.extract(page, '<img class="centred" src="', '"')
+ url, pos = text.extract(page, "<img class='centred' src='", "'")
if not url:
- raise exception.NotFoundError("image")
- filename, pos = text.extract(page, ' alt="', '"', pos)
+ url, pos = text.extract(page, '<img class="centred" src="', '"')
+ if not url:
+ raise exception.NotFoundError("image")
+
+ filename, pos = text.extract(page, "alt='", "'", pos)
+ if not filename:
+ filename, pos = text.extract(page, 'alt="', '"', pos)
+
return url, (filename + splitext(url)[1]) if filename else url
@@ -183,26 +154,13 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
category = "imagevenue"
pattern = (r"(?:https?://)?((?:www|img\d+)\.imagevenue\.com"
r"/([A-Z0-9]{8,10}|view/.*|img\.php\?.*))")
- test = (
- ("https://www.imagevenue.com/ME13LS07", {
- "pattern": r"https://cdn-images\.imagevenue\.com"
- r"/10/ac/05/ME13LS07_o\.png",
- "keyword": "ae15d6e3b2095f019eee84cd896700cd34b09c36",
- "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
- }),
- (("https://www.imagevenue.com/view/o?i=92518_13732377"
- "annakarina424200712535AM_122_486lo.jpg&h=img150&l=loc486"), {
- "url": "8bf0254e29250d8f5026c0105bbdda3ee3d84980",
- }),
- (("http://img28116.imagevenue.com/img.php"
- "?image=th_52709_test_122_64lo.jpg"), {
- "url": "f98e3091df7f48a05fb60fbd86f789fc5ec56331",
- }),
- )
+ example = "https://www.imagevenue.com/ME123456789"
def get_info(self, page):
pos = page.index('class="card-body')
url, pos = text.extract(page, '<img src="', '"', pos)
+ if url.endswith("/loader.svg"):
+ url, pos = text.extract(page, '<img src="', '"', pos)
filename, pos = text.extract(page, 'alt="', '"', pos)
return url, text.unescape(filename)
@@ -212,21 +170,11 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
category = "imagetwist"
pattern = (r"(?:https?://)?((?:www\.|phun\.)?"
r"image(?:twist|haha)\.com/([a-z0-9]{12}))")
- test = (
- ("https://imagetwist.com/f1i2s4vhvbrq/test.png", {
- "url": "8d5e168c0bee30211f821c6f3b2116e419d42671",
- "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- }),
- ("https://www.imagetwist.com/f1i2s4vhvbrq/test.png"),
- ("https://phun.imagetwist.com/f1i2s4vhvbrq/test.png"),
- ("https://imagehaha.com/f1i2s4vhvbrq/test.png"),
- ("https://www.imagehaha.com/f1i2s4vhvbrq/test.png"),
- )
+ example = "https://imagetwist.com/123456abcdef/NAME.EXT"
@property
@memcache(maxage=3*3600)
- def cookies(self):
+ def _cookies(self):
return self.request(self.page_url).cookies
def get_info(self, page):
@@ -239,11 +187,7 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgspice.com"""
category = "imgspice"
pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?#]+))"
- test = ("https://imgspice.com/nwfwtpyog50y/test.png.html", {
- "url": "b8c30a8f51ee1012959a4cfd46197fabf14de984",
- "keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- })
+ example = "https://imgspice.com/ID/NAME.EXT.html"
def get_info(self, page):
pos = page.find('id="imgpreview"')
@@ -259,12 +203,8 @@ class PixhostImageExtractor(ImagehostImageExtractor):
category = "pixhost"
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
r"/show/\d+/(\d+)_[^/?#]+)")
- test = ("http://pixhost.to/show/190/130327671_test-.png", {
- "url": "4e5470dcf6513944773044d40d883221bbc46cff",
- "keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- })
- cookies = {"pixhostads": "1", "pixhosttest": "1"}
+ example = "https://pixhost.to/show/123/12345_NAME.EXT"
+ _cookies = {"pixhostads": "1", "pixhosttest": "1"}
def get_info(self, page):
url , pos = text.extract(page, "class=\"image-img\" src=\"", "\"")
@@ -278,10 +218,7 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
subcategory = "gallery"
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
r"/gallery/([^/?#]+))")
- test = ("https://pixhost.to/gallery/jSMFq", {
- "pattern": PixhostImageExtractor.pattern,
- "count": 3,
- })
+ example = "https://pixhost.to/gallery/ID"
def items(self):
page = text.extr(self.request(
@@ -294,13 +231,9 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
class PostimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from postimages.org"""
category = "postimg"
- pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
- r"/(?!gallery/)(?:image/)?([^/?#]+)/?)")
- test = ("https://postimg.cc/Wtn2b3hC", {
- "url": "72f3c8b1d6c6601a20ad58f35635494b4891a99e",
- "keyword": "2d05808d04e4e83e33200db83521af06e3147a84",
- "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
- })
+ pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
+ r"\.(?:cc|org)/(?!gallery/)(?:image/)?([^/?#]+)/?)")
+ example = "https://postimages.org/ID"
def get_info(self, page):
pos = page.index(' id="download"')
@@ -313,12 +246,9 @@ class PostimgGalleryExtractor(ImagehostImageExtractor):
"""Extractor for images galleries from postimages.org"""
category = "postimg"
subcategory = "gallery"
- pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
- r"/(?:gallery/)([^/?#]+)/?)")
- test = ("https://postimg.cc/gallery/wxpDLgX", {
- "pattern": PostimgImageExtractor.pattern,
- "count": 22,
- })
+ pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
+ r"\.(?:cc|org)/gallery/([^/?#]+))")
+ example = "https://postimages.org/gallery/ID"
def items(self):
page = self.request(self.page_url).text
@@ -332,11 +262,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
category = "turboimagehost"
pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com"
r"/p/(\d+)/[^/?#]+\.html)")
- test = ("https://www.turboimagehost.com/p/39078423/test--.png.html", {
- "url": "b94de43612318771ced924cb5085976f13b3b90e",
- "keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- })
+ example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html"
def get_info(self, page):
url = text.extract(page, 'src="', '"', page.index("<img "))[0]
@@ -347,10 +273,7 @@ class ViprImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from vipr.im"""
category = "vipr"
pattern = r"(?:https?://)?(vipr\.im/(\w+))"
- test = ("https://vipr.im/kcd5jcuhgs3v.html", {
- "url": "88f6a3ecbf3356a11ae0868b518c60800e070202",
- "keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771",
- })
+ example = "https://vipr.im/abc123.html"
def get_info(self, page):
url = text.extr(page, '<img src="', '"')
@@ -361,13 +284,9 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgclick.net"""
category = "imgclick"
pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))"
- test = ("http://imgclick.net/4tbrre1oxew9/test-_-_.png.html", {
- "url": "140dcb250a325f2d26b2d918c18b8ac6a2a0f6ab",
- "keyword": "6895256143eab955622fc149aa367777a8815ba3",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- })
- https = False
- params = "complex"
+ example = "http://imgclick.net/abc123/NAME.EXT.html"
+ _https = False
+ _params = "complex"
def get_info(self, page):
url , pos = text.extract(page, '<br><img src="', '"')
@@ -379,11 +298,7 @@ class FappicImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from fappic.com"""
category = "fappic"
pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)"
- test = ("https://www.fappic.com/98wxqcklyh8k/test.png", {
- "pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png",
- "keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- })
+ example = "https://fappic.com/abc123/NAME.EXT"
def get_info(self, page):
url , pos = text.extract(page, '<a href="#"><img src="', '"')
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index a221075..1b74180 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -62,7 +62,7 @@ class ImgbbExtractor(Extractor):
def login(self):
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
+ self.cookies_update(self._login_impl(username, password))
@cache(maxage=360*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -82,7 +82,7 @@ class ImgbbExtractor(Extractor):
if not response.history:
raise exception.AuthenticationError()
- return self.session.cookies
+ return self.cookies
def _pagination(self, page, endpoint, params):
data = None
@@ -114,27 +114,7 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
subcategory = "album"
directory_fmt = ("{category}", "{user}", "{album_name} {album_id}")
pattern = r"(?:https?://)?ibb\.co/album/([^/?#]+)/?(?:\?([^#]+))?"
- test = (
- ("https://ibb.co/album/i5PggF", {
- "range": "1-80",
- "url": "70afec9fcc3a6de62a6b644b487d892d8d47cf1a",
- "keyword": "569e1d88ebdd27655387559cdf1cd526a3e1ab69",
- }),
- ("https://ibb.co/album/i5PggF?sort=title_asc", {
- "range": "1-80",
- "url": "afdf5fc95d8e09d77e8f44312f3e9b843987bb5a",
- "keyword": "f090e14d0e5f7868595082b2c95da1309c84872d",
- }),
- # no user data (#471)
- ("https://ibb.co/album/kYKpwF", {
- "url": "ac0abcfcb89f4df6adc2f7e4ff872f3b03ef1bc7",
- "keyword": {"user": ""},
- }),
- # private
- ("https://ibb.co/album/hqgWrF", {
- "exception": exception.HttpError,
- }),
- )
+ example = "https://ibb.co/album/ID"
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
@@ -169,10 +149,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
"""Extractor for user profiles in imgbb.com"""
subcategory = "user"
pattern = r"(?:https?://)?([\w-]+)\.imgbb\.com/?(?:\?([^#]+))?$"
- test = ("https://folkie.imgbb.com", {
- "range": "1-80",
- "pattern": r"https?://i\.ibb\.co/\w+/[^/?#]+",
- })
+ example = "https://USER.imgbb.com"
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
@@ -196,19 +173,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
class ImgbbImageExtractor(ImgbbExtractor):
subcategory = "image"
pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?#]+)"
- test = ("https://ibb.co/fUqh5b", {
- "pattern": r"https://i\.ibb\.co/g3kvx80/Arundel-Ireeman-5\.jpg",
- "content": "c5a0965178a8b357acd8aa39660092918c63795e",
- "keyword": {
- "id" : "fUqh5b",
- "title" : "Arundel Ireeman 5",
- "url" : "https://i.ibb.co/g3kvx80/Arundel-Ireeman-5.jpg",
- "width" : 960,
- "height": 719,
- "user" : "folkie",
- "extension": "jpg",
- },
- })
+ example = "https://ibb.co/ID"
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py
index 530c4e1..7069717 100644
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from galleries at https://imgbox.com/"""
+"""Extractors for https://imgbox.com/"""
from .common import Extractor, Message, AsynchronousMixin
from .. import text, exception
@@ -63,20 +63,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
filename_fmt = "{num:>03}-{filename}.{extension}"
archive_fmt = "{gallery_key}_{image_key}"
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"
- test = (
- ("https://imgbox.com/g/JaX5V5HX7g", {
- "url": "da4f15b161461119ee78841d4b8e8d054d95f906",
- "keyword": "4b1e62820ac2c6205b7ad0b6322cc8e00dbe1b0c",
- "content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc",
- }),
- ("https://imgbox.com/g/cUGEkRbdZZ", {
- "url": "76506a3aab175c456910851f66227e90484ca9f7",
- "keyword": "fb0427b87983197849fb2887905e758f3e50cb6e",
- }),
- ("https://imgbox.com/g/JaX5V5HX7h", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://imgbox.com/g/12345abcde"
def __init__(self, match):
ImgboxExtractor.__init__(self, match)
@@ -106,16 +93,7 @@ class ImgboxImageExtractor(ImgboxExtractor):
subcategory = "image"
archive_fmt = "{image_key}"
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"
- test = (
- ("https://imgbox.com/qHhw7lpG", {
- "url": "ee9cdea6c48ad0161c1b5f81f6b0c9110997038c",
- "keyword": "dfc72310026b45f3feb4f9cada20c79b2575e1af",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- }),
- ("https://imgbox.com/qHhw7lpH", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://imgbox.com/1234abcd"
def __init__(self, match):
ImgboxExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py
index 9ae22a9..3aa7922 100644
--- a/gallery_dl/extractor/imgth.py
+++ b/gallery_dl/extractor/imgth.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,24 +17,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
category = "imgth"
root = "https://imgth.com"
pattern = r"(?:https?://)?(?:www\.)?imgth\.com/gallery/(\d+)"
- test = (
- ("https://imgth.com/gallery/37/wallpaper-anime", {
- "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
- "pattern": r"https://imgth\.com/images/2009/11/25"
- r"/wallpaper-anime_\w+\.jpg",
- "keyword": {
- "count": 12,
- "date": "dt:2009-11-25 18:21:00",
- "extension": "jpg",
- "filename": r"re:wallpaper-anime_\w+",
- "gallery_id": 37,
- "num": int,
- "title": "Wallpaper anime",
- "user": "celebrities",
- },
- }),
- ("https://www.imgth.com/gallery/37/wallpaper-anime"),
- )
+ example = "https://imgth.com/gallery/123/TITLE"
def __init__(self, match):
self.gallery_id = gid = match.group(1)
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 4c29d98..8884d3e 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, exception
-
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)"
@@ -22,8 +21,10 @@ class ImgurExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.api = ImgurAPI(self)
self.key = match.group(1)
+
+ def _init(self):
+ self.api = ImgurAPI(self)
self.mp4 = self.config("mp4", True)
def _prepare(self, image):
@@ -63,69 +64,7 @@ class ImgurImageExtractor(ImgurExtractor):
archive_fmt = "{id}"
pattern = (BASE_PATTERN + r"/(?!gallery|search)"
r"(?:r/\w+/)?(\w{7}|\w{5})[sbtmlh]?")
- test = (
- ("https://imgur.com/21yMxCS", {
- "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- "keyword": {
- "account_id" : 0,
- "comment_count" : int,
- "cover_id" : "21yMxCS",
- "date" : "dt:2016-11-10 14:24:35",
- "description" : "",
- "downvote_count": int,
- "duration" : 0,
- "ext" : "png",
- "favorite" : False,
- "favorite_count": 0,
- "has_sound" : False,
- "height" : 32,
- "id" : "21yMxCS",
- "image_count" : 1,
- "in_most_viral" : False,
- "is_ad" : False,
- "is_album" : False,
- "is_animated" : False,
- "is_looping" : False,
- "is_mature" : False,
- "is_pending" : False,
- "mime_type" : "image/png",
- "name" : "test-テスト",
- "point_count" : int,
- "privacy" : "",
- "score" : int,
- "size" : 182,
- "title" : "Test",
- "upvote_count" : int,
- "url" : "https://i.imgur.com/21yMxCS.png",
- "view_count" : int,
- "width" : 64,
- },
- }),
- ("http://imgur.com/0gybAXR", { # gifv/mp4 video
- "url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7",
- "content": "a3c080e43f58f55243ab830569ba02309d59abfc",
- }),
- ("https://imgur.com/XFfsmuC", { # missing title in API response (#467)
- "keyword": {"title": "Tears are a natural response to irritants"},
- }),
- ("https://imgur.com/1Nily2P", { # animated png
- "pattern": "https://i.imgur.com/1Nily2P.png",
- }),
- ("https://imgur.com/zzzzzzz", { # not found
- "exception": exception.HttpError,
- }),
- ("https://m.imgur.com/r/Celebs/iHJ7tsM"),
- ("https://www.imgur.com/21yMxCS"), # www
- ("https://m.imgur.com/21yMxCS"), # mobile
- ("https://imgur.com/zxaY6"), # 5 character key
- ("https://imgur.io/zxaY6"), # .io
- ("https://i.imgur.com/21yMxCS.png"), # direct link
- ("https://i.imgur.io/21yMxCS.png"), # direct link .io
- ("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
- ("https://i.imgur.com/zxaY6.gif"), # direct link (short)
- ("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
- )
+ example = "https://imgur.com/abcdefg"
def items(self):
image = self.api.image(self.key)
@@ -150,71 +89,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
archive_fmt = "{album[id]}_{id}"
pattern = BASE_PATTERN + r"/a/(\w{7}|\w{5})"
- test = (
- ("https://imgur.com/a/TcBmP", {
- "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
- "keyword": {
- "album": {
- "account_id" : 0,
- "comment_count" : int,
- "cover_id" : "693j2Kr",
- "date" : "dt:2015-10-09 10:37:50",
- "description" : "",
- "downvote_count": 0,
- "favorite" : False,
- "favorite_count": 0,
- "id" : "TcBmP",
- "image_count" : 19,
- "in_most_viral" : False,
- "is_ad" : False,
- "is_album" : True,
- "is_mature" : False,
- "is_pending" : False,
- "privacy" : "private",
- "score" : int,
- "title" : "138",
- "upvote_count" : int,
- "url" : "https://imgur.com/a/TcBmP",
- "view_count" : int,
- "virality" : int,
- },
- "account_id" : 0,
- "count" : 19,
- "date" : "type:datetime",
- "description": "",
- "ext" : "jpg",
- "has_sound" : False,
- "height" : int,
- "id" : str,
- "is_animated": False,
- "is_looping" : False,
- "mime_type" : "image/jpeg",
- "name" : str,
- "num" : int,
- "size" : int,
- "title" : str,
- "type" : "image",
- "updated_at" : None,
- "url" : str,
- "width" : int,
- },
- }),
- ("https://imgur.com/a/eD9CT", { # large album
- "url": "de748c181a04d18bef1de9d4f4866ef0a06d632b",
- }),
- ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
- "url": "695ef0c950023362a0163ee5041796300db76674",
- }),
- ("https://imgur.com/a/TcBmQ", {
- "exception": exception.HttpError,
- }),
- ("https://imgur.com/a/pjOnJA0", { # empty, no 'media' (#2557)
- "count": 0,
- }),
- ("https://www.imgur.com/a/TcBmP"), # www
- ("https://imgur.io/a/TcBmP"), # .io
- ("https://m.imgur.com/a/TcBmP"), # mobile
- )
+ example = "https://imgur.com/a/abcde"
def items(self):
album = self.api.album(self.key)
@@ -247,17 +122,7 @@ class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(\w{7}|\w{5})"
- test = (
- ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
- "pattern": "https://imgur.com/zf2fIms",
- }),
- ("https://imgur.com/gallery/eD9CT", {
- "pattern": "https://imgur.com/a/eD9CT",
- }),
- ("https://imgur.com/t/unmuted/26sEhNr"),
- ("https://imgur.com/t/cat/qSB8NbN"),
- ("https://imgur.io/t/cat/qSB8NbN"), # .io
- )
+ example = "https://imgur.com/gallery/abcde"
def items(self):
if self.api.gallery(self.key)["is_album"]:
@@ -273,15 +138,7 @@ class ImgurUserExtractor(ImgurExtractor):
"""Extractor for all images posted by a user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts|/submitted)?/?$"
- test = (
- ("https://imgur.com/user/Miguenzo", {
- "range": "1-100",
- "count": 100,
- "pattern": r"https://imgur\.com(/a)?/\w+$",
- }),
- ("https://imgur.com/user/Miguenzo/posts"),
- ("https://imgur.com/user/Miguenzo/submitted"),
- )
+ example = "https://imgur.com/user/USER"
def items(self):
return self._items_queue(self.api.account_submissions(self.key))
@@ -291,11 +148,7 @@ class ImgurFavoriteExtractor(ImgurExtractor):
"""Extractor for a user's favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/?$"
- test = ("https://imgur.com/user/Miguenzo/favorites", {
- "range": "1-100",
- "count": 100,
- "pattern": r"https://imgur\.com(/a)?/\w+$",
- })
+ example = "https://imgur.com/user/USER/favorites"
def items(self):
return self._items_queue(self.api.account_favorites(self.key))
@@ -305,16 +158,7 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor):
"""Extractor for a user's favorites folder"""
subcategory = "favorite-folder"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/folder/(\d+)"
- test = (
- ("https://imgur.com/user/mikf1/favorites/folder/11896757/public", {
- "pattern": r"https://imgur\.com(/a)?/\w+$",
- "count": 3,
- }),
- ("https://imgur.com/user/mikf1/favorites/folder/11896741/private", {
- "pattern": r"https://imgur\.com(/a)?/\w+$",
- "count": 5,
- }),
- )
+ example = "https://imgur.com/user/USER/favorites/folder/12345/TITLE"
def __init__(self, match):
ImgurExtractor.__init__(self, match)
@@ -329,11 +173,7 @@ class ImgurSubredditExtractor(ImgurExtractor):
"""Extractor for a subreddits's imgur links"""
subcategory = "subreddit"
pattern = BASE_PATTERN + r"/r/([^/?#]+)/?$"
- test = ("https://imgur.com/r/pics", {
- "range": "1-100",
- "count": 100,
- "pattern": r"https://imgur\.com(/a)?/\w+$",
- })
+ example = "https://imgur.com/r/SUBREDDIT"
def items(self):
return self._items_queue(self.api.gallery_subreddit(self.key))
@@ -343,11 +183,7 @@ class ImgurTagExtractor(ImgurExtractor):
"""Extractor for imgur tag searches"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/t/([^/?#]+)$"
- test = ("https://imgur.com/t/animals", {
- "range": "1-100",
- "count": 100,
- "pattern": r"https://imgur\.com(/a)?/\w+$",
- })
+ example = "https://imgur.com/t/TAG"
def items(self):
return self._items_queue(self.api.gallery_tag(self.key))
@@ -357,11 +193,7 @@ class ImgurSearchExtractor(ImgurExtractor):
"""Extractor for imgur search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search(?:/[^?#]+)?/?\?q=([^&#]+)"
- test = ("https://imgur.com/search?q=cute+cat", {
- "range": "1-100",
- "count": 100,
- "pattern": r"https://imgur\.com(/a)?/\w+$",
- })
+ example = "https://imgur.com/search?q=UERY"
def items(self):
key = text.unquote(self.key.replace("+", " "))
@@ -449,11 +281,7 @@ class ImgurAPI():
params["client_id"] = self.client_id
params["page"] = 0
params["sort"] = "newest"
-
- headers = {
- "Referer": "https://imgur.com/",
- "Origin": "https://imgur.com",
- }
+ headers = {"Origin": "https://imgur.com"}
while True:
data = self._call(endpoint, params, headers)["data"]
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 83a1a19..4ad37fc 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -24,8 +24,7 @@ class InkbunnyExtractor(Extractor):
archive_fmt = "{file_id}"
root = "https://inkbunny.net"
- def __init__(self, match):
- Extractor.__init__(self, match)
+ def _init(self):
self.api = InkbunnyAPI(self)
def items(self):
@@ -73,51 +72,7 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user profiles"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])"
- test = (
- ("https://inkbunny.net/soina", {
- "pattern": r"https://[\w.]+\.metapix\.net/files/full"
- r"/\d+/\d+_soina_.+",
- "range": "20-50",
- "keyword": {
- "date" : "type:datetime",
- "deleted" : bool,
- "file_id" : "re:[0-9]+",
- "filename" : r"re:[0-9]+_soina_\w+",
- "full_file_md5": "re:[0-9a-f]{32}",
- "mimetype" : str,
- "submission_id": "re:[0-9]+",
- "user_id" : "20969",
- "comments_count" : "re:[0-9]+",
- "deleted" : bool,
- "favorite" : bool,
- "favorites_count": "re:[0-9]+",
- "friends_only" : bool,
- "guest_block" : bool,
- "hidden" : bool,
- "pagecount" : "re:[0-9]+",
- "pools" : list,
- "pools_count" : int,
- "public" : bool,
- "rating_id" : "re:[0-9]+",
- "rating_name" : str,
- "ratings" : list,
- "scraps" : bool,
- "tags" : list,
- "title" : str,
- "type_name" : str,
- "username" : "soina",
- "views" : str,
- },
- }),
- ("https://inkbunny.net/gallery/soina", {
- "range": "1-25",
- "keyword": {"scraps": False},
- }),
- ("https://inkbunny.net/scraps/soina", {
- "range": "1-25",
- "keyword": {"scraps": True},
- }),
- )
+ example = "https://inkbunny.net/USER"
def __init__(self, match):
kind, self.user = match.groups()
@@ -149,14 +104,7 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"poolview_process\.php\?pool_id=(\d+)|"
r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))")
- test = (
- ("https://inkbunny.net/poolview_process.php?pool_id=28985", {
- "count": 9,
- "keyword": {"pool_id": "28985"},
- }),
- ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
- "&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"),
- )
+ example = "https://inkbunny.net/poolview_process.php?pool_id=12345"
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@@ -186,16 +134,8 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"userfavorites_process\.php\?favs_user_id=(\d+)|"
r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))")
- test = (
- ("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
- "pattern": r"https://[\w.]+\.metapix\.net/files/full"
- r"/\d+/\d+_\w+_.+",
- "range": "20-50",
- "keyword": {"favs_user_id": "20969"},
- }),
- ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
- "&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"),
- )
+ example = ("https://inkbunny.net/userfavorites_process.php"
+ "?favs_user_id=12345")
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@@ -226,26 +166,8 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
subcategory = "search"
pattern = (BASE_PATTERN +
r"/submissionsviewall\.php\?([^#]+&mode=search&[^#]+)")
- test = (("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
- "&mode=search&page=1&orderby=create_datetime&text=cute"
- "&stringtype=and&keywords=yes&title=yes&description=no&artist="
- "&favsby=&type=&days=&keyword_id=&user_id=&random=&md5="), {
- "range": "1-10",
- "count": 10,
- "keyword": {
- "search": {
- "rid": "ffffffffff",
- "mode": "search",
- "page": "1",
- "orderby": "create_datetime",
- "text": "cute",
- "stringtype": "and",
- "keywords": "yes",
- "title": "yes",
- "description": "no",
- },
- },
- })
+ example = ("https://inkbunny.net/submissionsviewall.php"
+ "?text=TAG&mode=search&type=")
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@@ -280,15 +202,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))")
- test = (
- (("https://inkbunny.net/watchlist_process.php"
- "?mode=watching&user_id=20969"), {
- "pattern": InkbunnyUserExtractor.pattern,
- "count": ">= 90",
- }),
- ("https://inkbunny.net/usersviewall.php?rid=ffffffffff"
- "&mode=watching&page=1&user_id=20969&orderby=added&namesonly="),
- )
+ example = ("https://inkbunny.net/watchlist_process.php"
+ "?mode=watching&user_id=12345")
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@@ -325,16 +240,7 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
"""Extractor for individual Inkbunny posts"""
subcategory = "post"
pattern = BASE_PATTERN + r"/s/(\d+)"
- test = (
- ("https://inkbunny.net/s/1829715", {
- "pattern": r"https://[\w.]+\.metapix\.net/files/full"
- r"/2626/2626843_soina_dscn2296\.jpg",
- "content": "cf69d8dddf0822a12b4eef1f4b2258bd600b36c8",
- }),
- ("https://inkbunny.net/s/2044094", {
- "count": 4,
- }),
- )
+ example = "https://inkbunny.net/s/12345"
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index faeffa6..c704183 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -27,29 +27,33 @@ class InstagramExtractor(Extractor):
filename_fmt = "{sidecar_media_id:?/_/}{media_id}.{extension}"
archive_fmt = "{media_id}"
root = "https://www.instagram.com"
- cookiedomain = ".instagram.com"
- cookienames = ("sessionid",)
+ cookies_domain = ".instagram.com"
+ cookies_names = ("sessionid",)
request_interval = (6.0, 12.0)
def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)
- self.api = None
+
+ def _init(self):
self.www_claim = "0"
self.csrf_token = util.generate_token()
- self._logged_in = True
self._find_tags = re.compile(r"#\w+").findall
+ self._logged_in = True
self._cursor = None
self._user = None
- def items(self):
- self.login()
+ self.cookies.set(
+ "csrftoken", self.csrf_token, domain=self.cookies_domain)
if self.config("api") == "graphql":
self.api = InstagramGraphqlAPI(self)
else:
self.api = InstagramRestAPI(self)
+ def items(self):
+ self.login()
+
data = self.metadata()
videos = self.config("videos", True)
previews = self.config("previews", False)
@@ -86,7 +90,9 @@ class InstagramExtractor(Extractor):
file["_http_headers"] = video_headers
text.nameext_from_url(url, file)
yield Message.Url, url, file
- if not previews:
+ if previews:
+ file["media_id"] += "p"
+ else:
continue
url = file["display_url"]
@@ -131,14 +137,14 @@ class InstagramExtractor(Extractor):
return response
def login(self):
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(_login_impl(self, username, password))
- else:
- self._logged_in = False
- self.session.cookies.set(
- "csrftoken", self.csrf_token, domain=self.cookiedomain)
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ return self.cookies_update(_login_impl(self, username, password))
+
+ self._logged_in = False
def _parse_post_rest(self, post):
if "items" in post: # story or highlight
@@ -204,7 +210,12 @@ class InstagramExtractor(Extractor):
data["_files"] = files = []
for num, item in enumerate(items, 1):
- image = item["image_versions2"]["candidates"][0]
+ try:
+ image = item["image_versions2"]["candidates"][0]
+ except Exception:
+ self.log.warning("Missing media in post %s",
+ data["post_shortcode"])
+ continue
if "video_versions" in item:
video = max(
@@ -392,11 +403,13 @@ class InstagramUserExtractor(InstagramExtractor):
"""Extractor for an Instagram user profile"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:$|[?#])"
- test = (
- ("https://www.instagram.com/instagram/"),
- ("https://www.instagram.com/instagram/?hl=en"),
- ("https://www.instagram.com/id:25025320/"),
- )
+ example = "https://www.instagram.com/USER/"
+
+ def initialize(self):
+ pass
+
+ def finalize(self):
+ pass
def items(self):
base = "{}/{}/".format(self.root, self.item)
@@ -415,10 +428,7 @@ class InstagramPostsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's posts"""
subcategory = "posts"
pattern = USER_PATTERN + r"/posts"
- test = ("https://www.instagram.com/instagram/posts/", {
- "range": "1-16",
- "count": ">= 16",
- })
+ example = "https://www.instagram.com/USER/posts/"
def posts(self):
uid = self.api.user_id(self.item)
@@ -429,10 +439,7 @@ class InstagramReelsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's reels"""
subcategory = "reels"
pattern = USER_PATTERN + r"/reels"
- test = ("https://www.instagram.com/instagram/reels/", {
- "range": "40-60",
- "count": ">= 20",
- })
+ example = "https://www.instagram.com/USER/reels/"
def posts(self):
uid = self.api.user_id(self.item)
@@ -443,15 +450,7 @@ class InstagramTaggedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's tagged posts"""
subcategory = "tagged"
pattern = USER_PATTERN + r"/tagged"
- test = ("https://www.instagram.com/instagram/tagged/", {
- "range": "1-16",
- "count": ">= 16",
- "keyword": {
- "tagged_owner_id" : "25025320",
- "tagged_username" : "instagram",
- "tagged_full_name": "Instagram",
- },
- })
+ example = "https://www.instagram.com/USER/tagged/"
def metadata(self):
if self.item.startswith("id:"):
@@ -475,11 +474,7 @@ class InstagramGuideExtractor(InstagramExtractor):
"""Extractor for an Instagram guide"""
subcategory = "guide"
pattern = USER_PATTERN + r"/guide/[^/?#]+/(\d+)"
- test = (("https://www.instagram.com/kadakaofficial/guide"
- "/knit-i-need-collection/18131821684305217/"), {
- "range": "1-16",
- "count": ">= 16",
- })
+ example = "https://www.instagram.com/USER/guide/NAME/12345"
def __init__(self, match):
InstagramExtractor.__init__(self, match)
@@ -496,10 +491,7 @@ class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's saved media"""
subcategory = "saved"
pattern = USER_PATTERN + r"/saved(?:/all-posts)?/?$"
- test = (
- ("https://www.instagram.com/instagram/saved/"),
- ("https://www.instagram.com/instagram/saved/all-posts/"),
- )
+ example = "https://www.instagram.com/USER/saved/"
def posts(self):
return self.api.user_saved()
@@ -509,9 +501,7 @@ class InstagramCollectionExtractor(InstagramExtractor):
"""Extractor for Instagram collection"""
subcategory = "collection"
pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
- test = (
- "https://www.instagram.com/instagram/saved/collection_name/123456789/",
- )
+ example = "https://www.instagram.com/USER/saved/COLLECTION/12345"
def __init__(self, match):
InstagramExtractor.__init__(self, match)
@@ -533,14 +523,7 @@ class InstagramStoriesExtractor(InstagramExtractor):
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/s(?:tories/(?:highlights/(\d+)|([^/?#]+)(?:/(\d+))?)"
r"|/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)")
- test = (
- ("https://www.instagram.com/stories/instagram/"),
- ("https://www.instagram.com/stories/highlights/18042509488170095/"),
- ("https://instagram.com/stories/geekmig/2724343156064789461"),
- ("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"),
- ("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"
- "?story_media_id=2724343156064789461"),
- )
+ example = "https://www.instagram.com/stories/USER/"
def __init__(self, match):
h1, self.user, m1, h2, m2 = match.groups()
@@ -575,22 +558,33 @@ class InstagramHighlightsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's story highlights"""
subcategory = "highlights"
pattern = USER_PATTERN + r"/highlights"
- test = ("https://www.instagram.com/instagram/highlights",)
+ example = "https://www.instagram.com/USER/highlights/"
def posts(self):
uid = self.api.user_id(self.item)
return self.api.highlights_media(uid)
+class InstagramFollowingExtractor(InstagramExtractor):
+ """Extractor for an Instagram user's followed users"""
+ subcategory = "following"
+ pattern = USER_PATTERN + r"/following"
+ example = "https://www.instagram.com/USER/following/"
+
+ def items(self):
+ uid = self.api.user_id(self.item)
+ for user in self.api.user_following(uid):
+ user["_extractor"] = InstagramUserExtractor
+ url = "{}/{}".format(self.root, user["username"])
+ yield Message.Queue, url, user
+
+
class InstagramTagExtractor(InstagramExtractor):
"""Extractor for Instagram tags"""
subcategory = "tag"
directory_fmt = ("{category}", "{subcategory}", "{tag}")
pattern = BASE_PATTERN + r"/explore/tags/([^/?#]+)"
- test = ("https://www.instagram.com/explore/tags/instagram/", {
- "range": "1-16",
- "count": ">= 16",
- })
+ example = "https://www.instagram.com/explore/tags/TAG/"
def metadata(self):
return {"tag": text.unquote(self.item)}
@@ -603,10 +597,7 @@ class InstagramAvatarExtractor(InstagramExtractor):
"""Extractor for an Instagram user's avatar"""
subcategory = "avatar"
pattern = USER_PATTERN + r"/avatar"
- test = ("https://www.instagram.com/instagram/avatar", {
- "pattern": r"https://instagram\.[\w.-]+\.fbcdn\.net/v/t51\.2885-19"
- r"/281440578_1088265838702675_6233856337905829714_n\.jpg",
- })
+ example = "https://www.instagram.com/USER/avatar/"
def posts(self):
if self._logged_in:
@@ -646,105 +637,10 @@ class InstagramPostExtractor(InstagramExtractor):
subcategory = "post"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/(?:[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)")
- test = (
- # GraphImage
- ("https://www.instagram.com/p/BqvsDleB3lV/", {
- "pattern": r"https://[^/]+\.(cdninstagram\.com|fbcdn\.net)"
- r"/v(p/[0-9a-f]+/[0-9A-F]+)?/t51.2885-15/e35"
- r"/44877605_725955034447492_3123079845831750529_n.jpg",
- "keyword": {
- "date": "dt:2018-11-29 01:04:04",
- "description": str,
- "height": int,
- "likes": int,
- "location_id": "214424288",
- "location_slug": "hong-kong",
- "location_url": "re:/explore/locations/214424288/hong-kong/",
- "media_id": "1922949326347663701",
- "shortcode": "BqvsDleB3lV",
- "post_id": "1922949326347663701",
- "post_shortcode": "BqvsDleB3lV",
- "post_url": "https://www.instagram.com/p/BqvsDleB3lV/",
- "tags": ["#WHPsquares"],
- "typename": "GraphImage",
- "username": "instagram",
- "width": int,
- }
- }),
- # GraphSidecar
- ("https://www.instagram.com/p/BoHk1haB5tM/", {
- "count": 5,
- "keyword": {
- "sidecar_media_id": "1875629777499953996",
- "sidecar_shortcode": "BoHk1haB5tM",
- "post_id": "1875629777499953996",
- "post_shortcode": "BoHk1haB5tM",
- "post_url": "https://www.instagram.com/p/BoHk1haB5tM/",
- "num": int,
- "likes": int,
- "username": "instagram",
- }
- }),
- # GraphVideo
- ("https://www.instagram.com/p/Bqxp0VSBgJg/", {
- "pattern": r"/46840863_726311431074534_7805566102611403091_n\.mp4",
- "keyword": {
- "date": "dt:2018-11-29 19:23:58",
- "description": str,
- "height": int,
- "likes": int,
- "media_id": "1923502432034620000",
- "post_url": "https://www.instagram.com/p/Bqxp0VSBgJg/",
- "shortcode": "Bqxp0VSBgJg",
- "tags": ["#ASMR"],
- "typename": "GraphVideo",
- "username": "instagram",
- "width": int,
- }
- }),
- # GraphVideo (IGTV)
- ("https://www.instagram.com/tv/BkQjCfsBIzi/", {
- "pattern": r"/10000000_597132547321814_702169244961988209_n\.mp4",
- "keyword": {
- "date": "dt:2018-06-20 19:51:32",
- "description": str,
- "height": int,
- "likes": int,
- "media_id": "1806097553666903266",
- "post_url": "https://www.instagram.com/p/BkQjCfsBIzi/",
- "shortcode": "BkQjCfsBIzi",
- "typename": "GraphVideo",
- "username": "instagram",
- "width": int,
- }
- }),
- # GraphSidecar with 2 embedded GraphVideo objects
- ("https://www.instagram.com/p/BtOvDOfhvRr/", {
- "count": 2,
- "keyword": {
- "post_url": "https://www.instagram.com/p/BtOvDOfhvRr/",
- "sidecar_media_id": "1967717017113261163",
- "sidecar_shortcode": "BtOvDOfhvRr",
- "video_url": str,
- }
- }),
- # GraphImage with tagged user
- ("https://www.instagram.com/p/B_2lf3qAd3y/", {
- "keyword": {
- "tagged_users": [{
- "id" : "1246468638",
- "username" : "kaaymbl",
- "full_name": "Call Me Kay",
- }]
- }
- }),
- # URL with username (#2085)
- ("https://www.instagram.com/dm/p/CW042g7B9CY/"),
- ("https://www.instagram.com/reel/CDg_6Y1pxWu/"),
- )
+ example = "https://www.instagram.com/p/abcdefg/"
def posts(self):
- return self.api.media(id_from_shortcode(self.item))
+ return self.api.media(self.item)
class InstagramRestAPI():
@@ -783,8 +679,10 @@ class InstagramRestAPI():
endpoint = "/v1/highlights/{}/highlights_tray/".format(user_id)
return self._call(endpoint)["tray"]
- def media(self, post_id):
- endpoint = "/v1/media/{}/info/".format(post_id)
+ def media(self, shortcode):
+ if len(shortcode) > 28:
+ shortcode = shortcode[:-28]
+ endpoint = "/v1/media/{}/info/".format(id_from_shortcode(shortcode))
return self._pagination(endpoint)
def reels_media(self, reel_ids):
@@ -812,7 +710,8 @@ class InstagramRestAPI():
def user_by_name(self, screen_name):
endpoint = "/v1/users/web_profile_info/"
params = {"username": screen_name}
- return self._call(endpoint, params=params)["data"]["user"]
+ return self._call(
+ endpoint, params=params, notfound="user")["data"]["user"]
@memcache(keyarg=1)
def user_by_id(self, user_id):
@@ -857,6 +756,11 @@ class InstagramRestAPI():
params = {"count": 30}
return self._pagination(endpoint, params)
+ def user_following(self, user_id):
+ endpoint = "/v1/friendships/{}/following/".format(user_id)
+ params = {"count": 12}
+ return self._pagination_following(endpoint, params)
+
def user_saved(self):
endpoint = "/v1/feed/saved/posts/"
params = {"count": 50}
@@ -946,6 +850,20 @@ class InstagramRestAPI():
return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(data["next_max_id"])
+ def _pagination_following(self, endpoint, params):
+ extr = self.extractor
+ params["max_id"] = text.parse_int(extr._init_cursor())
+
+ while True:
+ data = self._call(endpoint, params=params)
+
+ yield from data["users"]
+
+ if len(data["users"]) < params["count"]:
+ return extr._update_cursor(None)
+ params["max_id"] = extr._update_cursor(
+ params["max_id"] + params["count"])
+
class InstagramGraphqlAPI():
@@ -980,10 +898,10 @@ class InstagramGraphqlAPI():
["edge_highlight_reels"]["edges"])
return [edge["node"] for edge in edges]
- def media(self, post_id):
+ def media(self, shortcode):
query_hash = "9f8827793ef34641b2fb195d4d41151c"
variables = {
- "shortcode": shortcode_from_id(post_id),
+ "shortcode": shortcode,
"child_comment_count": 3,
"fetch_comment_count": 40,
"parent_comment_count": 24,
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index c0a1de1..f6170c2 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -26,35 +26,11 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{document[publicationId]}_{num}"
pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)"
- test = ("https://issuu.com/issuu/docs/motions-1-2019/", {
- "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
- "count" : 36,
- "keyword": {
- "document": {
- "access" : "PUBLIC",
- "contentRating" : {
- "isAdsafe" : True,
- "isExplicit": False,
- "isReviewed": True,
- },
- "date" : "dt:2019-09-16 00:00:00",
- "description" : "re:Motions, the brand new publication by I",
- "documentName" : "motions-1-2019",
- "downloadable" : False,
- "pageCount" : 36,
- "publicationId" : "d99ec95935f15091b040cb8060f05510",
- "title" : "Motions by Issuu - Issue 1",
- "username" : "issuu",
- },
- "extension": "jpg",
- "filename" : r"re:page_\d+",
- "num" : int,
- },
- })
+ example = "https://issuu.com/issuu/docs/TITLE/"
def metadata(self, page):
- data = util.json_loads(text.extr(
- page, '<script data-json="', '"').replace("&quot;", '"'))
+ data = util.json_loads(text.rextract(
+ page, '<script data-json="', '"')[0].replace("&quot;", '"'))
doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime(
@@ -78,10 +54,7 @@ class IssuuUserExtractor(IssuuBase, Extractor):
"""Extractor for all publications of a user/publisher"""
subcategory = "user"
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$"
- test = ("https://issuu.com/issuu", {
- "pattern": IssuuPublicationExtractor.pattern,
- "count" : "> 25",
- })
+ example = "https://issuu.com/USER"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 4bcedae..1aef66e 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -26,8 +26,10 @@ class ItakuExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.api = ItakuAPI(self)
self.item = match.group(1)
+
+ def _init(self):
+ self.api = ItakuAPI(self)
self.videos = self.config("videos", True)
def items(self):
@@ -61,12 +63,7 @@ class ItakuGalleryExtractor(ItakuExtractor):
"""Extractor for posts from an itaku user gallery"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery"
- test = ("https://itaku.ee/profile/piku/gallery", {
- "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
- r"/[^/?#]+\.(jpg|png|gif)",
- "range": "1-10",
- "count": 10,
- })
+ example = "https://itaku.ee/profile/USER/gallery"
def posts(self):
return self.api.galleries_images(self.item)
@@ -75,62 +72,7 @@ class ItakuGalleryExtractor(ItakuExtractor):
class ItakuImageExtractor(ItakuExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/images/(\d+)"
- test = (
- ("https://itaku.ee/images/100471", {
- "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
- r"/220504_oUNIAFT\.png",
- "count": 1,
- "keyword": {
- "already_pinned": None,
- "blacklisted": {
- "blacklisted_tags": [],
- "is_blacklisted": False
- },
- "can_reshare": True,
- "date": "dt:2022-05-05 19:21:17",
- "date_added": "2022-05-05T19:21:17.674148Z",
- "date_edited": "2022-05-25T14:37:46.220612Z",
- "description": "sketch from drawpile",
- "extension": "png",
- "filename": "220504_oUNIAFT",
- "hotness_score": float,
- "id": 100471,
- "image": "https://d1wmr8tlk3viaj.cloudfront.net/gallery_imgs"
- "/220504_oUNIAFT.png",
- "image_xl": "https://d1wmr8tlk3viaj.cloudfront.net"
- "/gallery_imgs/220504_oUNIAFT/lg.jpg",
- "liked_by_you": False,
- "maturity_rating": "SFW",
- "num_comments": int,
- "num_likes": int,
- "num_reshares": int,
- "obj_tags": 136446,
- "owner": 16775,
- "owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
- "/profile_pics/av2022r_vKYVywc/md.jpg",
- "owner_displayname": "Piku",
- "owner_username": "piku",
- "reshared_by_you": False,
- "sections": ["Fanart/Miku"],
- "tags": list,
- "tags_character": ["hatsune_miku"],
- "tags_copyright": ["vocaloid"],
- "tags_general" : ["twintails", "green_hair", "flag",
- "gloves", "green_eyes", "female",
- "racing_miku"],
- "title": "Racing Miku 2022 Ver.",
- "too_mature": False,
- "uncompressed_filesize": "0.62",
- "video": None,
- "visibility": "PUBLIC",
- },
- }),
- # video
- ("https://itaku.ee/images/19465", {
- "pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_vids"
- r"/sleepy_af_OY5GHWw\.mp4",
- }),
- )
+ example = "https://itaku.ee/images/12345"
def posts(self):
return (self.api.image(self.item),)
@@ -143,7 +85,6 @@ class ItakuAPI():
self.root = extractor.root + "/api"
self.headers = {
"Accept": "application/json, text/plain, */*",
- "Referer": extractor.root + "/",
}
def galleries_images(self, username, section=None):
diff --git a/gallery_dl/extractor/itchio.py b/gallery_dl/extractor/itchio.py
index 6034d12..799dd66 100644
--- a/gallery_dl/extractor/itchio.py
+++ b/gallery_dl/extractor/itchio.py
@@ -21,28 +21,7 @@ class ItchioGameExtractor(Extractor):
filename_fmt = "{game[title]} ({id}).{extension}"
archive_fmt = "{id}"
pattern = r"(?:https?://)?(\w+).itch\.io/([\w-]+)"
- test = (
- ("https://sirtartarus.itch.io/a-craft-of-mine", {
- "pattern": r"https://\w+\.ssl\.hwcdn\.net/upload2"
- r"/game/1983311/7723751\?",
- "count": 1,
- "keyword": {
- "extension": "",
- "filename": "7723751",
- "game": {
- "id": 1983311,
- "noun": "game",
- "title": "A Craft Of Mine",
- "url": "https://sirtartarus.itch.io/a-craft-of-mine",
- },
- "user": {
- "id": 4060052,
- "name": "SirTartarus",
- "url": "https://sirtartarus.itch.io",
- },
- },
- }),
- )
+ example = "https://USER.itch.io/GAME"
def __init__(self, match):
self.user, self.slug = match.groups()
@@ -63,7 +42,7 @@ class ItchioGameExtractor(Extractor):
"Origin": "https://{}.itch.io".format(self.user),
}
data = {
- "csrf_token": text.unquote(self.session.cookies["itchio_token"]),
+ "csrf_token": text.unquote(self.cookies["itchio_token"]),
}
for upload_id in text.extract_iter(page, 'data-upload_id="', '"'):
diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py
index 39208e5..8862a7b 100644
--- a/gallery_dl/extractor/jpgfish.py
+++ b/gallery_dl/extractor/jpgfish.py
@@ -4,18 +4,18 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://jpeg.pet/"""
+"""Extractors for https://jpg1.su/"""
from .common import Extractor, Message
from .. import text
-BASE_PATTERN = r"(?:https?://)?jpe?g\.(?:pet|fish(?:ing)?|church)"
+BASE_PATTERN = r"(?:https?://)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)"
class JpgfishExtractor(Extractor):
"""Base class for jpgfish extractors"""
category = "jpgfish"
- root = "https://jpeg.pet"
+ root = "https://jpg1.su"
directory_fmt = ("{category}", "{user}", "{album}",)
archive_fmt = "{id}"
@@ -35,28 +35,7 @@ class JpgfishImageExtractor(JpgfishExtractor):
"""Extractor for jpgfish Images"""
subcategory = "image"
pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))"
- test = (
- ("https://jpeg.pet/img/funnymeme.LecXGS", {
- "pattern": r"https://simp3\.jpg\.church/images/funnymeme\.jpg",
- "content": "098e5e9b17ad634358426e0ffd1c93871474d13c",
- "keyword": {
- "album": "",
- "extension": "jpg",
- "filename": "funnymeme",
- "id": "LecXGS",
- "url": "https://simp3.jpg.church/images/funnymeme.jpg",
- "user": "exearco",
- },
- }),
- ("https://jpg.church/img/auCruA", {
- "pattern": r"https://simp2\.jpg\.church/hannahowo_00457\.jpg",
- "keyword": {"album": "401-500"},
- }),
- ("https://jpg.pet/img/funnymeme.LecXGS"),
- ("https://jpg.fishing/img/funnymeme.LecXGS"),
- ("https://jpg.fish/img/funnymeme.LecXGS"),
- ("https://jpg.church/img/funnymeme.LecXGS"),
- )
+ example = "https://jpg1.su/img/TITLE.ID"
def __init__(self, match):
JpgfishExtractor.__init__(self, match)
@@ -83,21 +62,7 @@ class JpgfishAlbumExtractor(JpgfishExtractor):
"""Extractor for jpgfish Albums"""
subcategory = "album"
pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?"
- test = (
- ("https://jpeg.pet/album/CDilP/?sort=date_desc&page=1", {
- "count": 2,
- }),
- ("https://jpg.fishing/a/gunggingnsk.N9OOI", {
- "count": 114,
- }),
- ("https://jpg.fish/a/101-200.aNJ6A/", {
- "count": 100,
- }),
- ("https://jpg.church/a/hannahowo.aNTdH/sub", {
- "count": 606,
- }),
- ("https://jpg.pet/album/CDilP/?sort=date_desc&page=1"),
- )
+ example = "https://jpg1.su/album/TITLE.ID"
def __init__(self, match):
JpgfishExtractor.__init__(self, match)
@@ -121,18 +86,7 @@ class JpgfishUserExtractor(JpgfishExtractor):
"""Extractor for jpgfish Users"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?"
- test = (
- ("https://jpeg.pet/exearco", {
- "count": 3,
- }),
- ("https://jpg.church/exearco/albums", {
- "count": 1,
- }),
- ("https://jpg.pet/exearco"),
- ("https://jpg.fishing/exearco"),
- ("https://jpg.fish/exearco"),
- ("https://jpg.church/exearco"),
- )
+ example = "https://jpg1.su/USER"
def __init__(self, match):
JpgfishExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/jschan.py b/gallery_dl/extractor/jschan.py
index fe758fa..398256d 100644
--- a/gallery_dl/extractor/jschan.py
+++ b/gallery_dl/extractor/jschan.py
@@ -31,12 +31,7 @@ class JschanThreadExtractor(JschanExtractor):
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
archive_fmt = "{board}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html"
- test = (
- ("https://94chan.org/art/thread/25.html", {
- "pattern": r"https://94chan.org/file/[0-9a-f]{64}(\.\w+)?",
- "count": ">= 15"
- })
- )
+ example = "https://94chan.org/a/thread/12345.html"
def __init__(self, match):
JschanExtractor.__init__(self, match)
@@ -71,15 +66,7 @@ class JschanBoardExtractor(JschanExtractor):
subcategory = "board"
pattern = (BASE_PATTERN + r"/([^/?#]+)"
r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)")
- test = (
- ("https://94chan.org/art/", {
- "pattern": JschanThreadExtractor.pattern,
- "count": ">= 30"
- }),
- ("https://94chan.org/art/2.html"),
- ("https://94chan.org/art/catalog.html"),
- ("https://94chan.org/art/index.html"),
- )
+ example = "https://94chan.org/a/"
def __init__(self, match):
JschanExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/kabeuchi.py b/gallery_dl/extractor/kabeuchi.py
index f172dcf..867f0da 100644
--- a/gallery_dl/extractor/kabeuchi.py
+++ b/gallery_dl/extractor/kabeuchi.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,16 +21,7 @@ class KabeuchiUserExtractor(Extractor):
archive_fmt = "{id}_{num}"
root = "https://kabe-uchiroom.com"
pattern = r"(?:https?://)?kabe-uchiroom\.com/mypage/?\?id=(\d+)"
- test = (
- ("https://kabe-uchiroom.com/mypage/?id=919865303848255493", {
- "pattern": (r"https://kabe-uchiroom\.com/accounts/upfile/3/"
- r"919865303848255493/\w+\.jpe?g"),
- "count": ">= 24",
- }),
- ("https://kabe-uchiroom.com/mypage/?id=123456789", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://kabe-uchiroom.com/mypage/?id=12345"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/keenspot.py b/gallery_dl/extractor/keenspot.py
index b5d7738..b8ecc14 100644
--- a/gallery_dl/extractor/keenspot.py
+++ b/gallery_dl/extractor/keenspot.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,28 +20,7 @@ class KeenspotComicExtractor(Extractor):
filename_fmt = "{filename}.{extension}"
archive_fmt = "{comic}_{filename}"
pattern = r"(?:https?://)?(?!www\.|forums\.)([\w-]+)\.keenspot\.com(/.+)?"
- test = (
- ("http://marksmen.keenspot.com/", { # link
- "range": "1-3",
- "url": "83bcf029103bf8bc865a1988afa4aaeb23709ba6",
- }),
- ("http://barkercomic.keenspot.com/", { # id
- "range": "1-3",
- "url": "c4080926db18d00bac641fdd708393b7d61379e6",
- }),
- ("http://crowscare.keenspot.com/", { # id v2
- "range": "1-3",
- "url": "a00e66a133dd39005777317da90cef921466fcaa"
- }),
- ("http://supernovas.keenspot.com/", { # ks
- "range": "1-3",
- "url": "de21b12887ef31ff82edccbc09d112e3885c3aab"
- }),
- ("http://twokinds.keenspot.com/comic/1066/", { # "random" access
- "range": "1-3",
- "url": "6a784e11370abfb343dcad9adbb7718f9b7be350",
- })
- )
+ example = "http://COMIC.keenspot.com/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 5aeefeb..894c671 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -26,23 +26,23 @@ class KemonopartyExtractor(Extractor):
directory_fmt = ("{category}", "{service}", "{user}")
filename_fmt = "{id}_{title}_{num:>02}_{filename[:180]}.{extension}"
archive_fmt = "{service}_{user}_{id}_{num}"
- cookiedomain = ".kemono.party"
+ cookies_domain = ".kemono.party"
def __init__(self, match):
domain = match.group(1)
tld = match.group(2)
self.category = domain + "party"
self.root = text.root_from_url(match.group(0))
- self.cookiedomain = ".{}.{}".format(domain, tld)
+ self.cookies_domain = ".{}.{}".format(domain, tld)
Extractor.__init__(self, match)
- self.session.headers["Referer"] = self.root + "/"
- def items(self):
+ def _init(self):
self._prepare_ddosguard_cookies()
-
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
+
+ def items(self):
find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
duplicates = self.config("duplicates")
@@ -126,8 +126,8 @@ class KemonopartyExtractor(Extractor):
def login(self):
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(
- (username, self.cookiedomain), password))
+ self.cookies_update(self._login_impl(
+ (username, self.cookies_domain), password))
@cache(maxage=28*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -215,19 +215,7 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
- test = (
- ("https://kemono.party/fanbox/user/6993449", {
- "range": "1-25",
- "count": 25,
- }),
- # 'max-posts' option, 'o' query parameter (#1674)
- ("https://kemono.party/patreon/user/881792?o=150", {
- "options": (("max-posts", 25),),
- "count": "< 100",
- }),
- ("https://kemono.su/subscribestar/user/alcorart"),
- ("https://kemono.party/subscribestar/user/alcorart"),
- )
+ example = "https://kemono.party/SERVICE/user/12345"
def __init__(self, match):
_, _, service, user_id, offset = match.groups()
@@ -255,87 +243,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
pattern = USER_PATTERN + r"/post/([^/?#]+)"
- test = (
- ("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono.party/data/21/0f"
- r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg",
- "content": "900949cefc97ab8dc1979cc3664785aac5ba70dd",
- "keyword": {
- "added": "Wed, 06 May 2020 20:28:02 GMT",
- "content": str,
- "count": 1,
- "date": "dt:2019-08-11 02:09:04",
- "edited": None,
- "embed": dict,
- "extension": "jpeg",
- "filename": "P058kDFYus7DbqAkGlfWTlOr",
- "hash": "210f35388e28bbcf756db18dd516e2d8"
- "2ce758e0d32881eeee76d43e1716d382",
- "id": "506575",
- "num": 1,
- "published": "Sun, 11 Aug 2019 02:09:04 GMT",
- "service": "fanbox",
- "shared_file": False,
- "subcategory": "fanbox",
- "title": "c96取り置き",
- "type": "file",
- "user": "6993449",
- },
- }),
- # inline image (#1286)
- ("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
- r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
- "keyword": {"hash": "47b5c014ecdcfabdf2c85eec53f1133a"
- "76336997ae8596f332e97d956a460ad2"},
- }),
- # kemono.party -> data.kemono.party
- ("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
- "pattern": r"https://kemono\.party/data/("
- r"a4/7b/a47bfe938d8c1682eef06e885927484cd8df1b.+\.jpg|"
- r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
- }),
- # username (#1548, #1652)
- ("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
- "options": (("metadata", True),),
- "keyword": {"username": "Kudalyn's Creations"},
- }),
- # skip patreon duplicates
- ("https://kemono.party/patreon/user/4158582/post/32099982", {
- "count": 2,
- }),
- # allow duplicates (#2440)
- ("https://kemono.party/patreon/user/4158582/post/32099982", {
- "options": (("duplicates", True),),
- "count": 3,
- }),
- # DMs (#2008)
- ("https://kemono.party/patreon/user/34134344/post/38129255", {
- "options": (("dms", True),),
- "keyword": {"dms": [{
- "body": r"re:Hi! Thank you very much for supporting the work I"
- r" did in May. Here's your reward pack! I hope you fin"
- r"d something you enjoy in it. :\)\n\nhttps://www.medi"
- r"afire.com/file/\w+/Set13_tier_2.zip/file",
- "date": "2021-07-31 02:47:51.327865",
- }]},
- }),
- # coomer.party (#2100)
- ("https://coomer.party/onlyfans/user/alinity/post/125962203", {
- "pattern": r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968"
- r"c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg",
- }),
- # invalid file (#3510)
- ("https://kemono.party/patreon/user/19623797/post/29035449", {
- "pattern": r"907ba78b4545338d3539683e63ecb51c"
- r"f51c10adc9dabd86e92bd52339f298b9\.txt",
- "content": "da39a3ee5e6b4b0d3255bfef95601890afd80709", # empty
- }),
- ("https://kemono.su/subscribestar/user/alcorart/post/184330"),
- ("https://kemono.party/subscribestar/user/alcorart/post/184330"),
- ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
- ("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
- )
+ example = "https://kemono.party/SERVICE/user/12345/post/12345"
def __init__(self, match):
_, _, service, user_id, post_id = match.groups()
@@ -358,30 +266,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
archive_fmt = "discord_{server}_{id}_{num}"
pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
- test = (
- (("https://kemono.party/discord"
- "/server/488668827274444803#finish-work"), {
- "count": 4,
- "keyword": {"channel_name": "finish-work"},
- }),
- (("https://kemono.su/discord"
- "/server/256559665620451329/channel/462437519519383555#"), {
- "pattern": r"https://kemono\.su/data/("
- r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
- r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
- "keyword": {"hash": "re:e377e3525164559484ace2e64425b0cec1db08"
- "|51453640a5e0a4d23fbf57fb85390f9c5ec154"},
- "count": ">= 2",
- }),
- # 'inline' files
- (("https://kemono.party/discord"
- "/server/315262215055736843/channel/315262215055736843#general"), {
- "pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
- "options": (("image-filter", "type == 'inline'"),),
- "keyword": {"hash": ""},
- "range": "1-5",
- }),
- )
+ example = "https://kemono.party/discord/server/12345#CHANNEL"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@@ -460,16 +345,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
- test = (
- ("https://kemono.party/discord/server/488668827274444803", {
- "pattern": KemonopartyDiscordExtractor.pattern,
- "count": 13,
- }),
- ("https://kemono.su/discord/server/488668827274444803", {
- "pattern": KemonopartyDiscordExtractor.pattern,
- "count": 13,
- }),
- )
+ example = "https://kemono.party/discord/server/12345"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@@ -491,23 +367,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
- test = (
- ("https://kemono.party/favorites", {
- "pattern": KemonopartyUserExtractor.pattern,
- "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
- "count": 3,
- }),
- ("https://kemono.party/favorites?type=post", {
- "pattern": KemonopartyPostExtractor.pattern,
- "url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
- "count": 3,
- }),
- ("https://kemono.su/favorites?type=post", {
- "pattern": KemonopartyPostExtractor.pattern,
- "url": "4be8e84cb384a907a8e7997baaf6287b451783b5",
- "count": 3,
- }),
- )
+ example = "https://kemono.party/favorites"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@@ -521,7 +381,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
if self.favorites == "artist":
users = self.request(
- self.root + "/api/favorites?type=artist").json()
+ self.root + "/api/v1/account/favorites?type=artist").json()
for user in users:
user["_extractor"] = KemonopartyUserExtractor
url = "{}/{}/user/{}".format(
@@ -530,7 +390,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
elif self.favorites == "post":
posts = self.request(
- self.root + "/api/favorites?type=post").json()
+ self.root + "/api/v1/account/favorites?type=post").json()
for post in posts:
post["_extractor"] = KemonopartyPostExtractor
url = "{}/{}/user/{}/post/{}".format(
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index 0c3b002..d0c9c30 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -16,30 +16,13 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
"""Extractor for soundtracks from khinsider.com"""
category = "khinsider"
subcategory = "soundtrack"
+ root = "https://downloads.khinsider.com"
directory_fmt = ("{category}", "{album[name]}")
archive_fmt = "{filename}.{extension}"
pattern = (r"(?:https?://)?downloads\.khinsider\.com"
r"/game-soundtracks/album/([^/?#]+)")
- root = "https://downloads.khinsider.com"
- test = (("https://downloads.khinsider.com"
- "/game-soundtracks/album/horizon-riders-wii"), {
- "pattern": r"https?://vgm(site|downloads)\.com"
- r"/soundtracks/horizon-riders-wii/[^/]+"
- r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack\.mp3",
- "keyword": {
- "album": {
- "count": 1,
- "date": "Sep 18th, 2016",
- "name": "Horizon Riders",
- "platform": "Wii",
- "size": 26214400,
- "type": "Gamerip",
- },
- "extension": "mp3",
- "filename": "Horizon Riders Wii - Full Soundtrack",
- },
- "count": 1,
- })
+ example = ("https://downloads.khinsider.com"
+ "/game-soundtracks/album/TITLE")
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index 04373c4..a3e0130 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -48,18 +48,7 @@ class KomikcastBase():
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
"""Extractor for manga-chapters from komikcast.site"""
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
- test = (
- (("https://komikcast.site/chapter"
- "/apotheosis-chapter-02-2-bahasa-indonesia/"), {
- "url": "f6b43fbc027697749b3ea1c14931c83f878d7936",
- "keyword": "f3938e1aff9ad1f302f52447e9781b21f6da26d4",
- }),
- (("https://komikcast.me/chapter"
- "/soul-land-ii-chapter-300-1-bahasa-indonesia/"), {
- "url": "efd00a9bd95461272d51990d7bc54b79ff3ff2e6",
- "keyword": "cb646cfed3d45105bd645ab38b2e9f7d8c436436",
- }),
- )
+ example = "https://komikcast.site/chapter/TITLE/"
def metadata(self, page):
info = text.extr(page, "<title>", " - Komikcast<")
@@ -79,13 +68,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
"""Extractor for manga from komikcast.site"""
chapterclass = KomikcastChapterExtractor
pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
- test = (
- ("https://komikcast.site/komik/090-eko-to-issho/", {
- "url": "19d3d50d532e84be6280a3d61ff0fd0ca04dd6b4",
- "keyword": "837a7e96867344ff59d840771c04c20dc46c0ab1",
- }),
- ("https://komikcast.me/tonari-no-kashiwagi-san/"),
- )
+ example = "https://komikcast.site/komik/TITLE"
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index 8990621..d4ccf33 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -48,19 +48,7 @@ class LensdumpBase():
class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
subcategory = "album"
pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
- test = (
- ("https://lensdump.com/a/1IhJr", {
- "url": "7428cc906e7b291c778d446a11c602b81ba72840",
- "keyword": {
- "extension": "png",
- "name": str,
- "num": int,
- "title": str,
- "url": str,
- "width": int,
- },
- }),
- )
+ example = "https://lensdump.com/a/ID"
def __init__(self, match):
GalleryExtractor.__init__(self, match, match.string)
@@ -77,6 +65,7 @@ class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
for node in self.nodes(page):
# get urls and filenames of images in current page
json_data = util.json_loads(text.unquote(
+ text.extr(node, "data-object='", "'") or
text.extr(node, 'data-object="', '"')))
image_id = json_data.get('name')
image_url = json_data.get('url')
@@ -99,7 +88,7 @@ class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
"""Extractor for album list from lensdump.com"""
subcategory = "albums"
pattern = BASE_PATTERN + r"/\w+/albums"
- test = ("https://lensdump.com/vstar925/albums",)
+ example = "https://lensdump.com/USER/albums"
def items(self):
for node in self.nodes():
@@ -116,23 +105,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
directory_fmt = ("{category}",)
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/i/(\w+)"
- test = (
- ("https://lensdump.com/i/tyoAyM", {
- "pattern": r"https://i\d\.lensdump\.com/i/tyoAyM\.webp",
- "url": "ae9933f5f3bd9497bfc34e3e70a0fbef6c562d38",
- "content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
- "keyword": {
- "date": "dt:2022-08-01 08:24:28",
- "extension": "webp",
- "filename": "tyoAyM",
- "height": 400,
- "id": "tyoAyM",
- "title": "MYOBI clovis bookcaseset",
- "url": "https://i2.lensdump.com/i/tyoAyM.webp",
- "width": 620,
- },
- }),
- )
+ example = "https://lensdump.com/i/ID"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/lexica.py b/gallery_dl/extractor/lexica.py
index ad93625..d55d821 100644
--- a/gallery_dl/extractor/lexica.py
+++ b/gallery_dl/extractor/lexica.py
@@ -20,37 +20,7 @@ class LexicaSearchExtractor(Extractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{id}"
pattern = r"(?:https?://)?lexica\.art/?\?q=([^&#]+)"
- test = (
- ("https://lexica.art/?q=tree", {
- "pattern": r"https://lexica-serve-encoded-images2\.sharif\."
- r"workers.dev/full_jpg/[0-9a-f-]{36}$",
- "range": "1-80",
- "count": 80,
- "keyword": {
- "height": int,
- "id": str,
- "upscaled_height": int,
- "upscaled_width": int,
- "userid": str,
- "width": int,
- "prompt": {
- "c": int,
- "grid": bool,
- "height": int,
- "id": str,
- "images": list,
- "initImage": None,
- "initImageStrength": None,
- "model": "lexica-aperture-v2",
- "negativePrompt": str,
- "prompt": str,
- "seed": str,
- "timestamp": r"re:\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\dZ",
- "width": int,
- },
- },
- }),
- )
+ example = "https://lexica.art/?q=QUERY"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/lightroom.py b/gallery_dl/extractor/lightroom.py
index 783473d..2cbaa97 100644
--- a/gallery_dl/extractor/lightroom.py
+++ b/gallery_dl/extractor/lightroom.py
@@ -18,24 +18,7 @@ class LightroomGalleryExtractor(Extractor):
filename_fmt = "{num:>04}_{id}.{extension}"
archive_fmt = "{id}"
pattern = r"(?:https?://)?lightroom\.adobe\.com/shares/([0-9a-f]+)"
- test = (
- (("https://lightroom.adobe.com/shares/"
- "0c9cce2033f24d24975423fe616368bf"), {
- "keyword": {
- "title": "Sterne und Nachtphotos",
- "user": "Christian Schrang",
- },
- "count": ">= 55",
- }),
- (("https://lightroom.adobe.com/shares/"
- "7ba68ad5a97e48608d2e6c57e6082813"), {
- "keyword": {
- "title": "HEBFC Snr/Res v Brighton",
- "user": "",
- },
- "count": ">= 180",
- }),
- )
+ example = "https://lightroom.adobe.com/shares/0123456789abcdef"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py
index 2765f0b..e21659f 100644
--- a/gallery_dl/extractor/livedoor.py
+++ b/gallery_dl/extractor/livedoor.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -84,32 +84,7 @@ class LivedoorBlogExtractor(LivedoorExtractor):
"""Extractor for a user's blog on blog.livedoor.jp"""
subcategory = "blog"
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?#])"
- test = (
- ("http://blog.livedoor.jp/zatsu_ke/", {
- "range": "1-50",
- "count": 50,
- "archive": False,
- "pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+",
- "keyword": {
- "post": {
- "categories" : tuple,
- "date" : "type:datetime",
- "description": str,
- "id" : int,
- "tags" : list,
- "title" : str,
- "user" : "zatsu_ke"
- },
- "filename": str,
- "hash" : r"re:\w{4,}",
- "num" : int,
- },
- }),
- ("http://blog.livedoor.jp/uotapo/", {
- "range": "1-5",
- "count": 5,
- }),
- )
+ example = "http://blog.livedoor.jp/USER/"
def posts(self):
url = "{}/{}".format(self.root, self.user)
@@ -129,20 +104,7 @@ class LivedoorPostExtractor(LivedoorExtractor):
"""Extractor for images from a blog post on blog.livedoor.jp"""
subcategory = "post"
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/archives/(\d+)"
- test = (
- ("http://blog.livedoor.jp/zatsu_ke/archives/51493859.html", {
- "url": "9ca3bbba62722c8155be79ad7fc47be409e4a7a2",
- "keyword": "1f5b558492e0734f638b760f70bfc0b65c5a97b9",
- }),
- ("http://blog.livedoor.jp/amaumauma/archives/7835811.html", {
- "url": "204bbd6a9db4969c50e0923855aeede04f2e4a62",
- "keyword": "05821c7141360e6057ef2d382b046f28326a799d",
- }),
- ("http://blog.livedoor.jp/uotapo/archives/1050616939.html", {
- "url": "4b5ab144b7309eb870d9c08f8853d1abee9946d2",
- "keyword": "84fbf6e4eef16675013d6333039a7cfcb22c2d50",
- }),
- )
+ example = "http://blog.livedoor.jp/USER/archives/12345.html"
def __init__(self, match):
LivedoorExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 5d236c3..3d7d685 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -30,25 +30,16 @@ BASE_PATTERN = LolisafeExtractor.update({
class LolisafeAlbumExtractor(LolisafeExtractor):
subcategory = "album"
pattern = BASE_PATTERN + "/a/([^/?#]+)"
- test = (
- ("https://xbunkr.com/a/TA0bu3F4", {
- "pattern": r"https://media\.xbunkr\.com/[^.]+\.\w+",
- "count": 861,
- "keyword": {
- "album_id": "TA0bu3F4",
- "album_name": "Hannahowo Onlyfans Photos",
- }
- }),
- ("https://xbunkr.com/a/GNQc2I5d"),
- )
+ example = "https://xbunkr.com/a/ID"
def __init__(self, match):
LolisafeExtractor.__init__(self, match)
self.album_id = match.group(match.lastindex)
+ def _init(self):
domain = self.config("domain")
if domain == "auto":
- self.root = text.root_from_url(match.group(0))
+ self.root = text.root_from_url(self.url)
elif domain:
self.root = text.ensure_http_scheme(domain)
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 57db0c9..c3c44d2 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -15,7 +15,7 @@ from .. import text, exception
class LusciousExtractor(Extractor):
"""Base class for luscious extractors"""
category = "luscious"
- cookiedomain = ".luscious.net"
+ cookies_domain = ".luscious.net"
root = "https://members.luscious.net"
def _graphql(self, op, variables, query):
@@ -47,77 +47,13 @@ class LusciousAlbumExtractor(LusciousExtractor):
archive_fmt = "{album[id]}_{id}"
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)")
- test = (
- ("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
- "pattern": r"https://storage\.bhs\.cloud\.ovh\.net/v1/AUTH_\w+"
- r"/images/NTRshouldbeillegal/277031"
- r"/luscious_net_\d+_\d+\.jpg$",
- # "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
- "keyword": {
- "album": {
- "__typename" : "Album",
- "audiences" : list,
- "content" : "Hentai",
- "cover" : "re:https://\\w+.luscious.net/.+/277031/",
- "created" : 1479625853,
- "created_by" : "NTRshouldbeillegal",
- "date" : "dt:2016-11-20 07:10:53",
- "description" : "Enjoy.",
- "download_url": "re:/download/(r/)?824778/277031/",
- "genres" : list,
- "id" : 277031,
- "is_manga" : True,
- "labels" : list,
- "language" : "English",
- "like_status" : "none",
- "modified" : int,
- "permissions" : list,
- "rating" : float,
- "slug" : "okinami-no-koigokoro",
- "status" : None,
- "tags" : list,
- "title" : "Okinami no Koigokoro",
- "url" : "/albums/okinami-no-koigokoro_277031/",
- "marked_for_deletion": False,
- "marked_for_processing": False,
- "number_of_animated_pictures": 0,
- "number_of_favorites": int,
- "number_of_pictures": 18,
- },
- "aspect_ratio": r"re:\d+:\d+",
- "category" : "luscious",
- "created" : int,
- "date" : "type:datetime",
- "height" : int,
- "id" : int,
- "is_animated" : False,
- "like_status" : "none",
- "position" : int,
- "resolution" : r"re:\d+x\d+",
- "status" : None,
- "tags" : list,
- "thumbnail" : str,
- "title" : str,
- "width" : int,
- "number_of_comments": int,
- "number_of_favorites": int,
- },
- }),
- ("https://luscious.net/albums/not-found_277035/", {
- "exception": exception.NotFoundError,
- }),
- ("https://members.luscious.net/albums/login-required_323871/", {
- "count": 64,
- }),
- ("https://www.luscious.net/albums/okinami_277031/"),
- ("https://members.luscious.net/albums/okinami_277031/"),
- ("https://luscious.net/pictures/c/video_game_manga/album"
- "/okinami-no-koigokoro_277031/sorted/position/id/16528978/@_1"),
- )
+ example = "https://luscious.net/albums/TITLE_12345/"
def __init__(self, match):
LusciousExtractor.__init__(self, match)
self.album_id = match.group(1)
+
+ def _init(self):
self.gif = self.config("gif", False)
def items(self):
@@ -336,15 +272,7 @@ class LusciousSearchExtractor(LusciousExtractor):
subcategory = "search"
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/albums/list/?(?:\?([^#]+))?")
- test = (
- ("https://members.luscious.net/albums/list/"),
- ("https://members.luscious.net/albums/list/"
- "?display=date_newest&language_ids=%2B1&tagged=+full_color&page=1", {
- "pattern": LusciousAlbumExtractor.pattern,
- "range": "41-60",
- "count": 20,
- }),
- )
+ example = "https://luscious.net/albums/list/?tagged=TAG"
def __init__(self, match):
LusciousExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
index 85e8bb1..0edd5c1 100644
--- a/gallery_dl/extractor/lynxchan.py
+++ b/gallery_dl/extractor/lynxchan.py
@@ -40,22 +40,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
- test = (
- ("https://bbw-chan.nl/bbwdraw/res/499.html", {
- "pattern": r"https://bbw-chan\.nl/\.media/[0-9a-f]{64}(\.\w+)?$",
- "count": ">= 352",
- }),
- ("https://bbw-chan.nl/bbwdraw/res/489.html"),
- ("https://kohlchan.net/a/res/4594.html", {
- "pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
- "count": ">= 80",
- }),
- ("https://endchan.org/yuri/res/193483.html", {
- "pattern": r"https://endchan\.org/\.media/[^.]+(\.\w+)?$",
- "count" : ">= 19",
- }),
- ("https://endchan.org/yuri/res/33621.html"),
- )
+ example = "https://bbw-chan.nl/a/res/12345.html"
def __init__(self, match):
LynxchanExtractor.__init__(self, match)
@@ -86,24 +71,7 @@ class LynxchanBoardExtractor(LynxchanExtractor):
"""Extractor for LynxChan boards"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
- test = (
- ("https://bbw-chan.nl/bbwdraw/", {
- "pattern": LynxchanThreadExtractor.pattern,
- "count": ">= 148",
- }),
- ("https://bbw-chan.nl/bbwdraw/2.html"),
- ("https://kohlchan.net/a/", {
- "pattern": LynxchanThreadExtractor.pattern,
- "count": ">= 100",
- }),
- ("https://kohlchan.net/a/2.html"),
- ("https://kohlchan.net/a/catalog.html"),
- ("https://endchan.org/yuri/", {
- "pattern": LynxchanThreadExtractor.pattern,
- "count" : ">= 9",
- }),
- ("https://endchan.org/yuri/catalog.html"),
- )
+ example = "https://bbw-chan.nl/a/"
def __init__(self, match):
LynxchanExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index e111fee..dbaf4cb 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -30,9 +30,11 @@ class MangadexExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
+ self.uuid = match.group(1)
+
+ def _init(self):
self.session.headers["User-Agent"] = util.USERAGENT
self.api = MangadexAPI(self)
- self.uuid = match.group(1)
def items(self):
for chapter in self.chapters():
@@ -96,25 +98,8 @@ class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
- test = (
- ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
- "keyword": "e86128a79ebe7201b648f1caa828496a2878dc8f",
- # "content": "50383a4c15124682057b197d40261641a98db514",
- }),
- # oneshot
- ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
- "count": 64,
- "keyword": "d11ed057a919854696853362be35fc0ba7dded4c",
- }),
- # MANGA Plus (#1154)
- ("https://mangadex.org/chapter/74149a55-e7c4-44ea-8a37-98e879c1096f", {
- "exception": exception.StopExtraction,
- }),
- # 'externalUrl', but still downloadable (#2503)
- ("https://mangadex.org/chapter/364728a4-6909-4164-9eea-6b56354f7c78", {
- "count": 0, # 404
- }),
- )
+ example = ("https://mangadex.org/chapter"
+ "/01234567-89ab-cdef-0123-456789abcdef")
def items(self):
try:
@@ -146,37 +131,8 @@ class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org"""
subcategory = "manga"
pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)"
- test = (
- ("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
- "keyword": {
- "manga" : "Souten no Koumori",
- "manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
- "title" : "re:One[Ss]hot",
- "volume" : 0,
- "chapter" : 0,
- "chapter_minor": "",
- "chapter_id": str,
- "date" : "type:datetime",
- "lang" : str,
- "language": str,
- "artist" : ["Arakawa Hiromu"],
- "author" : ["Arakawa Hiromu"],
- "status" : "completed",
- "tags" : ["Oneshot", "Historical", "Action",
- "Martial Arts", "Drama", "Tragedy"],
- },
- }),
- ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
- "options": (("lang", "en"),),
- "count": ">= 100",
- }),
- ("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
- "count": 1,
- }),
- ("https://mangadex.org/title/584ef094-b2ab-40ce-962c-bce341fb9d10", {
- "count": ">= 20",
- })
- )
+ example = ("https://mangadex.org/title"
+ "/01234567-89ab-cdef-0123-456789abcdef")
def chapters(self):
return self.api.manga_feed(self.uuid)
@@ -186,7 +142,7 @@ class MangadexFeedExtractor(MangadexExtractor):
"""Extractor for chapters from your Followed Feed"""
subcategory = "feed"
pattern = BASE_PATTERN + r"/title/feed$()"
- test = ("https://mangadex.org/title/feed",)
+ example = "https://mangadex.org/title/feed"
def chapters(self):
return self.api.user_follows_manga_feed()
@@ -202,7 +158,7 @@ class MangadexAPI():
self.extractor = extr
self.headers = {}
- self.username, self.password = self.extractor._get_auth_info()
+ self.username, self.password = extr._get_auth_info()
if not self.username:
self.authenticate = util.noop
@@ -288,9 +244,13 @@ class MangadexAPI():
if ratings is None:
ratings = ("safe", "suggestive", "erotica", "pornographic")
+ lang = config("lang")
+ if isinstance(lang, str) and "," in lang:
+ lang = lang.split(",")
+
params["contentRating[]"] = ratings
+ params["translatedLanguage[]"] = lang
params["includes[]"] = ("scanlation_group",)
- params["translatedLanguage[]"] = config("lang")
params["offset"] = 0
api_params = config("api-parameters")
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index 0818fd9..d590753 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,20 +20,12 @@ class MangafoxChapterExtractor(ChapterExtractor):
root = "https://m.fanfox.net"
pattern = BASE_PATTERN + \
r"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))"
- test = (
- ("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
- "keyword": "5661dab258d42d09d98f194f7172fb9851a49766",
- "content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
- }),
- ("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"),
- ("http://fanfox.net/manga/black_clover/vTBD/c295/1.html"),
- )
+ example = "https://fanfox.net/manga/TITLE/v01/c001/1.html"
def __init__(self, match):
base, self.cstr, self.volume, self.chapter, self.minor = match.groups()
self.urlbase = self.root + base
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
- self.session.headers["Referer"] = self.root + "/"
def metadata(self, page):
manga, pos = text.extract(page, "<title>", "</title>")
@@ -71,36 +63,7 @@ class MangafoxMangaExtractor(MangaExtractor):
root = "https://m.fanfox.net"
chapterclass = MangafoxChapterExtractor
pattern = BASE_PATTERN + r"(/manga/[^/?#]+)/?$"
- test = (
- ("https://fanfox.net/manga/kanojo_mo_kanojo", {
- "pattern": MangafoxChapterExtractor.pattern,
- "count": ">=60",
- "keyword": {
- "author": "HIROYUKI",
- "chapter": int,
- "chapter_minor": r"re:^(\.\d+)?$",
- "chapter_string": r"re:(v\d+/)?c\d+",
- "date": "type:datetime",
- "description": "High school boy Naoya gets a confession from M"
- "omi, a cute and friendly girl. However, Naoya "
- "already has a girlfriend, Seki... but Momi is "
- "too good a catch to let go. Momi and Nagoya's "
- "goal becomes clear: convince Seki to accept be"
- "ing an item with the two of them. Will she bud"
- "ge?",
- "lang": "en",
- "language": "English",
- "manga": "Kanojo mo Kanojo",
- "tags": ["Comedy", "Romance", "School Life", "Shounen"],
- "volume": int,
- },
- }),
- ("https://mangafox.me/manga/shangri_la_frontier", {
- "pattern": MangafoxChapterExtractor.pattern,
- "count": ">=45",
- }),
- ("https://m.fanfox.net/manga/sentai_daishikkaku"),
- )
+ example = "https://fanfox.net/manga/TITLE"
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index 531aef4..e8ee861 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,23 +25,14 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
"""Extractor for manga-chapters from mangahere.cc"""
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
r"([^/]+(?:/v0*(\d+))?/c([^/?#]+))")
- test = (
- ("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", {
- "keyword": "7c98d7b50a47e6757b089aa875a53aa970cac66f",
- "content": "708d475f06893b88549cbd30df1e3f9428f2c884",
- }),
- # URLs without HTTP scheme (#1070)
- ("https://www.mangahere.cc/manga/beastars/c196/1.html", {
- "pattern": "https://zjcdn.mangahere.org/.*",
- }),
- ("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
- ("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
- )
+ example = "https://www.mangahere.cc/manga/TITLE/c001/1.html"
def __init__(self, match):
self.part, self.volume, self.chapter = match.groups()
url = self.url_fmt.format(self.part, 1)
ChapterExtractor.__init__(self, match, url)
+
+ def _init(self):
self.session.headers["Referer"] = self.root_mobile + "/"
def metadata(self, page):
@@ -93,28 +84,10 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
chapterclass = MangahereChapterExtractor
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]"
r"(/manga/[^/?#]+/?)(?:#.*)?$")
- test = (
- ("https://www.mangahere.cc/manga/aria/", {
- "url": "9c2e54ec42e9a87ad53096c328b33c90750af3e4",
- "keyword": "71503c682c5d0c277a50409a8c5fd78e871e3d69",
- "count": 71,
- }),
- ("https://www.mangahere.cc/manga/hiyokoi/#50", {
- "url": "654850570aa03825cd57e2ae2904af489602c523",
- "keyword": "c8084d89a9ea6cf40353093669f9601a39bf5ca2",
- }),
- # adult filter (#556)
- ("http://www.mangahere.cc/manga/gunnm_mars_chronicle/", {
- "pattern": MangahereChapterExtractor.pattern,
- "count": ">= 50",
- }),
- ("https://www.mangahere.co/manga/aria/"),
- ("https://m.mangahere.co/manga/aria/"),
- )
+ example = "https://www.mangahere.cc/manga/TITLE"
- def __init__(self, match):
- MangaExtractor.__init__(self, match)
- self.session.cookies.set("isAdult", "1", domain="www.mangahere.cc")
+ def _init(self):
+ self.cookies.set("isAdult", "1", domain="www.mangahere.cc")
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
index ba55ac1..0183b25 100644
--- a/gallery_dl/extractor/mangakakalot.py
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Jake Mannens
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,27 +19,17 @@ BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv"
class MangakakalotBase():
"""Base class for mangakakalot extractors"""
category = "mangakakalot"
- root = "https://ww3.mangakakalot.tv"
+ root = "https://ww6.mangakakalot.tv"
class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
"""Extractor for manga chapters from mangakakalot.tv"""
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)"
- test = (
- ("https://ww3.mangakakalot.tv/chapter/manga-jk986845/chapter-34.2", {
- "pattern": r"https://cm\.blazefast\.co"
- r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
- "keyword": "0f1586ff52f0f9cbbb25306ae64ab718f8a6a633",
- "count": 9,
- }),
- ("https://mangakakalot.tv/chapter"
- "/hatarakanai_futari_the_jobless_siblings/chapter_20.1"),
- )
+ example = "https://ww6.mangakakalot.tv/chapter/manga-ID/chapter-01"
def __init__(self, match):
self.path = match.group(1)
ChapterExtractor.__init__(self, match, self.root + self.path)
- self.session.headers['Referer'] = self.root
def metadata(self, page):
_ , pos = text.extract(page, '<span itemprop="title">', '<')
@@ -76,13 +66,7 @@ class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor):
"""Extractor for manga from mangakakalot.tv"""
chapterclass = MangakakalotChapterExtractor
pattern = BASE_PATTERN + r"(/manga/[^/?#]+)"
- test = (
- ("https://ww3.mangakakalot.tv/manga/manga-jk986845", {
- "pattern": MangakakalotChapterExtractor.pattern,
- "count": ">= 30",
- }),
- ("https://mangakakalot.tv/manga/lk921810"),
- )
+ example = "https://ww6.mangakakalot.tv/manga/manga-ID"
def chapters(self, page):
data = {"lang": "en", "language": "English"}
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 6fd9f49..46019ad 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -21,8 +21,8 @@ class ManganeloBase():
def __init__(self, match):
domain, path = match.groups()
super().__init__(match, "https://" + domain + path)
- self.session.headers['Referer'] = self.root
+ def _init(self):
if self._match_chapter is None:
ManganeloBase._match_chapter = re.compile(
r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
@@ -53,27 +53,7 @@ class ManganeloBase():
class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
"""Extractor for manga chapters from manganelo.com"""
pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
- test = (
- ("https://chapmanganato.com/manga-gn983696/chapter-23", {
- "pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/03/23"
- r"/39/gn983696/vol_3_chapter_23_24_yen/\d+-[no]\.jpg",
- "keyword": "17faaea7f0fb8c2675a327bf3aa0bcd7a6311d68",
- "count": 25,
- }),
- ("https://chapmanganelo.com/manga-ti107776/chapter-4", {
- "pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/01/92"
- r"/08/ti970565/chapter_4_caster/\d+-o\.jpg",
- "keyword": "06e01fa9b3fc9b5b954c0d4a98f0153b40922ded",
- "count": 45,
- }),
- ("https://chapmanganato.com/manga-no991297/chapter-8", {
- "keyword": {"chapter": 8, "chapter_minor": "-1"},
- "count": 20,
- }),
- ("https://readmanganato.com/manga-gn983696/chapter-23"),
- ("https://manganelo.com/chapter/gamers/chapter_15"),
- ("https://manganelo.com/chapter/gq921227/chapter_23"),
- )
+ example = "https://chapmanganato.com/manga-ID/chapter-01"
def metadata(self, page):
extr = text.extract_from(page)
@@ -102,19 +82,7 @@ class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
"""Extractor for manga from manganelo.com"""
chapterclass = ManganeloChapterExtractor
pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
- test = (
- ("https://chapmanganato.com/manga-gn983696", {
- "pattern": ManganeloChapterExtractor.pattern,
- "count": ">= 25",
- }),
- ("https://m.manganelo.com/manga-ti107776", {
- "pattern": ManganeloChapterExtractor.pattern,
- "count": ">= 12",
- }),
- ("https://readmanganato.com/manga-gn983696"),
- ("https://manganelo.com/manga/read_otome_no_teikoku"),
- ("https://manganelo.com/manga/ol921234/"),
- )
+ example = "https://manganato.com/manga-ID"
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index a0d1e80..63aaf91 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -35,39 +35,7 @@ class MangaparkBase():
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)"
- test = (
- ("https://mangapark.net/title/114972-aria/6710214-en-ch.60.2", {
- "count": 70,
- "pattern": r"https://[\w-]+\.mpcdn\.org/comic/2002/e67"
- r"/61e29278a583b9227964076e/\d+_\d+_\d+_\d+\.jpeg"
- r"\?acc=[^&#]+&exp=\d+",
- "keyword": {
- "artist": [],
- "author": ["Amano Kozue"],
- "chapter": 60,
- "chapter_id": 6710214,
- "chapter_minor": ".2",
- "count": 70,
- "date": "dt:2022-01-15 09:25:03",
- "extension": "jpeg",
- "filename": str,
- "genre": ["adventure", "comedy", "drama", "sci_fi",
- "shounen", "slice_of_life"],
- "lang": "en",
- "language": "English",
- "manga": "Aria",
- "manga_id": 114972,
- "page": int,
- "source": "Koala",
- "title": "Special Navigation - Aquaria Ii",
- "volume": 12,
- },
- }),
- ("https://mangapark.com/title/114972-aria/6710214-en-ch.60.2"),
- ("https://mangapark.org/title/114972-aria/6710214-en-ch.60.2"),
- ("https://mangapark.io/title/114972-aria/6710214-en-ch.60.2"),
- ("https://mangapark.me/title/114972-aria/6710214-en-ch.60.2"),
- )
+ example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
@@ -115,41 +83,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"""Extractor for manga from mangapark.net"""
subcategory = "manga"
pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$"
- test = (
- ("https://mangapark.net/title/114972-aria", {
- "count": 141,
- "pattern": MangaparkChapterExtractor.pattern,
- "keyword": {
- "chapter": int,
- "chapter_id": int,
- "chapter_minor": str,
- "date": "type:datetime",
- "lang": "en",
- "language": "English",
- "manga_id": 114972,
- "source": "re:Horse|Koala",
- "source_id": int,
- "title": str,
- "volume": int,
- },
- }),
- # 'source' option
- ("https://mangapark.net/title/114972-aria", {
- "options": (("source", "koala"),),
- "count": 70,
- "pattern": MangaparkChapterExtractor.pattern,
- "keyword": {
- "source": "Koala",
- "source_id": 15150116,
- },
- }),
- ("https://mangapark.com/title/114972-"),
- ("https://mangapark.com/title/114972"),
- ("https://mangapark.com/title/114972-aria"),
- ("https://mangapark.org/title/114972-aria"),
- ("https://mangapark.io/title/114972-aria"),
- ("https://mangapark.me/title/114972-aria"),
- )
+ example = "https://mangapark.net/title/12345-MANGA"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py
index 74c239e..8f19374 100644
--- a/gallery_dl/extractor/mangaread.py
+++ b/gallery_dl/extractor/mangaread.py
@@ -35,56 +35,7 @@ class MangareadChapterExtractor(MangareadBase, ChapterExtractor):
"""Extractor for manga-chapters from mangaread.org"""
pattern = (r"(?:https?://)?(?:www\.)?mangaread\.org"
r"(/manga/[^/?#]+/[^/?#]+)")
- test = (
- ("https://www.mangaread.org/manga/one-piece/chapter-1053-3/", {
- "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
- r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
- "count": 11,
- "keyword": {
- "manga" : "One Piece",
- "title" : "",
- "chapter" : 1053,
- "chapter_minor": ".3",
- "tags" : ["Oda Eiichiro"],
- "lang" : "en",
- "language": "English",
- }
- }),
- ("https://www.mangaread.org/manga/one-piece/chapter-1000000/", {
- "exception": exception.NotFoundError,
- }),
- (("https://www.mangaread.org"
- "/manga/kanan-sama-wa-akumade-choroi/chapter-10/"), {
- "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
- r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
- "count": 9,
- "keyword": {
- "manga" : "Kanan-sama wa Akumade Choroi",
- "title" : "",
- "chapter" : 10,
- "chapter_minor": "",
- "tags" : list,
- "lang" : "en",
- "language": "English",
- }
- }),
- # 'Chapter146.5'
- # ^^ no whitespace
- ("https://www.mangaread.org/manga/above-all-gods/chapter146-5/", {
- "pattern": (r"https://www\.mangaread\.org/wp-content/uploads"
- r"/WP-manga/data/manga_[^/]+/[^/]+/[^.]+\.\w+"),
- "count": 6,
- "keyword": {
- "manga" : "Above All Gods",
- "title" : "",
- "chapter" : 146,
- "chapter_minor": ".5",
- "tags" : list,
- "lang" : "en",
- "language": "English",
- }
- }),
- )
+ example = "https://www.mangaread.org/manga/MANGA/chapter-01/"
def metadata(self, page):
tags = text.extr(page, 'class="wp-manga-tags-list">', '</div>')
@@ -108,50 +59,7 @@ class MangareadMangaExtractor(MangareadBase, MangaExtractor):
"""Extractor for manga from mangaread.org"""
chapterclass = MangareadChapterExtractor
pattern = r"(?:https?://)?(?:www\.)?mangaread\.org(/manga/[^/?#]+)/?$"
- test = (
- ("https://www.mangaread.org/manga/kanan-sama-wa-akumade-choroi", {
- "pattern": (r"https://www\.mangaread\.org/manga"
- r"/kanan-sama-wa-akumade-choroi"
- r"/chapter-\d+(-.+)?/"),
- "count" : ">= 13",
- "keyword": {
- "manga" : "Kanan-sama wa Akumade Choroi",
- "author" : ["nonco"],
- "artist" : ["nonco"],
- "type" : "Manga",
- "genres" : ["Comedy", "Romance", "Shounen", "Supernatural"],
- "rating" : float,
- "release": 2022,
- "status" : "OnGoing",
- "lang" : "en",
- "language" : "English",
- "manga_alt" : list,
- "description": str,
- }
- }),
- ("https://www.mangaread.org/manga/one-piece", {
- "pattern": (r"https://www\.mangaread\.org/manga"
- r"/one-piece/chapter-\d+(-.+)?/"),
- "count" : ">= 1066",
- "keyword": {
- "manga" : "One Piece",
- "author" : ["Oda Eiichiro"],
- "artist" : ["Oda Eiichiro"],
- "type" : "Manga",
- "genres" : list,
- "rating" : float,
- "release": 1997,
- "status" : "OnGoing",
- "lang" : "en",
- "language" : "English",
- "manga_alt" : ["One Piece"],
- "description": str,
- }
- }),
- ("https://www.mangaread.org/manga/doesnotexist", {
- "exception": exception.HttpError,
- }),
- )
+ example = "https://www.mangaread.org/manga/MANGA"
def chapters(self, page):
if 'class="error404' in page:
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
index b7070f2..7261332 100644
--- a/gallery_dl/extractor/mangasee.py
+++ b/gallery_dl/extractor/mangasee.py
@@ -36,64 +36,19 @@ class MangaseeBase():
class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
pattern = (r"(?:https?://)?(mangasee123|manga4life)\.com"
r"(/read-online/[^/?#]+\.html)")
- test = (
- (("https://mangasee123.com/read-online"
- "/Tokyo-Innocent-chapter-4.5-page-1.html"), {
- "pattern": r"https://[^/]+/manga/Tokyo-Innocent/0004\.5-00\d\.png",
- "count": 8,
- "keyword": {
- "author": ["NARUMI Naru"],
- "chapter": 4,
- "chapter_minor": ".5",
- "chapter_string": "100045",
- "count": 8,
- "date": "dt:2020-01-20 21:52:53",
- "extension": "png",
- "filename": r"re:0004\.5-00\d",
- "genre": ["Comedy", "Fantasy", "Harem", "Romance", "Shounen",
- "Supernatural"],
- "index": "1",
- "lang": "en",
- "language": "English",
- "manga": "Tokyo Innocent",
- "page": int,
- "title": "",
- },
- }),
- (("https://manga4life.com/read-online"
- "/One-Piece-chapter-1063-page-1.html"), {
- "pattern": r"https://[^/]+/manga/One-Piece/1063-0\d\d\.png",
- "count": 13,
- "keyword": {
- "author": ["ODA Eiichiro"],
- "chapter": 1063,
- "chapter_minor": "",
- "chapter_string": "110630",
- "count": 13,
- "date": "dt:2022-10-16 17:32:54",
- "extension": "png",
- "filename": r"re:1063-0\d\d",
- "genre": ["Action", "Adventure", "Comedy", "Drama", "Fantasy",
- "Shounen"],
- "index": "1",
- "lang": "en",
- "language": "English",
- "manga": "One Piece",
- "page": int,
- "title": "",
- },
- }),
- )
+ example = "https://mangasee123.com/read-online/MANGA-chapter-1-page-1.html"
def __init__(self, match):
if match.group(1) == "manga4life":
self.category = "mangalife"
self.root = "https://manga4life.com"
ChapterExtractor.__init__(self, match, self.root + match.group(2))
+
+ def _init(self):
self.session.headers["Referer"] = self.gallery_url
domain = self.root.rpartition("/")[2]
- cookies = self.session.cookies
+ cookies = self.cookies
if not cookies.get("PHPSESSID", domain=domain):
cookies.set("PHPSESSID", util.generate_token(13), domain=domain)
@@ -132,45 +87,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
chapterclass = MangaseeChapterExtractor
pattern = r"(?:https?://)?(mangasee123|manga4life)\.com(/manga/[^/?#]+)"
- test = (
- (("https://mangasee123.com/manga"
- "/Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai"), {
- "pattern": MangaseeChapterExtractor.pattern,
- "count": ">= 17",
- "keyword": {
- "author": ["TAKASE Masaya"],
- "chapter": int,
- "chapter_minor": r"re:^|\.5$",
- "chapter_string": r"re:100\d\d\d",
- "date": "type:datetime",
- "genre": ["Comedy", "Romance", "School Life", "Shounen",
- "Slice of Life"],
- "index": "1",
- "lang": "en",
- "language": "English",
- "manga": "Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai",
- "title": "",
- },
- }),
- ("https://manga4life.com/manga/Ano-Musume-Ni-Kiss-To-Shirayuri-O", {
- "pattern": MangaseeChapterExtractor.pattern,
- "count": ">= 50",
- "keyword": {
- "author": ["Canno"],
- "chapter": int,
- "chapter_minor": r"re:^|\.5$",
- "chapter_string": r"re:100\d\d\d",
- "date": "type:datetime",
- "genre": ["Comedy", "Romance", "School Life", "Seinen",
- "Shoujo Ai"],
- "index": "1",
- "lang": "en",
- "language": "English",
- "manga": "Ano-Musume-Ni-Kiss-To-Shirayuri-O",
- "title": ""
- },
- }),
- )
+ example = "https://mangasee123.com/manga/MANGA"
def __init__(self, match):
if match.group(1) == "manga4life":
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index ac4c797..b208f03 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,14 +19,14 @@ class MangoxoExtractor(Extractor):
"""Base class for mangoxo extractors"""
category = "mangoxo"
root = "https://www.mangoxo.com"
- cookiedomain = "www.mangoxo.com"
- cookienames = ("SESSION",)
+ cookies_domain = "www.mangoxo.com"
+ cookies_names = ("SESSION",)
_warning = True
def login(self):
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
+ self.cookies_update(self._login_impl(username, password))
elif MangoxoExtractor._warning:
MangoxoExtractor._warning = False
self.log.warning("Unauthenticated users cannot see "
@@ -51,7 +51,7 @@ class MangoxoExtractor(Extractor):
data = response.json()
if str(data.get("result")) != "1":
raise exception.AuthenticationError(data.get("msg"))
- return {"SESSION": self.session.cookies.get("SESSION")}
+ return {"SESSION": self.cookies.get("SESSION")}
@staticmethod
def _sign_by_md5(username, password, token):
@@ -80,25 +80,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
directory_fmt = ("{category}", "{channel[name]}", "{album[name]}")
archive_fmt = "{album[id]}_{num}"
pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/album/(\w+)"
- test = ("https://www.mangoxo.com/album/lzVOv1Q9", {
- "url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d",
- "keyword": {
- "channel": {
- "id": "gaxO16d8",
- "name": "Phoenix",
- "cover": str,
- },
- "album": {
- "id": "lzVOv1Q9",
- "name": "re:池永康晟 Ikenaga Yasunari 透出古朴",
- "date": "dt:2019-03-22 14:42:00",
- "description": str,
- },
- "id": int,
- "num": int,
- "count": 65,
- },
- })
+ example = "https://www.mangoxo.com/album/ID"
def __init__(self, match):
MangoxoExtractor.__init__(self, match)
@@ -163,11 +145,7 @@ class MangoxoChannelExtractor(MangoxoExtractor):
"""Extractor for all albums on a mangoxo channel"""
subcategory = "channel"
pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/(\w+)/album"
- test = ("https://www.mangoxo.com/phoenix/album", {
- "pattern": MangoxoAlbumExtractor.pattern,
- "range": "1-30",
- "count": "> 20",
- })
+ example = "https://www.mangoxo.com/USER/album"
def __init__(self, match):
MangoxoExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index e190c7e..3c2b03e 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -19,12 +19,14 @@ class MastodonExtractor(BaseExtractor):
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}"
- cookiedomain = None
+ cookies_domain = None
def __init__(self, match):
BaseExtractor.__init__(self, match)
- self.instance = self.root.partition("://")[2]
self.item = match.group(match.lastindex)
+
+ def _init(self):
+ self.instance = self.root.partition("://")[2]
self.reblogs = self.config("reblogs", False)
self.replies = self.config("replies", True)
@@ -104,29 +106,7 @@ class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$"
- test = (
- ("https://mastodon.social/@jk", {
- "pattern": r"https://files.mastodon.social/media_attachments"
- r"/files/(\d+/){3,}original/\w+",
- "range": "1-60",
- "count": 60,
- }),
- ("https://pawoo.net/@yoru_nine/", {
- "range": "1-60",
- "count": 60,
- }),
- ("https://baraag.net/@pumpkinnsfw"),
- ("https://mastodon.social/@yoru_nine@pawoo.net", {
- "pattern": r"https://mastodon\.social/media_proxy/\d+/original",
- "range": "1-10",
- "count": 10,
- }),
- ("https://mastodon.social/@id:10843"),
- ("https://mastodon.social/users/id:10843"),
- ("https://mastodon.social/users/jk"),
- ("https://mastodon.social/users/yoru_nine@pawoo.net"),
- ("https://mastodon.social/web/@jk"),
- )
+ example = "https://mastodon.social/@USER"
def statuses(self):
api = MastodonAPI(self)
@@ -142,11 +122,7 @@ class MastodonBookmarkExtractor(MastodonExtractor):
"""Extractor for mastodon bookmarks"""
subcategory = "bookmark"
pattern = BASE_PATTERN + r"/bookmarks"
- test = (
- ("https://mastodon.social/bookmarks"),
- ("https://pawoo.net/bookmarks"),
- ("https://baraag.net/bookmarks"),
- )
+ example = "https://mastodon.social/bookmarks"
def statuses(self):
return MastodonAPI(self).account_bookmarks()
@@ -155,16 +131,8 @@ class MastodonBookmarkExtractor(MastodonExtractor):
class MastodonFollowingExtractor(MastodonExtractor):
"""Extractor for followed mastodon users"""
subcategory = "following"
- pattern = BASE_PATTERN + r"/users/([^/?#]+)/following"
- test = (
- ("https://mastodon.social/users/0x4f/following", {
- "extractor": False,
- "count": ">= 20",
- }),
- ("https://mastodon.social/users/id:10843/following"),
- ("https://pawoo.net/users/yoru_nine/following"),
- ("https://baraag.net/users/pumpkinnsfw/following"),
- )
+ pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)/following"
+ example = "https://mastodon.social/@USER/following"
def items(self):
api = MastodonAPI(self)
@@ -179,21 +147,7 @@ class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)"
- test = (
- ("https://mastodon.social/@jk/103794036899778366", {
- "count": 4,
- "keyword": {
- "count": 4,
- "num": int,
- },
- }),
- ("https://pawoo.net/@yoru_nine/105038878897832922", {
- "content": "b52e807f8ab548d6f896b09218ece01eba83987a",
- }),
- ("https://baraag.net/@pumpkinnsfw/104364170556898443", {
- "content": "67748c1b828c58ad60d0fe5729b59fb29c872244",
- }),
- )
+ example = "https://mastodon.social/@USER/12345"
def statuses(self):
return (MastodonAPI(self).status(self.item),)
diff --git a/gallery_dl/extractor/mememuseum.py b/gallery_dl/extractor/mememuseum.py
deleted file mode 100644
index 1de0d76..0000000
--- a/gallery_dl/extractor/mememuseum.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2022 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://meme.museum/"""
-
-from .common import Extractor, Message
-from .. import text
-
-
-class MememuseumExtractor(Extractor):
- """Base class for meme.museum extractors"""
- basecategory = "booru"
- category = "mememuseum"
- filename_fmt = "{category}_{id}_{md5}.{extension}"
- archive_fmt = "{id}"
- root = "https://meme.museum"
-
- def items(self):
- data = self.metadata()
-
- for post in self.posts():
- url = post["file_url"]
- for key in ("id", "width", "height"):
- post[key] = text.parse_int(post[key])
- post["tags"] = text.unquote(post["tags"])
- post.update(data)
- yield Message.Directory, post
- yield Message.Url, url, text.nameext_from_url(url, post)
-
- def metadata(self):
- """Return general metadata"""
- return ()
-
- def posts(self):
- """Return an iterable containing data of all relevant posts"""
- return ()
-
-
-class MememuseumTagExtractor(MememuseumExtractor):
- """Extractor for images from meme.museum by search-tags"""
- subcategory = "tag"
- directory_fmt = ("{category}", "{search_tags}")
- pattern = r"(?:https?://)?meme\.museum/post/list/([^/?#]+)"
- test = ("https://meme.museum/post/list/animated/1", {
- "pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
- "count": ">= 30"
- })
- per_page = 25
-
- def __init__(self, match):
- MememuseumExtractor.__init__(self, match)
- self.tags = text.unquote(match.group(1))
-
- def metadata(self):
- return {"search_tags": self.tags}
-
- def posts(self):
- pnum = 1
- while True:
- url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
- extr = text.extract_from(self.request(url).text)
-
- while True:
- mime = extr("data-mime='", "'")
- if not mime:
- break
-
- pid = extr("data-post-id='", "'")
- tags, dimensions, size = extr("title='", "'").split(" // ")
- md5 = extr("/_thumbs/", "/")
- width, _, height = dimensions.partition("x")
-
- yield {
- "file_url": "{}/_images/{}/{}%20-%20{}.{}".format(
- self.root, md5, pid, text.quote(tags),
- mime.rpartition("/")[2]),
- "id": pid, "md5": md5, "tags": tags,
- "width": width, "height": height,
- "size": text.parse_bytes(size[:-1]),
- }
-
- if not extr(">Next<", ">"):
- return
- pnum += 1
-
-
-class MememuseumPostExtractor(MememuseumExtractor):
- """Extractor for single images from meme.museum"""
- subcategory = "post"
- pattern = r"(?:https?://)?meme\.museum/post/view/(\d+)"
- test = ("https://meme.museum/post/view/10243", {
- "pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc4997"
- r"1f78/10243%20-%20g%20beard%20open_source%20richard_stallm"
- r"an%20stallman%20tagme%20text\.jpg",
- "keyword": "3c8009251480cf17248c08b2b194dc0c4d59580e",
- "content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
- })
-
- def __init__(self, match):
- MememuseumExtractor.__init__(self, match)
- self.post_id = match.group(1)
-
- def posts(self):
- url = "{}/post/view/{}".format(self.root, self.post_id)
- extr = text.extract_from(self.request(url).text)
-
- return ({
- "id" : self.post_id,
- "tags" : extr(": ", "<"),
- "md5" : extr("/_thumbs/", "/"),
- "file_url": self.root + extr("id='main_image' src='", "'"),
- "width" : extr("data-width=", " ").strip("'\""),
- "height" : extr("data-height=", " ").strip("'\""),
- "size" : 0,
- },)
diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py
index 37efac0..95b83b6 100644
--- a/gallery_dl/extractor/misskey.py
+++ b/gallery_dl/extractor/misskey.py
@@ -19,9 +19,11 @@ class MisskeyExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
+ self.item = match.group(match.lastindex)
+
+ def _init(self):
self.api = MisskeyAPI(self)
self.instance = self.root.rpartition("://")[2]
- self.item = match.group(match.lastindex)
self.renotes = self.config("renotes", False)
self.replies = self.config("replies", True)
@@ -83,24 +85,7 @@ class MisskeyUserExtractor(MisskeyExtractor):
"""Extractor for all images of a Misskey user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/@([^/?#]+)/?$"
- test = (
- ("https://misskey.io/@lithla", {
- "pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
- "range": "1-50",
- "count": 50,
- }),
- ("https://misskey.io/@blooddj@pawoo.net", {
- "range": "1-50",
- "count": 50,
- }),
- ("https://lesbian.energy/@rerorero", {
- "pattern": r"https://lesbian.energy/files/\w+",
- "range": "1-50",
- "count": 50,
- }),
- ("https://lesbian.energy/@nano@mk.yopo.work"),
- ("https://sushi.ski/@ui@misskey.04.si"),
- )
+ example = "https://misskey.io/@USER"
def notes(self):
return self.api.users_notes(self.api.user_id_by_username(self.item))
@@ -110,13 +95,7 @@ class MisskeyFollowingExtractor(MisskeyExtractor):
"""Extractor for followed Misskey users"""
subcategory = "following"
pattern = BASE_PATTERN + r"/@([^/?#]+)/following"
- test = (
- ("https://misskey.io/@blooddj@pawoo.net/following", {
- "extractor": False,
- "count": ">= 6",
- }),
- ("https://sushi.ski/@hatusimo_sigure/following"),
- )
+ example = "https://misskey.io/@USER/following"
def items(self):
user_id = self.api.user_id_by_username(self.item)
@@ -134,21 +113,7 @@ class MisskeyNoteExtractor(MisskeyExtractor):
"""Extractor for images from a Note"""
subcategory = "note"
pattern = BASE_PATTERN + r"/notes/(\w+)"
- test = (
- ("https://misskey.io/notes/9bhqfo835v", {
- "pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
- "count": 4,
- }),
- ("https://misskey.io/notes/9brq7z1re6"),
- ("https://sushi.ski/notes/9bm3x4ksqw", {
- "pattern": r"https://media\.sushi\.ski/files/[\w-]+\.png",
- "count": 1,
- }),
- ("https://lesbian.energy/notes/995ig09wqy", {
- "count": 1,
- }),
- ("https://lesbian.energy/notes/96ynd9w5kc"),
- )
+ example = "https://misskey.io/notes/98765"
def notes(self):
return (self.api.notes_show(self.item),)
@@ -158,12 +123,7 @@ class MisskeyFavoriteExtractor(MisskeyExtractor):
"""Extractor for favorited notes"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/(?:my|api/i)/favorites"
- test = (
- ("https://misskey.io/my/favorites"),
- ("https://misskey.io/api/i/favorites"),
- ("https://lesbian.energy/my/favorites"),
- ("https://sushi.ski/my/favorites"),
- )
+ example = "https://misskey.io/my/favorites"
def notes(self):
return self.api.i_favorites()
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index 0ef0a32..145dd51 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -92,92 +92,12 @@ BASE_PATTERN = MoebooruExtractor.update({
})
-class MoebooruPostExtractor(MoebooruExtractor):
- subcategory = "post"
- archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/show/(\d+)"
- test = (
- ("https://yande.re/post/show/51824", {
- "content": "59201811c728096b2d95ce6896fd0009235fe683",
- "options": (("tags", True),),
- "keyword": {
- "tags_artist": "sasaki_tamaru",
- "tags_circle": "softhouse_chara",
- "tags_copyright": "ouzoku",
- "tags_general": str,
- },
- }),
- ("https://konachan.com/post/show/205189", {
- "content": "674e75a753df82f5ad80803f575818b8e46e4b65",
- "options": (("tags", True),),
- "keyword": {
- "tags_artist": "patata",
- "tags_character": "clownpiece",
- "tags_copyright": "touhou",
- "tags_general": str,
- },
- }),
- ("https://yande.re/post/show/993156", {
- "content": "fed722bd90f48de41ec163692befc701056e2b1e",
- "options": (("notes", True),),
- "keyword": {
- "notes": [
- {
- "id": 7096,
- "x" : 90,
- "y" : 626,
- "width" : 283,
- "height": 529,
- "body" : "Please keep this as a secret for me!!",
- },
- {
- "id": 7095,
- "x" : 900,
- "y" : 438,
- "width" : 314,
- "height": 588,
- "body" : "The facts that I love playing games",
- },
- ],
- },
- }),
- ("https://lolibooru.moe/post/show/281305/", {
- "content": "a331430223ffc5b23c31649102e7d49f52489b57",
- "options": (("notes", True),),
- "keyword": {
- "notes": list,
- },
- }),
- ("https://konachan.net/post/show/205189"),
- ("https://www.sakugabooru.com/post/show/125570"),
- ("https://lolibooru.moe/post/show/287835"),
- )
-
- def __init__(self, match):
- MoebooruExtractor.__init__(self, match)
- self.post_id = match.group(match.lastindex)
-
- def posts(self):
- params = {"tags": "id:" + self.post_id}
- return self.request(self.root + "/post.json", params=params).json()
-
-
class MoebooruTagExtractor(MoebooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/post\?(?:[^&#]*&)*tags=([^&#]+)"
- test = (
- ("https://yande.re/post?tags=ouzoku+armor", {
- "content": "59201811c728096b2d95ce6896fd0009235fe683",
- }),
- ("https://konachan.com/post?tags=patata", {
- "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d",
- }),
- ("https://konachan.net/post?tags=patata"),
- ("https://www.sakugabooru.com/post?tags=nichijou"),
- ("https://lolibooru.moe/post?tags=ruu_%28tksymkw%29"),
- )
+ pattern = BASE_PATTERN + r"/post\?(?:[^&#]*&)*tags=([^&#]*)"
+ example = "https://yande.re/post?tags=TAG"
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
@@ -197,17 +117,7 @@ class MoebooruPoolExtractor(MoebooruExtractor):
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = BASE_PATTERN + r"/pool/show/(\d+)"
- test = (
- ("https://yande.re/pool/show/318", {
- "content": "2a35b9d6edecce11cc2918c6dce4de2198342b68",
- }),
- ("https://konachan.com/pool/show/95", {
- "content": "cf0546e38a93c2c510a478f8744e60687b7a8426",
- }),
- ("https://konachan.net/pool/show/95"),
- ("https://www.sakugabooru.com/pool/show/54"),
- ("https://lolibooru.moe/pool/show/239"),
- )
+ example = "https://yande.re/pool/show/12345"
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
@@ -221,25 +131,28 @@ class MoebooruPoolExtractor(MoebooruExtractor):
return self._pagination(self.root + "/post.json", params)
+class MoebooruPostExtractor(MoebooruExtractor):
+ subcategory = "post"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/post/show/(\d+)"
+ example = "https://yande.re/post/show/12345"
+
+ def __init__(self, match):
+ MoebooruExtractor.__init__(self, match)
+ self.post_id = match.group(match.lastindex)
+
+ def posts(self):
+ params = {"tags": "id:" + self.post_id}
+ return self.request(self.root + "/post.json", params=params).json()
+
+
class MoebooruPopularExtractor(MoebooruExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
pattern = BASE_PATTERN + \
r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?"
- test = (
- ("https://yande.re/post/popular_by_month?month=6&year=2014", {
- "count": 40,
- }),
- ("https://yande.re/post/popular_recent"),
- ("https://konachan.com/post/popular_by_month?month=11&year=2010", {
- "count": 20,
- }),
- ("https://konachan.com/post/popular_recent"),
- ("https://konachan.net/post/popular_recent"),
- ("https://www.sakugabooru.com/post/popular_recent"),
- ("https://lolibooru.moe/post/popular_recent"),
- )
+ example = "https://yande.re/post/popular_by_month?year=YYYY&month=MM"
def __init__(self, match):
MoebooruExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 5dc4cb6..33a2284 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -13,32 +13,19 @@ from .. import text, exception
class MyhentaigalleryGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from myhentaigallery.com"""
category = "myhentaigallery"
+ root = "https://myhentaigallery.com"
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
pattern = (r"(?:https?://)?myhentaigallery\.com"
r"/gallery/(?:thumbnails|show)/(\d+)")
- test = (
- ("https://myhentaigallery.com/gallery/thumbnails/16247", {
- "pattern": r"https://images.myhentaicomics\.com/imagesgallery"
- r"/images/[^/]+/original/\d+\.jpg",
- "keyword": {
- "artist" : list,
- "count" : 11,
- "gallery_id": 16247,
- "group" : list,
- "parodies" : list,
- "tags" : ["Giantess"],
- "title" : "Attack Of The 50ft Woman 1",
- },
- }),
- ("https://myhentaigallery.com/gallery/show/16247/1"),
- )
- root = "https://myhentaigallery.com"
+ example = "https://myhentaigallery.com/gallery/thumbnails/12345"
def __init__(self, match):
self.gallery_id = match.group(1)
url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)
- self.session.headers["Referer"] = url
+
+ def _init(self):
+ self.session.headers["Referer"] = self.gallery_url
def metadata(self, page):
extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py
index fd16f24..7620d08 100644
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@@ -22,27 +22,7 @@ class MyportfolioGalleryExtractor(Extractor):
pattern = (r"(?:myportfolio:(?:https?://)?([^/]+)|"
r"(?:https?://)?([\w-]+\.myportfolio\.com))"
r"(/[^/?#]+)?")
- test = (
- ("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", {
- "url": "acea0690c76db0e5cf267648cefd86e921bc3499",
- "keyword": "6ac6befe2ee0af921d24cf1dd4a4ed71be06db6d",
- }),
- ("https://andrewling.myportfolio.com/", {
- "pattern": r"https://andrewling\.myportfolio\.com/[^/?#+]+$",
- "count": ">= 6",
- }),
- ("https://stevenilousphotography.myportfolio.com/society", {
- "exception": exception.NotFoundError,
- }),
- # custom domain
- ("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", {
- "count": 3,
- }),
- ("myportfolio:https://tooco.com.ar/", {
- "pattern": pattern,
- "count": ">= 40",
- }),
- )
+ example = "https://USER.myportfolio.com/TITLE"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index 7e94b1c..55faf9e 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -27,17 +27,7 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
archive_fmt = "{blog[id]}_{post[num]}_{num}"
pattern = (r"(?:https?://)?blog\.naver\.com/"
r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)")
- test = (
- ("https://blog.naver.com/rlfqjxm0/221430673006", {
- "url": "6c694f3aced075ed5e9511f1e796d14cb26619cc",
- "keyword": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
- }),
- (("https://blog.naver.com/PostView.nhn"
- "?blogId=rlfqjxm0&logNo=221430673006"), {
- "url": "6c694f3aced075ed5e9511f1e796d14cb26619cc",
- "keyword": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
- }),
- )
+ example = "https://blog.naver.com/BLOGID/12345"
def __init__(self, match):
blog_id = match.group(1)
@@ -84,18 +74,7 @@ class NaverBlogExtractor(NaverBase, Extractor):
categorytransfer = True
pattern = (r"(?:https?://)?blog\.naver\.com/"
r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
- test = (
- ("https://blog.naver.com/gukjung", {
- "pattern": NaverPostExtractor.pattern,
- "count": 12,
- "range": "1-12",
- }),
- ("https://blog.naver.com/PostList.nhn?blogId=gukjung", {
- "pattern": NaverPostExtractor.pattern,
- "count": 12,
- "range": "1-12",
- }),
- )
+ example = "https://blog.naver.com/BLOGID"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py
index cafe4f7..72ee5b0 100644
--- a/gallery_dl/extractor/naverwebtoon.py
+++ b/gallery_dl/extractor/naverwebtoon.py
@@ -28,54 +28,7 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
filename_fmt = "{episode:>03}-{num:>02}.{extension}"
archive_fmt = "{title_id}_{episode}_{num}"
pattern = BASE_PATTERN + r"/detail(?:\.nhn)?\?([^#]+)"
- test = (
- (("https://comic.naver.com/webtoon/detail"
- "?titleId=26458&no=1&weekday=tue"), {
- "url": "47a956ba8c7a837213d5985f50c569fcff986f75",
- "content": "3806b6e8befbb1920048de9888dfce6220f69a60",
- "count": 14,
- "keyword": {
- "author": ["김규삼"],
- "artist": ["김규삼"],
- "comic": "N의등대-눈의등대",
- "count": 14,
- "episode": "1",
- "extension": "jpg",
- "num": int,
- "tags": ["스릴러", "완결무료", "완결스릴러"],
- "title": "n의 등대 - 눈의 등대 1화",
- "title_id": "26458",
- },
- }),
- (("https://comic.naver.com/challenge/detail"
- "?titleId=765124&no=1"), {
- "pattern": r"https://image-comic\.pstatic\.net"
- r"/user_contents_data/challenge_comic/2021/01/19"
- r"/342586/upload_7149856273586337846\.jpeg",
- "count": 1,
- "keyword": {
- "author": ["kemi****"],
- "artist": [],
- "comic": "우니 모두의 이야기",
- "count": 1,
- "episode": "1",
- "extension": "jpeg",
- "filename": "upload_7149856273586337846",
- "num": 1,
- "tags": ["일상툰", "우니모두의이야기", "퇴사", "입사", "신입사원",
- "사회초년생", "회사원", "20대"],
- "title": "퇴사하다",
- "title_id": "765124",
- },
- }),
- (("https://comic.naver.com/bestChallenge/detail.nhn"
- "?titleId=771467&no=3"), {
- "pattern": r"https://image-comic\.pstatic\.net"
- r"/user_contents_data/challenge_comic/2021/04/28"
- r"/345534/upload_3617293622396203109\.jpeg",
- "count": 1,
- }),
- )
+ example = "https://comic.naver.com/webtoon/detail?titleId=12345&no=1"
def __init__(self, match):
path, query = match.groups()
@@ -115,20 +68,7 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
subcategory = "comic"
categorytransfer = True
pattern = BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)"
- test = (
- ("https://comic.naver.com/webtoon/list?titleId=22073", {
- "pattern": NaverwebtoonEpisodeExtractor.pattern,
- "count": 32,
- }),
- ("https://comic.naver.com/challenge/list?titleId=765124", {
- "pattern": NaverwebtoonEpisodeExtractor.pattern,
- "count": 25,
- }),
- ("https://comic.naver.com/bestChallenge/list.nhn?titleId=789786", {
- "pattern": NaverwebtoonEpisodeExtractor.pattern,
- "count": ">= 12",
- }),
- )
+ example = "https://comic.naver.com/webtoon/list?titleId=12345"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -145,7 +85,6 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
url = self.root + "/api/article/list"
headers = {
"Accept": "application/json, text/plain, */*",
- "Referer": self.root + "/",
}
params = {
"titleId": self.title_id,
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index e047f3d..1bcc915 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -21,14 +21,16 @@ class NewgroundsExtractor(Extractor):
filename_fmt = "{category}_{_index}_{title}.{extension}"
archive_fmt = "{_type}{_index}"
root = "https://www.newgrounds.com"
- cookiedomain = ".newgrounds.com"
- cookienames = ("NG_GG_username", "vmk1du5I8m")
+ cookies_domain = ".newgrounds.com"
+ cookies_names = ("NG_GG_username", "vmk1du5I8m")
request_interval = 1.0
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
self.user_root = "https://{}.newgrounds.com".format(self.user)
+
+ def _init(self):
self.flash = self.config("flash", True)
fmt = self.config("format", "original")
@@ -72,11 +74,12 @@ class NewgroundsExtractor(Extractor):
"""Return general metadata"""
def login(self):
- if self._check_cookies(self.cookienames):
+ if self.cookies_check(self.cookies_names):
return
+
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
+ self.cookies_update(self._login_impl(username, password))
@cache(maxage=360*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -85,7 +88,7 @@ class NewgroundsExtractor(Extractor):
url = self.root + "/passport/"
response = self.request(url)
if response.history and response.url.endswith("/social"):
- return self.session.cookies
+ return self.cookies
page = response.text
headers = {"Origin": self.root, "Referer": url}
@@ -105,7 +108,7 @@ class NewgroundsExtractor(Extractor):
return {
cookie.name: cookie.value
for cookie in response.history[0].cookies
- if cookie.expires and cookie.domain == self.cookiedomain
+ if cookie.expires and cookie.domain == self.cookies_domain
}
def extract_post(self, post_url):
@@ -204,7 +207,6 @@ class NewgroundsExtractor(Extractor):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
- "Referer": self.root,
}
sources = self.request(url, headers=headers).json()["sources"]
@@ -297,41 +299,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
pattern = (r"(?:https?://)?(?:"
r"(?:www\.)?newgrounds\.com/art/view/([^/?#]+)/[^/?#]+"
r"|art\.ngfiles\.com/images/\d+/\d+_([^_]+)_([^.]+))")
- test = (
- ("https://www.newgrounds.com/art/view/tomfulp/ryu-is-hawt", {
- "url": "57f182bcbbf2612690c3a54f16ffa1da5105245e",
- "content": "8f395e08333eb2457ba8d8b715238f8910221365",
- "keyword": {
- "artist" : ["tomfulp"],
- "comment" : "re:Consider this the bottom threshold for ",
- "date" : "dt:2009-06-04 14:44:05",
- "description": "re:Consider this the bottom threshold for ",
- "favorites" : int,
- "filename" : "94_tomfulp_ryu-is-hawt",
- "height" : 476,
- "index" : 94,
- "rating" : "e",
- "score" : float,
- "tags" : ["ryu", "streetfighter"],
- "title" : "Ryu is Hawt",
- "type" : "article",
- "user" : "tomfulp",
- "width" : 447,
- },
- }),
- ("https://art.ngfiles.com/images/0/94_tomfulp_ryu-is-hawt.gif", {
- "url": "57f182bcbbf2612690c3a54f16ffa1da5105245e",
- }),
- ("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", {
- "url": "84eec95e663041a80630df72719f231e157e5f5d",
- "count": 2,
- }),
- # "adult" rated (#2456)
- ("https://www.newgrounds.com/art/view/kekiiro/red", {
- "options": (("username", None),),
- "count": 1,
- }),
- )
+ example = "https://www.newgrounds.com/art/view/USER/TITLE"
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
@@ -351,104 +319,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
subcategory = "media"
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
r"(/(?:portal/view|audio/listen)/\d+)")
- test = (
- ("https://www.newgrounds.com/portal/view/595355", {
- "pattern": r"https://uploads\.ungrounded\.net/alternate/564000"
- r"/564957_alternate_31\.mp4\?1359712249",
- "keyword": {
- "artist" : ["kickinthehead", "danpaladin", "tomfulp"],
- "comment" : "re:My fan trailer for Alien Hominid HD!",
- "date" : "dt:2013-02-01 09:50:49",
- "description": "Fan trailer for Alien Hominid HD!",
- "favorites" : int,
- "filename" : "564957_alternate_31",
- "index" : 595355,
- "rating" : "e",
- "score" : float,
- "tags" : ["alienhominid", "trailer"],
- "title" : "Alien Hominid Fan Trailer",
- "type" : "movie",
- "user" : "kickinthehead",
- },
- }),
- ("https://www.newgrounds.com/audio/listen/609768", {
- "url": "f4c5490ae559a3b05e46821bb7ee834f93a43c95",
- "keyword": {
- "artist" : ["zj", "tomfulp"],
- "comment" : "re:RECORDED 12-09-2014\n\nFrom The ZJ \"Late ",
- "date" : "dt:2015-02-23 19:31:59",
- "description": "From The ZJ Report Show!",
- "favorites" : int,
- "index" : 609768,
- "rating" : "",
- "score" : float,
- "tags" : ["fulp", "interview", "tom", "zj"],
- "title" : "ZJ Interviews Tom Fulp!",
- "type" : "music.song",
- "user" : "zj",
- },
- }),
- # flash animation (#1257)
- ("https://www.newgrounds.com/portal/view/161181/format/flash", {
- "pattern": r"https://uploads\.ungrounded\.net/161000"
- r"/161181_ddautta_mask__550x281_\.swf\?f1081628129",
- "keyword": {"type": "movie"},
- }),
- # format selection (#1729)
- ("https://www.newgrounds.com/portal/view/758545", {
- "options": (("format", "720p"),),
- "pattern": r"https://uploads\.ungrounded\.net/alternate/1482000"
- r"/1482860_alternate_102516\.720p\.mp4\?\d+",
- }),
- # "adult" rated (#2456)
- ("https://www.newgrounds.com/portal/view/717744", {
- "options": (("username", None),),
- "count": 1,
- }),
- # flash game
- ("https://www.newgrounds.com/portal/view/829032", {
- "pattern": r"https://uploads\.ungrounded\.net/829000"
- r"/829032_picovsbeardx\.swf\?f1641968445",
- "range": "1",
- "keyword": {
- "artist" : [
- "dungeonation",
- "carpetbakery",
- "animalspeakandrews",
- "bill",
- "chipollo",
- "dylz49",
- "gappyshamp",
- "pinktophat",
- "rad",
- "shapeshiftingblob",
- "tomfulp",
- "voicesbycorey",
- "psychogoldfish",
- ],
- "comment" : "re:The children are expendable. Take out the ",
- "date" : "dt:2022-01-10 23:00:57",
- "description": "Bloodshed in The Big House that Blew...again!",
- "favorites" : int,
- "index" : 829032,
- "post_url" : "https://www.newgrounds.com/portal/view/829032",
- "rating" : "m",
- "score" : float,
- "tags" : [
- "assassin",
- "boyfriend",
- "darnell",
- "nene",
- "pico",
- "picos-school",
- ],
- "title" : "PICO VS BEAR DX",
- "type" : "game",
- "url" : "https://uploads.ungrounded.net/829000"
- "/829032_picovsbeardx.swf?f1641968445",
- },
- }),
- )
+ example = "https://www.newgrounds.com/portal/view/12345"
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
@@ -463,58 +334,38 @@ class NewgroundsArtExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
subcategory = _path = "art"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
- test = ("https://tomfulp.newgrounds.com/art", {
- "pattern": NewgroundsImageExtractor.pattern,
- "count": ">= 3",
- })
+ example = "https://USER.newgrounds.com/art"
class NewgroundsAudioExtractor(NewgroundsExtractor):
"""Extractor for all audio submissions of a newgrounds user"""
subcategory = _path = "audio"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
- test = ("https://tomfulp.newgrounds.com/audio", {
- "pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3",
- "count": ">= 4",
- })
+ example = "https://USER.newgrounds.com/audio"
class NewgroundsMoviesExtractor(NewgroundsExtractor):
"""Extractor for all movies of a newgrounds user"""
subcategory = _path = "movies"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
- test = ("https://tomfulp.newgrounds.com/movies", {
- "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
- "range": "1-10",
- "count": 10,
- })
+ example = "https://USER.newgrounds.com/movies"
class NewgroundsGamesExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user's games"""
subcategory = _path = "games"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/games/?$"
- test = ("https://tomfulp.newgrounds.com/games", {
- "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
- "range": "1-10",
- "count": 10,
- })
+ example = "https://USER.newgrounds.com/games"
class NewgroundsUserExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user profile"""
subcategory = "user"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
- test = (
- ("https://tomfulp.newgrounds.com", {
- "pattern": "https://tomfulp.newgrounds.com/art$",
- }),
- ("https://tomfulp.newgrounds.com", {
- "options": (("include", "all"),),
- "pattern": "https://tomfulp.newgrounds.com/(art|audio|movies)$",
- "count": 3,
- }),
- )
+ example = "https://USER.newgrounds.com"
+
+ def initialize(self):
+ pass
def items(self):
base = self.user_root + "/"
@@ -532,15 +383,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
directory_fmt = ("{category}", "{user}", "Favorites")
pattern = (r"(?:https?://)?([\w-]+)\.newgrounds\.com"
r"/favorites(?!/following)(?:/(art|audio|movies))?/?")
- test = (
- ("https://tomfulp.newgrounds.com/favorites/art", {
- "range": "1-10",
- "count": ">= 10",
- }),
- ("https://tomfulp.newgrounds.com/favorites/audio"),
- ("https://tomfulp.newgrounds.com/favorites/movies"),
- ("https://tomfulp.newgrounds.com/favorites/"),
- )
+ example = "https://USER.newgrounds.com/favorites"
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
@@ -589,11 +432,7 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
"""Extractor for a newgrounds user's favorited users"""
subcategory = "following"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/favorites/(following)"
- test = ("https://tomfulp.newgrounds.com/favorites/following", {
- "pattern": NewgroundsUserExtractor.pattern,
- "range": "76-125",
- "count": 50,
- })
+ example = "https://USER.newgrounds.com/favorites/following"
def items(self):
data = {"_extractor": NewgroundsUserExtractor}
@@ -614,21 +453,7 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
directory_fmt = ("{category}", "search", "{search_tags}")
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
r"/search/conduct/([^/?#]+)/?\?([^#]+)")
- test = (
- ("https://www.newgrounds.com/search/conduct/art?terms=tree", {
- "pattern": NewgroundsImageExtractor.pattern,
- "keyword": {"search_tags": "tree"},
- "range": "1-10",
- "count": 10,
- }),
- ("https://www.newgrounds.com/search/conduct/movies?terms=tree", {
- "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+",
- "range": "1-10",
- "count": 10,
- }),
- ("https://www.newgrounds.com/search/conduct/audio?advanced=1"
- "&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm"),
- )
+ example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY"
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
@@ -652,7 +477,6 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
- "Referer": self.root,
}
params["inner"] = "1"
params["page"] = 1
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 4270c84..09b2b16 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -18,29 +18,7 @@ class NhentaiGalleryExtractor(GalleryExtractor):
category = "nhentai"
root = "https://nhentai.net"
pattern = r"(?:https?://)?nhentai\.net/g/(\d+)"
- test = ("https://nhentai.net/g/147850/", {
- "url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
- "keyword": {
- "title" : r"re:\[Morris\] Amazon no Hiyaku \| Amazon Elixir",
- "title_en" : str,
- "title_ja" : str,
- "gallery_id": 147850,
- "media_id" : 867789,
- "count" : 16,
- "date" : 1446050915,
- "scanlator" : "",
- "artist" : ["morris"],
- "group" : list,
- "parody" : list,
- "characters": list,
- "tags" : list,
- "type" : "manga",
- "lang" : "en",
- "language" : "English",
- "width" : int,
- "height" : int,
- },
- })
+ example = "https://nhentai.net/g/12345/"
def __init__(self, match):
url = self.root + "/api/gallery/" + match.group(1)
@@ -127,34 +105,18 @@ class NhentaiTagExtractor(NhentaiExtractor):
pattern = (r"(?:https?://)?nhentai\.net("
r"/(?:artist|category|character|group|language|parody|tag)"
r"/[^/?#]+(?:/popular[^/?#]*)?/?)(?:\?([^#]+))?")
- test = (
- ("https://nhentai.net/tag/sole-female/", {
- "pattern": NhentaiGalleryExtractor.pattern,
- "count": 30,
- "range": "1-30",
- }),
- ("https://nhentai.net/artist/itou-life/"),
- ("https://nhentai.net/group/itou-life/"),
- ("https://nhentai.net/parody/touhou-project/"),
- ("https://nhentai.net/character/patchouli-knowledge/popular"),
- ("https://nhentai.net/category/doujinshi/popular-today"),
- ("https://nhentai.net/language/english/popular-week"),
- )
+ example = "https://nhentai.net/tag/TAG/"
class NhentaiSearchExtractor(NhentaiExtractor):
"""Extractor for nhentai search results"""
subcategory = "search"
pattern = r"(?:https?://)?nhentai\.net(/search/?)\?([^#]+)"
- test = ("https://nhentai.net/search/?q=touhou", {
- "pattern": NhentaiGalleryExtractor.pattern,
- "count": 30,
- "range": "1-30",
- })
+ example = "https://nhentai.net/search/?q=QUERY"
class NhentaiFavoriteExtractor(NhentaiExtractor):
"""Extractor for nhentai favorites"""
subcategory = "favorite"
pattern = r"(?:https?://)?nhentai\.net(/favorites/?)(?:\?([^#]+))?"
- test = ("https://nhentai.net/favorites/",)
+ example = "https://nhentai.net/favorites/"
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 079bae7..b902404 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,18 +21,18 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
archive_fmt = "{image_id}_{num}"
def __init__(self, match):
- self._init_category(match)
- self.cookiedomain = "." + self.root.rpartition("/")[2]
- self.cookienames = (self.category + "_tok",)
+ BaseExtractor.__init__(self, match)
+ self.user_id = text.parse_int(match.group(match.lastindex))
- if self.category == "horne":
- self._extract_data = self._extract_data_horne
+ def initialize(self):
+ self.cookies_domain = "." + self.root.rpartition("/")[2]
+ self.cookies_names = (self.category + "_tok",)
- BaseExtractor.__init__(self, match)
+ BaseExtractor.initialize(self)
- self.user_id = text.parse_int(match.group(match.lastindex))
self.user_name = None
- self.session.headers["Referer"] = self.root + "/"
+ if self.category == "horne":
+ self._extract_data = self._extract_data_horne
def items(self):
self.login()
@@ -121,10 +121,11 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
return text.unescape(text.extr(page, "<br />", "<"))
def login(self):
- """Login and obtain session cookies"""
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- self._update_cookies(self._login_impl(username, password))
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ self.cookies_update(self._login_impl(username, password))
@cache(maxage=90*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -139,7 +140,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
response = self.request(url, method="POST", data=data)
if "/login.php" in response.text:
raise exception.AuthenticationError()
- return self.session.cookies
+ return self.cookies
def _pagination(self, path):
url = "{}/{}.php".format(self.root, path)
@@ -172,12 +173,12 @@ BASE_PATTERN = NijieExtractor.update({
class NijieUserExtractor(NijieExtractor):
"""Extractor for nijie user profiles"""
subcategory = "user"
- cookiedomain = None
+ cookies_domain = None
pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
- test = (
- ("https://nijie.info/members.php?id=44"),
- ("https://horne.red/members.php?id=58000"),
- )
+ example = "https://nijie.info/members.php?id=12345"
+
+ def initialize(self):
+ pass
def items(self):
fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
@@ -193,48 +194,7 @@ class NijieIllustrationExtractor(NijieExtractor):
"""Extractor for all illustrations of a nijie-user"""
subcategory = "illustration"
pattern = BASE_PATTERN + r"/members_illust\.php\?id=(\d+)"
- test = (
- ("https://nijie.info/members_illust.php?id=44", {
- "url": "1553e5144df50a676f5947d02469299b401ad6c0",
- "keyword": {
- "artist_id": 44,
- "artist_name": "ED",
- "date": "type:datetime",
- "description": str,
- "extension": "jpg",
- "filename": str,
- "image_id": int,
- "num": int,
- "tags": list,
- "title": str,
- "url": r"re:https://pic.nijie.net/\d+/nijie/.*jpg$",
- "user_id": 44,
- "user_name": "ED",
- },
- }),
- ("https://horne.red/members_illust.php?id=58000", {
- "pattern": r"https://pic\.nijie\.net/\d+/horne/\d+/\d+/\d+"
- r"/illust/\d+_\d+_[0-9a-f]+_[0-9a-f]+\.png",
- "range": "1-20",
- "count": 20,
- "keyword": {
- "artist_id": 58000,
- "artist_name": "のえるわ",
- "date": "type:datetime",
- "description": str,
- "image_id": int,
- "num": int,
- "tags": list,
- "title": str,
- "url": str,
- "user_id": 58000,
- "user_name": "のえるわ",
- },
- }),
- ("https://nijie.info/members_illust.php?id=43", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://nijie.info/members_illust.php?id=12345"
def image_ids(self):
return self._pagination("members_illust")
@@ -244,16 +204,7 @@ class NijieDoujinExtractor(NijieExtractor):
"""Extractor for doujin entries of a nijie user"""
subcategory = "doujin"
pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)"
- test = (
- ("https://nijie.info/members_dojin.php?id=6782", {
- "count": ">= 18",
- "keyword": {
- "user_id" : 6782,
- "user_name": "ジョニー@アビオン村",
- },
- }),
- ("https://horne.red/members_dojin.php?id=58000"),
- )
+ example = "https://nijie.info/members_dojin.php?id=12345"
def image_ids(self):
return self._pagination("members_dojin")
@@ -265,23 +216,7 @@ class NijieFavoriteExtractor(NijieExtractor):
directory_fmt = ("{category}", "bookmarks", "{user_id}")
archive_fmt = "f_{user_id}_{image_id}_{num}"
pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)"
- test = (
- ("https://nijie.info/user_like_illust_view.php?id=44", {
- "count": ">= 16",
- "keyword": {
- "user_id" : 44,
- "user_name": "ED",
- },
- }),
- ("https://horne.red/user_like_illust_view.php?id=58000", {
- "range": "1-5",
- "count": 5,
- "keyword": {
- "user_id" : 58000,
- "user_name": "のえるわ",
- },
- }),
- )
+ example = "https://nijie.info/user_like_illust_view.php?id=12345"
def image_ids(self):
return self._pagination("user_like_illust_view")
@@ -299,17 +234,7 @@ class NijieNuitaExtractor(NijieExtractor):
directory_fmt = ("{category}", "nuita", "{user_id}")
archive_fmt = "n_{user_id}_{image_id}_{num}"
pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)"
- test = (
- ("https://nijie.info/history_nuita.php?id=728995", {
- "range": "1-10",
- "count": 10,
- "keyword": {
- "user_id" : 728995,
- "user_name": "莚",
- },
- }),
- ("https://horne.red/history_nuita.php?id=58000"),
- )
+ example = "https://nijie.info/history_nuita.php?id=12345"
def image_ids(self):
return self._pagination("history_nuita")
@@ -329,13 +254,7 @@ class NijieFeedExtractor(NijieExtractor):
"""Extractor for nijie liked user feed"""
subcategory = "feed"
pattern = BASE_PATTERN + r"/like_user_view\.php"
- test = (
- ("https://nijie.info/like_user_view.php", {
- "range": "1-10",
- "count": 10,
- }),
- ("https://horne.red/like_user_view.php"),
- )
+ example = "https://nijie.info/like_user_view.php"
def image_ids(self):
return self._pagination("like_user_view")
@@ -345,14 +264,11 @@ class NijieFeedExtractor(NijieExtractor):
return ""
-class NijiefollowedExtractor(NijieExtractor):
+class NijieFollowedExtractor(NijieExtractor):
"""Extractor for followed nijie users"""
subcategory = "followed"
pattern = BASE_PATTERN + r"/like_my\.php"
- test = (
- ("https://nijie.info/like_my.php"),
- ("https://horne.red/like_my.php"),
- )
+ example = "https://nijie.info/like_my.php"
def items(self):
self.login()
@@ -378,32 +294,7 @@ class NijieImageExtractor(NijieExtractor):
"""Extractor for a nijie work/image"""
subcategory = "image"
pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
- test = (
- ("https://nijie.info/view.php?id=70720", {
- "url": "3d654e890212ba823c9647754767336aebc0a743",
- "keyword": "41da5d0e178b04f01fe72460185df52fadc3c91b",
- "content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
- }),
- ("https://nijie.info/view.php?id=70724", {
- "count": 0,
- }),
- ("https://nijie.info/view_popup.php?id=70720"),
- ("https://horne.red/view.php?id=8716", {
- "count": 4,
- "keyword": {
- "artist_id": 58000,
- "artist_name": "のえるわ",
- "date": "dt:2018-02-04 14:47:24",
- "description": "ノエル「そんなことしなくても、"
- "言ってくれたら咥えるのに・・・♡」",
- "image_id": 8716,
- "tags": ["男の娘", "フェラ", "オリキャラ", "うちのこ"],
- "title": "ノエル「いまどきそんな、恵方巻ネタなんてやらなくても・・・」",
- "user_id": 58000,
- "user_name": "のえるわ",
- },
- }),
- )
+ example = "https://nijie.info/view.php?id=12345"
def __init__(self, match):
NijieExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index beb3da2..9f5cc9d 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -21,7 +21,7 @@ class NitterExtractor(BaseExtractor):
archive_fmt = "{tweet_id}_{num}"
def __init__(self, match):
- self.cookiedomain = self.root.partition("://")[2]
+ self.cookies_domain = self.root.partition("://")[2]
BaseExtractor.__init__(self, match)
lastindex = match.lastindex
@@ -35,7 +35,7 @@ class NitterExtractor(BaseExtractor):
if videos:
ytdl = (videos == "ytdl")
videos = True
- self._cookiejar.set("hlsPlayback", "on", domain=self.cookiedomain)
+ self.cookies.set("hlsPlayback", "on", domain=self.cookies_domain)
for tweet in self.tweets():
@@ -261,58 +261,7 @@ USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)"
class NitterTweetsExtractor(NitterExtractor):
subcategory = "tweets"
pattern = USER_PATTERN + r"(?:/tweets)?(?:$|\?|#)"
- test = (
- ("https://nitter.net/supernaturepics", {
- "pattern": r"https://nitter\.net/pic/orig"
- r"/media%2F[\w-]+\.(jpg|png)$",
- "range": "1-20",
- "count": 20,
- "keyword": {
- "author": {
- "name": "supernaturepics",
- "nick": "Nature Pictures"
- },
- "comments": int,
- "content": str,
- "count": 1,
- "date": "type:datetime",
- "likes": int,
- "quotes": int,
- "retweets": int,
- "tweet_id": r"re:\d+",
- "user": {
- "date": "dt:2015-01-12 10:25:00",
- "description": "The very best nature pictures.",
- "favourites_count": int,
- "followers_count": int,
- "friends_count": int,
- "id": "2976459548",
- "name": "supernaturepics",
- "nick": "Nature Pictures",
- "profile_banner": "https://nitter.net/pic/https%3A%2F%2Fpb"
- "s.twimg.com%2Fprofile_banners%2F2976459"
- "548%2F1421058583%2F1500x500",
- "profile_image": "https://nitter.net/pic/pbs.twimg.com%2Fp"
- "rofile_images%2F554585280938659841%2FFLV"
- "AlX18.jpeg",
- "statuses_count": 1568,
- "verified": False,
- },
- },
- }),
- ("https://nitter.lacontrevoie.fr/supernaturepics", {
- "url": "54f4b55f2099dcc248f3fb7bfacf1349e08d8e2d",
- "pattern": r"https://nitter\.lacontrevoie\.fr/pic/orig"
- r"/media%2FCGMNYZvW0AIVoom\.jpg",
- "range": "1",
- }),
- ("https://nitter.1d4.us/supernaturepics", {
- "range": "1",
- "keyword": {"user": {"id": "2976459548"}},
- }),
- ("https://nitter.kavin.rocks/id:2976459548"),
- ("https://nitter.unixfox.eu/supernaturepics"),
- )
+ example = "https://nitter.net/USER"
def tweets(self):
return self._pagination("")
@@ -321,17 +270,7 @@ class NitterTweetsExtractor(NitterExtractor):
class NitterRepliesExtractor(NitterExtractor):
subcategory = "replies"
pattern = USER_PATTERN + r"/with_replies"
- test = (
- ("https://nitter.net/supernaturepics/with_replies", {
- "pattern": r"https://nitter\.net/pic/orig"
- r"/media%2F[\w-]+\.(jpg|png)$",
- "range": "1-20",
- }),
- ("https://nitter.lacontrevoie.fr/supernaturepics/with_replies"),
- ("https://nitter.1d4.us/supernaturepics/with_replies"),
- ("https://nitter.kavin.rocks/id:2976459548/with_replies"),
- ("https://nitter.unixfox.eu/i/user/2976459548/with_replies"),
- )
+ example = "https://nitter.net/USER/with_replies"
def tweets(self):
return self._pagination("/with_replies")
@@ -340,21 +279,7 @@ class NitterRepliesExtractor(NitterExtractor):
class NitterMediaExtractor(NitterExtractor):
subcategory = "media"
pattern = USER_PATTERN + r"/media"
- test = (
- ("https://nitter.net/supernaturepics/media", {
- "pattern": r"https://nitter\.net/pic/orig"
- r"/media%2F[\w-]+\.(jpg|png)$",
- "range": "1-20",
- }),
- ("https://nitter.kavin.rocks/id:2976459548/media", {
- "pattern": r"https://nitter\.kavin\.rocks/pic/orig"
- r"/media%2F[\w-]+\.(jpg|png)$",
- "range": "1-20",
- }),
- ("https://nitter.lacontrevoie.fr/supernaturepics/media"),
- ("https://nitter.1d4.us/supernaturepics/media"),
- ("https://nitter.unixfox.eu/i/user/2976459548/media"),
- )
+ example = "https://nitter.net/USER/media"
def tweets(self):
return self._pagination("/media")
@@ -363,17 +288,7 @@ class NitterMediaExtractor(NitterExtractor):
class NitterSearchExtractor(NitterExtractor):
subcategory = "search"
pattern = USER_PATTERN + r"/search"
- test = (
- ("https://nitter.net/supernaturepics/search", {
- "pattern": r"https://nitter\.net/pic/orig"
- r"/media%2F[\w-]+\.(jpg|png)$",
- "range": "1-20",
- }),
- ("https://nitter.lacontrevoie.fr/supernaturepics/search"),
- ("https://nitter.1d4.us/supernaturepics/search"),
- ("https://nitter.kavin.rocks/id:2976459548/search"),
- ("https://nitter.unixfox.eu/i/user/2976459548/search"),
- )
+ example = "https://nitter.net/USER/search"
def tweets(self):
return self._pagination("/search")
@@ -386,95 +301,7 @@ class NitterTweetExtractor(NitterExtractor):
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{num}"
pattern = BASE_PATTERN + r"/(i/web|[^/?#]+)/status/(\d+())"
- test = (
- ("https://nitter.net/supernaturepics/status/604341487988576256", {
- "url": "3f2b64e175bf284aa672c3bb53ed275e470b919a",
- "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
- "keyword": {
- "comments": 19,
- "content": "Big Wedeene River, Canada",
- "count": 1,
- "date": "dt:2015-05-29 17:40:00",
- "extension": "jpg",
- "filename": "CGMNYZvW0AIVoom",
- "likes": int,
- "num": 1,
- "quotes": 10,
- "retweets": int,
- "tweet_id": "604341487988576256",
- "url": "https://nitter.net/pic/orig"
- "/media%2FCGMNYZvW0AIVoom.jpg",
- "user": {
- "name": "supernaturepics",
- "nick": "Nature Pictures",
- },
- },
- }),
- # 4 images
- ("https://nitter.lacontrevoie.fr/i/status/894001459754180609", {
- "url": "9c51b3a4a1114535eb9b168bba97ad95db0d59ff",
- }),
- # video
- ("https://nitter.lacontrevoie.fr/i/status/1065692031626829824", {
- "pattern": r"ytdl:https://nitter\.lacontrevoie\.fr/video"
- r"/[0-9A-F]{10,}/https%3A%2F%2Fvideo.twimg.com%2F"
- r"ext_tw_video%2F1065691868439007232%2Fpu%2Fpl%2F"
- r"nv8hUQC1R0SjhzcZ.m3u8%3Ftag%3D5",
- "keyword": {
- "extension": "mp4",
- "filename": "nv8hUQC1R0SjhzcZ",
- },
- }),
- # content with emoji, newlines, hashtags (#338)
- ("https://nitter.1d4.us/playpokemon/status/1263832915173048321", {
- "keyword": {"content": (
- r"re:Gear up for #PokemonSwordShieldEX with special Mystery "
- "Gifts! \n\nYou’ll be able to receive four Galarian form "
- "Pokémon with Hidden Abilities, plus some very useful items. "
- "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ "
- )},
- }),
- # Nitter tweet (#890)
- ("https://nitter.kavin.rocks/ed1conf/status/1163841619336007680", {
- "url": "e115bd1c86c660064e392b05269bbcafcd8c8b7a",
- "content": "f29501e44d88437fe460f5c927b7543fda0f6e34",
- }),
- # Reply to deleted tweet (#403, #838)
- ("https://nitter.unixfox.eu/i/web/status/1170041925560258560", {
- "pattern": r"https://nitter\.unixfox\.eu/pic/orig"
- r"/media%2FEDzS7VrU0AAFL4_\.jpg",
- }),
- # "quoted" option (#854)
- ("https://nitter.net/StobiesGalaxy/status/1270755918330896395", {
- "options": (("quoted", True),),
- "pattern": r"https://nitter\.net/pic/orig/media%2FEa[KG].+\.jpg",
- "count": 8,
- }),
- # quoted tweet (#526, #854)
- ("https://nitter.1d4.us/StobiesGalaxy/status/1270755918330896395", {
- "pattern": r"https://nitter\.1d4\.us/pic/orig"
- r"/enc/bWVkaWEvRWFL\w+LmpwZw==",
- "keyword": {"filename": r"re:EaK.{12}"},
- "count": 4,
- }),
- # deleted quote tweet (#2225)
- ("https://nitter.lacontrevoie.fr/i/status/1460044411165888515", {
- "count": 0,
- }),
- # "Misleading" content
- ("https://nitter.lacontrevoie.fr/i/status/1486373748911575046", {
- "count": 4,
- }),
- # age-restricted (#2354)
- ("https://nitter.unixfox.eu/mightbecurse/status/1492954264909479936", {
- "keyword": {"date": "dt:2022-02-13 20:10:00"},
- "count": 1,
- }),
- # broadcast
- ("https://nitter.it/POTUS/status/1639409307878928384", {
- "count": 0,
- })
- )
+ example = "https://nitter.net/USER/status/12345"
def tweets(self):
url = "{}/i/status/{}".format(self.root, self.user)
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index af2a367..8c7ffe5 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -24,11 +24,11 @@ class NozomiExtractor(Extractor):
filename_fmt = "{postid} {dataid}.{extension}"
archive_fmt = "{dataid}"
- def items(self):
+ def _init(self):
+ self.session.headers["Origin"] = self.root
+ def items(self):
data = self.metadata()
- self.session.headers["Origin"] = self.root
- self.session.headers["Referer"] = self.root + "/"
for post_id in map(str, self.posts()):
url = "https://j.nozomi.la/post/{}/{}/{}.json".format(
@@ -105,52 +105,7 @@ class NozomiPostExtractor(NozomiExtractor):
"""Extractor for individual posts on nozomi.la"""
subcategory = "post"
pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
- test = (
- ("https://nozomi.la/post/3649262.html", {
- "url": "e5525e717aec712843be8b88592d6406ae9e60ba",
- "pattern": r"https://w\.nozomi\.la/2/15/aaa9f7c632cde1e1a5baaff3fb"
- r"6a6d857ec73df7fdc5cf5a358caf604bf73152\.webp",
- "content": "6d62c4a7fea50c0a89d499603c4e7a2b4b9bffa8",
- "keyword": {
- "artist" : ["hammer (sunset beach)"],
- "character": ["patchouli knowledge"],
- "copyright": ["touhou"],
- "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5",
- "date" : "dt:2016-07-26 02:32:03",
- "extension": "webp",
- "filename" : str,
- "height" : 768,
- "is_video" : False,
- "postid" : 3649262,
- "tags" : list,
- "type" : "jpg",
- "url" : str,
- "width" : 1024,
- },
- }),
- # multiple images per post
- ("https://nozomi.la/post/25588032.html", {
- "url": "fb956ccedcf2cf509739d26e2609e910244aa56c",
- "keyword": "516ca5cbd0d2a46a8ce26679d6e08de5ac42184b",
- "count": 7,
- }),
- # empty 'date' (#1163)
- ("https://nozomi.la/post/130309.html", {
- "keyword": {"date": None},
- }),
- # gif
- ("https://nozomi.la/post/1647.html", {
- "pattern": r"https://g\.nozomi\.la/a/f0/d1b06469e00d72e4f6346209c1"
- r"49db459d76b58a074416c260ed93cc31fa9f0a\.gif",
- "content": "952efb78252bbc9fb56df2e8fafb68d5e6364181",
- }),
- # video
- ("https://nozomi.la/post/2269847.html", {
- "pattern": r"https://v\.nozomi\.la/d/0e/ff88398862669783691b31519f"
- r"2bea3a35c24b6e62e3ba2d89b4409e41c660ed\.webm",
- "content": "57065e6c16da7b1c7098a63b36fb0c6c6f1b9bca",
- }),
- )
+ example = "https://nozomi.la/post/12345.html"
def __init__(self, match):
NozomiExtractor.__init__(self, match)
@@ -165,11 +120,7 @@ class NozomiIndexExtractor(NozomiExtractor):
subcategory = "index"
pattern = (r"(?:https?://)?nozomi\.la/"
r"(?:(index(?:-Popular)?)-(\d+)\.html)?(?:$|#|\?)")
- test = (
- ("https://nozomi.la/"),
- ("https://nozomi.la/index-2.html"),
- ("https://nozomi.la/index-Popular-33.html"),
- )
+ example = "https://nozomi.la/index-1.html"
def __init__(self, match):
NozomiExtractor.__init__(self, match)
@@ -183,11 +134,7 @@ class NozomiTagExtractor(NozomiExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{dataid}"
pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
- test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
- "pattern": r"^https://[wgv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
- "count": ">= 25",
- "range": "1-25",
- })
+ example = "https://nozomi.la/tag/TAG-1.html"
def __init__(self, match):
NozomiExtractor.__init__(self, match)
@@ -205,9 +152,7 @@ class NozomiSearchExtractor(NozomiExtractor):
directory_fmt = ("{category}", "{search_tags:J }")
archive_fmt = "t_{search_tags}_{dataid}"
pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)"
- test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", {
- "count": ">= 5",
- })
+ example = "https://nozomi.la/search.html?q=QUERY"
def __init__(self, match):
NozomiExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py
index 6433fbd..eb5d31f 100644
--- a/gallery_dl/extractor/nsfwalbum.py
+++ b/gallery_dl/extractor/nsfwalbum.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,12 +20,9 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
filename_fmt = "{album_id}_{num:>03}_{id}.{extension}"
directory_fmt = ("{category}", "{album_id} {title}")
archive_fmt = "{id}"
+ referer = False
pattern = r"(?:https?://)?(?:www\.)?nsfwalbum\.com(/album/(\d+))"
- test = ("https://nsfwalbum.com/album/401611", {
- "range": "1-5",
- "url": "b0481fc7fad5982da397b6359fbed8421b8ba284",
- "keyword": "e98f9b0d473c00000831618d0235863b1dd78294",
- })
+ example = "https://nsfwalbum.com/album/12345"
def __init__(self, match):
self.album_id = match.group(2)
@@ -75,8 +72,8 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
@staticmethod
def _validate_response(response):
- return not response.request.url.endswith(
- ("/no_image.jpg", "/placeholder.png"))
+ return not response.url.endswith(
+ ("/no_image.jpg", "/placeholder.png", "/error.jpg"))
@staticmethod
def _annihilate(value, base=6):
diff --git a/gallery_dl/extractor/nudecollect.py b/gallery_dl/extractor/nudecollect.py
index 3159919..bda5d77 100644
--- a/gallery_dl/extractor/nudecollect.py
+++ b/gallery_dl/extractor/nudecollect.py
@@ -37,26 +37,8 @@ class NudecollectImageExtractor(NudecollectExtractor):
pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
r"(/content/([^/?#]+)/image-(\d+)-pics-(\d+)"
r"-mirror-(\d+)\.html)")
- test = (
- (("https://www.nudecollect.com/content/20201220_Teenpornstorage_"
- "Patritcy_Vanessa_Lesbian_Lust/image-4-pics-108-mirror-43.html"), {
- "pattern": (r"https://mirror\d+\.nudecollect\.com/showimage"
- r"/nudecollect-8769086487/image00004-5896498214-43"
- r"-9689595623/20201220_Teenpornstorage_Patritcy_Vaness"
- r"a_Lesbian_Lust/9879560327/nudecollect\.com\.jpg"),
- "keyword": {
- "slug" : ("20201220_Teenpornstorage_Patritcy"
- "_Vanessa_Lesbian_Lust"),
- "title" : ("20201220 Teenpornstorage Patritcy"
- " Vanessa Lesbian Lust"),
- "num" : 4,
- "count" : 108,
- "mirror": 43,
- },
- }),
- (("https://www.nudecollect.com/content/20201220_Teenpornstorage_"
- "Patritcy_Vanessa_Lesbian_Lust/image-10-pics-108-mirror-43.html")),
- )
+ example = ("https://www.nudecollect.com/content/12345_TITLE"
+ "/image-1-pics-108-mirror-1.html")
def __init__(self, match):
NudecollectExtractor.__init__(self, match)
@@ -80,45 +62,8 @@ class NudecollectAlbumExtractor(NudecollectExtractor):
pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
r"/content/([^/?#]+)/(?:index-mirror-(\d+)-(\d+)"
r"|page-\d+-pics-(\d+)-mirror-(\d+))\.html")
- test = (
- (("https://www.nudecollect.com/content/20170219_TheWhiteBoxxx_"
- "Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex"
- "_with_alluring_Czech_babes_x125_1080px/index-mirror-67-125.html"), {
- "pattern": (r"https://mirror\d+\.nudecollect\.com/showimage"
- r"/nudecollect-8769086487/image00\d\d\d-5896498214-67"
- r"-9689595623/20170219_TheWhiteBoxxx_Caprice"
- r"_Tracy_Loves_Hot_ass_fingering_and_sensual_"
- r"lesbian_sex_with_alluring_Czech_babes_x125_1080px"
- r"/9879560327/nudecollect\.com\.jpg"),
- "count" : 125,
- "keyword": {
- "slug" : ("20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_"
- "ass_fingering_and_sensual_lesbian_sex_with_"
- "alluring_Czech_babes_x125_1080px"),
- "title" : ("20170219 TheWhiteBoxxx Caprice Tracy Loves Hot ass"
- " fingering and sensual lesbian sex with alluring"
- " Czech babes x125 1080px"),
- "num" : int,
- "mirror": 67,
- },
- }),
- (("https://www.nudecollect.com/content/20201220_Teenpornstorage_"
- "Patritcy_Vanessa_Lesbian_Lust/page-1-pics-108-mirror-43.html"), {
- "pattern": (r"https://mirror\d+\.nudecollect\.com/showimage"
- r"/nudecollect-8769086487/image00\d\d\d-5896498214-43"
- r"-9689595623/20201220_Teenpornstorage_Patritcy_Vaness"
- r"a_Lesbian_Lust/9879560327/nudecollect\.com\.jpg"),
- "count" : 108,
- "keyword": {
- "slug" : ("20201220_Teenpornstorage_Patritcy"
- "_Vanessa_Lesbian_Lust"),
- "title" : ("20201220 Teenpornstorage Patritcy"
- " Vanessa Lesbian Lust"),
- "num" : int,
- "mirror": 43,
- },
- }),
- )
+ example = ("https://www.nudecollect.com/content/12345_TITLE"
+ "/index-mirror-01-123.html")
def __init__(self, match):
self.slug = match.group(1)
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 824757c..45313c5 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -28,6 +28,8 @@ class OAuthBase(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.client = None
+
+ def _init(self):
self.cache = config.get(("extractor", self.category), "cache", True)
def oauth_config(self, key, default=None):
@@ -241,6 +243,7 @@ class OAuthBase(Extractor):
class OAuthFlickr(OAuthBase):
subcategory = "flickr"
pattern = "oauth:flickr$"
+ example = "oauth:flickr"
redirect_uri = REDIRECT_URI_HTTPS
def items(self):
@@ -259,6 +262,7 @@ class OAuthFlickr(OAuthBase):
class OAuthSmugmug(OAuthBase):
subcategory = "smugmug"
pattern = "oauth:smugmug$"
+ example = "oauth:smugmug"
def items(self):
yield Message.Version, 1
@@ -276,6 +280,7 @@ class OAuthSmugmug(OAuthBase):
class OAuthTumblr(OAuthBase):
subcategory = "tumblr"
pattern = "oauth:tumblr$"
+ example = "oauth:tumblr"
def items(self):
yield Message.Version, 1
@@ -296,6 +301,7 @@ class OAuthTumblr(OAuthBase):
class OAuthDeviantart(OAuthBase):
subcategory = "deviantart"
pattern = "oauth:deviantart$"
+ example = "oauth:deviantart"
redirect_uri = REDIRECT_URI_HTTPS
def items(self):
@@ -317,6 +323,7 @@ class OAuthDeviantart(OAuthBase):
class OAuthReddit(OAuthBase):
subcategory = "reddit"
pattern = "oauth:reddit$"
+ example = "oauth:reddit"
def items(self):
yield Message.Version, 1
@@ -338,6 +345,7 @@ class OAuthReddit(OAuthBase):
class OAuthMastodon(OAuthBase):
subcategory = "mastodon"
pattern = "oauth:mastodon:(?:https?://)?([^/?#]+)"
+ example = "oauth:mastodon:mastodon.social"
def __init__(self, match):
OAuthBase.__init__(self, match)
@@ -395,6 +403,7 @@ class OAuthMastodon(OAuthBase):
class OAuthPixiv(OAuthBase):
subcategory = "pixiv"
pattern = "oauth:pixiv$"
+ example = "oauth:pixiv"
def items(self):
yield Message.Version, 1
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 1fa571c..0389ead 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -21,7 +21,7 @@ class PahealExtractor(Extractor):
root = "https://rule34.paheal.net"
def items(self):
- self.session.cookies.set(
+ self.cookies.set(
"ui-tnc-agreed", "true", domain="rule34.paheal.net")
data = self.get_metadata()
@@ -74,40 +74,14 @@ class PahealTagExtractor(PahealExtractor):
directory_fmt = ("{category}", "{search_tags}")
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/list/([^/?#]+)")
- test = (
- ("https://rule34.paheal.net/post/list/Ayane_Suzuki/1", {
- "pattern": r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20",
- "count": ">= 15"
- }),
- ("https://rule34.paheal.net/post/list/Ayane_Suzuki/1", {
- "range": "1",
- "options": (("metadata", True),),
- "keyword": {
- "date": "dt:2018-01-07 07:04:05",
- "duration": 0.0,
- "extension": "jpg",
- "filename": "2446128 - Ayane_Suzuki Idolmaster "
- "idolmaster_dearly_stars Zanzi",
- "height": 768,
- "id": 2446128,
- "md5": "b0ceda9d860df1d15b60293a7eb465c1",
- "search_tags": "Ayane_Suzuki",
- "size": 205312,
- "source": "https://www.pixiv.net/member_illust.php"
- "?mode=medium&illust_id=19957280",
- "tags": "Ayane_Suzuki Idolmaster "
- "idolmaster_dearly_stars Zanzi",
- "uploader": "XXXname",
- "width": 1024,
- },
- }),
- )
+ example = "https://rule34.paheal.net/post/list/TAG/1"
per_page = 70
def __init__(self, match):
PahealExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1))
+ def _init(self):
if self.config("metadata"):
self._extract_data = self._extract_data_ex
@@ -160,61 +134,7 @@ class PahealPostExtractor(PahealExtractor):
subcategory = "post"
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/view/(\d+)")
- test = (
- ("https://rule34.paheal.net/post/view/481609", {
- "pattern": r"https://tulip\.paheal\.net/_images"
- r"/bbdc1c33410c2cdce7556c7990be26b7/481609%20-%20"
- r"Azumanga_Daioh%20inanimate%20Osaka%20Vuvuzela\.jpg",
- "content": "7b924bcf150b352ac75c9d281d061e174c851a11",
- "keyword": {
- "date": "dt:2010-06-17 15:40:23",
- "extension": "jpg",
- "file_url": "re:https://tulip.paheal.net/_images/bbdc1c33410c",
- "filename": "481609 - Azumanga_Daioh inanimate Osaka Vuvuzela",
- "height": 660,
- "id": 481609,
- "md5": "bbdc1c33410c2cdce7556c7990be26b7",
- "size": 157389,
- "source": "",
- "tags": "Azumanga_Daioh inanimate Osaka Vuvuzela",
- "uploader": "CaptainButtface",
- "width": 614,
- },
- }),
- ("https://rule34.paheal.net/post/view/488534", {
- "keyword": {
- "date": "dt:2010-06-25 13:51:17",
- "height": 800,
- "md5": "b39edfe455a0381110c710d6ed2ef57d",
- "size": 758989,
- "source": "http://www.furaffinity.net/view/4057821/",
- "tags": "inanimate thelost-dragon Vuvuzela",
- "uploader": "leacheate_soup",
- "width": 1200,
- },
- }),
- # video
- ("https://rule34.paheal.net/post/view/3864982", {
- "pattern": r"https://[\w]+\.paheal\.net/_images/7629fc0ff77e32637d"
- r"de5bf4f992b2cb/3864982%20-%20animated%20Metal_Gear%20"
- r"Metal_Gear_Solid_V%20Quiet%20Vg_erotica%20webm\.webm",
- "keyword": {
- "date": "dt:2020-09-06 01:59:03",
- "duration": 30.0,
- "extension": "webm",
- "height": 2500,
- "id": 3864982,
- "md5": "7629fc0ff77e32637dde5bf4f992b2cb",
- "size": 18454938,
- "source": "https://twitter.com/VG_Worklog"
- "/status/1302407696294055936",
- "tags": "animated Metal_Gear Metal_Gear_Solid_V "
- "Quiet Vg_erotica webm",
- "uploader": "justausername",
- "width": 1768,
- },
- }),
- )
+ example = "https://rule34.paheal.net/post/view/12345"
def __init__(self, match):
PahealExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index e4bfa2a..729ceaf 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -19,7 +19,7 @@ class PatreonExtractor(Extractor):
"""Base class for patreon extractors"""
category = "patreon"
root = "https://www.patreon.com"
- cookiedomain = ".patreon.com"
+ cookies_domain = ".patreon.com"
directory_fmt = ("{category}", "{creator[full_name]}")
filename_fmt = "{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
@@ -28,11 +28,11 @@ class PatreonExtractor(Extractor):
_warning = True
def items(self):
-
if self._warning:
- if not self._check_cookies(("session_id",)):
+ if not self.cookies_check(("session_id",)):
self.log.warning("no 'session_id' cookie set")
PatreonExtractor._warning = False
+
generators = self._build_file_generators(self.config("files"))
for post in self.posts():
@@ -103,7 +103,6 @@ class PatreonExtractor(Extractor):
def _pagination(self, url):
headers = {
- "Referer" : self.root + "/",
"Content-Type": "application/vnd.api+json",
}
@@ -251,7 +250,7 @@ class PatreonExtractor(Extractor):
def _extract_bootstrap(self, page):
return util.json_loads(text.extr(
- page, "window.patreon.bootstrap,", "\n});") + "}")
+ page, "window.patreon.bootstrap,", "});") + "}")
class PatreonCreatorExtractor(PatreonExtractor):
@@ -260,34 +259,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))"
r"([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
- test = (
- ("https://www.patreon.com/koveliana", {
- "range": "1-25",
- "count": ">= 25",
- "keyword": {
- "attachments" : list,
- "comment_count": int,
- "content" : str,
- "creator" : dict,
- "date" : "type:datetime",
- "id" : int,
- "images" : list,
- "like_count" : int,
- "post_type" : str,
- "published_at" : str,
- "title" : str,
- },
- }),
- ("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", {
- "count": 1,
- "keyword": {"date": "dt:2020-03-30 21:21:44"},
- }),
- ("https://www.patreon.com/kovelianot", {
- "exception": exception.NotFoundError,
- }),
- ("https://www.patreon.com/user?u=2931440"),
- ("https://www.patreon.com/user/posts/?u=2931440"),
- )
+ example = "https://www.patreon.com/USER"
def __init__(self, match):
PatreonExtractor.__init__(self, match)
@@ -328,7 +300,7 @@ class PatreonUserExtractor(PatreonExtractor):
"""Extractor for media from creators supported by you"""
subcategory = "user"
pattern = r"(?:https?://)?(?:www\.)?patreon\.com/home$"
- test = ("https://www.patreon.com/home",)
+ example = "https://www.patreon.com/home"
def posts(self):
url = self._build_url("stream", (
@@ -343,24 +315,7 @@ class PatreonPostExtractor(PatreonExtractor):
"""Extractor for media from a single post"""
subcategory = "post"
pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?#]+)"
- test = (
- # postfile + attachments
- ("https://www.patreon.com/posts/precious-metal-23563293", {
- "count": 4,
- }),
- # postfile + content
- ("https://www.patreon.com/posts/56127163", {
- "count": 3,
- "keyword": {"filename": r"re:^(?!1).+$"},
- }),
- # tags (#1539)
- ("https://www.patreon.com/posts/free-post-12497641", {
- "keyword": {"tags": ["AWMedia"]},
- }),
- ("https://www.patreon.com/posts/not-found-123", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://www.patreon.com/posts/TITLE-12345"
def __init__(self, match):
PatreonExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index e718828..3a0f5b0 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -22,8 +22,7 @@ class PhilomenaExtractor(BooruExtractor):
page_start = 1
per_page = 50
- def __init__(self, match):
- BooruExtractor.__init__(self, match)
+ def _init(self):
self.api = PhilomenaAPI(self)
_file_url = operator.itemgetter("view_url")
@@ -58,68 +57,7 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
"""Extractor for single posts on a Philomena booru"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(?:images/)?(\d+)"
- test = (
- ("https://derpibooru.org/images/1", {
- "content": "88449eeb0c4fa5d3583d0b794f6bc1d70bf7f889",
- "count": 1,
- "keyword": {
- "animated": False,
- "aspect_ratio": 1.0,
- "comment_count": int,
- "created_at": "2012-01-02T03:12:33Z",
- "date": "dt:2012-01-02 03:12:33",
- "deletion_reason": None,
- "description": "",
- "downvotes": int,
- "duplicate_of": None,
- "duration": 0.04,
- "extension": "png",
- "faves": int,
- "first_seen_at": "2012-01-02T03:12:33Z",
- "format": "png",
- "height": 900,
- "hidden_from_users": False,
- "id": 1,
- "mime_type": "image/png",
- "name": "1__safe_fluttershy_solo_cloud_happy_flying_upvotes+ga"
- "lore_artist-colon-speccysy_get_sunshine",
- "orig_sha512_hash": None,
- "processed": True,
- "representations": dict,
- "score": int,
- "sha512_hash": "f16c98e2848c2f1bfff3985e8f1a54375cc49f78125391"
- "aeb80534ce011ead14e3e452a5c4bc98a66f56bdfcd07e"
- "f7800663b994f3f343c572da5ecc22a9660f",
- "size": 860914,
- "source_url": "https://www.deviantart.com/speccysy/art"
- "/Afternoon-Flight-215193985",
- "spoilered": False,
- "tag_count": int,
- "tag_ids": list,
- "tags": list,
- "thumbnails_generated": True,
- "updated_at": r"re:\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ",
- "uploader": "Clover the Clever",
- "uploader_id": 211188,
- "upvotes": int,
- "view_url": str,
- "width": 900,
- "wilson_score": float,
- },
- }),
- ("https://derpibooru.org/1"),
- ("https://www.derpibooru.org/1"),
- ("https://www.derpibooru.org/images/1"),
-
- ("https://ponybooru.org/images/1", {
- "content": "bca26f58fafd791fe07adcd2a28efd7751824605",
- }),
- ("https://www.ponybooru.org/images/1"),
-
- ("https://furbooru.org/images/1", {
- "content": "9eaa1e1b32fa0f16520912257dbefaff238d5fd2",
- }),
- )
+ example = "https://derpibooru.org/images/12345"
def __init__(self, match):
PhilomenaExtractor.__init__(self, match)
@@ -134,28 +72,7 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
subcategory = "search"
directory_fmt = ("{category}", "{search_tags}")
pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))"
- test = (
- ("https://derpibooru.org/search?q=cute", {
- "range": "40-60",
- "count": 21,
- }),
- ("https://derpibooru.org/tags/cute", {
- "range": "40-60",
- "count": 21,
- }),
- (("https://derpibooru.org/tags/"
- "artist-colon--dash-_-fwslash--fwslash-%255Bkorroki%255D_aternak"), {
- "count": ">= 2",
- }),
- ("https://ponybooru.org/search?q=cute", {
- "range": "40-60",
- "count": 21,
- }),
- ("https://furbooru.org/search?q=cute", {
- "range": "40-60",
- "count": 21,
- }),
- )
+ example = "https://derpibooru.org/search?q=QUERY"
def __init__(self, match):
PhilomenaExtractor.__init__(self, match)
@@ -189,28 +106,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
directory_fmt = ("{category}", "galleries",
"{gallery[id]} {gallery[title]}")
pattern = BASE_PATTERN + r"/galleries/(\d+)"
- test = (
- ("https://derpibooru.org/galleries/1", {
- "pattern": r"https://derpicdn\.net/img/view/\d+/\d+/\d+/\d+[^/]+$",
- "keyword": {
- "gallery": {
- "description": "Indexes start at 1 :P",
- "id": 1,
- "spoiler_warning": "",
- "thumbnail_id": 1,
- "title": "The Very First Gallery",
- "user": "DeliciousBlackInk",
- "user_id": 365446,
- },
- },
- }),
- ("https://ponybooru.org/galleries/27", {
- "count": ">= 24",
- }),
- ("https://furbooru.org/galleries/27", {
- "count": ">= 13",
- }),
- )
+ example = "https://derpibooru.org/galleries/12345"
def __init__(self, match):
PhilomenaExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py
index 6234e6a..a01c9fe 100644
--- a/gallery_dl/extractor/photobucket.py
+++ b/gallery_dl/extractor/photobucket.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://photobucket.com/"""
+"""Extractors for https://photobucket.com/"""
from .common import Extractor, Message
from .. import text, exception
@@ -23,34 +23,13 @@ class PhotobucketAlbumExtractor(Extractor):
archive_fmt = "{id}"
pattern = (r"(?:https?://)?((?:[\w-]+\.)?photobucket\.com)"
r"/user/[^/?&#]+/library(?:/[^?&#]*)?")
- test = (
- ("https://s369.photobucket.com/user/CrpyLrkr/library", {
- "pattern": r"https?://[oi]+\d+.photobucket.com/albums/oo139/",
- "count": ">= 50"
- }),
- # subalbums of main "directory"
- ("https://s271.photobucket.com/user/lakerfanryan/library/", {
- "options": (("image-filter", "False"),),
- "pattern": pattern,
- "count": 1,
- }),
- # subalbums of subalbum without images
- ("https://s271.photobucket.com/user/lakerfanryan/library/Basketball", {
- "pattern": pattern,
- "count": ">= 9",
- }),
- # private (missing JSON data)
- ("https://s1277.photobucket.com/user/sinisterkat44/library/", {
- "count": 0,
- }),
- ("https://s1110.photobucket.com/user/chndrmhn100/library/"
- "Chandu%20is%20the%20King?sort=3&page=1"),
- )
+ example = "https://s123.photobucket.com/user/USER/library"
def __init__(self, match):
- Extractor.__init__(self, match)
- self.album_path = ""
self.root = "https://" + match.group(1)
+ Extractor.__init__(self, match)
+
+ def _init(self):
self.session.headers["Referer"] = self.url
def items(self):
@@ -112,23 +91,14 @@ class PhotobucketImageExtractor(Extractor):
pattern = (r"(?:https?://)?(?:[\w-]+\.)?photobucket\.com"
r"(?:/gallery/user/([^/?&#]+)/media/([^/?&#]+)"
r"|/user/([^/?&#]+)/media/[^?&#]+\.html)")
- test = (
- (("https://s271.photobucket.com/user/lakerfanryan"
- "/media/Untitled-3-1.jpg.html"), {
- "url": "3b647deeaffc184cc48c89945f67574559c9051f",
- "keyword": "69732741b2b351db7ecaa77ace2fdb39f08ca5a3",
- }),
- (("https://s271.photobucket.com/user/lakerfanryan"
- "/media/IsotopeswBros.jpg.html?sort=3&o=2"), {
- "url": "12c1890c09c9cdb8a88fba7eec13f324796a8d7b",
- "keyword": "61200a223df6c06f45ac3d30c88b3f5b048ce9a8",
- }),
- )
+ example = "https://s123.photobucket.com/user/USER/media/NAME.EXT.html"
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1) or match.group(3)
self.media_id = match.group(2)
+
+ def _init(self):
self.session.headers["Referer"] = self.url
def items(self):
diff --git a/gallery_dl/extractor/photovogue.py b/gallery_dl/extractor/photovogue.py
index 3c68fd5..2a2df5a 100644
--- a/gallery_dl/extractor/photovogue.py
+++ b/gallery_dl/extractor/photovogue.py
@@ -19,38 +19,7 @@ class PhotovogueUserExtractor(Extractor):
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/photographers/(\d+)"
- test = (
- ("https://www.vogue.com/photovogue/photographers/221252"),
- ("https://vogue.com/photovogue/photographers/221252", {
- "pattern": r"https://images.vogue.it/Photovogue/[^/]+_gallery.jpg",
- "keyword": {
- "date": "type:datetime",
- "favorite_count": int,
- "favorited": list,
- "id": int,
- "image_id": str,
- "is_favorite": False,
- "orientation": "re:portrait|landscape",
- "photographer": {
- "biography": "Born in 1995. Live in Bologna.",
- "city": "Bologna",
- "country_id": 106,
- "favoritedCount": int,
- "id": 221252,
- "isGold": bool,
- "isPro": bool,
- "latitude": str,
- "longitude": str,
- "name": "Arianna Mattarozzi",
- "user_id": "38cb0601-4a85-453c-b7dc-7650a037f2ab",
- "websites": list,
- },
- "photographer_id": 221252,
- "tags": list,
- "title": str,
- },
- }),
- )
+ example = "https://www.vogue.com/photovogue/photographers/12345"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/picarto.py b/gallery_dl/extractor/picarto.py
index 77a07b4..cc7eee5 100644
--- a/gallery_dl/extractor/picarto.py
+++ b/gallery_dl/extractor/picarto.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,7 @@ class PicartoGalleryExtractor(Extractor):
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
pattern = r"(?:https?://)?picarto\.tv/([^/?#]+)/gallery"
- test = ("https://picarto.tv/fnook/gallery/default/", {
- "pattern": r"https://images\.picarto\.tv/gallery/\d/\d\d/\d+/artwork"
- r"/[0-9a-f-]+/large-[0-9a-f]+\.(jpg|png|gif)",
- "count": ">= 7",
- "keyword": {"date": "type:datetime"},
- })
+ example = "https://picarto.tv/USER/gallery/TITLE/"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 56c2978..422325f 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -68,10 +68,7 @@ class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$"
- test = ("https://piczel.tv/gallery/Bikupan", {
- "range": "1-100",
- "count": ">= 100",
- })
+ example = "https://piczel.tv/gallery/USER"
def __init__(self, match):
PiczelExtractor.__init__(self, match)
@@ -89,9 +86,7 @@ class PiczelFolderExtractor(PiczelExtractor):
archive_fmt = "f{folder[id]}_{id}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
r"/gallery/(?!image)([^/?#]+)/(\d+)")
- test = ("https://piczel.tv/gallery/Lulena/1114", {
- "count": ">= 4",
- })
+ example = "https://piczel.tv/gallery/USER/12345"
def __init__(self, match):
PiczelExtractor.__init__(self, match)
@@ -106,30 +101,7 @@ class PiczelImageExtractor(PiczelExtractor):
"""Extractor for individual images"""
subcategory = "image"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"
- test = ("https://piczel.tv/gallery/image/7807", {
- "pattern": r"https://(\w+\.)?piczel\.tv/static/uploads/gallery_image"
- r"/32920/image/7807/1532236438-Lulena\.png",
- "content": "df9a053a24234474a19bce2b7e27e0dec23bff87",
- "keyword": {
- "created_at": "2018-07-22T05:13:58.000Z",
- "date": "dt:2018-07-22 05:13:58",
- "description": None,
- "extension": "png",
- "favorites_count": int,
- "folder_id": 1113,
- "id": 7807,
- "is_flash": False,
- "is_video": False,
- "multi": False,
- "nsfw": False,
- "num": 0,
- "password_protected": False,
- "tags": ["fanart", "commission", "altair", "recreators"],
- "title": "Altair",
- "user": dict,
- "views": int,
- },
- })
+ example = "https://piczel.tv/gallery/image/12345"
def __init__(self, match):
PiczelExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
index 841a99b..ff591fb 100644
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -24,7 +24,7 @@ class PillowfortExtractor(Extractor):
filename_fmt = ("{post_id} {title|original_post[title]:?/ /}"
"{num:>02}.{extension}")
archive_fmt = "{id}"
- cookiedomain = "www.pillowfort.social"
+ cookies_domain = "www.pillowfort.social"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -56,7 +56,7 @@ class PillowfortExtractor(Extractor):
post["num"] = 0
for file in files:
- url = file["url"]
+ url = file["url"] or file.get("b2_lg_url")
if not url:
continue
@@ -82,15 +82,14 @@ class PillowfortExtractor(Extractor):
yield msgtype, url, post
def login(self):
- cget = self.session.cookies.get
- if cget("_Pf_new_session", domain=self.cookiedomain) \
- or cget("remember_user_token", domain=self.cookiedomain):
+ if self.cookies.get("_Pf_new_session", domain=self.cookies_domain):
+ return
+ if self.cookies.get("remember_user_token", domain=self.cookies_domain):
return
username, password = self._get_auth_info()
if username:
- cookies = self._login_impl(username, password)
- self._update_cookies(cookies)
+ self.cookies_update(self._login_impl(username, password))
@cache(maxage=14*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -123,69 +122,7 @@ class PillowfortPostExtractor(PillowfortExtractor):
"""Extractor for a single pillowfort post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/posts/(\d+)"
- test = (
- ("https://www.pillowfort.social/posts/27510", {
- "pattern": r"https://img\d+\.pillowfort\.social"
- r"/posts/\w+_out\d+\.png",
- "count": 4,
- "keyword": {
- "avatar_url": str,
- "col": 0,
- "commentable": True,
- "comments_count": int,
- "community_id": None,
- "content": str,
- "created_at": str,
- "date": "type:datetime",
- "deleted": None,
- "deleted_at": None,
- "deleted_by_mod": None,
- "deleted_for_flag_id": None,
- "embed_code": None,
- "id": int,
- "last_activity": str,
- "last_activity_elapsed": str,
- "last_edited_at": str,
- "likes_count": int,
- "media_type": "picture",
- "nsfw": False,
- "num": int,
- "original_post_id": None,
- "original_post_user_id": None,
- "picture_content_type": None,
- "picture_file_name": None,
- "picture_file_size": None,
- "picture_updated_at": None,
- "post_id": 27510,
- "post_type": "picture",
- "privacy": "public",
- "reblog_copy_info": list,
- "rebloggable": True,
- "reblogged_from_post_id": None,
- "reblogged_from_user_id": None,
- "reblogs_count": int,
- "row": int,
- "small_image_url": None,
- "tags": list,
- "time_elapsed": str,
- "timestamp": str,
- "title": "What is Pillowfort.social?",
- "updated_at": str,
- "url": r"re:https://img3.pillowfort.social/posts/.*\.png",
- "user_id": 5,
- "username": "Staff"
- },
- }),
- ("https://www.pillowfort.social/posts/1557500", {
- "options": (("external", True), ("inline", False)),
- "pattern": r"https://twitter\.com/Aliciawitdaart/status"
- r"/1282862493841457152",
- }),
- ("https://www.pillowfort.social/posts/1672518", {
- "options": (("inline", True),),
- "count": 3,
- }),
- )
+ example = "https://www.pillowfort.social/posts/12345"
def posts(self):
url = "{}/posts/{}/json/".format(self.root, self.item)
@@ -195,12 +132,8 @@ class PillowfortPostExtractor(PillowfortExtractor):
class PillowfortUserExtractor(PillowfortExtractor):
"""Extractor for all posts of a pillowfort user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)"
- test = ("https://www.pillowfort.social/Pome", {
- "pattern": r"https://img\d+\.pillowfort\.social/posts/",
- "range": "1-15",
- "count": 15,
- })
+ pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+(?:/tagged/[^/?#]+)?)"
+ example = "https://www.pillowfort.social/USER"
def posts(self):
url = "{}/{}/json/".format(self.root, self.item)
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 92e0588..e9f124f 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -23,12 +23,10 @@ class PinterestExtractor(Extractor):
archive_fmt = "{id}{media_id}"
root = "https://www.pinterest.com"
- def __init__(self, match):
- Extractor.__init__(self, match)
-
+ def _init(self):
domain = self.config("domain")
if not domain or domain == "auto" :
- self.root = text.root_from_url(match.group(0))
+ self.root = text.root_from_url(self.url)
else:
self.root = text.ensure_http_scheme(domain)
@@ -113,21 +111,7 @@ class PinterestPinExtractor(PinterestExtractor):
"""Extractor for images from a single pin from pinterest.com"""
subcategory = "pin"
pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
- test = (
- ("https://www.pinterest.com/pin/858146903966145189/", {
- "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
- "content": ("4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca",
- "d3e24bc9f7af585e8c23b9136956bd45a4d9b947"),
- }),
- # video pin (#1189)
- ("https://www.pinterest.com/pin/422564377542934214/", {
- "pattern": r"https://v\d*\.pinimg\.com/videos/mc/hls/d7/22/ff"
- r"/d722ff00ab2352981b89974b37909de8.m3u8",
- }),
- ("https://www.pinterest.com/pin/858146903966145188/", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://www.pinterest.com/pin/12345/"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -149,26 +133,7 @@ class PinterestBoardExtractor(PinterestExtractor):
archive_fmt = "{board[id]}_{id}"
pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
"/(?!_saved|_created|pins/)([^/?#]+)/?$")
- test = (
- ("https://www.pinterest.com/g1952849/test-/", {
- "pattern": r"https://i\.pinimg\.com/originals/",
- "count": 2,
- }),
- # board with sections (#835)
- ("https://www.pinterest.com/g1952849/stuff/", {
- "options": (("sections", True),),
- "count": 4,
- }),
- # secret board (#1055)
- ("https://www.pinterest.de/g1952849/secret/", {
- "count": 2,
- }),
- ("https://www.pinterest.com/g1952848/test/", {
- "exception": exception.GalleryDLException,
- }),
- # .co.uk TLD (#914)
- ("https://www.pinterest.co.uk/hextra7519/based-animals/"),
- )
+ example = "https://www.pinterest.com/USER/BOARD/"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -199,13 +164,7 @@ class PinterestUserExtractor(PinterestExtractor):
"""Extractor for a user's boards"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)(?:/_saved)?/?$"
- test = (
- ("https://www.pinterest.com/g1952849/", {
- "pattern": PinterestBoardExtractor.pattern,
- "count": ">= 2",
- }),
- ("https://www.pinterest.com/g1952849/_saved/"),
- )
+ example = "https://www.pinterest.com/USER/"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -224,11 +183,7 @@ class PinterestAllpinsExtractor(PinterestExtractor):
subcategory = "allpins"
directory_fmt = ("{category}", "{user}")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/pins/?$"
- test = ("https://www.pinterest.com/g1952849/pins/", {
- "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
- r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}",
- "count": 7,
- })
+ example = "https://www.pinterest.com/USER/pins/"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -246,12 +201,7 @@ class PinterestCreatedExtractor(PinterestExtractor):
subcategory = "created"
directory_fmt = ("{category}", "{user}")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/_created/?$"
- test = ("https://www.pinterest.de/digitalmomblog/_created/", {
- "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
- r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.(jpg|png)",
- "count": 10,
- "range": "1-10",
- })
+ example = "https://www.pinterest.com/USER/_created/"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -271,9 +221,7 @@ class PinterestSectionExtractor(PinterestExtractor):
"{board[name]}", "{section[title]}")
archive_fmt = "{board[id]}_{id}"
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)"
- test = ("https://www.pinterest.com/g1952849/stuff/section", {
- "count": 2,
- })
+ example = "https://www.pinterest.com/USER/BOARD/SEcTION"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -301,10 +249,7 @@ class PinterestSearchExtractor(PinterestExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search/pins/?\?q=([^&#]+)"
- test = ("https://www.pinterest.com/search/pins/?q=nature", {
- "range": "1-50",
- "count": ">= 50",
- })
+ example = "https://www.pinterest.com/search/pins/?q=QUERY"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -322,11 +267,7 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor):
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[id]}")
pattern = BASE_PATTERN + r"/pin/([^/?#]+).*#related$"
- test = ("https://www.pinterest.com/pin/858146903966145189/#related", {
- "range": "31-70",
- "count": 40,
- "archive": False,
- })
+ example = "https://www.pinterest.com/pin/12345/#related"
def metadata(self):
return {"original_pin": self.api.pin(self.pin_id)}
@@ -341,11 +282,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "related")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$"
- test = ("https://www.pinterest.com/g1952849/test-/#related", {
- "range": "31-70",
- "count": 40,
- "archive": False,
- })
+ example = "https://www.pinterest.com/USER/BOARD/#related"
def pins(self):
return self.api.board_content_recommendation(self.board["id"])
@@ -355,15 +292,7 @@ class PinterestPinitExtractor(PinterestExtractor):
"""Extractor for images from a pin.it URL"""
subcategory = "pinit"
pattern = r"(?:https?://)?pin\.it/([^/?#]+)"
-
- test = (
- ("https://pin.it/Hvt8hgT", {
- "url": "8daad8558382c68f0868bdbd17d05205184632fa",
- }),
- ("https://pin.it/Hvt8hgS", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://pin.it/abcde"
def __init__(self, match):
PinterestExtractor.__init__(self, match)
@@ -396,7 +325,6 @@ class PinterestAPI():
"Accept" : "application/json, text/javascript, "
"*/*, q=0.01",
"Accept-Language" : "en-US,en;q=0.5",
- "Referer" : self.root + "/",
"X-Requested-With" : "XMLHttpRequest",
"X-APP-VERSION" : "0c4af40",
"X-CSRFToken" : csrf_token,
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 861959e..18a3ceb 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -26,10 +26,9 @@ class PixivExtractor(Extractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
- cookiedomain = None
+ cookies_domain = None
- def __init__(self, match):
- Extractor.__init__(self, match)
+ def _init(self):
self.api = PixivAppAPI(self)
self.load_ugoira = self.config("ugoira", True)
self.max_posts = self.config("max-posts", 0)
@@ -47,6 +46,8 @@ class PixivExtractor(Extractor):
def transform_tags(work):
work["tags"] = [tag["name"] for tag in work["tags"]]
+ url_sanity = ("https://s.pximg.net/common/images"
+ "/limit_sanity_level_360.png")
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
meta_user = self.config("metadata")
meta_bookmark = self.config("metadata-bookmark")
@@ -102,6 +103,10 @@ class PixivExtractor(Extractor):
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
+ if url == url_sanity:
+ self.log.debug("Skipping 'sanity_level' warning (%s)",
+ work["id"])
+ continue
work["date_url"] = self._date_from_url(url)
yield Message.Url, url, text.nameext_from_url(url, work)
@@ -156,18 +161,15 @@ class PixivUserExtractor(PixivExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
- test = (
- ("https://www.pixiv.net/en/users/173530"),
- ("https://www.pixiv.net/u/173530"),
- ("https://www.pixiv.net/member.php?id=173530"),
- ("https://www.pixiv.net/mypage.php#id=173530"),
- ("https://www.pixiv.net/#id=173530"),
- )
+ example = "https://www.pixiv.net/en/users/12345"
def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)
+ def initialize(self):
+ pass
+
def items(self):
base = "{}/users/{}/".format(self.root, self.user_id)
return self._dispatch_extractors((
@@ -187,29 +189,7 @@ class PixivArtworksExtractor(PixivExtractor):
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
- test = (
- ("https://www.pixiv.net/en/users/173530/artworks", {
- "url": "852c31ad83b6840bacbce824d85f2a997889efb7",
- }),
- # illusts with specific tag
- (("https://www.pixiv.net/en/users/173530/artworks"
- "/%E6%89%8B%E3%81%B6%E3%82%8D"), {
- "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
- }),
- (("https://www.pixiv.net/member_illust.php?id=173530"
- "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
- "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
- }),
- # deleted account
- ("http://www.pixiv.net/member_illust.php?id=173531", {
- "options": (("metadata", True),),
- "exception": exception.NotFoundError,
- }),
- ("https://www.pixiv.net/en/users/173530/manga"),
- ("https://www.pixiv.net/en/users/173530/illustrations"),
- ("https://www.pixiv.net/member_illust.php?id=173530"),
- ("https://touch.pixiv.net/member_illust.php?id=173530"),
- )
+ example = "https://www.pixiv.net/en/users/12345/artworks"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -245,9 +225,7 @@ class PixivAvatarExtractor(PixivExtractor):
filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "avatar_{user[id]}_{date}"
pattern = USER_PATTERN + r"/avatar"
- test = ("https://www.pixiv.net/en/users/173530/avatar", {
- "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
- })
+ example = "https://www.pixiv.net/en/users/12345/avatar"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -265,10 +243,7 @@ class PixivBackgroundExtractor(PixivExtractor):
filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "background_{user[id]}_{date}"
pattern = USER_PATTERN + "/background"
- test = ("https://www.pixiv.net/en/users/194921/background", {
- "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
- r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
- })
+ example = "https://www.pixiv.net/en/users/12345/background"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -295,14 +270,7 @@ class PixivMeExtractor(PixivExtractor):
"""Extractor for pixiv.me URLs"""
subcategory = "me"
pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)"
- test = (
- ("https://pixiv.me/del_shannon", {
- "url": "29c295ce75150177e6b0a09089a949804c708fbf",
- }),
- ("https://pixiv.me/del_shanno", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://pixiv.me/USER"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -325,45 +293,7 @@ class PixivWorkExtractor(PixivExtractor):
r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)"
r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))")
- test = (
- ("https://www.pixiv.net/artworks/966412", {
- "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
- "content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a",
- "keyword": {
- "date" : "dt:2008-06-12 15:29:13",
- "date_url": "dt:2008-06-12 15:29:13",
- },
- }),
- (("http://www.pixiv.net/member_illust.php"
- "?mode=medium&illust_id=966411"), {
- "exception": exception.NotFoundError,
- }),
- # ugoira
- (("https://www.pixiv.net/member_illust.php"
- "?mode=medium&illust_id=66806629"), {
- "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
- "keyword": {
- "frames" : list,
- "date" : "dt:2018-01-14 15:06:08",
- "date_url": "dt:2018-01-15 04:24:48",
- },
- }),
- # related works (#1237)
- ("https://www.pixiv.net/artworks/966412", {
- "options": (("related", True),),
- "range": "1-10",
- "count": ">= 10",
- }),
- ("https://www.pixiv.net/en/artworks/966412"),
- ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"),
- ("http://i1.pixiv.net/c/600x600/img-master"
- "/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"),
- ("https://i.pximg.net/img-original"
- "/img/2017/04/25/07/33/29/62568267_p0.png"),
- ("https://www.pixiv.net/i/966412"),
- ("http://img.pixiv.net/img/soundcross/42626136.jpg"),
- ("http://i2.pixiv.net/img76/img/snailrin/42672235.jpg"),
- )
+ example = "https://www.pixiv.net/artworks/12345"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -386,47 +316,7 @@ class PixivFavoriteExtractor(PixivExtractor):
pattern = (BASE_PATTERN + r"/(?:(?:en/)?"
r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
r"|bookmark\.php)(?:\?([^#]*))?")
- test = (
- ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
- "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
- }),
- ("https://www.pixiv.net/bookmark.php?id=173530", {
- "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
- }),
- # bookmarks with specific tag
- (("https://www.pixiv.net/en/users/3137110"
- "/bookmarks/artworks/%E3%81%AF%E3%82%93%E3%82%82%E3%82%93"), {
- "url": "379b28275f786d946e01f721e54afe346c148a8c",
- }),
- # bookmarks with specific tag (legacy url)
- (("https://www.pixiv.net/bookmark.php?id=3137110"
- "&tag=%E3%81%AF%E3%82%93%E3%82%82%E3%82%93&p=1"), {
- "url": "379b28275f786d946e01f721e54afe346c148a8c",
- }),
- # own bookmarks
- ("https://www.pixiv.net/bookmark.php", {
- "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
- "keyword": {"tags_bookmark": ["47", "hitman"]},
- "options": (("metadata-bookmark", True),),
- }),
- # own bookmarks with tag (#596)
- ("https://www.pixiv.net/bookmark.php?tag=foobar", {
- "count": 0,
- }),
- # followed users (#515)
- ("https://www.pixiv.net/en/users/173530/following", {
- "pattern": PixivUserExtractor.pattern,
- "count": ">= 12",
- }),
- # followed users (legacy url) (#515)
- ("https://www.pixiv.net/bookmark.php?id=173530&type=user", {
- "pattern": PixivUserExtractor.pattern,
- "count": ">= 12",
- }),
- # touch URLs
- ("https://touch.pixiv.net/bookmark.php?id=173530"),
- ("https://touch.pixiv.net/bookmark.php"),
- )
+ example = "https://www.pixiv.net/en/users/12345/bookmarks/artworks"
def __init__(self, match):
uid, kind, self.tag, query = match.groups()
@@ -487,14 +377,7 @@ class PixivRankingExtractor(PixivExtractor):
directory_fmt = ("{category}", "rankings",
"{ranking[mode]}", "{ranking[date]}")
pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?"
- test = (
- ("https://www.pixiv.net/ranking.php?mode=daily&date=20170818"),
- ("https://www.pixiv.net/ranking.php"),
- ("https://touch.pixiv.net/ranking.php"),
- ("https://www.pixiv.net/ranking.php?mode=unknown", {
- "exception": exception.StopExtraction,
- }),
- )
+ example = "https://www.pixiv.net/ranking.php"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -553,24 +436,7 @@ class PixivSearchExtractor(PixivExtractor):
directory_fmt = ("{category}", "search", "{search[word]}")
pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
r"|search\.php)(?:\?([^#]+))?")
- test = (
- ("https://www.pixiv.net/en/tags/Original", {
- "range": "1-10",
- "count": 10,
- }),
- ("https://pixiv.net/en/tags/foo/artworks?order=week&s_mode=s_tag", {
- "exception": exception.StopExtraction,
- }),
- ("https://pixiv.net/en/tags/foo/artworks?order=date&s_mode=tag", {
- "exception": exception.StopExtraction,
- }),
- ("https://www.pixiv.net/search.php?s_mode=s_tag&name=Original", {
- "exception": exception.StopExtraction,
- }),
- ("https://www.pixiv.net/en/tags/foo/artworks?order=date&s_mode=s_tag"),
- ("https://www.pixiv.net/search.php?s_mode=s_tag&word=Original"),
- ("https://touch.pixiv.net/search.php?word=Original"),
- )
+ example = "https://www.pixiv.net/en/tags/TAG"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -635,10 +501,7 @@ class PixivFollowExtractor(PixivExtractor):
archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "following")
pattern = BASE_PATTERN + r"/bookmark_new_illust\.php"
- test = (
- ("https://www.pixiv.net/bookmark_new_illust.php"),
- ("https://touch.pixiv.net/bookmark_new_illust.php"),
- )
+ example = "https://www.pixiv.net/bookmark_new_illust.php"
def works(self):
return self.api.illust_follow()
@@ -655,17 +518,7 @@ class PixivPixivisionExtractor(PixivExtractor):
"{pixivision_id} {pixivision_title}")
archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
- test = (
- ("https://www.pixivision.net/en/a/2791"),
- ("https://pixivision.net/a/2791", {
- "count": 7,
- "keyword": {
- "pixivision_id": "2791",
- "pixivision_title": "What's your favorite music? Editor’s "
- "picks featuring: “CD Covers”!",
- },
- }),
- )
+ example = "https://www.pixivision.net/en/a/12345"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -697,22 +550,7 @@ class PixivSeriesExtractor(PixivExtractor):
"{series[id]} {series[title]}")
filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
- test = ("https://www.pixiv.net/user/10509347/series/21859", {
- "range": "1-10",
- "count": 10,
- "keyword": {
- "num_series": int,
- "series": {
- "canonical": "https://www.pixiv.net/user/10509347"
- "/series/21859",
- "description": str,
- "ogp": dict,
- "title": "先輩がうざい後輩の話",
- "total": int,
- "twitter": dict,
- },
- },
- })
+ example = "https://www.pixiv.net/user/12345/series/12345"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -754,56 +592,7 @@ class PixivNovelExtractor(PixivExtractor):
subcategory = "novel"
request_interval = 1.0
pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
- test = (
- ("https://www.pixiv.net/novel/show.php?id=19612040", {
- "count": 1,
- "content": "8c818474153cbd2f221ee08766e1d634c821d8b4",
- "keyword": {
- "caption": r"re:「無能な名無し」と呼ばれ虐げられて育った鈴\(すず\)は、",
- "comment_access_control": 0,
- "create_date": "2023-04-02T15:18:58+09:00",
- "date": "dt:2023-04-02 06:18:58",
- "id": 19612040,
- "is_bookmarked": False,
- "is_muted": False,
- "is_mypixiv_only": False,
- "is_original": True,
- "is_x_restricted": False,
- "novel_ai_type": 1,
- "page_count": 1,
- "rating": "General",
- "restrict": 0,
- "series": {
- "id": 10278364,
- "title": "龍の贄嫁〜無能な名無しと虐げられていましたが、"
- "どうやら異母妹に霊力を搾取されていたようです〜",
- },
- "tags": ["和風ファンタジー", "溺愛", "神様", "ヤンデレ", "執着",
- "異能", "ざまぁ", "学園", "神嫁"],
- "text_length": 5974,
- "title": "異母妹から「無能な名無し」と虐げられていた私、"
- "どうやら異母妹に霊力を搾取されていたようです(1)",
- "user": {
- "account": "yukinaga_chifuyu",
- "id": 77055466,
- },
- "visible": True,
- "x_restrict": 0,
- },
- }),
- # embeds
- ("https://www.pixiv.net/novel/show.php?id=16422450", {
- "options": (("embeds", True),),
- "count": 3,
- }),
- # full series
- ("https://www.pixiv.net/novel/show.php?id=19612040", {
- "options": (("full-series", True),),
- "count": 4,
- }),
- # short URL
- ("https://www.pixiv.net/n/19612040"),
- )
+ example = "https://www.pixiv.net/novel/show.php?id=12345"
def __init__(self, match):
PixivExtractor.__init__(self, match)
@@ -856,8 +645,13 @@ class PixivNovelExtractor(PixivExtractor):
yield Message.Directory, novel
+ try:
+ content = self.api.novel_text(novel["id"])["novel_text"]
+ except Exception:
+ self.log.warning("Unable to download novel %s", novel["id"])
+ continue
+
novel["extension"] = "txt"
- content = self.api.novel_text(novel["id"])["novel_text"]
yield Message.Url, "text:" + content, novel
if embeds:
@@ -909,11 +703,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor):
"""Extractor for pixiv users' novels"""
subcategory = "novel-user"
pattern = USER_PATTERN + r"/novels"
- test = ("https://www.pixiv.net/en/users/77055466/novels", {
- "pattern": "^text:",
- "range": "1-5",
- "count": 5,
- })
+ example = "https://www.pixiv.net/en/users/12345/novels"
def novels(self):
return self.api.user_novels(self.novel_id)
@@ -923,10 +713,7 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor):
"""Extractor for pixiv novel series"""
subcategory = "novel-series"
pattern = BASE_PATTERN + r"/novel/series/(\d+)"
- test = ("https://www.pixiv.net/novel/series/10278364", {
- "count": 4,
- "content": "b06abed001b3f6ccfb1579699e9a238b46d38ea2",
- })
+ example = "https://www.pixiv.net/novel/series/12345"
def novels(self):
return self.api.novel_series(self.novel_id)
@@ -937,13 +724,7 @@ class PixivNovelBookmarkExtractor(PixivNovelExtractor):
subcategory = "novel-bookmark"
pattern = (USER_PATTERN + r"/bookmarks/novels"
r"(?:/([^/?#]+))?(?:/?\?([^#]+))?")
- test = (
- ("https://www.pixiv.net/en/users/77055466/bookmarks/novels", {
- "count": 1,
- "content": "7194e8faa876b2b536f185ee271a2b6e46c69089",
- }),
- ("https://www.pixiv.net/en/users/11/bookmarks/novels/TAG?rest=hide"),
- )
+ example = "https://www.pixiv.net/en/users/12345/bookmarks/novels"
def __init__(self, match):
PixivNovelExtractor.__init__(self, match)
@@ -971,13 +752,9 @@ class PixivSketchExtractor(Extractor):
filename_fmt = "{post_id} {id}.{extension}"
archive_fmt = "S{user[id]}_{id}"
root = "https://sketch.pixiv.net"
- cookiedomain = ".pixiv.net"
+ cookies_domain = ".pixiv.net"
pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
- test = ("https://sketch.pixiv.net/@nicoby", {
- "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium"
- r"/file/\d+/\d+\.(jpg|png)",
- "count": ">= 35",
- })
+ example = "https://sketch.pixiv.net/@USER"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py
index 15be563..eb3edc3 100644
--- a/gallery_dl/extractor/pixnet.py
+++ b/gallery_dl/extractor/pixnet.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, exception
-
BASE_PATTERN = r"(?:https?://)?(?!www\.)([\w-]+)\.pixnet.net"
@@ -68,11 +67,7 @@ class PixnetImageExtractor(PixnetExtractor):
filename_fmt = "{id}.{extension}"
directory_fmt = ("{category}", "{blog}")
pattern = BASE_PATTERN + r"/album/photo/(\d+)"
- test = ("https://albertayu773.pixnet.net/album/photo/159443828", {
- "url": "156564c422138914c9fa5b42191677b45c414af4",
- "keyword": "19971bcd056dfef5593f4328a723a9602be0f087",
- "content": "0e097bdf49e76dd9b9d57a016b08b16fa6a33280",
- })
+ example = "https://USER.pixnet.net/album/photo/12345"
def items(self):
url = "https://api.pixnet.cc/oembed"
@@ -100,19 +95,7 @@ class PixnetSetExtractor(PixnetExtractor):
directory_fmt = ("{category}", "{blog}",
"{folder_id} {folder_title}", "{set_id} {set_title}")
pattern = BASE_PATTERN + r"/album/set/(\d+)"
- test = (
- ("https://albertayu773.pixnet.net/album/set/15078995", {
- "url": "6535712801af47af51110542f4938a7cef44557f",
- "keyword": "bf25d59e5b0959cb1f53e7fd2e2a25f2f67e5925",
- }),
- ("https://anrine910070.pixnet.net/album/set/5917493", {
- "url": "b3eb6431aea0bcf5003432a4a0f3a3232084fc13",
- "keyword": "bf7004faa1cea18cf9bd856f0955a69be51b1ec6",
- }),
- ("https://sky92100.pixnet.net/album/set/17492544", {
- "count": 0, # password-protected
- }),
- )
+ example = "https://USER.pixnet.net/album/set/12345"
def items(self):
url = self.url_fmt.format(self.root, self.item_id)
@@ -157,10 +140,7 @@ class PixnetFolderExtractor(PixnetExtractor):
subcategory = "folder"
url_fmt = "{}/album/folder/{}"
pattern = BASE_PATTERN + r"/album/folder/(\d+)"
- test = ("https://albertayu773.pixnet.net/album/folder/1405768", {
- "pattern": PixnetSetExtractor.pattern,
- "count": ">= 15",
- })
+ example = "https://USER.pixnet.net/album/folder/12345"
class PixnetUserExtractor(PixnetExtractor):
@@ -168,16 +148,4 @@ class PixnetUserExtractor(PixnetExtractor):
subcategory = "user"
url_fmt = "{}{}/album/list"
pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])"
- test = (
- ("https://albertayu773.pixnet.net/"),
- ("https://albertayu773.pixnet.net/blog"),
- ("https://albertayu773.pixnet.net/album"),
- ("https://albertayu773.pixnet.net/album/list", {
- "pattern": PixnetFolderExtractor.pattern,
- "count": ">= 30",
- }),
- ("https://anrine910070.pixnet.net/album/list", {
- "pattern": PixnetSetExtractor.pattern,
- "count": ">= 14",
- }),
- )
+ example = "https://USER.pixnet.net/"
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 4135259..5a3bf5a 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -71,10 +71,7 @@ class PlurkTimelineExtractor(PlurkExtractor):
"""Extractor for URLs from all posts in a Plurk timeline"""
subcategory = "timeline"
pattern = r"(?:https?://)?(?:www\.)?plurk\.com/(?!p/)(\w+)/?(?:$|[?#])"
- test = ("https://www.plurk.com/plurkapi", {
- "pattern": r"https?://.+",
- "count": ">= 23"
- })
+ example = "https://www.plurk.com/USER"
def __init__(self, match):
PlurkExtractor.__init__(self, match)
@@ -105,16 +102,7 @@ class PlurkPostExtractor(PlurkExtractor):
"""Extractor for URLs from a Plurk post"""
subcategory = "post"
pattern = r"(?:https?://)?(?:www\.)?plurk\.com/p/(\w+)"
- test = (
- ("https://www.plurk.com/p/i701j1", {
- "url": "2115f208564591b8748525c2807a84596aaaaa5f",
- "count": 3,
- }),
- ("https://www.plurk.com/p/i701j1", {
- "options": (("comments", True),),
- "count": ">= 210",
- }),
- )
+ example = "https://www.plurk.com/p/12345"
def __init__(self, match):
PlurkExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index e3bb512..f42016f 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -96,15 +96,7 @@ class PoipikuUserExtractor(PoipikuExtractor):
subcategory = "user"
pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
r"(\d+)/?(?:$|[?&#])")
- test = (
- ("https://poipiku.com/25049/", {
- "pattern": r"https://img-org\.poipiku\.com/user_img\d+/000025049"
- r"/\d+_\w+\.(jpe?g|png)$",
- "range": "1-10",
- "count": 10,
- }),
- ("https://poipiku.com/IllustListPcV.jsp?PG=1&ID=25049&KWD=")
- )
+ example = "https://poipiku.com/12345/"
def __init__(self, match):
PoipikuExtractor.__init__(self, match)
@@ -136,54 +128,7 @@ class PoipikuPostExtractor(PoipikuExtractor):
"""Extractor for a poipiku post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
- test = (
- ("https://poipiku.com/25049/5864576.html", {
- "pattern": r"https://img-org\.poipiku\.com/user_img\d+/000025049"
- r"/005864576_EWN1Y65gQ\.png$",
- "keyword": {
- "count": "1",
- "description": "",
- "extension": "png",
- "filename": "005864576_EWN1Y65gQ",
- "num": 1,
- "post_category": "DOODLE",
- "post_id": "5864576",
- "user_id": "25049",
- "user_name": "ユキウサギ",
- },
- }),
- ("https://poipiku.com/2166245/6411749.html", {
- "pattern": r"https://img-org\.poipiku\.com/user_img\d+/002166245"
- r"/006411749_\w+\.jpeg$",
- "count": 4,
- "keyword": {
- "count": "4",
- "description": "絵茶の産物ネタバレあるやつ",
- "num": int,
- "post_category": "SPOILER",
- "post_id": "6411749",
- "user_id": "2166245",
- "user_name": "wadahito",
- },
- }),
- # different warning button style
- ("https://poipiku.com/3572553/5776587.html", {
- "pattern": r"https://img-org\.poipiku.com/user_img\d+/003572553"
- r"/005776587_(\d+_)?\w+\.jpeg$",
- "count": 3,
- "keyword": {
- "count": "3",
- "description": "ORANGE OASISボスネタバレ<br />曲も大好き<br />"
- "2枚目以降はほとんど見えなかった1枚目背景"
- "のヒエログリフ小ネタです𓀀",
- "num": int,
- "post_category": "SPOILER",
- "post_id": "5776587",
- "user_id": "3572553",
- "user_name": "nagakun",
- },
- }),
- )
+ example = "https://poipiku.com/12345/12345.html"
def __init__(self, match):
PoipikuExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index fa4efa0..c5ce832 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -19,6 +19,35 @@ class PornhubExtractor(Extractor):
category = "pornhub"
root = "https://www.pornhub.com"
+ def _init(self):
+ self.cookies.set(
+ "accessAgeDisclaimerPH", "1", domain=".pornhub.com")
+
+ def _pagination(self, user, path):
+ if "/" not in path:
+ path += "/public"
+
+ url = "{}/{}/{}/ajax".format(self.root, user, path)
+ params = {"page": 1}
+ headers = {
+ "Referer": url[:-5],
+ "X-Requested-With": "XMLHttpRequest",
+ }
+
+ while True:
+ response = self.request(
+ url, method="POST", headers=headers, params=params,
+ allow_redirects=False)
+
+ if 300 <= response.status_code < 400:
+ url = "{}{}/{}/ajax".format(
+ self.root, response.headers["location"], path)
+ continue
+
+ yield response.text
+
+ params["page"] += 1
+
class PornhubGalleryExtractor(PornhubExtractor):
"""Extractor for image galleries on pornhub.com"""
@@ -27,30 +56,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
filename_fmt = "{num:>03}_{id}.{extension}"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/album/(\d+)"
- test = (
- ("https://www.pornhub.com/album/19289801", {
- "pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/",
- "count": ">= 300",
- "keyword": {
- "id" : int,
- "num" : int,
- "score" : int,
- "views" : int,
- "caption": str,
- "user" : "Danika Mori",
- "gallery": {
- "id" : 19289801,
- "score": int,
- "views": int,
- "tags" : list,
- "title": "Danika Mori Best Moments",
- },
- },
- }),
- ("https://www.pornhub.com/album/69040172", {
- "exception": exception.AuthorizationError,
- }),
- )
+ example = "https://www.pornhub.com/album/12345"
def __init__(self, match):
PornhubExtractor.__init__(self, match)
@@ -58,9 +64,6 @@ class PornhubGalleryExtractor(PornhubExtractor):
self._first = None
def items(self):
- self.session.cookies.set(
- "accessAgeDisclaimerPH", "1", domain=".pornhub.com")
-
data = self.metadata()
yield Message.Directory, data
for num, image in enumerate(self.images(), 1):
@@ -111,57 +114,100 @@ class PornhubGalleryExtractor(PornhubExtractor):
"views" : text.parse_int(img["times_viewed"]),
"score" : text.parse_int(img["vote_percent"]),
}
- key = img["next"]
+ key = str(img["next"])
if key == end:
return
+class PornhubGifExtractor(PornhubExtractor):
+ """Extractor for pornhub.com gifs"""
+ subcategory = "gif"
+ directory_fmt = ("{category}", "{user}", "gifs")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/gif/(\d+)"
+ example = "https://www.pornhub.com/gif/12345"
+
+ def __init__(self, match):
+ PornhubExtractor.__init__(self, match)
+ self.gallery_id = match.group(1)
+
+ def items(self):
+ url = "{}/gif/{}".format(self.root, self.gallery_id)
+ extr = text.extract_from(self.request(url).text)
+
+ gif = {
+ "id" : self.gallery_id,
+ "tags" : extr("data-context-tag='", "'").split(","),
+ "title": extr('"name": "', '"'),
+ "url" : extr('"contentUrl": "', '"'),
+ "date" : text.parse_datetime(
+ extr('"uploadDate": "', '"'), "%Y-%m-%d"),
+ "user" : extr('data-mxptext="', '"'),
+ }
+
+ yield Message.Directory, gif
+ yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
+
+
class PornhubUserExtractor(PornhubExtractor):
- """Extractor for all galleries of a pornhub user"""
+ """Extractor for a pornhub user"""
subcategory = "user"
- pattern = (BASE_PATTERN + r"/(users|model|pornstar)/([^/?#]+)"
- "(?:/photos(?:/(public|private|favorites))?)?/?$")
- test = (
- ("https://www.pornhub.com/pornstar/danika-mori/photos", {
- "pattern": PornhubGalleryExtractor.pattern,
- "count": ">= 6",
- }),
- ("https://www.pornhub.com/users/flyings0l0/"),
- ("https://www.pornhub.com/users/flyings0l0/photos/public"),
- ("https://www.pornhub.com/users/flyings0l0/photos/private"),
- ("https://www.pornhub.com/users/flyings0l0/photos/favorites"),
- ("https://www.pornhub.com/model/bossgirl/photos"),
- )
+ pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
+ example = "https://www.pornhub.com/model/USER"
def __init__(self, match):
PornhubExtractor.__init__(self, match)
- self.type, self.user, self.cat = match.groups()
+ self.user = match.group(1)
+
+ def initialize(self):
+ pass
def items(self):
- url = "{}/{}/{}/photos/{}/ajax".format(
- self.root, self.type, self.user, self.cat or "public")
- params = {"page": 1}
- headers = {
- "Referer": url[:-5],
- "X-Requested-With": "XMLHttpRequest",
- }
+ base = "{}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (PornhubPhotosExtractor, base + "photos"),
+ (PornhubGifsExtractor , base + "gifs"),
+ ), ("photos",))
- data = {"_extractor": PornhubGalleryExtractor}
- while True:
- response = self.request(
- url, method="POST", headers=headers, params=params,
- allow_redirects=False)
- if 300 <= response.status_code < 400:
- url = "{}{}/photos/{}/ajax".format(
- self.root, response.headers["location"],
- self.cat or "public")
- continue
+class PornhubPhotosExtractor(PornhubExtractor):
+ """Extractor for all galleries of a pornhub user"""
+ subcategory = "photos"
+ pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ "/(photos(?:/[^/?#]+)?)")
+ example = "https://www.pornhub.com/model/USER/photos"
+
+ def __init__(self, match):
+ PornhubExtractor.__init__(self, match)
+ self.user, self.path = match.groups()
+ def items(self):
+ data = {"_extractor": PornhubGalleryExtractor}
+ for page in self._pagination(self.user, self.path):
gid = None
- for gid in text.extract_iter(response.text, 'id="albumphoto', '"'):
+ for gid in text.extract_iter(page, 'id="albumphoto', '"'):
yield Message.Queue, self.root + "/album/" + gid, data
if gid is None:
return
- params["page"] += 1
+
+class PornhubGifsExtractor(PornhubExtractor):
+ """Extractor for a pornhub user's gifs"""
+ subcategory = "gifs"
+ pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ "/(gifs(?:/[^/?#]+)?)")
+ example = "https://www.pornhub.com/model/USER/gifs"
+
+ def __init__(self, match):
+ PornhubExtractor.__init__(self, match)
+ self.user, self.path = match.groups()
+
+ def items(self):
+ data = {"_extractor": PornhubGifExtractor}
+ for page in self._pagination(self.user, self.path):
+ gid = None
+ for gid in text.extract_iter(page, 'id="gif', '"'):
+ yield Message.Queue, self.root + "/gif/" + gid, data
+ if gid is None:
+ return
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
index 783f3da..4a6f031 100644
--- a/gallery_dl/extractor/pornpics.py
+++ b/gallery_dl/extractor/pornpics.py
@@ -23,7 +23,6 @@ class PornpicsExtractor(Extractor):
def __init__(self, match):
super().__init__(match)
self.item = match.group(1)
- self.session.headers["Referer"] = self.root
def items(self):
for gallery in self.galleries():
@@ -62,33 +61,7 @@ class PornpicsExtractor(Extractor):
class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
"""Extractor for pornpics galleries"""
pattern = BASE_PATTERN + r"(/galleries/(?:[^/?#]+-)?(\d+))"
- test = (
- (("https://www.pornpics.com/galleries/british-beauty-danielle-flashes-"
- "hot-breasts-ass-and-snatch-in-the-forest-62610699/"), {
- "pattern": r"https://cdni\.pornpics\.com/1280/7/160/62610699"
- r"/62610699_\d+_[0-9a-f]{4}\.jpg",
- "keyword": {
- "categories": ["MILF", "Amateur", "Sexy", "Outdoor"],
- "channel": "FTV MILFs",
- "count": 17,
- "gallery_id": 62610699,
- "models": ["Danielle"],
- "num": int,
- "slug": "british-beauty-danielle-flashes-"
- "hot-breasts-ass-and-snatch-in-the-forest",
- "tags": ["Amateur MILF", "Sexy MILF"],
- "title": "British beauty Danielle flashes "
- "hot breasts, ass and snatch in the forest",
- "views": int,
- },
- }),
- ("https://pornpics.com/es/galleries/62610699", {
- "keyword": {
- "slug": "british-beauty-danielle-flashes-"
- "hot-breasts-ass-and-snatch-in-the-forest",
- },
- }),
- )
+ example = "https://www.pornpics.com/galleries/TITLE-12345/"
def __init__(self, match):
PornpicsExtractor.__init__(self, match)
@@ -124,14 +97,7 @@ class PornpicsTagExtractor(PornpicsExtractor):
"""Extractor for galleries from pornpics tag searches"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/tags/([^/?#]+)"
- test = (
- ("https://www.pornpics.com/tags/summer-dress/", {
- "pattern": PornpicsGalleryExtractor.pattern,
- "range": "1-50",
- "count": 50,
- }),
- ("https://pornpics.com/fr/tags/summer-dress"),
- )
+ example = "https://www.pornpics.com/tags/TAGS/"
def galleries(self):
url = "{}/tags/{}/".format(self.root, self.item)
@@ -142,26 +108,7 @@ class PornpicsSearchExtractor(PornpicsExtractor):
"""Extractor for galleries from pornpics search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/(?:\?q=|pornstars/|channels/)([^/&#]+)"
- test = (
- ("https://www.pornpics.com/?q=nature", {
- "pattern": PornpicsGalleryExtractor.pattern,
- "range": "1-50",
- "count": 50,
- }),
- ("https://www.pornpics.com/channels/femjoy/", {
- "pattern": PornpicsGalleryExtractor.pattern,
- "range": "1-50",
- "count": 50,
- }),
- ("https://www.pornpics.com/pornstars/emma-brown/", {
- "pattern": PornpicsGalleryExtractor.pattern,
- "range": "1-50",
- "count": 50,
- }),
- ("https://pornpics.com/jp/?q=nature"),
- ("https://pornpics.com/it/channels/femjoy"),
- ("https://pornpics.com/pt/pornstars/emma-brown"),
- )
+ example = "https://www.pornpics.com/?q=QUERY"
def galleries(self):
url = self.root + "/search/srch.php"
diff --git a/gallery_dl/extractor/pururin.py b/gallery_dl/extractor/pururin.py
index 32567f6..3a4c614 100644
--- a/gallery_dl/extractor/pururin.py
+++ b/gallery_dl/extractor/pururin.py
@@ -10,95 +10,63 @@
from .common import GalleryExtractor
from .. import text, util
-import binascii
class PururinGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries on pururin.io"""
category = "pururin"
- pattern = r"(?:https?://)?(?:www\.)?pururin\.[ti]o/(?:gallery|read)/(\d+)"
- test = (
- ("https://pururin.to/gallery/38661/iowant-2", {
- "pattern": r"https://cdn.pururin.[ti]o/\w+"
- r"/images/data/\d+/\d+\.jpg",
- "keyword": {
- "title" : "re:I ?owant 2!!",
- "title_en" : "re:I ?owant 2!!",
- "title_jp" : "",
- "gallery_id": 38661,
- "count" : 19,
- "artist" : ["Shoda Norihiro"],
- "group" : ["Obsidian Order"],
- "parody" : ["Kantai Collection"],
- "characters": ["Iowa", "Teitoku"],
- "tags" : list,
- "type" : "Doujinshi",
- "collection": "I owant you!",
- "convention": "C92",
- "rating" : float,
- "uploader" : "demo",
- "scanlator" : "mrwayne",
- "lang" : "en",
- "language" : "English",
- }
- }),
- ("https://pururin.to/gallery/7661/unisis-team-vanilla", {
- "count": 17,
- }),
- ("https://pururin.io/gallery/38661/iowant-2"),
- )
root = "https://pururin.to"
+ pattern = r"(?:https?://)?(?:www\.)?pururin\.[ti]o/(?:gallery|read)/(\d+)"
+ example = "https://pururin.to/gallery/12345/TITLE"
def __init__(self, match):
self.gallery_id = match.group(1)
url = "{}/gallery/{}/x".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)
- self._ext = ""
- self._cnt = 0
-
def metadata(self, page):
extr = text.extract_from(page)
- def _lst(key, e=extr):
- return [
- text.unescape(item)
- for item in text.extract_iter(e(key, "</td>"), 'title="', '"')
- ]
+ def _lst(e=extr):
+ v = text.unescape(e('value="', '"'))
+ return [item["name"] for item in util.json_loads(v)] if v else ()
def _str(key, e=extr):
- return text.unescape(text.extract(
- e(key, "</td>"), 'title="', '"')[0] or "")
+ return text.unescape(text.extr(
+ e(key, "</td>"), 'title="', '"')).partition(" / ")[0]
- url = "{}/read/{}/01/x".format(self.root, self.gallery_id)
- page = self.request(url).text
- info = util.json_loads(binascii.a2b_base64(text.extr(
- page, '<gallery-read encoded="', '"')).decode())
- self._ext = info["image_extension"]
- self._cnt = info["total_pages"]
+ title = text.unescape(extr('<h1><span itemprop="name">', '<'))
+ title_en, _, title_ja = title.partition(" / ")
data = {
"gallery_id": text.parse_int(self.gallery_id),
- "title" : info["title"] or info.get("j_title") or "",
- "title_en" : info["title"],
- "title_jp" : info.get("j_title") or "",
- "artist" : _lst("<td>Artist</td>"),
- "group" : _lst("<td>Circle</td>"),
- "parody" : _lst("<td>Parody</td>"),
- "tags" : _lst("<td>Contents</td>"),
- "type" : _str("<td>Category</td>"),
- "characters": _lst("<td>Character</td>"),
- "collection": _str("<td>Collection</td>"),
+ "title" : title_en or title_ja,
+ "title_en" : title_en,
+ "title_ja" : title_ja,
"language" : _str("<td>Language</td>"),
- "scanlator" : _str("<td>Scanlator</td>"),
- "convention": _str("<td>Convention</td>"),
+ "type" : _str("<td>Category</td>"),
"uploader" : text.remove_html(extr("<td>Uploader</td>", "</td>")),
- "rating" : text.parse_float(extr(" :rating='" , "'")),
+ "rating" : text.parse_float(extr(
+ 'itemprop="ratingValue" content="', '"')),
+ "artist" : extr('name="artist_tags"', '') or _lst(),
+ "group" : _lst(),
+ "parody" : _lst(),
+ "tags" : _lst(),
+ "characters": _lst(),
+ "scanlator" : _lst(),
+ "convention": _lst(),
+ "collection": _lst(),
}
data["lang"] = util.language_to_code(data["language"])
return data
def images(self, _):
- ufmt = "https://cdn.pururin.to/assets/images/data/{}/{{}}.{}".format(
- self.gallery_id, self._ext)
- return [(ufmt.format(num), None) for num in range(1, self._cnt + 1)]
+ url = "{}/read/{}/01/x".format(self.root, self.gallery_id)
+ page = self.request(url).text
+
+ svr, pos = text.extract(page, 'data-svr="', '"')
+ img, pos = text.extract(page, 'data-img="', '"', pos)
+ data = util.json_loads(text.unescape(img))
+
+ base = "{}/{}/".format(svr, data["directory"])
+ return [(base + i["filename"], None) for i in data["images"]]
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 1800b68..9a6c8a5 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -22,18 +22,20 @@ class ReactorExtractor(BaseExtractor):
def __init__(self, match):
BaseExtractor.__init__(self, match)
+
url = text.ensure_http_scheme(match.group(0), "http://")
pos = url.index("/", 10)
-
- self.root, self.path = url[:pos], url[pos:]
- self.session.headers["Referer"] = self.root
- self.gif = self.config("gif", False)
+ self.root = url[:pos]
+ self.path = url[pos:]
if self.category == "reactor":
# set category based on domain name
netloc = urllib.parse.urlsplit(self.root).netloc
self.category = netloc.rpartition(".")[0]
+ def _init(self):
+ self.gif = self.config("gif", False)
+
def items(self):
data = self.metadata()
yield Message.Directory, data
@@ -170,25 +172,7 @@ class ReactorTagExtractor(ReactorExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{search_tags}_{post_id}_{num}"
pattern = BASE_PATTERN + r"/tag/([^/?#]+)(?:/[^/?#]+)?"
- test = (
- ("http://reactor.cc/tag/gif"),
- ("http://anime.reactor.cc/tag/Anime+Art"),
- ("http://joyreactor.cc/tag/Advent+Cirno", {
- "count": ">= 15",
- }),
- ("http://joyreactor.com/tag/Cirno", {
- "url": "aa59090590b26f4654881301fe8fe748a51625a8",
- }),
- # 'best' rating (#3073)
- ("http://joyreactor.com/tag/Dark+Souls+2/best", {
- "count": 4,
- }),
- ("http://pornreactor.cc/tag/RiceGnat", {
- "range": "1-25",
- "count": ">= 25",
- }),
- ("http://fapreactor.com/tag/RiceGnat"),
- )
+ example = "http://reactor.cc/tag/TAG"
def __init__(self, match):
ReactorExtractor.__init__(self, match)
@@ -204,19 +188,7 @@ class ReactorSearchExtractor(ReactorExtractor):
directory_fmt = ("{category}", "search", "{search_tags}")
archive_fmt = "s_{search_tags}_{post_id}_{num}"
pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
- test = (
- ("http://reactor.cc/search?q=Art"),
- ("http://joyreactor.cc/search/Nature", {
- "range": "1-25",
- "count": ">= 20",
- }),
- ("http://joyreactor.com/search?q=Nature", {
- "range": "1-25",
- "count": ">= 20",
- }),
- ("http://pornreactor.cc/search?q=ecchi+hentai"),
- ("http://fapreactor.com/search/ecchi+hentai"),
- )
+ example = "http://reactor.cc/search?q=QUERY"
def __init__(self, match):
ReactorExtractor.__init__(self, match)
@@ -231,19 +203,7 @@ class ReactorUserExtractor(ReactorExtractor):
subcategory = "user"
directory_fmt = ("{category}", "user", "{user}")
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
- test = (
- ("http://reactor.cc/user/Dioklet"),
- ("http://anime.reactor.cc/user/Shuster"),
- ("http://joyreactor.cc/user/hemantic"),
- ("http://joyreactor.com/user/Tacoman123", {
- "url": "60ce9a3e3db791a0899f7fb7643b5b87d09ae3b5",
- }),
- ("http://pornreactor.cc/user/Disillusion", {
- "range": "1-25",
- "count": ">= 20",
- }),
- ("http://fapreactor.com/user/Disillusion"),
- )
+ example = "http://reactor.cc/user/USER"
def __init__(self, match):
ReactorExtractor.__init__(self, match)
@@ -257,42 +217,7 @@ class ReactorPostExtractor(ReactorExtractor):
"""Extractor for single posts on *reactor.cc sites"""
subcategory = "post"
pattern = BASE_PATTERN + r"/post/(\d+)"
- test = (
- ("http://reactor.cc/post/4999736", {
- "url": "dfc74d150d7267384d8c229c4b82aa210755daa0",
- }),
- ("http://anime.reactor.cc/post/3576250"),
- ("http://joyreactor.com/post/3721876", { # single image
- "pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
- r"/cartoon-painting-monster-lake-4841316.jpeg",
- "count": 1,
- "keyword": "2207a7dfed55def2042b6c2554894c8d7fda386e",
- }),
- ("http://joyreactor.com/post/3713804", { # 4 images
- "pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
- r"/movie-tv-godzilla-monsters-\d+\.jpeg",
- "count": 4,
- "keyword": "d7da9ba7809004c809eedcf6f1c06ad0fbb3df21",
- }),
- ("http://joyreactor.com/post/3726210", { # gif / video
- "url": "60f3b9a0a3918b269bea9b4f8f1a5ab3c2c550f8",
- "keyword": "8949d9d5fc469dab264752432efbaa499561664a",
- }),
- ("http://joyreactor.com/post/3668724", { # youtube embed
- "url": "bf1666eddcff10c9b58f6be63fa94e4e13074214",
- "keyword": "e18b1ffbd79d76f9a0e90b6d474cc2499e343f0b",
- }),
- ("http://joyreactor.cc/post/1299", { # "malformed" JSON
- "url": "ab02c6eb7b4035ad961b29ee0770ee41be2fcc39",
- }),
- ("http://pornreactor.cc/post/863166", {
- "url": "a09fb0577489e1f9564c25d0ad576f81b19c2ef3",
- "content": "ec6b0568bfb1803648744077da082d14de844340",
- }),
- ("http://fapreactor.com/post/863166", {
- "url": "2a956ce0c90e8bc47b4392db4fa25ad1342f3e54",
- }),
- )
+ example = "http://reactor.cc/post/12345"
def __init__(self, match):
ReactorExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index c924e0a..93e41be 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -49,16 +49,14 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.li"""
subcategory = "issue"
pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)"
- test = ("https://readcomiconline.li/Comic/W-i-t-c-h/Issue-130?id=22289", {
- "pattern": r"https://2\.bp\.blogspot\.com/[\w-]+=s0\?.+",
- "keyword": "2d9ec81ce1b11fac06ebf96ce33cdbfca0e85eb5",
- "count": 36,
- })
+ example = "https://readcomiconline.li/Comic/TITLE/Issue-123?id=12345"
def __init__(self, match):
ChapterExtractor.__init__(self, match)
+ self.params = match.group(2)
- params = text.parse_query(match.group(2))
+ def _init(self):
+ params = text.parse_query(self.params)
quality = self.config("quality")
if quality is None or quality == "auto":
@@ -96,16 +94,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
chapterclass = ReadcomiconlineIssueExtractor
subcategory = "comic"
pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/?)$"
- test = (
- ("https://readcomiconline.li/Comic/W-i-t-c-h", {
- "url": "74eb8b9504b4084fcc9367b341300b2c52260918",
- "keyword": "3986248e4458fa44a201ec073c3684917f48ee0c",
- }),
- ("https://readcomiconline.to/Comic/Bazooka-Jules", {
- "url": "2f66a467a772df4d4592e97a059ddbc3e8991799",
- "keyword": "f5ba5246cd787bb750924d9690cb1549199bd516",
- }),
- )
+ example = "https://readcomiconline.li/Comic/TITLE"
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py
index 4dd9d5c..4156484 100644
--- a/gallery_dl/extractor/recursive.py
+++ b/gallery_dl/extractor/recursive.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,6 @@
"""Recursive extractor"""
from .common import Extractor, Message
-import requests
import re
@@ -17,34 +16,16 @@ class RecursiveExtractor(Extractor):
"""Extractor that fetches URLs from a remote or local source"""
category = "recursive"
pattern = r"r(?:ecursive)?:"
- test = ("recursive:https://pastebin.com/raw/FLwrCYsT", {
- "url": "eee86d65c346361b818e8f4b2b307d9429f136a2",
- })
+ example = "recursive:https://pastebin.com/raw/FLwrCYsT"
def items(self):
- self.session.mount("file://", FileAdapter())
- page = self.request(self.url.partition(":")[2]).text
- del self.session.adapters["file://"]
+ url = self.url.partition(":")[2]
- for match in re.finditer(r"https?://[^\s\"']+", page):
- yield Message.Queue, match.group(0), {}
-
-
-class FileAdapter(requests.adapters.BaseAdapter):
- """Requests adapter for local files"""
-
- def send(self, request, **kwargs):
- response = requests.Response()
- try:
- response.raw = open(request.url[7:], "rb")
- except OSError:
- import io
- response.raw = io.BytesIO()
- response.status_code = requests.codes.bad_request
+ if url.startswith("file://"):
+ with open(url[7:]) as fp:
+ page = fp.read()
else:
- response.raw.release_conn = response.raw.close
- response.status_code = requests.codes.ok
- return response
+ page = self.request(url).text
- def close(self):
- pass
+ for match in re.finditer(r"https?://[^\s\"']+", page):
+ yield Message.Queue, match.group(0), {}
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 54b162b..8553312 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -19,7 +19,7 @@ class RedditExtractor(Extractor):
directory_fmt = ("{category}", "{subreddit}")
filename_fmt = "{id}{num:? //>02} {title[:220]}.{extension}"
archive_fmt = "{filename}"
- cookiedomain = ".reddit.com"
+ cookies_domain = ".reddit.com"
request_interval = 0.6
def items(self):
@@ -30,6 +30,7 @@ class RedditExtractor(Extractor):
parentdir = self.config("parent-directory")
max_depth = self.config("recursion", 0)
+ previews = self.config("previews", True)
videos = self.config("videos", True)
if videos:
@@ -65,7 +66,10 @@ class RedditExtractor(Extractor):
media = submission
url = media["url"]
- if url and url.startswith("https://i.redd.it/"):
+ if url and url.startswith((
+ "https://i.redd.it/",
+ "https://preview.redd.it/",
+ )):
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
@@ -94,21 +98,38 @@ class RedditExtractor(Extractor):
' href="', '"'):
urls.append((url, submission))
for comment in comments:
- for url in text.extract_iter(
- comment["body_html"] or "", ' href="', '"'):
- urls.append((url, comment))
+ html = comment["body_html"] or ""
+ if ' href="' in html:
+ comment["date"] = text.parse_timestamp(
+ comment["created_utc"])
+ if submission:
+ data = submission.copy()
+ data["comment"] = comment
+ else:
+ data = comment
+ for url in text.extract_iter(html, ' href="', '"'):
+ urls.append((url, data))
for url, data in urls:
if not url or url[0] == "#":
continue
if url[0] == "/":
url = "https://www.reddit.com" + url
+ if url.startswith((
+ "https://www.reddit.com/message/compose",
+ "https://reddit.com/message/compose",
+ )):
+ continue
match = match_submission(url)
if match:
extra.append(match.group(1))
elif not match_user(url) and not match_subreddit(url):
+ if previews and "preview" in data:
+ data["_fallback"] = self._previews(data)
yield Message.Queue, text.unescape(url), data
+ if "_fallback" in data:
+ del data["_fallback"]
if not extra or depth == max_depth:
return
@@ -165,22 +186,30 @@ class RedditExtractor(Extractor):
submission["_ytdl_extra"] = {"title": submission["title"]}
return submission["url"]
+ def _previews(self, post):
+ try:
+ if "reddit_video_preview" in post["preview"]:
+ video = post["preview"]["reddit_video_preview"]
+ if "dash_url" in video:
+ yield "ytdl:" + video["dash_url"]
+ if "hls_url" in video:
+ yield "ytdl:" + video["hls_url"]
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+
+ try:
+ for image in post["preview"]["images"]:
+ yield image["source"]["url"]
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+
class RedditSubredditExtractor(RedditExtractor):
"""Extractor for URLs from subreddits on reddit.com"""
subcategory = "subreddit"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com"
r"(/r/[^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)")
- test = (
- ("https://www.reddit.com/r/lavaporn/", {
- "range": "1-20",
- "count": ">= 20",
- }),
- ("https://www.reddit.com/r/lavaporn/top/?sort=top&t=month"),
- ("https://old.reddit.com/r/lavaporn/"),
- ("https://np.reddit.com/r/lavaporn/"),
- ("https://m.reddit.com/r/lavaporn/"),
- )
+ example = "https://www.reddit.com/r/SUBREDDIT/"
def __init__(self, match):
self.subreddit, sub, params = match.groups()
@@ -198,13 +227,7 @@ class RedditHomeExtractor(RedditSubredditExtractor):
subcategory = "home"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com"
r"((?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)")
- test = (
- ("https://www.reddit.com/", {
- "range": "1-20",
- "count": ">= 20",
- }),
- ("https://old.reddit.com/top/?sort=top&t=month"),
- )
+ example = "https://www.reddit.com/"
class RedditUserExtractor(RedditExtractor):
@@ -212,14 +235,7 @@ class RedditUserExtractor(RedditExtractor):
subcategory = "user"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/"
r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?$")
- test = (
- ("https://www.reddit.com/user/username/", {
- "count": ">= 2",
- }),
- ("https://www.reddit.com/user/username/gilded/?sort=top&t=month"),
- ("https://old.reddit.com/user/username/"),
- ("https://www.reddit.com/u/username/"),
- )
+ example = "https://www.reddit.com/user/USER/"
def __init__(self, match):
self.user, sub, params = match.groups()
@@ -238,71 +254,7 @@ class RedditSubmissionExtractor(RedditExtractor):
pattern = (r"(?:https?://)?(?:"
r"(?:\w+\.)?reddit\.com/(?:(?:r|u|user)/[^/?#]+"
r"/comments|gallery)|redd\.it)/([a-z0-9]+)")
- test = (
- ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
- "pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg",
- "count": 1,
- }),
- ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
- "options": (("comments", 500),),
- "pattern": r"https://",
- "count": 3,
- }),
- ("https://www.reddit.com/gallery/hrrh23", {
- "url": "25b91ede15459470274dd17291424b037ed8b0ae",
- "content": "1e7dde4ee7d5f4c4b45749abfd15b2dbfa27df3f",
- "count": 3,
- }),
- # video
- ("https://www.reddit.com/r/aww/comments/90bu6w/", {
- "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11",
- "count": 1,
- }),
- # video (ytdl)
- ("https://www.reddit.com/r/aww/comments/90bu6w/", {
- "options": (("videos", "ytdl"),),
- "pattern": r"ytdl:https://www.reddit.com/r/aww/comments/90bu6w"
- r"/heat_index_was_110_degrees_so_we_offered_him_a/",
- "count": 1,
- }),
- # video (dash)
- ("https://www.reddit.com/r/aww/comments/90bu6w/", {
- "options": (("videos", "dash"),),
- "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11"
- r"/DASHPlaylist.mpd\?a=",
- "count": 1,
- }),
- # deleted gallery (#953)
- ("https://www.reddit.com/gallery/icfgzv", {
- "count": 0,
- }),
- # animated gallery items (#955)
- ("https://www.reddit.com/r/araragi/comments/ib32hm", {
- "pattern": r"https://i\.redd\.it/\w+\.gif",
- "count": 2,
- }),
- # "failed" gallery item (#1127)
- ("https://www.reddit.com/r/cosplay/comments/jvwaqr", {
- "count": 1,
- }),
- # gallery with no 'media_metadata' (#2001)
- ("https://www.reddit.com/r/kpopfap/comments/qjj04q/", {
- "count": 0,
- }),
- # user page submission (#2301)
- ("https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", {
- "pattern": r"https://i.redd.it/8fpgv17yqlh81.jpg",
- "count": 1,
- }),
- # cross-posted video (#887, #3586, #3976)
- ("https://www.reddit.com/r/kittengifs/comments/12m0b8d", {
- "pattern": r"ytdl:https://v\.redd\.it/cvabpjacrvta1",
- }),
- ("https://old.reddit.com/r/lavaporn/comments/2a00np/"),
- ("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
- ("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
- ("https://redd.it/2a00np/"),
- )
+ example = "https://www.reddit.com/r/SUBREDDIT/comments/id/"
def __init__(self, match):
RedditExtractor.__init__(self, match)
@@ -319,22 +271,7 @@ class RedditImageExtractor(Extractor):
archive_fmt = "{filename}"
pattern = (r"(?:https?://)?((?:i|preview)\.redd\.it|i\.reddituploads\.com)"
r"/([^/?#]+)(\?[^#]*)?")
- test = (
- ("https://i.redd.it/upjtjcx2npzz.jpg", {
- "url": "0de614900feef103e580b632190458c0b62b641a",
- "content": "cc9a68cf286708d5ce23c68e79cd9cf7826db6a3",
- }),
- (("https://i.reddituploads.com/0f44f1b1fca2461f957c713d9592617d"
- "?fit=max&h=1536&w=1536&s=e96ce7846b3c8e1f921d2ce2671fb5e2"), {
- "url": "f24f25efcedaddeec802e46c60d77ef975dc52a5",
- "content": "541dbcc3ad77aa01ee21ca49843c5e382371fae7",
- }),
- # preview.redd.it -> i.redd.it
- (("https://preview.redd.it/00af44lpn0u51.jpg?width=960&crop=smart"
- "&auto=webp&v=enabled&s=dbca8ab84033f4a433772d9c15dbe0429c74e8ac"), {
- "pattern": r"^https://i\.redd\.it/00af44lpn0u51\.jpg$"
- }),
- )
+ example = "https://i.redd.it/NAME.EXT"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -399,9 +336,9 @@ class RedditAPI():
if not self.refresh_token:
# allow downloading from quarantined subreddits (#2180)
- extractor._cookiejar.set(
+ extractor.cookies.set(
"_options", '%7B%22pref_quarantine_optin%22%3A%20true%7D',
- domain=extractor.cookiedomain)
+ domain=extractor.cookies_domain)
def submission(self, submission_id):
"""Fetch the (submission, comments)=-tuple for a submission id"""
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index bfd18b5..e246405 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -24,6 +24,8 @@ class RedgifsExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1)
+
+ def _init(self):
self.api = RedgifsAPI(self)
formats = self.config("format")
@@ -87,53 +89,40 @@ class RedgifsUserExtractor(RedgifsExtractor):
"""Extractor for redgifs user profiles"""
subcategory = "user"
directory_fmt = ("{category}", "{userName}")
- pattern = r"(?:https?://)?(?:\w+\.)?redgifs\.com/users/([^/?#]+)/?$"
- test = (
- ("https://www.redgifs.com/users/Natalifiction", {
- "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.mp4",
- "count": ">= 100",
- }),
- ("https://v3.redgifs.com/users/lamsinka89", {
- "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.(mp4|jpg)",
- "count": ">= 100",
- }),
- )
+ pattern = (r"(?:https?://)?(?:\w+\.)?redgifs\.com/users/([^/?#]+)/?"
+ r"(?:\?([^#]+))?$")
+ example = "https://www.redgifs.com/users/USER"
+
+ def __init__(self, match):
+ RedgifsExtractor.__init__(self, match)
+ self.query = match.group(2)
def metadata(self):
return {"userName": self.key}
def gifs(self):
- return self.api.user(self.key)
+ order = text.parse_query(self.query).get("order")
+ return self.api.user(self.key, order or "new")
class RedgifsCollectionExtractor(RedgifsExtractor):
"""Extractor for an individual user collection"""
subcategory = "collection"
- directory_fmt = ("{category}", "{userName}", "{folderName}")
- archive_fmt = "{folderId}_{id}"
+ directory_fmt = (
+ "{category}", "{collection[userName]}", "{collection[folderName]}")
+ archive_fmt = "{collection[folderId]}_{id}"
pattern = (r"(?:https?://)?(?:www\.)?redgifs\.com/users"
r"/([^/?#]+)/collections/([^/?#]+)")
- test = (
- ("https://www.redgifs.com/users/boombah123/collections/2631326bbd", {
- "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.mp4",
- "range": "1-20",
- "count": 20,
- }),
- ("https://www.redgifs.com/users/boombah123/collections/9e6f7dd41f", {
- "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.mp4",
- "range": "1-20",
- "count": 20,
- }),
- )
+ example = "https://www.redgifs.com/users/USER/collections/ID"
def __init__(self, match):
RedgifsExtractor.__init__(self, match)
self.collection_id = match.group(2)
def metadata(self):
- data = {"userName": self.key}
- data.update(self.api.collection_info(self.key, self.collection_id))
- return data
+ collection = self.api.collection_info(self.key, self.collection_id)
+ collection["userName"] = self.key
+ return {"collection": collection}
def gifs(self):
return self.api.collection(self.key, self.collection_id)
@@ -144,11 +133,7 @@ class RedgifsCollectionsExtractor(RedgifsExtractor):
subcategory = "collections"
pattern = (r"(?:https?://)?(?:www\.)?redgifs\.com/users"
r"/([^/?#]+)/collections/?$")
- test = ("https://www.redgifs.com/users/boombah123/collections", {
- "pattern": (r"https://www\.redgifs\.com/users"
- r"/boombah123/collections/\w+"),
- "count": ">= 3",
- })
+ example = "https://www.redgifs.com/users/USER/collections"
def items(self):
for collection in self.api.collections(self.key):
@@ -158,27 +143,23 @@ class RedgifsCollectionsExtractor(RedgifsExtractor):
yield Message.Queue, url, collection
+class RedgifsNichesExtractor(RedgifsExtractor):
+ """Extractor for redgifs niches"""
+ subcategory = "niches"
+ pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/niches/([^/?#]+)"
+ example = "https://www.redgifs.com/niches/NAME"
+
+ def gifs(self):
+ return self.api.niches(self.key)
+
+
class RedgifsSearchExtractor(RedgifsExtractor):
"""Extractor for redgifs search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = (r"(?:https?://)?(?:\w+\.)?redgifs\.com"
r"/(?:gifs/([^/?#]+)|browse)(?:/?\?([^#]+))?")
- test = (
- ("https://www.redgifs.com/gifs/jav", {
- "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
- "range": "1-10",
- "count": 10,
- }),
- ("https://www.redgifs.com/browse?tags=JAV", {
- "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
- "range": "1-10",
- "count": 10,
- }),
- ("https://www.redgifs.com/gifs/jav?order=best&verified=1"),
- ("https://www.redgifs.com/browse?type=i&verified=y&order=top7"),
- ("https://v3.redgifs.com/browse?tags=JAV"),
- )
+ example = "https://www.redgifs.com/gifs/TAG"
def __init__(self, match):
RedgifsExtractor.__init__(self, match)
@@ -202,29 +183,10 @@ class RedgifsImageExtractor(RedgifsExtractor):
subcategory = "image"
pattern = (r"(?:https?://)?(?:"
r"(?:\w+\.)?redgifs\.com/(?:watch|ifr)|"
+ r"(?:\w+\.)?gfycat\.com(?:/gifs/detail|/\w+)?|"
r"(?:www\.)?gifdeliverynetwork\.com|"
r"i\.redgifs\.com/i)/([A-Za-z]+)")
- test = (
- ("https://redgifs.com/watch/foolishforkedabyssiniancat", {
- "pattern": r"https://\w+\.redgifs\.com"
- r"/FoolishForkedAbyssiniancat\.mp4",
- "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533",
- }),
- # gallery (#4021)
- ("https://www.redgifs.com/watch/desertedbaregraywolf", {
- "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.jpg",
- "count": 4,
- "keyword": {
- "num": int,
- "count": 4,
- "gallery": "187ad979693-1922-fc66-0000-a96fb07b8a5d",
- },
- }),
- ("https://redgifs.com/ifr/FoolishForkedAbyssiniancat"),
- ("https://i.redgifs.com/i/FoolishForkedAbyssiniancat"),
- ("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"),
- ("https://v3.redgifs.com/watch/FoolishForkedAbyssiniancat"),
- )
+ example = "https://redgifs.com/watch/ID"
def gifs(self):
return (self.api.gif(self.key),)
@@ -238,7 +200,6 @@ class RedgifsAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
- "Referer" : extractor.root + "/",
"authorization" : None,
"content-type" : "application/json",
"x-customheader": extractor.root + "/",
@@ -253,7 +214,7 @@ class RedgifsAPI():
endpoint = "/v2/gallery/" + gallery_id
return self._call(endpoint)
- def user(self, user, order="best"):
+ def user(self, user, order="new"):
endpoint = "/v2/users/{}/search".format(user.lower())
params = {"order": order}
return self._pagination(endpoint, params)
@@ -271,6 +232,10 @@ class RedgifsAPI():
endpoint = "/v2/users/{}/collections".format(user)
return self._pagination(endpoint, key="collections")
+ def niches(self, niche):
+ endpoint = "/v2/niches/{}/gifs".format(niche)
+ return self._pagination(endpoint)
+
def search(self, params):
endpoint = "/v2/gifs/search"
params["search_text"] = params.pop("tags", None)
diff --git a/gallery_dl/extractor/rule34us.py b/gallery_dl/extractor/rule34us.py
index 00b6972..6439a22 100644
--- a/gallery_dl/extractor/rule34us.py
+++ b/gallery_dl/extractor/rule34us.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,8 +10,8 @@
from .booru import BooruExtractor
from .. import text
-import re
import collections
+import re
class Rule34usExtractor(BooruExtractor):
@@ -19,8 +19,7 @@ class Rule34usExtractor(BooruExtractor):
root = "https://rule34.us"
per_page = 42
- def __init__(self, match):
- BooruExtractor.__init__(self, match)
+ def _init(self):
self._find_tags = re.compile(
r'<li class="([^-"]+)-tag"[^>]*><a href="[^;"]+;q=([^"]+)').findall
@@ -55,11 +54,7 @@ class Rule34usTagExtractor(Rule34usExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = r"(?:https?://)?rule34\.us/index\.php\?r=posts/index&q=([^&#]+)"
- test = ("https://rule34.us/index.php?r=posts/index&q=[terios]_elysion", {
- "pattern": r"https://img\d*\.rule34\.us"
- r"/images/../../[0-9a-f]{32}\.\w+",
- "count": 10,
- })
+ example = "https://rule34.us/index.php?r=posts/index&q=TAG"
def __init__(self, match):
Rule34usExtractor.__init__(self, match)
@@ -96,31 +91,7 @@ class Rule34usPostExtractor(Rule34usExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = r"(?:https?://)?rule34\.us/index\.php\?r=posts/view&id=(\d+)"
- test = (
- ("https://rule34.us/index.php?r=posts/view&id=3709005", {
- "pattern": r"https://img\d*\.rule34\.us/images/14/7b"
- r"/147bee6fc2e13f73f5f9bac9d4930b13\.png",
- "content": "d714342ea84050f82dda5f0c194d677337abafc5",
- }),
- ("https://rule34.us/index.php?r=posts/view&id=4576310", {
- "pattern": r"https://video\.rule34\.us/images/a2/94"
- r"/a294ff8e1f8e0efa041e5dc9d1480011\.mp4",
- "keyword": {
- "extension": "mp4",
- "file_url": str,
- "filename": "a294ff8e1f8e0efa041e5dc9d1480011",
- "height": "3982",
- "id": "4576310",
- "md5": "a294ff8e1f8e0efa041e5dc9d1480011",
- "score": r"re:\d+",
- "tags": "tagme, video",
- "tags_general": "video",
- "tags_metadata": "tagme",
- "uploader": "Anonymous",
- "width": "3184",
- },
- }),
- )
+ example = "https://rule34.us/index.php?r=posts/view&id=12345"
def __init__(self, match):
Rule34usExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 09e5421..745a351 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -24,8 +24,9 @@ class SankakuExtractor(BooruExtractor):
"""Base class for sankaku channel extractors"""
basecategory = "booru"
category = "sankaku"
+ root = "https://sankaku.app"
filename_fmt = "{category}_{id}_{md5}.{extension}"
- cookiedomain = None
+ cookies_domain = None
_warning = True
TAG_TYPES = {
@@ -87,32 +88,7 @@ class SankakuTagExtractor(SankakuExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/?\?([^#]*)"
- test = (
- ("https://sankaku.app/?tags=bonocho", {
- "count": 5,
- "pattern": r"https://s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
- r"/[0-9a-f]{32}\.\w+\?e=\d+&(expires=\d+&)?m=[^&#]+",
- }),
- ("https://beta.sankakucomplex.com/?tags=bonocho"),
- ("https://chan.sankakucomplex.com/?tags=bonocho"),
- ("https://black.sankakucomplex.com/?tags=bonocho"),
- ("https://white.sankakucomplex.com/?tags=bonocho"),
- ("https://sankaku.app/ja?tags=order%3Apopularity"),
- ("https://sankaku.app/no/?tags=order%3Apopularity"),
- # error on five or more tags
- ("https://chan.sankakucomplex.com/?tags=bonocho+a+b+c+d", {
- "options": (("username", None),),
- "exception": exception.StopExtraction,
- }),
- # match arbitrary query parameters
- ("https://chan.sankakucomplex.com"
- "/?tags=marie_rose&page=98&next=3874906&commit=Search"),
- # 'date:' tags (#1790)
- ("https://chan.sankakucomplex.com/?tags=date:2023-03-20", {
- "range": "1",
- "count": 1,
- }),
- )
+ example = "https://sankaku.app/?tags=TAG"
def __init__(self, match):
SankakuExtractor.__init__(self, match)
@@ -142,13 +118,7 @@ class SankakuPoolExtractor(SankakuExtractor):
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}")
archive_fmt = "p_{pool}_{id}"
pattern = BASE_PATTERN + r"/(?:books|pool/show)/(\d+)"
- test = (
- ("https://sankaku.app/books/90", {
- "count": 5,
- }),
- ("https://beta.sankakucomplex.com/books/90"),
- ("https://chan.sankakucomplex.com/pool/show/90"),
- )
+ example = "https://sankaku.app/books/12345"
def __init__(self, match):
SankakuExtractor.__init__(self, match)
@@ -174,50 +144,7 @@ class SankakuPostExtractor(SankakuExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/show/([0-9a-f]+)"
- test = (
- ("https://sankaku.app/post/show/360451", {
- "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
- "options": (("tags", True),),
- "keyword": {
- "tags_artist" : ["bonocho"],
- "tags_studio" : ["dc_comics"],
- "tags_medium" : list,
- "tags_copyright": list,
- "tags_character": list,
- "tags_general" : list,
- },
- }),
- # 'contentious_content'
- ("https://sankaku.app/post/show/21418978", {
- "pattern": r"https://s\.sankakucomplex\.com"
- r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg",
- }),
- # empty tags (#1617)
- ("https://sankaku.app/post/show/20758561", {
- "options": (("tags", True),),
- "count": 1,
- "keyword": {
- "tags": list,
- "tags_general": ["key(mangaka)", "key(mangaka)"],
- },
- }),
- # md5 hexdigest instead of ID (#3952)
- (("https://chan.sankakucomplex.com/post/show"
- "/f8ba89043078f0e4be2d9c46550b840a"), {
- "pattern": r"https://s\.sankakucomplex\.com"
- r"/data/f8/ba/f8ba89043078f0e4be2d9c46550b840a\.jpg",
- "count": 1,
- "keyword": {
- "id": 33195194,
- "md5": "f8ba89043078f0e4be2d9c46550b840a",
- },
- }),
- ("https://chan.sankakucomplex.com/post/show/360451"),
- ("https://chan.sankakucomplex.com/ja/post/show/360451"),
- ("https://beta.sankakucomplex.com/post/show/360451"),
- ("https://white.sankakucomplex.com/post/show/360451"),
- ("https://black.sankakucomplex.com/post/show/360451"),
- )
+ example = "https://sankaku.app/post/show/12345"
def __init__(self, match):
SankakuExtractor.__init__(self, match)
@@ -231,13 +158,7 @@ class SankakuBooksExtractor(SankakuExtractor):
"""Extractor for books by tag search on sankaku.app"""
subcategory = "books"
pattern = BASE_PATTERN + r"/books/?\?([^#]*)"
- test = (
- ("https://sankaku.app/books?tags=aiue_oka", {
- "range": "1-20",
- "count": 20,
- }),
- ("https://beta.sankakucomplex.com/books?tags=aiue_oka"),
- )
+ example = "https://sankaku.app/books?tags=TAG"
def __init__(self, match):
SankakuExtractor.__init__(self, match)
@@ -258,9 +179,9 @@ class SankakuAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
- "Accept" : "application/vnd.sankaku.api+json;v=2",
- "Origin" : extractor.root,
- "Referer": extractor.root + "/",
+ "Accept" : "application/vnd.sankaku.api+json;v=2",
+ "Platform": "web-app",
+ "Origin" : extractor.root,
}
self.username, self.password = self.extractor._get_auth_info()
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index 47e067b..e1d4153 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -30,32 +30,8 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
filename_fmt = "{filename}.{extension}"
archive_fmt = "{date:%Y%m%d}_{filename}"
pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
- r"/(\d{4}/\d\d/\d\d/[^/?#]+)")
- test = (
- ("https://www.sankakucomplex.com/2019/05/11/twitter-cosplayers", {
- "url": "4a9ecc5ae917fbce469280da5b6a482510cae84d",
- "keyword": "bfe08310e7d9a572f568f6900e0ed0eb295aa2b3",
- }),
- ("https://www.sankakucomplex.com/2009/12/01/sexy-goddesses-of-2ch", {
- "url": "a1e249173fd6c899a8134fcfbd9c925588a63f7c",
- "keyword": "e78fcc23c2711befc0969a45ea5082a29efccf68",
- }),
- # videos (#308)
- (("https://www.sankakucomplex.com/2019/06/11"
- "/darling-ol-goddess-shows-off-her-plump-lower-area/"), {
- "pattern": r"/wp-content/uploads/2019/06/[^/]+\d\.mp4",
- "range": "26-",
- "count": 5,
- }),
- # youtube embeds (#308)
- (("https://www.sankakucomplex.com/2015/02/12"
- "/snow-miku-2015-live-magical-indeed/"), {
- "options": (("embeds", True),),
- "pattern": r"https://www.youtube.com/embed/",
- "range": "2-",
- "count": 2,
- }),
- )
+ r"/(\d\d\d\d/\d\d/\d\d/[^/?#]+)")
+ example = "https://www.sankakucomplex.com/1970/01/01/TITLE"
def items(self):
url = "{}/{}/?pg=X".format(self.root, self.path)
@@ -113,15 +89,7 @@ class SankakucomplexTagExtractor(SankakucomplexExtractor):
subcategory = "tag"
pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
r"/((?:tag|category|author)/[^/?#]+)")
- test = (
- ("https://www.sankakucomplex.com/tag/cosplay/", {
- "range": "1-50",
- "count": 50,
- "pattern": SankakucomplexArticleExtractor.pattern,
- }),
- ("https://www.sankakucomplex.com/category/anime/"),
- ("https://www.sankakucomplex.com/author/rift/page/5/"),
- )
+ example = "https://www.sankakucomplex.com/tag/TAG/"
def items(self):
pnum = 1
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index 711435e..edfe1dc 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -16,7 +16,7 @@ class SeigaExtractor(Extractor):
"""Base class for seiga extractors"""
category = "seiga"
archive_fmt = "{image_id}"
- cookiedomain = ".nicovideo.jp"
+ cookies_domain = ".nicovideo.jp"
root = "https://seiga.nicovideo.jp"
def __init__(self, match):
@@ -24,7 +24,7 @@ class SeigaExtractor(Extractor):
self.start_image = 0
def items(self):
- if not self._check_cookies(("user_session",)):
+ if not self.cookies_check(("user_session",)):
raise exception.StopExtraction("'user_session' cookie required")
images = iter(self.get_images())
@@ -58,32 +58,7 @@ class SeigaUserExtractor(SeigaExtractor):
filename_fmt = "{category}_{user[id]}_{image_id}.{extension}"
pattern = (r"(?:https?://)?(?:www\.|(?:sp\.)?seiga\.)?nicovideo\.jp/"
r"user/illust/(\d+)(?:\?(?:[^&]+&)*sort=([^&#]+))?")
- test = (
- ("https://seiga.nicovideo.jp/user/illust/39537793", {
- "pattern": r"https://lohas\.nicoseiga\.jp/priv/[0-9a-f]+/\d+/\d+",
- "count": ">= 4",
- "keyword": {
- "user": {
- "id": 39537793,
- "message": str,
- "name": str,
- },
- "clips": int,
- "comments": int,
- "count": int,
- "extension": None,
- "image_id": int,
- "title": str,
- "views": int,
- },
- }),
- ("https://seiga.nicovideo.jp/user/illust/79433", {
- "exception": exception.NotFoundError,
- }),
- ("https://seiga.nicovideo.jp/user/illust/39537793"
- "?sort=image_view&target=illust_all"),
- ("https://sp.seiga.nicovideo.jp/user/illust/39537793"),
- )
+ example = "https://seiga.nicovideo.jp/user/illust/12345"
def __init__(self, match):
SeigaExtractor.__init__(self, match)
@@ -156,26 +131,7 @@ class SeigaImageExtractor(SeigaExtractor):
r"(?:seiga\.|www\.)?nicovideo\.jp/(?:seiga/im|image/source/)"
r"|sp\.seiga\.nicovideo\.jp/seiga/#!/im"
r"|lohas\.nicoseiga\.jp/(?:thumb|(?:priv|o)/[^/]+/\d+)/)(\d+)")
- test = (
- ("https://seiga.nicovideo.jp/seiga/im5977527", {
- "keyword": "c8339781da260f7fc44894ad9ada016f53e3b12a",
- "content": "d9202292012178374d57fb0126f6124387265297",
- }),
- ("https://seiga.nicovideo.jp/seiga/im123", {
- "exception": exception.NotFoundError,
- }),
- ("https://seiga.nicovideo.jp/seiga/im10877923", {
- "pattern": r"https://lohas\.nicoseiga\.jp/priv/5936a2a6c860a600e46"
- r"5e0411c0822e0b510e286/1688757110/10877923",
- }),
- ("https://seiga.nicovideo.jp/image/source/5977527"),
- ("https://sp.seiga.nicovideo.jp/seiga/#!/im5977527"),
- ("https://lohas.nicoseiga.jp/thumb/5977527i"),
- ("https://lohas.nicoseiga.jp/priv"
- "/759a4ef1c639106ba4d665ee6333832e647d0e4e/1549727594/5977527"),
- ("https://lohas.nicoseiga.jp/o"
- "/759a4ef1c639106ba4d665ee6333832e647d0e4e/1549727594/5977527"),
- )
+ example = "https://seiga.nicovideo.jp/seiga/im12345"
def __init__(self, match):
SeigaExtractor.__init__(self, match)
@@ -186,7 +142,7 @@ class SeigaImageExtractor(SeigaExtractor):
return num
def get_images(self):
- self.session.cookies.set(
+ self.cookies.set(
"skip_fetish_warning", "1", domain="seiga.nicovideo.jp")
url = "{}/seiga/im{}".format(self.root, self.image_id)
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index 92c9d2c..a8fdc4c 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -17,62 +17,13 @@ class SenmangaChapterExtractor(ChapterExtractor):
category = "senmanga"
root = "https://raw.senmanga.com"
pattern = r"(?:https?://)?raw\.senmanga\.com(/[^/?#]+/[^/?#]+)"
- test = (
- ("https://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", {
- "pattern": r"https://raw\.senmanga\.com/viewer"
- r"/Bokura-wa-Minna-Kawaisou/37A/[12]",
- "url": "5f95140ff511d8497e2ec08fa7267c6bb231faec",
- "content": "556a16d5ca3441d7a5807b6b5ac06ec458a3e4ba",
- "keyword": {
- "chapter": "37A",
- "count": 2,
- "extension": "",
- "filename": "re:[12]",
- "lang": "ja",
- "language": "Japanese",
- "manga": "Bokura wa Minna Kawaisou",
- "page": int,
- },
- }),
- ("http://raw.senmanga.com/Love-Lab/2016-03/1", {
- "pattern": r"https://raw\.senmanga\.com/viewer"
- r"/Love-Lab/2016-03/\d",
- "url": "8347b9f00c14b864dd3c19a1f5ae52adb2ef00de",
- "keyword": {
- "chapter": "2016-03",
- "count": 9,
- "extension": "",
- "filename": r"re:\d",
- "manga": "Renai Lab 恋愛ラボ",
- },
- }),
- ("https://raw.senmanga.com/akabane-honeko-no-bodyguard/1", {
- "pattern": r"https://i\d\.wp\.com/kumacdn.club/image-new-2/a"
- r"/akabane-honeko-no-bodyguard/chapter-1"
- r"/\d+-[0-9a-f]{13}\.jpg",
- "keyword": {
- "chapter": "1",
- "count": 65,
- "extension": "jpg",
- "filename": r"re:\d+-\w+",
- "manga": "Akabane Honeko no Bodyguard",
- },
- }),
- # no http scheme ()
- ("https://raw.senmanga.com/amama-cinderella/3", {
- "pattern": r"^https://kumacdn.club/image-new-2/a/amama-cinderella"
- r"/chapter-3/.+\.jpg",
- "count": 30,
- }),
- )
+ example = "https://raw.senmanga.com/MANGA/CHAPTER"
- def __init__(self, match):
- ChapterExtractor.__init__(self, match)
+ def _init(self):
self.session.headers["Referer"] = self.gallery_url
# select "All pages" viewer
- self.session.cookies.set(
- "viewer", "1", domain="raw.senmanga.com")
+ self.cookies.set("viewer", "1", domain="raw.senmanga.com")
def metadata(self, page):
title = text.extr(page, "<title>", "</title>")
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index c6588de..80f2aea 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -106,43 +106,7 @@ class SexcomPinExtractor(SexcomExtractor):
subcategory = "pin"
directory_fmt = ("{category}",)
pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+)(?!.*#related$)"
- test = (
- # picture
- ("https://www.sex.com/pin/21241874-sexy-ecchi-girls-166/", {
- "pattern": "https://cdn.sex.com/images/.+/2014/08/26/7637609.jpg",
- "content": "ebe1814dadfebf15d11c6af4f6afb1a50d6c2a1c",
- "keyword": {
- "comments" : int,
- "date" : "dt:2014-10-19 15:45:44",
- "extension": "jpg",
- "filename" : "7637609",
- "likes" : int,
- "pin_id" : 21241874,
- "repins" : int,
- "tags" : list,
- "thumbnail": str,
- "title" : "Sexy Ecchi Girls 166",
- "type" : "picture",
- "uploader" : "mangazeta",
- "url" : str,
- },
- }),
- # gif
- ("https://www.sex.com/pin/55435122-ecchi/", {
- "pattern": "https://cdn.sex.com/images/.+/2017/12/07/18760842.gif",
- "content": "176cc63fa05182cb0438c648230c0f324a5965fe",
- }),
- # video
- ("https://www.sex.com/pin/55748341/", {
- "pattern": r"https://cdn\.sex\.com/videos/pinporn"
- r"/2018/02/10/776229_hd\.mp4",
- "content": "e1a5834869163e2c4d1ca2677f5b7b367cf8cfff",
- }),
- # pornhub embed
- ("https://www.sex.com/pin/55847384-very-nicely-animated/", {
- "pattern": "ytdl:https://www.pornhub.com/embed/ph56ef24b6750f2",
- }),
- )
+ example = "https://www.sex.com/pin/12345-TITLE/"
def __init__(self, match):
SexcomExtractor.__init__(self, match)
@@ -157,9 +121,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[pin_id]}")
pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$"
- test = ("https://www.sex.com/pin/21241874/#related", {
- "count": ">= 20",
- })
+ example = "https://www.sex.com/pin/12345#related"
def metadata(self):
pin = self._parse_pin(SexcomPinExtractor.pins(self)[0])
@@ -176,9 +138,7 @@ class SexcomPinsExtractor(SexcomExtractor):
subcategory = "pins"
directory_fmt = ("{category}", "{user}")
pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/pins/"
- test = ("https://www.sex.com/user/sirjuan79/pins/", {
- "count": ">= 15",
- })
+ example = "https://www.sex.com/user/USER/pins/"
def __init__(self, match):
SexcomExtractor.__init__(self, match)
@@ -198,9 +158,7 @@ class SexcomBoardExtractor(SexcomExtractor):
directory_fmt = ("{category}", "{user}", "{board}")
pattern = (r"(?:https?://)?(?:www\.)?sex\.com/user"
r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)")
- test = ("https://www.sex.com/user/ronin17/exciting-hentai/", {
- "count": ">= 15",
- })
+ example = "https://www.sex.com/user/USER/BOARD/"
def __init__(self, match):
SexcomExtractor.__init__(self, match)
@@ -224,17 +182,7 @@ class SexcomSearchExtractor(SexcomExtractor):
pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:"
r"(pic|gif|video)s/([^/?#]*)|search/(pic|gif|video)s"
r")/?(?:\?([^#]+))?)")
- test = (
- ("https://www.sex.com/search/pics?query=ecchi", {
- "range": "1-10",
- "count": 10,
- }),
- ("https://www.sex.com/videos/hentai/", {
- "range": "1-10",
- "count": 10,
- }),
- ("https://www.sex.com/pics/?sort=popular&sub=all&page=1"),
- )
+ example = "https://www.sex.com/search/pics?query=QUERY"
def __init__(self, match):
SexcomExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 285cd8f..912e601 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -18,21 +18,23 @@ class Shimmie2Extractor(BaseExtractor):
filename_fmt = "{category}_{id}{md5:?_//}.{extension}"
archive_fmt = "{id}"
- def __init__(self, match):
- BaseExtractor.__init__(self, match)
-
+ def _init(self):
try:
instance = INSTANCES[self.category]
except KeyError:
- pass
- else:
- cookies = instance.get("cookies")
- if cookies:
- domain = self.root.rpartition("/")[2]
- self._update_cookies_dict(cookies, domain=domain)
- file_url = instance.get("file_url")
- if file_url:
- self.file_url_fmt = file_url
+ return
+
+ cookies = instance.get("cookies")
+ if cookies:
+ domain = self.root.rpartition("/")[2]
+ self.cookies_update_dict(cookies, domain=domain)
+
+ file_url = instance.get("file_url")
+ if file_url:
+ self.file_url_fmt = file_url
+
+ if self.category == "giantessbooru":
+ self.posts = self._posts_giantessbooru
def items(self):
data = self.metadata()
@@ -64,10 +66,6 @@ class Shimmie2Extractor(BaseExtractor):
INSTANCES = {
- "mememuseum": {
- "root": "https://meme.museum",
- "pattern": r"meme\.museum",
- },
"loudbooru": {
"root": "https://loudbooru.com",
"pattern": r"loudbooru\.com",
@@ -85,11 +83,11 @@ INSTANCES = {
"cavemanon": {
"root": "https://booru.cavemanon.xyz",
"pattern": r"booru\.cavemanon\.xyz",
- "file_url": "{0}/index.php?q=image/{2}.{4}"
+ "file_url": "{0}/index.php?q=image/{2}.{4}",
},
}
-BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=)?"
+BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
class Shimmie2TagExtractor(Shimmie2Extractor):
@@ -98,33 +96,7 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
directory_fmt = ("{category}", "{search_tags}")
file_url_fmt = "{}/_images/{}/{}%20-%20{}.{}"
pattern = BASE_PATTERN + r"post/list/([^/?#]+)(?:/(\d+))?()"
- test = (
- ("https://meme.museum/post/list/animated/1", {
- "pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
- "count": ">= 30"
- }),
- ("https://loudbooru.com/post/list/original_character/1", {
- "pattern": r"https://loudbooru\.com/_images/[0-9a-f]{32}/\d+",
- "range": "1-100",
- "count": 100,
- }),
- ("https://giantessbooru.com/post/list/smiling/1", {
- "pattern": r"https://giantessbooru\.com/_images/[0-9a-f]{32}/\d+",
- "range": "1-100",
- "count": 100,
- }),
- ("https://tentaclerape.net/post/list/comic/1", {
- "pattern": r"https://tentaclerape\.net/_images/[0-9a-f]{32}/\d+",
- "range": "1-100",
- "count": 100,
- }),
- ("https://booru.cavemanon.xyz/index.php?q=post/list/Amber/1", {
- "pattern": r"https://booru\.cavemanon\.xyz"
- r"/index\.php\?q=image/\d+\.\w+",
- "range": "1-100",
- "count": 100,
- }),
- )
+ example = "https://loudbooru.com/post/list/TAG/1"
def __init__(self, match):
Shimmie2Extractor.__init__(self, match)
@@ -184,120 +156,43 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
if not extr("/{}'>{}<".format(pnum, pnum), ">"):
return
+ def _posts_giantessbooru(self):
+ pnum = text.parse_int(self.page, 1)
+ file_url_fmt = (self.root + "/index.php?q=/image/{}.jpg").format
+
+ while True:
+ url = "{}/index.php?q=/post/list/{}/{}".format(
+ self.root, self.tags, pnum)
+ extr = text.extract_from(self.request(url).text)
+
+ while True:
+ pid = extr('href="./index.php?q=/post/view/', '&')
+ if not pid:
+ break
+
+ tags, dimensions, size = extr('title="', '"').split(" // ")
+ width, _, height = dimensions.partition("x")
+
+ yield {
+ "file_url": file_url_fmt(pid),
+ "id": pid,
+ "md5": "",
+ "tags": tags,
+ "width": width,
+ "height": height,
+ "size": text.parse_bytes(size[:-1]),
+ }
+
+ pnum += 1
+ if not extr('/{}">{}<'.format(pnum, pnum), ">"):
+ return
+
class Shimmie2PostExtractor(Shimmie2Extractor):
"""Extractor for single shimmie2 posts"""
subcategory = "post"
pattern = BASE_PATTERN + r"post/view/(\d+)"
- test = (
- ("https://meme.museum/post/view/10243", {
- "pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc"
- r"49971f78/10243%20-%20g%20beard%20open_source%20richar"
- r"d_stallman%20stallman%20tagme%20text\.jpg",
- "content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
- "keyword": {
- "extension": "jpg",
- "file_url": "https://meme.museum/_images/105febebcd5ca791ee332"
- "adc49971f78/10243%20-%20g%20beard%20open_source%2"
- "0richard_stallman%20stallman%20tagme%20text.jpg",
- "filename": "10243 - g beard open_source richard_stallman "
- "stallman tagme text",
- "height": 451,
- "id": 10243,
- "md5": "105febebcd5ca791ee332adc49971f78",
- "size": 0,
- "subcategory": "post",
- "tags": "/g/ beard open_source "
- "richard_stallman stallman tagme text",
- "width": 480,
- },
- }),
- ("https://loudbooru.com/post/view/33828", {
- "pattern": r"https://loudbooru\.com/_images/.+\.png",
- "content": "a4755f787ba23ae2aa297a46810f802ca9032739",
- "keyword": {
- "extension": "png",
- "file_url": "https://loudbooru.com/_images/ca2638d903c86e8337f"
- "e9aeb4974be88/33828%20-%202020%20artist%3Astikyfi"
- "nkaz%20character%3Alisa_loud%20cover%20fanfiction"
- "%3Aplatz_eins%20frowning%20half-closed_eyes%20sol"
- "o%20text%20title_card.png",
- "filename": "33828 - 2020 artist:stikyfinkaz character:lisa_"
- "loud cover fanfiction:platz_eins frowning "
- "half-closed_eyes solo text title_card",
- "height": 1920,
- "id": 33828,
- "md5": "ca2638d903c86e8337fe9aeb4974be88",
- "tags": "2020 artist:stikyfinkaz character:lisa_loud cover "
- "fanfiction:platz_eins frowning half-closed_eyes "
- "solo text title_card",
- "width": 1078,
- },
- }),
- ("https://giantessbooru.com/post/view/41", {
- "pattern": r"https://giantessbooru\.com/_images"
- r"/3f67e1986496806b7b14ff3e82ac5af4/41\.jpg",
- "content": "79115ed309d1f4e82e7bead6948760e889139c91",
- "keyword": {
- "extension": "jpg",
- "file_url": "https://giantessbooru.com/_images"
- "/3f67e1986496806b7b14ff3e82ac5af4/41.jpg",
- "filename": "41",
- "height": 0,
- "id": 41,
- "md5": "3f67e1986496806b7b14ff3e82ac5af4",
- "size": 0,
- "tags": "anime bare_midriff color drawing gentle giantess "
- "karbo looking_at_tinies negeyari outdoors smiling "
- "snake_girl white_hair",
- "width": 0
-
-
- },
- }),
- ("https://tentaclerape.net/post/view/10", {
- "pattern": r"https://tentaclerape\.net/\./index\.php"
- r"\?q=/image/10\.jpg",
- "content": "d0fd8f0f6517a76cb5e23ba09f3844950bf2c516",
- "keyword": {
- "extension": "jpg",
- "file_url": "https://tentaclerape.net/./index.php"
- "?q=/image/10.jpg",
- "filename": "10",
- "height": 427,
- "id": 10,
- "md5": "945db71eeccaef82ce44b77564260c0b",
- "size": 0,
- "subcategory": "post",
- "tags": "Deviant_Art Pet Tentacle artist_sche blonde_hair "
- "blouse boots green_eyes highheels leash miniskirt "
- "octopus schoolgirl white_skin willing",
- "width": 300,
- },
- }),
- # video
- ("https://tentaclerape.net/post/view/91267", {
- "pattern": r"https://tentaclerape\.net/\./index\.php"
- r"\?q=/image/91267\.mp4",
- }),
- ("https://booru.cavemanon.xyz/index.php?q=post/view/8335", {
- "pattern": r"https://booru\.cavemanon\.xyz"
- r"/index\.php\?q=image/8335\.png",
- "content": "7158f7e4abbbf143bad5835eb93dbe4d68c1d4ab",
- "keyword": {
- "extension": "png",
- "file_url": "https://booru.cavemanon.xyz"
- "/index.php?q=image/8335.png",
- "filename": "8335",
- "height": 460,
- "id": 8335,
- "md5": "",
- "size": 0,
- "tags": "Color Fang",
- "width": 459,
- },
- }),
- )
+ example = "https://loudbooru.com/post/view/12345"
def __init__(self, match):
Shimmie2Extractor.__init__(self, match)
@@ -313,7 +208,7 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
"md5" : extr("/_thumbs/", "/"),
"file_url": self.root + (
extr("id='main_image' src='", "'") or
- extr("<source src='", "'")),
+ extr("<source src='", "'")).lstrip("."),
"width" : extr("data-width=", " ").strip("\"'"),
"height" : extr("data-height=", ">").partition(
" ")[0].strip("\"'"),
@@ -324,3 +219,18 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
post["md5"] = text.extr(post["file_url"], "/_images/", "/")
return (post,)
+
+ def _posts_giantessbooru(self):
+ url = "{}/index.php?q=/post/view/{}".format(
+ self.root, self.post_id)
+ extr = text.extract_from(self.request(url).text)
+
+ return ({
+ "id" : self.post_id,
+ "tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
+ "md5" : "",
+ "file_url": self.root + extr('id="main_image" src=".', '"'),
+ "width" : extr("orig_width =", ";"),
+ "height" : 0,
+ "size" : 0,
+ },)
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index f6e8bc0..a658cac 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -95,24 +95,7 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
subcategory = "collection"
directory_fmt = ("{category}", "{collection[title]}")
pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])"
- test = (
- ("https://chelseacrew.com/collections/flats"),
- ("https://www.fashionnova.com/collections/mini-dresses", {
- "range": "1-20",
- "count": 20,
- }),
- ("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
- ("https://www.fashionnova.com/collections/mini-dresses#1"),
- ("https://loungeunderwear.com/collections/apparel"),
- ("https://michaels.com.au/collections/microphones"),
- ("https://modcloth.com/collections/shoes"),
- ("https://www.ohpolly.com/collections/dresses-mini-dresses"),
- ("https://www.omgmiamiswimwear.com/collections/fajas"),
- ("https://pinupgirlclothing.com/collections/evening"),
- ("https://www.raidlondon.com/collections/flats"),
- ("https://www.unique-vintage.com/collections/flapper-1920s"),
- ("https://www.windsorstore.com/collections/dresses-ball-gowns"),
- )
+ example = "https://www.fashionnova.com/collections/TITLE"
def metadata(self):
return self.request(self.item_url + ".json").json()
@@ -134,32 +117,7 @@ class ShopifyProductExtractor(ShopifyExtractor):
subcategory = "product"
directory_fmt = ("{category}", "Products")
pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)"
- test = (
- ("https://chelseacrew.com/collections/flats/products/dora"),
- ("https://www.fashionnova.com/products/essential-slide-red", {
- "pattern": r"https?://cdn\d*\.shopify.com/",
- "count": 3,
- }),
- ("https://www.fashionnova.com/collections/flats/products/name"),
- ("https://de.loungeunderwear.com/products/ribbed-crop-top-black"),
- ("https://michaels.com.au/collections/audio/products"
- "/boya-by-wm4-pro-k5-2-4ghz-mic-android-1-1-101281"),
- ("https://modcloth.com/collections/shoes/products/heidii-brn"),
- (("https://www.ohpolly.com/products/edonia-ruched-triangle-cup"
- "-a-line-mini-dress-brown")),
- ("https://www.omgmiamiswimwear.com/products/la-medusa-maxi-dress", {
- "pattern": r"https://cdn\.shopify\.com/s/files/1/1819/6171/",
- "count": 5,
- }),
- ("https://pinupgirlclothing.com/collections/evening/products"
- "/clarice-coat-dress-in-olive-green-poly-crepe-laura-byrnes-design"),
- ("https://www.raidlondon.com/collections/flats/products"
- "/raid-addyson-chunky-flat-shoe-in-white"),
- ("https://www.unique-vintage.com/collections/flapper-1920s/products"
- "/unique-vintage-plus-size-black-silver-beaded-troyes-flapper-dress"),
- ("https://www.windsorstore.com/collections/accessories-belts/products"
- "/rhine-buckle-dbl-o-ring-pu-strap-belt-073010158001"),
- )
+ example = "https://www.fashionnova.com/collections/TITLE/products/NAME"
def products(self):
product = self.request(self.item_url + ".json").json()["product"]
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index b5d116f..6f72291 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,20 +19,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.)?simply-hentai\.com"
r"(?!/(?:album|gifs?|images?|series)(?:/|$))"
r"((?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?#]+)+)")
- test = (
- (("https://original-work.simply-hentai.com"
- "/amazon-no-hiyaku-amazon-elixir"), {
- "url": "21613585ae5ec2f69ea579e9713f536fceab5bd5",
- "keyword": "9e87a0973553b2922ddee37958b8f5d87910af72",
- }),
- ("https://www.simply-hentai.com/notfound", {
- "exception": exception.GalleryDLException,
- }),
- # custom subdomain
- ("https://pokemon.simply-hentai.com/mao-friends-9bc39"),
- # www subdomain, two path segments
- ("https://www.simply-hentai.com/vocaloid/black-magnet"),
- )
+ example = "https://www.simply-hentai.com/TITLE"
def __init__(self, match):
subdomain, path = match.groups()
@@ -40,7 +27,9 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
path = "/" + subdomain.rstrip(".") + path
url = "https://old.simply-hentai.com" + path
GalleryExtractor.__init__(self, match, url)
- self.session.headers["Referer"] = url
+
+ def _init(self):
+ self.session.headers["Referer"] = self.gallery_url
def metadata(self, page):
extr = text.extract_from(page)
@@ -87,17 +76,7 @@ class SimplyhentaiImageExtractor(Extractor):
archive_fmt = "{token}"
pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com"
r"/(image|gif)/[^/?#]+)")
- test = (
- (("https://www.simply-hentai.com/image"
- "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), {
- "url": "3d8eb55240a960134891bd77fe1df7988fcdc455",
- "keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2",
- }),
- ("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", {
- "url": "f73916527211b4a40f26568ee26cd8999f5f4f30",
- "keyword": "f94d775177fed918759c8a78a50976f867425b48",
- }),
- )
+ example = "https://www.simply-hentai.com/image/NAME"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -140,20 +119,7 @@ class SimplyhentaiVideoExtractor(Extractor):
filename_fmt = "{title}{episode:?_//>02}.{extension}"
archive_fmt = "{title}_{episode}"
pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?#]+)"
- test = (
- ("https://videos.simply-hentai.com/creamy-pie-episode-02", {
- "pattern": r"https://www\.googleapis\.com/drive/v3/files"
- r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
- "keyword": "706790708b14773efc1e075ddd3b738a375348a5",
- "options": (("verify", False),),
- "count": 1,
- }),
- (("https://videos.simply-hentai.com"
- "/1715-tifa-in-hentai-gang-bang-3d-movie"), {
- "url": "ad9a36ae06c601b6490e3c401834b4949d947eb0",
- "keyword": "f9dad94fbde9c95859e631ff4f07297a9567b874",
- }),
- )
+ example = "https://videos.simply-hentai.com/TITLE"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 3724c85..55a0db0 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -22,6 +22,8 @@ class SkebExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user_name = match.group(1)
+
+ def _init(self):
self.thumbnails = self.config("thumbnails", False)
self.article = self.config("article", False)
@@ -43,7 +45,7 @@ class SkebExtractor(Extractor):
"""Return additional metadata"""
def _pagination(self, url, params):
- headers = {"Referer": self.root, "Authorization": "Bearer null"}
+ headers = {"Authorization": "Bearer null"}
params["offset"] = 0
while True:
@@ -67,7 +69,7 @@ class SkebExtractor(Extractor):
def _get_post_data(self, user_name, post_num):
url = "{}/api/users/{}/works/{}".format(
self.root, user_name, post_num)
- headers = {"Referer": self.root, "Authorization": "Bearer null"}
+ headers = {"Authorization": "Bearer null"}
resp = self.request(url, headers=headers).json()
creator = resp["creator"]
post = {
@@ -143,53 +145,7 @@ class SkebPostExtractor(SkebExtractor):
"""Extractor for a single skeb post"""
subcategory = "post"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
- test = ("https://skeb.jp/@kanade_cocotte/works/38", {
- "count": 2,
- "keyword": {
- "anonymous": False,
- "body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ",
- "client": {
- "avatar_url": r"re:https://pbs.twimg.com/profile_images"
- r"/\d+/\w+\.jpg",
- "header_url": r"re:https://pbs.twimg.com/profile_banners"
- r"/1375007870291300358/\d+/1500x500",
- "id": 1196514,
- "name": str,
- "screen_name": "minato_ragi",
- },
- "content_category": "preview",
- "creator": {
- "avatar_url": "https://pbs.twimg.com/profile_images"
- "/1225470417063645184/P8_SiB0V.jpg",
- "header_url": "https://pbs.twimg.com/profile_banners"
- "/71243217/1647958329/1500x500",
- "id": 159273,
- "name": "イチノセ奏",
- "screen_name": "kanade_cocotte",
- },
- "file_id": int,
- "file_url": str,
- "genre": "art",
- "nsfw": False,
- "original": {
- "byte_size": int,
- "duration": None,
- "extension": "re:psd|png",
- "frame_rate": None,
- "height": 3727,
- "is_movie": False,
- "width": 2810,
- },
- "post_num": "38",
- "post_url": "https://skeb.jp/@kanade_cocotte/works/38",
- "source_body": None,
- "source_thanks": None,
- "tags": list,
- "thanks": None,
- "translated_body": False,
- "translated_thanks": None,
- }
- })
+ example = "https://skeb.jp/@USER/works/123"
def __init__(self, match):
SkebExtractor.__init__(self, match)
@@ -203,13 +159,7 @@ class SkebUserExtractor(SkebExtractor):
"""Extractor for all posts from a skeb user"""
subcategory = "user"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$"
- test = ("https://skeb.jp/@kanade_cocotte", {
- "pattern": r"https://skeb\.imgix\.net/uploads/origins/[\w-]+"
- r"\?bg=%23fff&auto=format&txtfont=bold&txtshad=70"
- r"&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150"
- r"&txt=SAMPLE&fm=webp&w=800&s=\w+",
- "range": "1-5",
- })
+ example = "https://skeb.jp/@USER"
def posts(self):
url = "{}/api/users/{}/works".format(self.root, self.user_name)
@@ -228,10 +178,7 @@ class SkebSearchExtractor(SkebExtractor):
"""Extractor for skeb search results"""
subcategory = "search"
pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)"
- test = ("https://skeb.jp/search?q=bunny%20tree&t=works", {
- "count": ">= 18",
- "keyword": {"search_tags": "bunny tree"},
- })
+ example = "https://skeb.jp/search?q=QUERY"
def metadata(self):
return {"search_tags": text.unquote(self.user_name)}
@@ -243,7 +190,6 @@ class SkebSearchExtractor(SkebExtractor):
}
headers = {
"Origin": self.root,
- "Referer": self.root + "/",
"x-algolia-api-key": "9a4ce7d609e71bf29e977925e4c6740c",
"x-algolia-application-id": "HB1JT3KRE9",
}
@@ -284,7 +230,7 @@ class SkebFollowingExtractor(SkebExtractor):
"""Extractor for all creators followed by a skeb user"""
subcategory = "following"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
- test = ("https://skeb.jp/@user/following_creators",)
+ example = "https://skeb.jp/@USER/following_creators"
def items(self):
for user in self.users():
@@ -296,7 +242,7 @@ class SkebFollowingExtractor(SkebExtractor):
url = "{}/api/users/{}/following_creators".format(
self.root, self.user_name)
params = {"sort": "date", "offset": 0, "limit": 90}
- headers = {"Referer": self.root, "Authorization": "Bearer null"}
+ headers = {"Authorization": "Bearer null"}
while True:
data = self.request(url, params=params, headers=headers).json()
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index 3727c0b..b2961e1 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text
import time
-
BASE_PATTERN = r"(?:https?://)?([\w-]+)\.slickpic\.com"
@@ -34,22 +33,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
filename_fmt = "{num:>03}_{id}{title:?_//}.{extension}"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/albums/([^/?#]+)"
- test = (
- ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
- "pattern": r"https://stored-cf\.slickpic\.com/NDk5MjNmYTc1MzU0MQ,,"
- r"/20160807/\w+/p/o/JSBFSS-\d+\.jpg",
- "keyword": "c37c4ce9c54c09abc6abdf295855d46f11529cbf",
- "count": 102,
- }),
- ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
- "range": "34",
- "content": ("276eb2c902187bb177ae8013e310e1d6641fba9a",
- "52b5a310587de1048030ab13a912f6a3a9cc7dab",
- "cec6630e659dc72db1ee1a9a6f3b525189261988",
- "6f81e1e74c6cd6db36844e7211eef8e7cd30055d",
- "22e83645fc242bc3584eca7ec982c8a53a4d8a44"),
- }),
- )
+ example = "https://USER.slickpic.com/albums/TITLE/"
def __init__(self, match):
SlickpicExtractor.__init__(self, match)
@@ -127,13 +111,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
class SlickpicUserExtractor(SlickpicExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"(?:/gallery)?/?(?:$|[?#])"
- test = (
- ("https://mattcrandall.slickpic.com/gallery/", {
- "count": ">= 358",
- "pattern": SlickpicAlbumExtractor.pattern,
- }),
- ("https://mattcrandall.slickpic.com/"),
- )
+ example = "https://USER.slickpic.com/"
def items(self):
page = self.request(self.root + "/gallery?viewer").text
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 3521298..b56ed27 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2023 Mike Fährmann, Leonardo Taccari
+# Copyright 2016-2017 Leonardo Taccari
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,45 +22,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
archive_fmt = "{presentation}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?slideshare\.net"
r"/(?:mobile/)?([^/?#]+)/([^/?#]+)")
- test = (
- (("https://www.slideshare.net"
- "/Slideshare/get-started-with-slide-share"), {
- "pattern": r"https://image\.slidesharecdn\.com/getstartedwithslide"
- r"share-150520173821-lva1-app6892/95/get-started-with-s"
- r"lide-share-\d+-1024\.jpg\?cb=\d+",
- "count": 19,
- "content": "2b6a191eab60b3978fdacfecf2da302dd45bc108",
- "keyword": {
- "description": "Get Started with SlideShare - "
- "A Beginngers Guide for Creators",
- "likes": int,
- "presentation": "get-started-with-slide-share",
- "date": "dt:2015-05-20 17:38:21",
- "title": "Getting Started With SlideShare",
- "user": "Slideshare",
- "views": int,
- },
- }),
- # long title and description
- (("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren"
- "-mitarbeitenden-ndern-sollten-sondern-ihr-managementsystem"), {
- "url": "d8952260f8bec337dd809a958ec8091350393f6b",
- "keyword": {
- "title": "Warum Sie nicht Ihren Mitarbeitenden ändern "
- "sollten, sondern Ihr Managementsystem",
- "description": "Mitarbeitende verhalten sich mehrheitlich so, "
- "wie das System es ihnen vorgibt. Welche Voraus"
- "setzungen es braucht, damit Ihre Mitarbeitende"
- "n ihr ganzes Herzblut einsetzen, bespricht Fre"
- "di Schmidli in diesem Referat.",
- },
- }),
- # mobile URL
- (("https://www.slideshare.net"
- "/mobile/uqudent/introduction-to-fixed-prosthodontics"), {
- "url": "72c431cb1eccbb6794f608ecbbc01d52e8768159",
- }),
- )
+ example = "https://www.slideshare.net/USER/PRESENTATION"
def __init__(self, match):
self.user, self.presentation = match.groups()
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 713d4c4..3639c0b 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,7 +21,7 @@ class SmugmugExtractor(Extractor):
category = "smugmug"
filename_fmt = ("{category}_{User[NickName]:?/_/}"
"{Image[UploadKey]}_{Image[ImageKey]}.{extension}")
- cookiedomain = None
+ cookies_domain = None
empty_user = {
"Uri": "",
"ResponseLevel": "Public",
@@ -34,8 +34,7 @@ class SmugmugExtractor(Extractor):
"Uris": None,
}
- def __init__(self, match):
- Extractor.__init__(self, match)
+ def _init(self):
self.api = SmugmugAPI(self)
self.videos = self.config("videos", True)
self.session = self.api.session
@@ -69,19 +68,7 @@ class SmugmugAlbumExtractor(SmugmugExtractor):
directory_fmt = ("{category}", "{User[NickName]}", "{Album[Name]}")
archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
pattern = r"smugmug:album:([^:]+)$"
- test = (
- ("smugmug:album:cr4C7f", {
- "url": "2c2e576e47d4e9ce60b44871f08a8c66b5ebaceb",
- }),
- # empty
- ("smugmug:album:Fb7hMs", {
- "count": 0,
- }),
- # no "User"
- ("smugmug:album:6VRT8G", {
- "url": "c4a0f4c4bfd514b93cbdeb02b3345bf7ef6604df",
- }),
- )
+ example = "smugmug:album:ID"
def __init__(self, match):
SmugmugExtractor.__init__(self, match)
@@ -108,18 +95,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
subcategory = "image"
archive_fmt = "{Image[ImageKey]}"
pattern = BASE_PATTERN + r"(?:/[^/?#]+)+/i-([^/?#-]+)"
- test = (
- ("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
- "url": "e6408fd2c64e721fd146130dceb56a971ceb4259",
- "keyword": "b31a63d07c9c26eb0f79f52d60d171a98938f99b",
- "content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0",
- }),
- # video
- ("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
- "url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "2b545184592c282b365fcbb7df6ca7952b8a3173",
- }),
- )
+ example = "https://USER.smugmug.com/PATH/i-ID"
def __init__(self, match):
SmugmugExtractor.__init__(self, match)
@@ -140,35 +116,13 @@ class SmugmugPathExtractor(SmugmugExtractor):
"""Extractor for smugmug albums from URL paths and users"""
subcategory = "path"
pattern = BASE_PATTERN + r"((?:/[^/?#a-fh-mo-z][^/?#]*)*)/?$"
- test = (
- ("https://tdm.smugmug.com/Nature/Dove", {
- "pattern": "smugmug:album:cr4C7f$",
- }),
- ("https://tdm.smugmug.com/", {
- "pattern": SmugmugAlbumExtractor.pattern,
- "url": "1640028712875b90974e5aecd91b60e6de6138c7",
- }),
- # gallery node without owner
- ("https://www.smugmug.com/gallery/n-GLCjnD/", {
- "pattern": "smugmug:album:6VRT8G$",
- }),
- # custom domain
- ("smugmug:www.sitkapics.com/TREES-and-TRAILS/", {
- "pattern": "smugmug:album:ct8Nds$",
- }),
- ("smugmug:www.sitkapics.com/", {
- "pattern": r"smugmug:album:\w{6}$",
- "count": ">= 14",
- }),
- ("smugmug:https://www.sitkapics.com/"),
- )
+ example = "https://USER.smugmug.com/PATH"
def __init__(self, match):
SmugmugExtractor.__init__(self, match)
self.domain, self.user, self.path = match.groups()
def items(self):
-
if not self.user:
self.user = self.api.site_user(self.domain)["NickName"]
diff --git a/gallery_dl/extractor/soundgasm.py b/gallery_dl/extractor/soundgasm.py
index 236f94f..7c75aaa 100644
--- a/gallery_dl/extractor/soundgasm.py
+++ b/gallery_dl/extractor/soundgasm.py
@@ -51,29 +51,7 @@ class SoundgasmAudioExtractor(SoundgasmExtractor):
"""Extractor for audio clips from soundgasm.net"""
subcategory = "audio"
pattern = BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)"
- test = (
- (("https://soundgasm.net/u/ClassWarAndPuppies2"
- "/687-Otto-von-Toontown-12822"), {
- "pattern": r"https://media\.soundgasm\.net/sounds"
- r"/26cb2b23b2f2c6094b40ee3a9167271e274b570a\.m4a",
- "keyword": {
- "description": "We celebrate today’s important prisoner swap, "
- "and finally bring the 2022 mid-terms to a clos"
- "e with Raphael Warnock’s defeat of Herschel Wa"
- "lker in Georgia. Then, we take a look at the Q"
- "anon-addled attempt to overthrow the German go"
- "vernment and install Heinrich XIII Prince of R"
- "euss as kaiser.",
- "extension": "m4a",
- "filename": "26cb2b23b2f2c6094b40ee3a9167271e274b570a",
- "slug": "687-Otto-von-Toontown-12822",
- "title": "687 - Otto von Toontown (12/8/22)",
- "user": "ClassWarAndPuppies2",
- },
- }),
- ("https://www.soundgasm.net/user/ClassWarAndPuppies2"
- "/687-Otto-von-Toontown-12822"),
- )
+ example = "https://soundgasm.net/u/USER/TITLE"
def __init__(self, match):
SoundgasmExtractor.__init__(self, match)
@@ -87,19 +65,7 @@ class SoundgasmUserExtractor(SoundgasmExtractor):
"""Extractor for all sounds from a soundgasm user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/([^/?#]+)/?$"
- test = ("https://soundgasm.net/u/fierce-aphrodite", {
- "pattern": r"https://media\.soundgasm\.net/sounds/[0-9a-f]{40}\.m4a",
- "count" : ">= 15",
- "keyword": {
- "description": str,
- "extension": "m4a",
- "filename": "re:^[0-9a-f]{40}$",
- "slug": str,
- "title": str,
- "url": str,
- "user": "fierce-aphrodite"
- },
- })
+ example = "https://soundgasm.net/u/USER"
def __init__(self, match):
SoundgasmExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
index 9eb8761..e44fdae 100644
--- a/gallery_dl/extractor/speakerdeck.py
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://speakerdeck.com/"""
+"""Extractors for https://speakerdeck.com/"""
from .common import Extractor, Message
from .. import text
@@ -19,16 +19,8 @@ class SpeakerdeckPresentationExtractor(Extractor):
directory_fmt = ("{category}", "{user}")
filename_fmt = "{presentation}-{num:>02}.{extension}"
archive_fmt = "{presentation}_{num}"
- pattern = (r"(?:https?://)?(?:www\.)?speakerdeck\.com"
- r"/([^/?#]+)/([^/?#]+)")
- test = (
- (("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), {
- "pattern": r"https://files.speakerdeck.com/presentations/"
- r"50021f75cf1db900020005e7/slide_\d+.jpg",
- "content": "75c7abf0969b0bcab23e0da9712c95ee5113db3a",
- "count": 6,
- }),
- )
+ pattern = r"(?:https?://)?(?:www\.)?speakerdeck\.com/([^/?#]+)/([^/?#]+)"
+ example = "https://speakerdeck.com/USER/PRESENTATION"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 4de7e9b..6b4cba2 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -22,14 +22,14 @@ class SubscribestarExtractor(Extractor):
directory_fmt = ("{category}", "{author_name}")
filename_fmt = "{post_id}_{id}.{extension}"
archive_fmt = "{id}"
- cookiedomain = "www.subscribestar.com"
- cookienames = ("auth_token",)
+ cookies_domain = "www.subscribestar.com"
+ cookies_names = ("auth_token",)
def __init__(self, match):
tld, self.item = match.groups()
if tld == "adult":
self.root = "https://subscribestar.adult"
- self.cookiedomain = "subscribestar.adult"
+ self.cookies_domain = "subscribestar.adult"
self.subcategory += "-adult"
Extractor.__init__(self, match)
@@ -49,12 +49,12 @@ class SubscribestarExtractor(Extractor):
"""Yield HTML content of all relevant posts"""
def login(self):
- if self._check_cookies(self.cookienames):
+ if self.cookies_check(self.cookies_names):
return
+
username, password = self._get_auth_info()
if username:
- cookies = self._login_impl(username, password)
- self._update_cookies(cookies)
+ self.cookies_update(self._login_impl(username, password))
@cache(maxage=28*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -84,16 +84,16 @@ class SubscribestarExtractor(Extractor):
if cookie.name.startswith("auth")
}
- @staticmethod
- def _media_from_post(html):
+ def _media_from_post(self, html):
media = []
gallery = text.extr(html, 'data-gallery="', '"')
if gallery:
- media.extend(
- item for item in util.json_loads(text.unescape(gallery))
- if "/previews/" not in item["url"]
- )
+ for item in util.json_loads(text.unescape(gallery)):
+ if "/previews" in item["url"]:
+ self._warn_preview()
+ else:
+ media.append(item)
attachments = text.extr(
html, 'class="uploads-docs"', 'data-role="post-edit_form"')
@@ -130,39 +130,16 @@ class SubscribestarExtractor(Extractor):
date = text.parse_datetime(dt, "%B %d, %Y %I:%M %p")
return date
+ def _warn_preview(self):
+ self.log.warning("Preview image detected")
+ self._warn_preview = util.noop
+
class SubscribestarUserExtractor(SubscribestarExtractor):
"""Extractor for media from a subscribestar user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)"
- test = (
- ("https://www.subscribestar.com/subscribestar", {
- "count": ">= 20",
- "pattern": r"https://\w+\.cloudfront\.net/uploads(_v2)?/users/11/",
- "keyword": {
- "author_id": 11,
- "author_name": "subscribestar",
- "author_nick": "SubscribeStar",
- "content": str,
- "date" : "type:datetime",
- "id" : int,
- "num" : int,
- "post_id": int,
- "type" : "re:image|video|attachment",
- "url" : str,
- "?pinned": bool,
- },
- }),
- ("https://www.subscribestar.com/subscribestar", {
- "options": (("metadata", True),),
- "keyword": {"date": "type:datetime"},
- "range": "1",
- }),
- ("https://subscribestar.adult/kanashiipanda", {
- "range": "1-10",
- "count": 10,
- }),
- )
+ example = "https://www.subscribestar.com/USER"
def posts(self):
needle_next_page = 'data-role="infinite_scroll-next_page" href="'
@@ -184,32 +161,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
"""Extractor for media from a single subscribestar post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/posts/(\d+)"
- test = (
- ("https://www.subscribestar.com/posts/102468", {
- "count": 1,
- "keyword": {
- "author_id": 11,
- "author_name": "subscribestar",
- "author_nick": "SubscribeStar",
- "content": "re:<h1>Brand Guidelines and Assets</h1>",
- "date": "dt:2020-05-07 12:33:00",
- "extension": "jpg",
- "filename": "8ff61299-b249-47dc-880a-cdacc9081c62",
- "group": "imgs_and_videos",
- "height": 291,
- "id": 203885,
- "num": 1,
- "pinned": False,
- "post_id": 102468,
- "type": "image",
- "width": 700,
- },
- }),
- ("https://subscribestar.adult/posts/22950", {
- "count": 1,
- "keyword": {"date": "dt:2019-04-28 07:32:00"},
- }),
- )
+ example = "https://www.subscribestar.com/posts/12345"
def posts(self):
url = "{}/posts/{}".format(self.root, self.item)
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index 4b15b14..5415bf3 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -20,8 +20,7 @@ class SzurubooruExtractor(booru.BooruExtractor):
filename_fmt = "{id}_{version}_{checksumMD5}.{extension}"
per_page = 100
- def __init__(self, match):
- booru.BooruExtractor.__init__(self, match)
+ def _init(self):
self.headers = {
"Accept": "application/json",
"Content-Type": "application/json",
@@ -96,15 +95,7 @@ class SzurubooruTagExtractor(SzurubooruExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}_{version}"
pattern = BASE_PATTERN + r"/posts/query=([^/?#]+)"
- test = (
- ("https://booru.foalcon.com/posts/query=simple_background", {
- "pattern": r"https://booru\.foalcon\.com/data/posts"
- r"/\d+_[0-9a-f]{16}\.\w+",
- "range": "1-150",
- "count": 150,
- }),
- ("https://booru.bcbnsfw.space/posts/query=simple_background"),
- )
+ example = "https://booru.foalcon.com/posts/query=TAG"
def __init__(self, match):
SzurubooruExtractor.__init__(self, match)
@@ -122,19 +113,7 @@ class SzurubooruPostExtractor(SzurubooruExtractor):
subcategory = "post"
archive_fmt = "{id}_{version}"
pattern = BASE_PATTERN + r"/post/(\d+)"
- test = (
- ("https://booru.foalcon.com/post/30092", {
- "pattern": r"https://booru\.foalcon\.com/data/posts"
- r"/30092_b7d56e941888b624\.png",
- "url": "dad4d4c67d87cd9a4ac429b3414747c27a95d5cb",
- "content": "86d1514c0ca8197950cc4b74e7a59b2dc76ebf9c",
- }),
- ("https://booru.bcbnsfw.space/post/1599", {
- "pattern": r"https://booru\.bcbnsfw\.space/data/posts"
- r"/1599_53784518e92086bd\.png",
- "content": "0c38fc612ba1f03950fad31c4f80a1fccdab1096",
- }),
- )
+ example = "https://booru.foalcon.com/post/12345"
def __init__(self, match):
SzurubooruExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index 545a95b..bfca7a6 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,12 +22,11 @@ class TapasExtractor(Extractor):
directory_fmt = ("{category}", "{series[title]}", "{id} {title}")
filename_fmt = "{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
- cookiedomain = ".tapas.io"
- cookienames = ("_cpc_",)
+ cookies_domain = ".tapas.io"
+ cookies_names = ("_cpc_",)
_cache = None
- def __init__(self, match):
- Extractor.__init__(self, match)
+ def _init(self):
if self._cache is None:
TapasExtractor._cache = {}
@@ -70,14 +69,17 @@ class TapasExtractor(Extractor):
yield Message.Url, url, text.nameext_from_url(url, episode)
def login(self):
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(self._login_impl(username, password))
- else:
- sc = self.session.cookies.set
- sc("birthDate" , "1981-02-03", domain=self.cookiedomain)
- sc("adjustedBirthDate", "1981-02-03", domain=self.cookiedomain)
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ return self.cookies_update(self._login_impl(username, password))
+
+ self.cookies.set(
+ "birthDate" , "1981-02-03", domain=self.cookies_domain)
+ self.cookies.set(
+ "adjustedBirthDate", "1981-02-03", domain=self.cookies_domain)
@cache(maxage=14*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -105,15 +107,7 @@ class TapasExtractor(Extractor):
class TapasSeriesExtractor(TapasExtractor):
subcategory = "series"
pattern = BASE_PATTERN + r"/series/([^/?#]+)"
- test = (
- ("https://tapas.io/series/just-leave-me-be", {
- "pattern": r"https://\w+\.cloudfront\.net/pc/\w\w/[0-9a-f-]+\.jpg",
- "count": 132,
- }),
- ("https://tapas.io/series/yona", { # mature
- "count": 26,
- }),
- )
+ example = "https://tapas.io/series/TITLE"
def __init__(self, match):
TapasExtractor.__init__(self, match)
@@ -149,53 +143,7 @@ class TapasSeriesExtractor(TapasExtractor):
class TapasEpisodeExtractor(TapasExtractor):
subcategory = "episode"
pattern = BASE_PATTERN + r"/episode/(\d+)"
- test = ("https://tapas.io/episode/2068651", {
- "url": "0b53644c864a0a097f65accea6bb620be9671078",
- "pattern": "^text:",
- "keyword": {
- "book": True,
- "comment_cnt": int,
- "date": "dt:2021-02-23 16:02:07",
- "early_access": False,
- "escape_title": "You are a Tomb Raider (2)",
- "free": True,
- "id": 2068651,
- "like_cnt": int,
- "liked": bool,
- "mature": False,
- "next_ep_id": 2068652,
- "nsfw": False,
- "nu": False,
- "num": 1,
- "open_comments": True,
- "pending_scene": 2,
- "prev_ep_id": 2068650,
- "publish_date": "2021-02-23T16:02:07Z",
- "read": bool,
- "related_ep_id": None,
- "relative_publish_date": "Feb 23, 2021",
- "scene": 2,
- "scheduled": False,
- "title": "You are a Tomb Raider (2)",
- "unlock_cnt": 0,
- "unlocked": False,
- "view_cnt": int,
-
- "series": {
- "genre": dict,
- "has_book_cover": True,
- "has_top_banner": True,
- "id": 199931,
- "premium": True,
- "sale_type": "PAID",
- "subscribed": bool,
- "thumbsup_cnt": int,
- "title": "Tomb Raider King",
- "type": "BOOKS",
- "url": "tomb-raider-king-novel",
- },
- },
- })
+ example = "https://tapas.io/episode/12345"
def __init__(self, match):
TapasExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py
index b5a730a..a3ef26c 100644
--- a/gallery_dl/extractor/tcbscans.py
+++ b/gallery_dl/extractor/tcbscans.py
@@ -16,35 +16,7 @@ class TcbscansChapterExtractor(ChapterExtractor):
category = "tcbscans"
root = "https://tcbscans.com"
pattern = BASE_PATTERN + r"(/chapters/\d+/[^/?#]+)"
- test = (
- (("https://tcbscans.com"
- "/chapters/4708/chainsaw-man-chapter-108"), {
- "pattern": (r"https://cdn\.[^/]+"
- r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
- "count" : 17,
- "keyword": {
- "manga": "Chainsaw Man",
- "chapter": 108,
- "chapter_minor": "",
- "lang": "en",
- "language": "English",
- },
- }),
- ("https://onepiecechapters.com/chapters/4716/one-piece-chapter-1065", {
- "pattern": (r"https://cdn\.[^/]+"
- r"/(file|attachments/[^/]+)/[^/]+/[^.]+\.\w+"),
- "count" : 18,
- "keyword": {
- "manga": "One Piece",
- "chapter": 1065,
- "chapter_minor": "",
- "lang": "en",
- "language": "English",
- },
- }),
- (("https://onepiecechapters.com/"
- "chapters/44/ace-novel-manga-adaptation-chapter-1")),
- )
+ example = "https://tcbscans.com/chapters/12345/MANGA-chapter-123"
def images(self, page):
return [
@@ -70,19 +42,7 @@ class TcbscansMangaExtractor(MangaExtractor):
root = "https://tcbscans.com"
chapterclass = TcbscansChapterExtractor
pattern = BASE_PATTERN + r"(/mangas/\d+/[^/?#]+)"
- test = (
- ("https://tcbscans.com/mangas/13/chainsaw-man", {
- "pattern": TcbscansChapterExtractor.pattern,
- "range" : "1-50",
- "count" : 50,
- }),
- ("https://onepiecechapters.com/mangas/4/jujutsu-kaisen", {
- "pattern": TcbscansChapterExtractor.pattern,
- "range" : "1-50",
- "count" : 50,
- }),
- ("https://onepiecechapters.com/mangas/15/hunter-x-hunter"),
- )
+ example = "https://tcbscans.com/mangas/123/MANGA"
def chapters(self, page):
data = {
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index 116f3af..dd5988f 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -18,72 +18,7 @@ class TelegraphGalleryExtractor(GalleryExtractor):
filename_fmt = "{num_formatted}_{filename}.{extension}"
archive_fmt = "{slug}_{num}"
pattern = r"(?:https?://)(?:www\.)??telegra\.ph(/[^/?#]+)"
- test = (
- ("https://telegra.ph/Telegraph-Test-03-28", {
- "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.png",
- "keyword": {
- "author": "mikf",
- "caption": r"re:test|",
- "count": 2,
- "date": "dt:2022-03-28 16:01:36",
- "description": "Just a test",
- "post_url": "https://telegra.ph/Telegraph-Test-03-28",
- "slug": "Telegraph-Test-03-28",
- "title": "Telegra.ph Test",
- },
- }),
- ("https://telegra.ph/森-03-28", {
- "pattern": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
- "count": 1,
- "keyword": {
- "author": "&",
- "caption": "kokiri",
- "count": 1,
- "date": "dt:2022-03-28 16:31:26",
- "description": "コキリの森",
- "extension": "jpg",
- "filename": "3ea79d23b0dd0889f215a",
- "num": 1,
- "num_formatted": "1",
- "post_url": "https://telegra.ph/森-03-28",
- "slug": "森-03-28",
- "title": '"森"',
- "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
- },
- }),
- ("https://telegra.ph/Vsyo-o-druzyah-moej-sestricy-05-27", {
- "url": "c1f3048e5d94bee53af30a8c27f70b0d3b15438e",
- "pattern": r"^https://pith1\.ru/uploads"
- r"/posts/2019-12/\d+_\d+\.jpg$",
- "keyword": {
- "author": "Shotacon - заходи сюда",
- "caption": "",
- "count": 19,
- "date": "dt:2022-05-27 16:17:27",
- "description": "",
- "num_formatted": r"re:^\d{2}$",
- "post_url": "https://telegra.ph"
- "/Vsyo-o-druzyah-moej-sestricy-05-27",
- "slug": "Vsyo-o-druzyah-moej-sestricy-05-27",
- "title": "Всё о друзьях моей сестрицы",
- },
- }),
- ("https://telegra.ph/Disharmonica---Saber-Nero-02-21", {
- "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.(jpg|png)",
- "keyword": {
- "author": "cosmos",
- "caption": "",
- "count": 89,
- "date": "dt:2022-02-21 05:57:39",
- "description": "",
- "num_formatted": r"re:^\d{2}$",
- "post_url": "https://telegra.ph"
- "/Disharmonica---Saber-Nero-02-21",
- "slug": "Disharmonica---Saber-Nero-02-21",
- "title": "Disharmonica - Saber Nero",
- },
- }),
- )
+ example = "https://telegra.ph/TITLE"
def metadata(self, page):
extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/test.py b/gallery_dl/extractor/test.py
index 5d81ed5..e3f9f74 100644
--- a/gallery_dl/extractor/test.py
+++ b/gallery_dl/extractor/test.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2017 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -33,11 +33,7 @@ class TestExtractor(Extractor):
"""
category = "test"
pattern = r"t(?:est)?:([^:]*)(?::([^:]*)(?::(\*|[\d,]*))?)?$"
- test = (
- ("test:pixiv"),
- ("test:pixiv:user,favorite:0"),
- ("test:"),
- )
+ example = "test:CATEGORY"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
index c6be38d..78ff265 100644
--- a/gallery_dl/extractor/toyhouse.py
+++ b/gallery_dl/extractor/toyhouse.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -99,28 +99,7 @@ class ToyhouseArtExtractor(ToyhouseExtractor):
"""Extractor for artworks of a toyhouse user"""
subcategory = "art"
pattern = BASE_PATTERN + r"/([^/?#]+)/art"
-
- test = (
- ("https://www.toyhou.se/d-floe/art", {
- "range": "1-30",
- "count": 30,
- "pattern": r"https://f\d+\.toyhou\.se/file/f\d+-toyhou-se"
- r"/images/\d+_\w+\.\w+$",
- "keyword": {
- "artists": list,
- "characters": list,
- "date": "type:datetime",
- "hash": r"re:\w+",
- "id": r"re:\d+",
- "url": str,
- "user": "d-floe",
- },
- }),
- # protected by Content Warning
- ("https://www.toyhou.se/kroksoc/art", {
- "count": ">= 19",
- }),
- )
+ example = "https://www.toyhou.se/USER/art"
def posts(self):
return self._pagination("/{}/art".format(self.user))
@@ -136,37 +115,7 @@ class ToyhouseImageExtractor(ToyhouseExtractor):
r"(?:www\.)?toyhou\.se/~images|"
r"f\d+\.toyhou\.se/file/[^/?#]+/(?:image|watermark)s"
r")/(\d+)")
- test = (
- ("https://toyhou.se/~images/40587320", {
- "content": "058ec8427977ab432c4cc5be5a6dd39ce18713ef",
- "keyword": {
- "artists": ["d-floe"],
- "characters": ["Sumi"],
- "date": "dt:2021-10-08 01:32:47",
- "extension": "png",
- "filename": "40587320_TT1NaBUr3FLkS1p",
- "hash": "TT1NaBUr3FLkS1p",
- "id": "40587320",
- "url": "https://f2.toyhou.se/file/f2-toyhou-se/images"
- "/40587320_TT1NaBUr3FLkS1p.png",
- },
- }),
- # direct link, multiple artists
- (("https://f2.toyhou.se/file/f2-toyhou-se"
- "/watermarks/36817425_bqhGcwcnU.png?1625561467"), {
- "keyword": {
- "artists": [
- "http://aminoapps.com/p/92sf3z",
- "kroksoc (Color)"],
- "characters": ["❀Reiichi❀"],
- "date": "dt:2021-07-03 20:02:02",
- "hash": "bqhGcwcnU",
- "id": "36817425",
- },
- }),
- ("https://f2.toyhou.se/file/f2-toyhou-se"
- "/images/40587320_TT1NaBUr3FLkS1p.png"),
- )
+ example = "https://toyhou.se/~images/12345"
def posts(self):
url = "{}/~images/{}".format(self.root, self.user)
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index 92bd634..de7cdfc 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -16,15 +16,15 @@ from ..cache import cache
class TsuminoBase():
"""Base class for tsumino extractors"""
category = "tsumino"
- cookiedomain = "www.tsumino.com"
+ cookies_domain = "www.tsumino.com"
root = "https://www.tsumino.com"
def login(self):
username, password = self._get_auth_info()
if username:
- self._update_cookies(self._login_impl(username, password))
+ self.cookies_update(self._login_impl(username, password))
else:
- self.session.cookies.setdefault(
+ self.cookies.setdefault(
"ASP.NET_SessionId", "x1drgggilez4cpkttneukrc5")
@cache(maxage=14*24*3600, keyarg=1)
@@ -37,41 +37,14 @@ class TsuminoBase():
response = self.request(url, method="POST", headers=headers, data=data)
if not response.history:
raise exception.AuthenticationError()
- return self.session.cookies
+ return self.cookies
class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
"""Extractor for image galleries on tsumino.com"""
pattern = (r"(?i)(?:https?://)?(?:www\.)?tsumino\.com"
r"/(?:entry|Book/Info|Read/(?:Index|View))/(\d+)")
- test = (
- ("https://www.tsumino.com/entry/40996", {
- "pattern": r"https://content.tsumino.com/parts/40996/\d+\?key=\w+",
- "keyword": {
- "title" : r"re:Shikoshiko Daisuki Nightingale \+ Kaijou",
- "title_en" : r"re:Shikoshiko Daisuki Nightingale \+ Kaijou",
- "title_jp" : "シコシコ大好きナイチンゲール + 会場限定おまけ本",
- "gallery_id": 40996,
- "date" : "dt:2018-06-29 00:00:00",
- "count" : 42,
- "collection": "",
- "artist" : ["Itou Life"],
- "group" : ["Itou Life"],
- "parody" : list,
- "characters": list,
- "tags" : list,
- "type" : "Doujinshi",
- "rating" : float,
- "uploader" : "sehki",
- "lang" : "en",
- "language" : "English",
- "thumbnail" : "https://content.tsumino.com/thumbs/40996/1",
- },
- }),
- ("https://www.tsumino.com/Book/Info/40996"),
- ("https://www.tsumino.com/Read/View/45834"),
- ("https://www.tsumino.com/Read/Index/45834"),
- )
+ example = "https://www.tsumino.com/entry/12345"
def __init__(self, match):
self.gallery_id = match.group(1)
@@ -131,21 +104,8 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
class TsuminoSearchExtractor(TsuminoBase, Extractor):
"""Extractor for search results on tsumino.com"""
subcategory = "search"
- pattern = (r"(?i)(?:https?://)?(?:www\.)?tsumino\.com"
- r"/(?:Books/?)?#(.+)")
- test = (
- ("https://www.tsumino.com/Books#?Character=Reimu+Hakurei", {
- "pattern": TsuminoGalleryExtractor.pattern,
- "range": "1-40",
- "count": 40,
- }),
- (("http://www.tsumino.com/Books#~(Tags~(~"
- "(Type~7~Text~'Reimu*20Hakurei~Exclude~false)~"
- "(Type~'1~Text~'Pantyhose~Exclude~false)))#"), {
- "pattern": TsuminoGalleryExtractor.pattern,
- "count": ">= 3",
- }),
- )
+ pattern = r"(?i)(?:https?://)?(?:www\.)?tsumino\.com/(?:Books/?)?#(.+)"
+ example = "https://www.tsumino.com/Books#QUERY"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index b45609d..3dab16e 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -31,7 +31,7 @@ class TumblrExtractor(Extractor):
directory_fmt = ("{category}", "{blog_name}")
filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
- cookiedomain = None
+ cookies_domain = None
def __init__(self, match):
Extractor.__init__(self, match)
@@ -42,6 +42,7 @@ class TumblrExtractor(Extractor):
else:
self.blog = match.group(1) or match.group(3)
+ def _init(self):
self.api = TumblrAPI(self)
self.types = self._setup_posttypes()
self.avatar = self.config("avatar", False)
@@ -272,59 +273,7 @@ class TumblrUserExtractor(TumblrExtractor):
"""Extractor for a Tumblr user's posts"""
subcategory = "user"
pattern = BASE_PATTERN + r"(?:/page/\d+|/archive)?/?$"
- test = (
- ("http://demo.tumblr.com/", {
- "pattern": r"https://\d+\.media\.tumblr\.com"
- r"/tumblr_[^/_]+_\d+\.jpg",
- "count": 1,
- "options": (("posts", "photo"),),
- }),
- ("http://demo.tumblr.com/", {
- "pattern": (r"https?://(?:$|"
- r"\d+\.media\.tumblr\.com/.+_1280\.jpg|"
- r"a\.tumblr\.com/tumblr_\w+)"),
- "count": 3,
- "options": (("posts", "all"), ("external", True))
- }),
- ("https://mikf123-hidden.tumblr.com/", { # dashboard-only
- "options": (("access-token", None),),
- "exception": exception.AuthorizationError,
- }),
- ("https://mikf123-hidden.tumblr.com/", { # dashboard-only
- "count": 2,
- "keyword": {"tags": ["test", "hidden"]},
- }),
- ("https://mikf123-private.tumblr.com/", { # password protected
- "count": 2,
- "keyword": {"tags": ["test", "private"]},
- }),
- ("https://mikf123-private-hidden.tumblr.com/", { # both
- "count": 2,
- "keyword": {"tags": ["test", "private", "hidden"]},
- }),
- ("https://mikf123.tumblr.com/", { # date-min/-max/-format (#337)
- "count": 4,
- "options": (("date-min", "201804"), ("date-max", "201805"),
- ("date-format", "%Y%m"))
- }),
- # pagination with 'date-max' (#2191) and 'api-key'
- ("https://donttrustthetits.tumblr.com/", {
- "options": (
- ("access-token", None),
- ("original", False),
- ("date-max", "2015-04-25T00:00:00"),
- ("date-min", "2015-04-01T00:00:00"),
- ),
- "count": 316,
- }),
- ("https://demo.tumblr.com/page/2"),
- ("https://demo.tumblr.com/archive"),
- ("tumblr:http://www.b-authentique.com/"),
- ("tumblr:www.b-authentique.com"),
- ("https://www.tumblr.com/blog/view/smarties-art"),
- ("https://www.tumblr.com/blog/smarties-art"),
- ("https://www.tumblr.com/smarties-art"),
- )
+ example = "https://www.tumblr.com/BLOG"
def posts(self):
return self.api.posts(self.blog, {})
@@ -334,55 +283,7 @@ class TumblrPostExtractor(TumblrExtractor):
"""Extractor for a single Tumblr post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)"
- test = (
- ("http://demo.tumblr.com/post/459265350", {
- "pattern": (r"https://\d+\.media\.tumblr\.com"
- r"/tumblr_[^/_]+_1280.jpg"),
- "count": 1,
- }),
- ("https://mikf123.tumblr.com/post/167770226574/text-post", {
- "count": 2,
- }),
- ("https://mikf123.tumblr.com/post/181022561719/quote-post", {
- "count": 1,
- }),
- ("https://mikf123.tumblr.com/post/167623351559/link-post", {
- "count": 2,
- }),
- ("https://mikf123.tumblr.com/post/167633596145/video-post", {
- "count": 2,
- }),
- ("https://mikf123.tumblr.com/post/167770026604/audio-post", {
- "count": 2,
- }),
- ("https://mikf123.tumblr.com/post/172687798174/photo-post", {
- "count": 4,
- }),
- ("https://mikf123.tumblr.com/post/181022380064/chat-post", {
- "count": 0,
- }),
- ("https://kichatundk.tumblr.com/post/654953419288821760", {
- "count": 2, # high-quality images (#1846)
- "content": "d6fcc7b6f750d835d55c7f31fa3b63be26c9f89b",
- }),
- ("https://hameru-is-cool.tumblr.com/post/639261855227002880", {
- "count": 2, # high-quality images (#1344)
- "content": "6bc19a42787e46e1bba2ef4aeef5ca28fcd3cd34",
- }),
- ("https://mikf123.tumblr.com/image/689860196535762944", {
- "pattern": r"^https://\d+\.media\.tumblr\.com"
- r"/134791621559a79793563b636b5fe2c6"
- r"/8f1131551cef6e74-bc/s99999x99999"
- r"/188cf9b8915b0d0911c6c743d152fc62e8f38491\.png$",
- }),
- ("http://ziemniax.tumblr.com/post/109697912859/", {
- "exception": exception.NotFoundError, # HTML response (#297)
- }),
- ("http://demo.tumblr.com/image/459265350"),
- ("https://www.tumblr.com/blog/view/smarties-art/686047436641353728"),
- ("https://www.tumblr.com/blog/smarties-art/686047436641353728"),
- ("https://www.tumblr.com/smarties-art/686047436641353728"),
- )
+ example = "https://www.tumblr.com/BLOG/12345"
def __init__(self, match):
TumblrExtractor.__init__(self, match)
@@ -402,16 +303,7 @@ class TumblrTagExtractor(TumblrExtractor):
"""Extractor for Tumblr user's posts by tag"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/tagged/([^/?#]+)"
- test = (
- ("http://demo.tumblr.com/tagged/Times%20Square", {
- "pattern": r"https://\d+\.media\.tumblr\.com"
- r"/tumblr_[^/_]+_1280.jpg",
- "count": 1,
- }),
- ("https://www.tumblr.com/blog/view/smarties-art/tagged/undertale"),
- ("https://www.tumblr.com/blog/smarties-art/tagged/undertale"),
- ("https://www.tumblr.com/smarties-art/tagged/undertale"),
- )
+ example = "https://www.tumblr.com/BLOG/tagged/TAG"
def __init__(self, match):
TumblrExtractor.__init__(self, match)
@@ -425,26 +317,18 @@ class TumblrDayExtractor(TumblrExtractor):
"""Extractor for Tumblr user's posts by day"""
subcategory = "day"
pattern = BASE_PATTERN + r"/day/(\d\d\d\d/\d\d/\d\d)"
- test = (
- ("https://mikf123.tumblr.com/day/2018/01/05", {
- "pattern": r"https://64\.media\.tumblr\.com"
- r"/1a2be8c63f1df58abd2622861696c72a"
- r"/tumblr_ozm9nqst9t1wgha4yo1_1280\.jpg",
- "keyword": {"id": 169341068404},
- "count": 1,
- }),
- ("https://www.tumblr.com/blog/view/mikf123/day/2018/01/05"),
- ("https://www.tumblr.com/blog/mikf123/day/2018/01/05"),
- ("https://www.tumblr.com/mikf123/day/2018/01/05"),
- )
+ example = "https://www.tumblr.com/BLOG/day/1970/01/01"
def __init__(self, match):
TumblrExtractor.__init__(self, match)
year, month, day = match.group(4).split("/")
- self.date_min = ts = (
+ self.date_min = (
# 719163 == date(1970, 1, 1).toordinal()
date(int(year), int(month), int(day)).toordinal() - 719163) * 86400
- self.api.before = ts + 86400
+
+ def _init(self):
+ TumblrExtractor._init(self)
+ self.api.before = self.date_min + 86400
def posts(self):
return self.api.posts(self.blog, {})
@@ -456,14 +340,7 @@ class TumblrLikesExtractor(TumblrExtractor):
directory_fmt = ("{category}", "{blog_name}", "likes")
archive_fmt = "f_{blog[name]}_{id}_{num}"
pattern = BASE_PATTERN + r"/likes"
- test = (
- ("http://mikf123.tumblr.com/likes", {
- "count": 1,
- }),
- ("https://www.tumblr.com/blog/view/mikf123/likes"),
- ("https://www.tumblr.com/blog/mikf123/likes"),
- ("https://www.tumblr.com/mikf123/likes"),
- )
+ example = "https://www.tumblr.com/BLOG/likes"
def posts(self):
return self.api.likes(self.blog)
diff --git a/gallery_dl/extractor/tumblrgallery.py b/gallery_dl/extractor/tumblrgallery.py
index 6940f3e..27cc9d0 100644
--- a/gallery_dl/extractor/tumblrgallery.py
+++ b/gallery_dl/extractor/tumblrgallery.py
@@ -38,7 +38,7 @@ class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor):
"""Extractor for Tumblrblog on tumblrgallery.xyz"""
subcategory = "tumblrblog"
pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+)\.html)"
- test = ("https://tumblrgallery.xyz/tumblrblog/gallery/103975.html",)
+ example = "https://tumblrgallery.xyz/tumblrblog/gallery/12345.html"
def __init__(self, match):
TumblrgalleryExtractor.__init__(self, match)
@@ -69,11 +69,7 @@ class TumblrgalleryPostExtractor(TumblrgalleryExtractor):
"""Extractor for Posts on tumblrgallery.xyz"""
subcategory = "post"
pattern = BASE_PATTERN + r"(/post/(\d+)\.html)"
- test = ("https://tumblrgallery.xyz/post/405674.html", {
- "pattern": r"https://78\.media\.tumblr\.com/bec67072219c1f3bc04fd9711d"
- r"ec42ef/tumblr_p51qq1XCHS1txhgk3o1_1280\.jpg",
- "count": 3,
- })
+ example = "https://tumblrgallery.xyz/post/12345.html"
def __init__(self, match):
TumblrgalleryExtractor.__init__(self, match)
@@ -98,10 +94,7 @@ class TumblrgallerySearchExtractor(TumblrgalleryExtractor):
filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}"
directory_fmt = ("{category}", "{search_term}")
pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))"
- test = ("https://tumblrgallery.xyz/s.php?q=everyday-life", {
- "pattern": r"https://\d+\.media\.tumblr\.com/.+",
- "count": "< 1000",
- })
+ example = "https://tumblrgallery.xyz/s.php?q=QUERY"
def __init__(self, match):
TumblrgalleryExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index a8acd31..49c8419 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -19,15 +19,14 @@ class TwibooruExtractor(BooruExtractor):
"""Base class for twibooru extractors"""
category = "twibooru"
basecategory = "philomena"
+ root = "https://twibooru.org"
filename_fmt = "{id}_{filename}.{extension}"
archive_fmt = "{id}"
request_interval = 6.05
page_start = 1
per_page = 50
- root = "https://twibooru.org"
- def __init__(self, match):
- BooruExtractor.__init__(self, match)
+ def _init(self):
self.api = TwibooruAPI(self)
_file_url = operator.itemgetter("view_url")
@@ -47,50 +46,7 @@ class TwibooruPostExtractor(TwibooruExtractor):
subcategory = "post"
request_interval = 1.0
pattern = BASE_PATTERN + r"/(\d+)"
- test = ("https://twibooru.org/1", {
- "pattern": r"https://cdn.twibooru.org/img/2020/7/8/1/full.png",
- "content": "aac4d1dba611883ac701aaa8f0b2b322590517ae",
- "keyword": {
- "animated": False,
- "aspect_ratio": 1.0,
- "comment_count": int,
- "created_at": "2020-07-08T22:26:55.743Z",
- "date": "dt:2020-07-08 22:26:55",
- "description": "Why have I done this?",
- "downvotes": 0,
- "duration": 0.0,
- "faves": int,
- "first_seen_at": "2020-07-08T22:26:55.743Z",
- "format": "png",
- "height": 576,
- "hidden_from_users": False,
- "id": 1,
- "intensities": dict,
- "locations": [],
- "media_type": "image",
- "mime_type": "image/png",
- "name": "1676547__safe_artist-colon-scraggleman_oc_oc-colon-"
- "floor+bored_oc+only_bags+under+eyes_bust_earth+pony_"
- "female_goggles_helmet_mare_meme_neet_neet+home+g.png",
- "orig_sha512_hash": "re:8b4c00d2[0-9a-f]{120}",
- "processed": True,
- "representations": dict,
- "score": int,
- "sha512_hash": "8b4c00d2eff52d51ad9647e14738944ab306fd1d8e1bf6"
- "34fbb181b32f44070aa588938e26c4eb072b1eb61489aa"
- "f3062fb644a76c79f936b97723a2c3e0e5d3",
- "size": 70910,
- "source_url": "",
- "tag_ids": list,
- "tags": list,
- "thumbnails_generated": True,
- "updated_at": "2022-11-27T00:34:50.483Z",
- "upvotes": int,
- "view_url": "https://cdn.twibooru.org/img/2020/7/8/1/full.png",
- "width": 576,
- "wilson_score": float,
- },
- })
+ example = "https://twibooru.org/12345"
def __init__(self, match):
TwibooruExtractor.__init__(self, match)
@@ -105,16 +61,7 @@ class TwibooruSearchExtractor(TwibooruExtractor):
subcategory = "search"
directory_fmt = ("{category}", "{search_tags}")
pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))"
- test = (
- ("https://twibooru.org/search?q=cute", {
- "range": "40-60",
- "count": 21,
- }),
- ("https://twibooru.org/tags/cute", {
- "range": "1-20",
- "count": 20,
- }),
- )
+ example = "https://twibooru.org/search?q=TAG"
def __init__(self, match):
TwibooruExtractor.__init__(self, match)
@@ -148,19 +95,7 @@ class TwibooruGalleryExtractor(TwibooruExtractor):
directory_fmt = ("{category}", "galleries",
"{gallery[id]} {gallery[title]}")
pattern = BASE_PATTERN + r"/galleries/(\d+)"
- test = ("https://twibooru.org/galleries/1", {
- "range": "1-20",
- "keyword": {
- "gallery": {
- "description": "Best nation pone and "
- "russian related pics.",
- "id": 1,
- "spoiler_warning": "Russia",
- "thumbnail_id": 694923,
- "title": "Marussiaverse",
- },
- },
- })
+ example = "https://twibooru.org/galleries/12345"
def __init__(self, match):
TwibooruExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7b9a2e4..3895c74 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -15,7 +15,7 @@ import itertools
import json
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:(?:[fv]x)?twitter|x)\.com"
class TwitterExtractor(Extractor):
@@ -24,14 +24,16 @@ class TwitterExtractor(Extractor):
directory_fmt = ("{category}", "{user[name]}")
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
- cookiedomain = ".twitter.com"
- cookienames = ("auth_token",)
+ cookies_domain = ".twitter.com"
+ cookies_names = ("auth_token",)
root = "https://twitter.com"
browser = "firefox"
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
+
+ def _init(self):
self.textonly = self.config("text-tweets", False)
self.retweets = self.config("retweets", False)
self.replies = self.config("replies", True)
@@ -275,6 +277,8 @@ class TwitterExtractor(Extractor):
else:
note = None
+ source = tweet["source"]
+
if "legacy" in tweet:
tweet = tweet["legacy"]
@@ -301,6 +305,7 @@ class TwitterExtractor(Extractor):
"author" : author,
"user" : self._user or author,
"lang" : tweet["lang"],
+ "source" : text.extr(source, ">", "<"),
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
"reply_count" : tget("reply_count"),
@@ -334,11 +339,18 @@ class TwitterExtractor(Extractor):
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
if "quoted_by" in tweet:
tdata["quote_by"] = tweet["quoted_by"]
+ if tdata["retweet_id"]:
+ tdata["date_original"] = text.parse_timestamp(
+ ((tdata["retweet_id"] >> 22) + 1288834974657) // 1000)
return tdata
def _transform_user(self, user):
- uid = user.get("rest_id") or user["id_str"]
+ try:
+ uid = user.get("rest_id") or user["id_str"]
+ except KeyError:
+ # private/invalid user (#4349)
+ return {}
try:
return self._user_cache[uid]
@@ -394,9 +406,12 @@ class TwitterExtractor(Extractor):
def _users_result(self, users):
userfmt = self.config("users")
- if not userfmt or userfmt == "timeline":
- cls = TwitterTimelineExtractor
+ if not userfmt or userfmt == "user":
+ cls = TwitterUserExtractor
fmt = (self.root + "/i/user/{rest_id}").format_map
+ elif userfmt == "timeline":
+ cls = TwitterTimelineExtractor
+ fmt = (self.root + "/id:{rest_id}/timeline").format_map
elif userfmt == "media":
cls = TwitterMediaExtractor
fmt = (self.root + "/id:{rest_id}/media").format_map
@@ -455,37 +470,20 @@ class TwitterExtractor(Extractor):
"""Yield all relevant tweet objects"""
def login(self):
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(_login_impl(self, username, password))
+ if self.cookies_check(self.cookies_names):
+ return
+ username, password = self._get_auth_info()
+ if username:
+ self.cookies_update(_login_impl(self, username, password))
-class TwitterTimelineExtractor(TwitterExtractor):
- """Extractor for a Twitter user timeline"""
- subcategory = "timeline"
+
+class TwitterUserExtractor(TwitterExtractor):
+ """Extractor for a Twitter user"""
+ subcategory = "user"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
- test = (
- ("https://twitter.com/supernaturepics", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- # suspended account (#2216)
- ("https://twitter.com/OptionalTypo", {
- "exception": exception.NotFoundError,
- }),
- # suspended account user ID
- ("https://twitter.com/id:772949683521978368", {
- "exception": exception.NotFoundError,
- }),
- ("https://mobile.twitter.com/supernaturepics?p=i"),
- ("https://www.twitter.com/id:2976459548"),
- ("https://twitter.com/i/user/2976459548"),
- ("https://twitter.com/intent/user?user_id=2976459548"),
- ("https://fxtwitter.com/supernaturepics"),
- ("https://vxtwitter.com/supernaturepics"),
- )
+ example = "https://twitter.com/USER"
def __init__(self, match):
TwitterExtractor.__init__(self, match)
@@ -493,6 +491,28 @@ class TwitterTimelineExtractor(TwitterExtractor):
if user_id:
self.user = "id:" + user_id
+ def initialize(self):
+ pass
+
+ def items(self):
+ base = "{}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (TwitterAvatarExtractor , base + "photo"),
+ (TwitterBackgroundExtractor, base + "header_photo"),
+ (TwitterTimelineExtractor , base + "timeline"),
+ (TwitterTweetsExtractor , base + "tweets"),
+ (TwitterMediaExtractor , base + "media"),
+ (TwitterRepliesExtractor , base + "with_replies"),
+ (TwitterLikesExtractor , base + "likes"),
+ ), ("timeline",))
+
+
+class TwitterTimelineExtractor(TwitterExtractor):
+ """Extractor for a Twitter user timeline"""
+ subcategory = "timeline"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
+ example = "https://twitter.com/USER/timeline"
+
def tweets(self):
# yield initial batch of (media) tweets
tweet = None
@@ -536,14 +556,7 @@ class TwitterTweetsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Tweets timeline"""
subcategory = "tweets"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/tweets", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- ("https://mobile.twitter.com/supernaturepics/tweets#t"),
- ("https://www.twitter.com/id:2976459548/tweets"),
- )
+ example = "https://twitter.com/USER/tweets"
def tweets(self):
return self.api.user_tweets(self.user)
@@ -553,14 +566,7 @@ class TwitterRepliesExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline including replies"""
subcategory = "replies"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/with_replies", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- ("https://mobile.twitter.com/supernaturepics/with_replies#t"),
- ("https://www.twitter.com/id:2976459548/with_replies"),
- )
+ example = "https://twitter.com/USER/with_replies"
def tweets(self):
return self.api.user_tweets_and_replies(self.user)
@@ -570,14 +576,7 @@ class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/media", {
- "range": "1-40",
- "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
- }),
- ("https://mobile.twitter.com/supernaturepics/media#t"),
- ("https://www.twitter.com/id:2976459548/media"),
- )
+ example = "https://twitter.com/USER/media"
def tweets(self):
return self.api.user_media(self.user)
@@ -587,7 +586,7 @@ class TwitterLikesExtractor(TwitterExtractor):
"""Extractor for liked tweets"""
subcategory = "likes"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)"
- test = ("https://twitter.com/supernaturepics/likes",)
+ example = "https://twitter.com/USER/likes"
def metadata(self):
return {"user_likes": self.user}
@@ -598,7 +597,7 @@ class TwitterLikesExtractor(TwitterExtractor):
def _transform_tweet(self, tweet):
tdata = TwitterExtractor._transform_tweet(self, tweet)
tdata["date_liked"] = text.parse_timestamp(
- (int(tweet["sortIndex"]) >> 20) // 1000)
+ (int(tweet["sortIndex"] or 0) >> 20) // 1000)
return tdata
@@ -606,7 +605,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
subcategory = "bookmark"
pattern = BASE_PATTERN + r"/i/bookmarks()"
- test = ("https://twitter.com/i/bookmarks",)
+ example = "https://twitter.com/i/bookmarks"
def tweets(self):
return self.api.user_bookmarks()
@@ -614,7 +613,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
def _transform_tweet(self, tweet):
tdata = TwitterExtractor._transform_tweet(self, tweet)
tdata["date_bookmarked"] = text.parse_timestamp(
- (int(tweet["sortIndex"]) >> 20) // 1000)
+ (int(tweet["sortIndex"] or 0) >> 20) // 1000)
return tdata
@@ -622,11 +621,7 @@ class TwitterListExtractor(TwitterExtractor):
"""Extractor for Twitter lists"""
subcategory = "list"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
- test = ("https://twitter.com/i/lists/784214683683127296", {
- "range": "1-40",
- "count": 40,
- "archive": False,
- })
+ example = "https://twitter.com/i/lists/12345"
def tweets(self):
return self.api.list_latest_tweets_timeline(self.user)
@@ -636,11 +631,7 @@ class TwitterListMembersExtractor(TwitterExtractor):
"""Extractor for members of a Twitter list"""
subcategory = "list-members"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
- test = ("https://twitter.com/i/lists/784214683683127296/members", {
- "pattern": TwitterTimelineExtractor.pattern,
- "range": "1-40",
- "count": 40,
- })
+ example = "https://twitter.com/i/lists/12345/members"
def items(self):
self.login()
@@ -651,10 +642,7 @@ class TwitterFollowingExtractor(TwitterExtractor):
"""Extractor for followed users"""
subcategory = "following"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/following(?!\w)"
- test = (
- ("https://twitter.com/supernaturepics/following"),
- ("https://www.twitter.com/id:2976459548/following"),
- )
+ example = "https://twitter.com/USER/following"
def items(self):
self.login()
@@ -665,11 +653,7 @@ class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for Twitter search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
- test = ("https://twitter.com/search?q=nature", {
- "range": "1-20",
- "count": 20,
- "archive": False,
- })
+ example = "https://twitter.com/search?q=QUERY"
def metadata(self):
return {"search": text.unquote(self.user)}
@@ -700,10 +684,7 @@ class TwitterHashtagExtractor(TwitterExtractor):
"""Extractor for Twitter hashtags"""
subcategory = "hashtag"
pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
- test = ("https://twitter.com/hashtag/nature", {
- "pattern": TwitterSearchExtractor.pattern,
- "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
- })
+ example = "https://twitter.com/hashtag/NAME"
def items(self):
url = "{}/search?q=%23{}".format(self.root, self.user)
@@ -717,10 +698,7 @@ class TwitterEventExtractor(TwitterExtractor):
directory_fmt = ("{category}", "Events",
"{event[id]} {event[short_title]}")
pattern = BASE_PATTERN + r"/i/events/(\d+)"
- test = ("https://twitter.com/i/events/1484669206993903616", {
- "range": "1-20",
- "count": ">=1",
- })
+ example = "https://twitter.com/i/events/12345"
def metadata(self):
return {"event": self.api.live_event(self.user)}
@@ -733,186 +711,7 @@ class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
subcategory = "tweet"
pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
- test = (
- ("https://twitter.com/supernaturepics/status/604341487988576256", {
- "url": "88a40f7d25529c2501c46f2218f9e0de9aa634b4",
- "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
- }),
- # 4 images
- ("https://twitter.com/perrypumas/status/894001459754180609", {
- "url": "3a2a43dc5fb79dd5432c701d8e55e87c4e551f47",
- }),
- # video
- ("https://twitter.com/perrypumas/status/1065692031626829824", {
- "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5",
- }),
- # content with emoji, newlines, hashtags (#338)
- ("https://twitter.com/playpokemon/status/1263832915173048321", {
- "keyword": {"content": (
- r"re:Gear up for #PokemonSwordShieldEX with special Mystery "
- "Gifts! \n\nYou’ll be able to receive four Galarian form "
- "Pokémon with Hidden Abilities, plus some very useful items. "
- "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ "
- )},
- }),
- # Reply to deleted tweet (#403, #838)
- ("https://twitter.com/i/web/status/1170041925560258560", {
- "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_",
- }),
- # 'replies' option (#705)
- ("https://twitter.com/i/web/status/1170041925560258560", {
- "options": (("replies", False),),
- "count": 0,
- }),
- # 'replies' to self (#1254)
- ("https://twitter.com/i/web/status/1424882930803908612", {
- "options": (("replies", "self"),),
- "count": 4,
- "keyword": {"user": {
- "description": "re:business email-- rhettaro.bloom@gmail.com "
- "patreon- http://patreon.com/Princecanary",
- "url": "http://princecanary.tumblr.com",
- }},
- }),
- ("https://twitter.com/i/web/status/1424898916156284928", {
- "options": (("replies", "self"),),
- "count": 1,
- }),
- # "quoted" option (#854)
- ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
- "options": (("quoted", True),),
- "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+=jpg",
- "count": 8,
- }),
- # quoted tweet (#526, #854)
- ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
- "pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg",
- "count": 4,
- }),
- # different 'user' and 'author' in quoted Tweet (#3922)
- ("https://twitter.com/web/status/1644907989109751810", {
- "keyword": {
- "author": {"id": 321629993 , "name": "Cakes_Comics"},
- "user" : {"id": 718928225360080897, "name": "StobiesGalaxy"},
- },
- }),
- # TwitPic embeds (#579)
- ("https://twitter.com/i/web/status/112900228289540096", {
- "options": (("twitpic", True), ("cards", False)),
- "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
- "count": 2, # 1 duplicate
- }),
- # TwitPic URL not in 'urls' (#3792)
- ("https://twitter.com/shimoigusaP/status/8138669971", {
- "options": (("twitpic", True),),
- "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png",
- "count": 1,
- }),
- # Twitter card (#1005)
- ("https://twitter.com/billboard/status/1306599586602135555", {
- "options": (("cards", True),),
- "pattern": r"https://pbs.twimg.com/card_img/\d+/",
- }),
- # unified_card image_website (#2875)
- ("https://twitter.com/i/web/status/1561674543323910144", {
- "options": (("cards", True),),
- "pattern": r"https://pbs\.twimg\.com/media/F.+=jpg",
- }),
- # unified_card image_carousel_website
- ("https://twitter.com/doax_vv_staff/status/1479438945662685184", {
- "options": (("cards", True),),
- "pattern": r"https://pbs\.twimg\.com/media/F.+=png",
- "count": 6,
- }),
- # unified_card video_website (#2875)
- ("https://twitter.com/bang_dream_1242/status/1561548715348746241", {
- "options": (("cards", True),),
- "pattern": r"https://video\.twimg\.com/amplify_video"
- r"/1560607284333449216/vid/720x720/\w+\.mp4",
- }),
- # unified_card without type
- ("https://twitter.com/i/web/status/1466183847628865544", {
- "count": 0,
- }),
- # 'cards-blacklist' option
- ("https://twitter.com/i/web/status/1571141912295243776", {
- "options": (("cards", "ytdl"),
- ("cards-blacklist", ("twitch.tv",))),
- "count": 0,
- }),
- # retweet
- ("https://twitter.com/jessica_3978/status/1296304589591810048", {
- "options": (("retweets", True),),
- "count": 2,
- "keyword": {
- "tweet_id" : 1296304589591810048,
- "retweet_id": 1296296016002547713,
- "date" : "dt:2020-08-20 04:34:32",
- },
- }),
- # original retweets (#1026)
- ("https://twitter.com/jessica_3978/status/1296304589591810048", {
- "options": (("retweets", "original"),),
- "count": 2,
- "keyword": {
- "tweet_id" : 1296296016002547713,
- "retweet_id": 1296296016002547713,
- "date" : "dt:2020-08-20 04:00:28",
- },
- }),
- # all Tweets from a 'conversation' (#1319)
- ("https://twitter.com/supernaturepics/status/604341487988576256", {
- "options": (("conversations", True),),
- "count": 5,
- }),
- # retweet with missing media entities (#1555)
- ("https://twitter.com/morino_ya/status/1392763691599237121", {
- "options": (("retweets", True),),
- "count": 0, # private
- }),
- # deleted quote tweet (#2225)
- ("https://twitter.com/i/web/status/1460044411165888515", {
- "count": 0,
- }),
- # "Misleading" content
- ("https://twitter.com/i/web/status/1486373748911575046", {
- "count": 4,
- }),
- # age-restricted (#2354)
- ("https://twitter.com/mightbecursed/status/1492954264909479936", {
- "options": (("syndication", True),),
- "keyword": {"date": "dt:2022-02-13 20:10:09"},
- "count": 1,
- }),
- # media alt texts / descriptions (#2617)
- ("https://twitter.com/my0nruri/status/1528379296041299968", {
- "keyword": {"description": "oc"}
- }),
- # '?format=...&name=...'-style URLs
- ("https://twitter.com/poco_dandy/status/1150646424461176832", {
- "options": (("cards", True),),
- "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+"
- r"\?format=(jpg|png)&name=orig$",
- "range": "1-2",
- }),
- # note tweet with long 'content'
- ("https://twitter.com/i/web/status/1629193457112686592", {
- "keyword": {
- "content": """\
-BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
-just contradicted federal government regulators, saying that toxic air \
-pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
-Washington Post writes, "Three weeks after the toxic train derailment in \
-Ohio, an analysis of Environmental Protection Agency data has found nine air \
-pollutants at levels that could raise long-term health concerns in and around \
-East Palestine, according to an independent analysis. \n\n\"The analysis by \
-Texas A&M University seems to contradict statements by state and federal \
-regulators that air near the crash site is completely safe, despite residents \
-complaining about rashes, breathing problems and other health effects." \
-Your reaction.""",
- },
- }),
- )
+ example = "https://twitter.com/USER/status/12345"
def __init__(self, match):
TwitterExtractor.__init__(self, match)
@@ -923,21 +722,49 @@ Your reaction.""",
if conversations:
self._accessible = (conversations == "accessible")
return self._tweets_conversation(self.tweet_id)
- else:
- return self._tweets_single(self.tweet_id)
- def _tweets_single(self, tweet_id):
- tweets = []
+ endpoint = self.config("tweet-endpoint")
+ if endpoint == "detail" or endpoint in (None, "auto") and \
+ self.api.headers["x-twitter-auth-type"]:
+ return self._tweets_detail(self.tweet_id)
+ return self._tweets_single(self.tweet_id)
+
+ def _tweets_single(self, tweet_id):
tweet = self.api.tweet_result_by_rest_id(tweet_id)
- self._assign_user(tweet["core"]["user_results"]["result"])
+
+ try:
+ self._assign_user(tweet["core"]["user_results"]["result"])
+ except KeyError:
+ raise exception.StopExtraction(
+ "'%s'", tweet.get("reason") or "Unavailable")
+
+ yield tweet
+
+ if not self.quoted:
+ return
while True:
- tweets.append(tweet)
tweet_id = tweet["legacy"].get("quoted_status_id_str")
if not tweet_id:
break
tweet = self.api.tweet_result_by_rest_id(tweet_id)
+ tweet["legacy"]["quoted_by_id_str"] = tweet_id
+ yield tweet
+
+ def _tweets_detail(self, tweet_id):
+ tweets = []
+
+ for tweet in self.api.tweet_detail(tweet_id):
+ if tweet["rest_id"] == tweet_id or \
+ tweet.get("_retweet_id_str") == tweet_id:
+ if self._user_obj is None:
+ self._assign_user(tweet["core"]["user_results"]["result"])
+ tweets.append(tweet)
+
+ tweet_id = tweet["legacy"].get("quoted_status_id_str")
+ if not tweet_id:
+ break
return tweets
@@ -965,21 +792,7 @@ class TwitterAvatarExtractor(TwitterExtractor):
filename_fmt = "avatar {date}.{extension}"
archive_fmt = "AV_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
- test = (
- ("https://twitter.com/supernaturepics/photo", {
- "pattern": r"https://pbs\.twimg\.com/profile_images"
- r"/554585280938659841/FLVAlX18\.jpeg",
- "keyword": {
- "date": "dt:2015-01-12 10:26:49",
- "extension": "jpeg",
- "filename": "FLVAlX18",
- "tweet_id": 554585280938659841,
- },
- }),
- ("https://twitter.com/User16/photo", {
- "count": 0,
- }),
- )
+ example = "https://twitter.com/USER/photo"
def tweets(self):
self.api._user_id_by_screen_name(self.user)
@@ -1001,20 +814,7 @@ class TwitterBackgroundExtractor(TwitterExtractor):
filename_fmt = "background {date}.{extension}"
archive_fmt = "BG_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
- test = (
- ("https://twitter.com/supernaturepics/header_photo", {
- "pattern": r"https://pbs\.twimg\.com/profile_banners"
- r"/2976459548/1421058583",
- "keyword": {
- "date": "dt:2015-01-12 10:29:43",
- "filename": "1421058583",
- "tweet_id": 554586009367478272,
- },
- }),
- ("https://twitter.com/User16/header_photo", {
- "count": 0,
- }),
- )
+ example = "https://twitter.com/USER/header_photo"
def tweets(self):
self.api._user_id_by_screen_name(self.user)
@@ -1034,13 +834,7 @@ class TwitterImageExtractor(Extractor):
category = "twitter"
subcategory = "image"
pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)"
- test = (
- ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", {
- "options": (("size", "4096x4096,orig"),),
- "url": "cb3042a6f6826923da98f0d2b66c427e9385114c",
- }),
- ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"),
- )
+ example = "https://pbs.twimg.com/media/ABCDE?format=jpg&name=orig"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -1071,23 +865,19 @@ class TwitterAPI():
self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
- cookies = extractor.session.cookies
- cookiedomain = extractor.cookiedomain
+ cookies = extractor.cookies
+ cookies_domain = extractor.cookies_domain
csrf = extractor.config("csrf")
if csrf is None or csrf == "cookies":
- csrf_token = cookies.get("ct0", domain=cookiedomain)
+ csrf_token = cookies.get("ct0", domain=cookies_domain)
else:
csrf_token = None
if not csrf_token:
csrf_token = util.generate_token()
- cookies.set("ct0", csrf_token, domain=cookiedomain)
+ cookies.set("ct0", csrf_token, domain=cookies_domain)
- auth_token = cookies.get("auth_token", domain=cookiedomain)
-
- search = extractor.config("search-endpoint")
- if search == "rest":
- self.search_timeline = self.search_adaptive
+ auth_token = cookies.get("auth_token", domain=cookies_domain)
self.headers = {
"Accept": "*/*",
@@ -1216,7 +1006,19 @@ class TwitterAPI():
"withArticleRichContentState": False,
}),
}
- return self._call(endpoint, params)["data"]["tweetResult"]["result"]
+ tweet = self._call(endpoint, params)["data"]["tweetResult"]["result"]
+ if "tweet" in tweet:
+ tweet = tweet["tweet"]
+
+ if tweet.get("__typename") == "TweetUnavailable":
+ reason = tweet.get("reason")
+ if reason == "NsfwLoggedOut":
+ raise exception.AuthorizationError("NSFW Tweet")
+ if reason == "Protected":
+ raise exception.AuthorizationError("Protected Tweet")
+ raise exception.StopExtraction("Tweet unavailable ('%s')", reason)
+
+ return tweet
def tweet_detail(self, tweet_id):
endpoint = "/graphql/JlLZj42Ltr2qwjasw-l5lQ/TweetDetail"
@@ -1324,16 +1126,6 @@ class TwitterAPI():
return self._pagination_tweets(
endpoint, variables, ("list", "tweets_timeline", "timeline"))
- def search_adaptive(self, query):
- endpoint = "/2/search/adaptive.json"
- params = self.params.copy()
- params["q"] = query
- params["tweet_search_mode"] = "live"
- params["query_source"] = "typed_query"
- params["pc"] = "1"
- params["spelling_corrections"] = "1"
- return self._pagination_legacy(endpoint, params)
-
def search_timeline(self, query):
endpoint = "/graphql/7jT5GT59P8IFjgxwqnEdQw/SearchTimeline"
variables = {
@@ -1453,8 +1245,8 @@ class TwitterAPI():
guest_token = self._guest_token()
if guest_token != self.headers["x-guest-token"]:
self.headers["x-guest-token"] = guest_token
- self.extractor.session.cookies.set(
- "gt", guest_token, domain=self.extractor.cookiedomain)
+ self.extractor.cookies.set(
+ "gt", guest_token, domain=self.extractor.cookies_domain)
def _call(self, endpoint, params, method="GET", auth=True, root=None):
url = (root or self.root) + endpoint
@@ -1647,8 +1439,8 @@ class TwitterAPI():
if user.get("blocked_by"):
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
- extr._cookiefile = None
- del extr.session.cookies["auth_token"]
+ extr.cookies_file = None
+ del extr.cookies["auth_token"]
self.headers["x-twitter-auth-type"] = None
extr.log.info("Retrying API request as guest")
continue
@@ -1902,7 +1694,7 @@ def _login_impl(extr, username, password):
extr.log.debug(response.text)
raise exception.AuthenticationError(", ".join(errors))
- extr.session.cookies.clear()
+ extr.cookies.clear()
api = TwitterAPI(extr)
api._authenticate_guest()
headers = api.headers
@@ -2042,5 +1834,5 @@ def _login_impl(extr, username, password):
return {
cookie.name: cookie.value
- for cookie in extr.session.cookies
+ for cookie in extr.cookies
}
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index b298c27..a1b87b9 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -76,66 +76,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
"""Extractor for a single unsplash photo"""
subcategory = "image"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
- test = ("https://unsplash.com/photos/lsoogGC_5dg", {
- "pattern": r"https://images\.unsplash\.com/photo-1586348943529-"
- r"beaae6c28db9\?ixid=\w+&ixlib=rb-4.0.3",
- "keyword": {
- "alt_description": "re:silhouette of trees near body of water ",
- "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
- "? categories": list,
- "color": "#f3c08c",
- "created_at": "2020-04-08T12:29:42Z",
- "date": "dt:2020-04-08 12:29:42",
- "description": "The Island",
- "downloads": int,
- "exif": {
- "aperture": "11",
- "exposure_time": "30",
- "focal_length": "70.0",
- "iso": 200,
- "make": "Canon",
- "model": "Canon EOS 5D Mark IV"
- },
- "extension": "jpg",
- "filename": "photo-1586348943529-beaae6c28db9",
- "height": 6272,
- "id": "lsoogGC_5dg",
- "liked_by_user": False,
- "likes": int,
- "location": {
- "city": "Beaver Dam",
- "country": "United States",
- "name": "Beaver Dam, WI 53916, USA",
- "position": {
- "latitude": 43.457769,
- "longitude": -88.837329,
- },
- },
- "promoted_at": "2020-04-08T15:12:03Z",
- "sponsorship": None,
- "tags": list,
- "updated_at": str,
- "user": {
- "accepted_tos": True,
- "bio": str,
- "first_name": "Dave",
- "id": "uMJXuywXLiU",
- "instagram_username": "just_midwest_rock",
- "last_name": "Hoefler",
- "location": None,
- "name": "Dave Hoefler",
- "portfolio_url": None,
- "total_collections": int,
- "total_likes": int,
- "total_photos": int,
- "twitter_username": None,
- "updated_at": str,
- "username": "davehoefler",
- },
- "views": int,
- "width": 4480,
- },
- })
+ example = "https://unsplash.com/photos/ID"
def photos(self):
url = "{}/napi/photos/{}".format(self.root, self.item)
@@ -146,12 +87,7 @@ class UnsplashUserExtractor(UnsplashExtractor):
"""Extractor for all photos of an unsplash user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/@(\w+)/?$"
- test = ("https://unsplash.com/@davehoefler", {
- "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
- "range": "1-30",
- "count": 30,
- })
+ example = "https://unsplash.com/@USER"
def photos(self):
url = "{}/napi/users/{}/photos".format(self.root, self.item)
@@ -163,12 +99,7 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
"""Extractor for all likes of an unsplash user"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/@(\w+)/likes"
- test = ("https://unsplash.com/@davehoefler/likes", {
- "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
- "range": "1-30",
- "count": 30,
- })
+ example = "https://unsplash.com/@USER/likes"
def photos(self):
url = "{}/napi/users/{}/likes".format(self.root, self.item)
@@ -180,18 +111,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor):
"""Extractor for an unsplash collection"""
subcategory = "collection"
pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?"
- test = (
- ("https://unsplash.com/collections/3178572/winter", {
- "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
- "keyword": {"collection_id": "3178572",
- "collection_title": "winter"},
- "range": "1-30",
- "count": 30,
- }),
- ("https://unsplash.com/collections/3178572/"),
- ("https://unsplash.com/collections/_8qJQ2bCMWE/2021.05"),
- )
+ example = "https://unsplash.com/collections/12345/TITLE"
def __init__(self, match):
UnsplashExtractor.__init__(self, match)
@@ -210,13 +130,7 @@ class UnsplashSearchExtractor(UnsplashExtractor):
"""Extractor for unsplash search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
- test = ("https://unsplash.com/s/photos/hair-style", {
- "pattern": r"https://(images|plus)\.unsplash\.com"
- r"/((flagged/|premium_)?photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
- "range": "1-30",
- "count": 30,
- })
+ example = "https://unsplash.com/s/photos/QUERY"
def __init__(self, match):
UnsplashExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/uploadir.py b/gallery_dl/extractor/uploadir.py
index bd18c0a..ce34e7d 100644
--- a/gallery_dl/extractor/uploadir.py
+++ b/gallery_dl/extractor/uploadir.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,40 +20,7 @@ class UploadirFileExtractor(Extractor):
filename_fmt = "{filename} ({id}).{extension}"
archive_fmt = "{id}"
pattern = r"(?:https?://)?uploadir\.com/(?:user/)?u(?:ploads)?/([^/?#]+)"
- test = (
- # image
- ("https://uploadir.com/u/rd3t46ry", {
- "pattern": r"https://uploadir\.com/u/rd3t46ry",
- "count": 1,
- "keyword": {
- "extension": "jpg",
- "filename": "Chloe and Rachel 4K jpg",
- "id": "rd3t46ry",
- },
- }),
- # archive
- ("https://uploadir.com/uploads/gxe8ti9v/downloads/new", {
- "pattern": r"https://uploadir\.com/uploads/gxe8ti9v/downloads",
- "count": 1,
- "keyword": {
- "extension": "zip",
- "filename": "NYAN-Mods-Pack#1",
- "id": "gxe8ti9v",
- },
- }),
- # utf-8 filename
- ("https://uploadir.com/u/fllda6xl", {
- "pattern": r"https://uploadir\.com/u/fllda6xl",
- "count": 1,
- "keyword": {
- "extension": "png",
- "filename": "_圖片_🖼_image_",
- "id": "fllda6xl",
- },
- }),
- ("https://uploadir.com/uploads/rd3t46ry"),
- ("https://uploadir.com/user/uploads/rd3t46ry"),
- )
+ example = "https://uploadir.com/u/ID"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index 972b508..f2e6521 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -35,25 +35,13 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor):
"""Extractor for general-purpose URL shorteners"""
subcategory = "link"
pattern = BASE_PATTERN + r"/([^/?#]+)"
- test = (
- ("https://bit.ly/3cWIUgq", {
- "count": 1,
- "pattern": "^https://gumroad.com/l/storm_b1",
- }),
- ("https://t.co/bCgBY8Iv5n", {
- "count": 1,
- "pattern": "^https://twitter.com/elonmusk/status/"
- "1421395561324896257/photo/1",
- }),
- ("https://t.co/abcdefghij", {
- "exception": exception.NotFoundError,
- }),
- )
+ example = "https://bit.ly/abcde"
def __init__(self, match):
UrlshortenerExtractor.__init__(self, match)
self.id = match.group(match.lastindex)
+ def _init(self):
try:
self.headers = INSTANCES[self.category]["headers"]
except Exception:
diff --git a/gallery_dl/extractor/vanillarock.py b/gallery_dl/extractor/vanillarock.py
index 6b1178e..1ce969f 100644
--- a/gallery_dl/extractor/vanillarock.py
+++ b/gallery_dl/extractor/vanillarock.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -30,10 +30,7 @@ class VanillarockPostExtractor(VanillarockExtractor):
archive_fmt = "{filename}"
pattern = (r"(?:https?://)?(?:www\.)?vanilla-rock\.com"
r"(/(?!category/|tag/)[^/?#]+)/?$")
- test = ("https://vanilla-rock.com/mizuhashi_parsee-5", {
- "url": "7fb9a4d18d9fa22d7295fee8d94ab5a7a52265dd",
- "keyword": "b91df99b714e1958d9636748b1c81a07c3ef52c9",
- })
+ example = "https://vanilla-rock.com/TITLE"
def items(self):
extr = text.extract_from(self.request(self.root + self.path).text)
@@ -66,18 +63,7 @@ class VanillarockTagExtractor(VanillarockExtractor):
subcategory = "tag"
pattern = (r"(?:https?://)?(?:www\.)?vanilla-rock\.com"
r"(/(?:tag|category)/[^?#]+)")
- test = (
- ("https://vanilla-rock.com/tag/%e5%b0%84%e5%91%bd%e4%b8%b8%e6%96%87", {
- "pattern": VanillarockPostExtractor.pattern,
- "count": ">= 12",
- }),
- (("https://vanilla-rock.com/category/%e4%ba%8c%e6%ac%a1%e3%82%a8%e3%83"
- "%ad%e7%94%bb%e5%83%8f/%e8%90%8c%e3%81%88%e3%83%bb%e3%82%bd%e3%83%95"
- "%e3%83%88%e3%82%a8%e3%83%ad"), {
- "pattern": VanillarockPostExtractor.pattern,
- "count": ">= 5",
- }),
- )
+ example = "https://vanilla-rock.com/tag/TAG"
def items(self):
url = self.root + self.path
diff --git a/gallery_dl/extractor/vichan.py b/gallery_dl/extractor/vichan.py
index 2fafb56..79d7916 100644
--- a/gallery_dl/extractor/vichan.py
+++ b/gallery_dl/extractor/vichan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -40,33 +40,7 @@ class VichanThreadExtractor(VichanExtractor):
filename_fmt = "{time}{num:?-//} {filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
- test = (
- ("https://8kun.top/test/res/65248.html", {
- "pattern": r"https://media\.128ducks\.com/file_store/\w{64}\.\w+",
- "count": ">= 8",
- }),
- # old-style file URLs (#1101)
- # ("https://8kun.top/d/res/13258.html", {
- # "pattern": r"https://media\.128ducks\.com/d/src/\d+(-\d)?\.\w+",
- # "range": "1-20",
- # }),
-
- ("https://wikieat.club/cel/res/25321.html", {
- "pattern": r"https://wikieat\.club/cel/src/\d+(-\d)?\.\w+",
- "count": ">= 200",
- }),
-
- ("https://smuglo.li/a/res/1154380.html", {
- "pattern": r"https://smug.+/a/src/\d+(-\d)?\.\w+",
- "count": ">= 18",
- "keyword": {
- "board": "a",
- "thread": "1154380",
- "title": "Mob Psycho 100 Season 3",
- },
- }),
- ("https://smugloli.net/a/res/1145409.html"),
- )
+ example = "https://8kun.top/a/res/12345.html"
def __init__(self, match):
VichanExtractor.__init__(self, match)
@@ -123,28 +97,7 @@ class VichanBoardExtractor(VichanExtractor):
"""Extractor for vichan boards"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
- test = (
- ("https://8kun.top/v/index.html", {
- "pattern": VichanThreadExtractor.pattern,
- "count": ">= 100",
- }),
- ("https://8kun.top/v/2.html"),
- ("https://8kun.top/v/index.html?PageSpeed=noscript"),
-
- ("https://wikieat.club/cel/index.html", {
- "pattern": VichanThreadExtractor.pattern,
- "count": ">= 100",
- }),
- ("https://wikieat.club/cel/catalog.html"),
- ("https://wikieat.club/cel/2.html"),
-
- ("https://smuglo.li/a", {
- "pattern": VichanThreadExtractor.pattern,
- "count": ">= 100",
- }),
- ("https://smuglo.li/a/1.html"),
- ("https://smugloli.net/cute/catalog.html"),
- )
+ example = "https://8kun.top/a/"
def __init__(self, match):
VichanExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 6dff01c..4ee252e 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -23,12 +23,8 @@ class VipergirlsExtractor(Extractor):
root = "https://vipergirls.to"
request_interval = 0.5
request_interval_min = 0.2
- cookiedomain = ".vipergirls.to"
- cookienames = ("vg_userid", "vg_password")
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.session.headers["Referer"] = self.root
+ cookies_domain = ".vipergirls.to"
+ cookies_names = ("vg_userid", "vg_password")
def items(self):
self.login()
@@ -42,10 +38,12 @@ class VipergirlsExtractor(Extractor):
yield Message.Queue, image.attrib["main_url"], data
def login(self):
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(self._login_impl(username, password))
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ self.cookies_update(self._login_impl(username, password))
@cache(maxage=90*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -71,17 +69,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
"""Extractor for vipergirls threads"""
subcategory = "thread"
pattern = BASE_PATTERN + r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?$"
- test = (
- (("https://vipergirls.to/threads/4328304"
- "-2011-05-28-Danica-Simply-Beautiful-x112-4500x3000"), {
- "url": "0d75cb42777f5bebc0d284d1d38cb90c750c61d9",
- "count": 225,
- }),
- ("https://vipergirls.to/threads/6858916-Karina/page4", {
- "count": 1279,
- }),
- ("https://vipergirls.to/threads/4328304"),
- )
+ example = "https://vipergirls.to/threads/12345-TITLE"
def __init__(self, match):
VipergirlsExtractor.__init__(self, match)
@@ -102,21 +90,7 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
subcategory = "post"
pattern = (BASE_PATTERN +
r"/threads/(\d+)(?:-[^/?#]+)?\?p=\d+[^#]*#post(\d+)")
- test = (
- (("https://vipergirls.to/threads/4328304-2011-05-28-Danica-Simply-"
- "Beautiful-x112-4500x3000?p=116038081&viewfull=1#post116038081"), {
- "pattern": r"https://vipr\.im/\w{12}$",
- "range": "2-113",
- "count": 112,
- "keyword": {
- "id": "116038081",
- "imagecount": "113",
- "number": "116038081",
- "thread_id": "4328304",
- "title": "FemJoy Danica - Simply Beautiful (x112) 3000x4500",
- },
- }),
- )
+ example = "https://vipergirls.to/threads/12345-TITLE?p=23456#post23456"
def __init__(self, match):
VipergirlsExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 5692452..c9cd02f 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -105,43 +105,7 @@ class VkPhotosExtractor(VkExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"(?:albums|photos|id)(-?\d+)"
r"|(?!(?:album|tag)-?\d+_?)([^/?#]+))")
- test = (
- ("https://vk.com/id398982326", {
- "pattern": r"https://sun\d+-\d+\.userapi\.com/s/v1/if1"
- r"/[\w-]+\.jpg\?size=\d+x\d+&quality=96&type=album",
- "count": ">= 35",
- "keyword": {
- "id": r"re:\d+",
- "user": {
- "id": "398982326",
- "info": "Мы за Движуху! – m1ni SounD #4 [EROmusic]",
- "name": "",
- "nick": "Dobrov Kurva",
- },
- },
- }),
- ("https://vk.com/cosplayinrussia", {
- "range": "15-25",
- "keyword": {
- "id": r"re:\d+",
- "user": {
- "id" : "-165740836",
- "info": str,
- "name": "cosplayinrussia",
- "nick": "Косплей | Cosplay 18+",
- },
- },
- }),
- # photos without width/height (#2535)
- ("https://vk.com/id76957806", {
- "pattern": r"https://sun\d+-\d+\.userapi\.com/",
- "range": "1-9",
- "count": 9,
- }),
- ("https://m.vk.com/albums398982326"),
- ("https://www.vk.com/id398982326?profile=1"),
- ("https://vk.com/albums-165740836"),
- )
+ example = "https://vk.com/id12345"
def __init__(self, match):
VkExtractor.__init__(self, match)
@@ -181,18 +145,7 @@ class VkAlbumExtractor(VkExtractor):
subcategory = "album"
directory_fmt = ("{category}", "{user[id]}", "{album[id]}")
pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
- test = (
- ("https://vk.com/album-165740836_281339889", {
- "count": 12,
- }),
- # "Access denied" (#2556)
- ("https://vk.com/album-53775183_00", {
- "exception": exception.AuthorizationError,
- }),
- ("https://vk.com/album232175027_00", {
- "exception": exception.AuthorizationError,
- }),
- )
+ example = "https://vk.com/album12345_00"
def __init__(self, match):
VkExtractor.__init__(self, match)
@@ -214,11 +167,7 @@ class VkTaggedExtractor(VkExtractor):
subcategory = "tagged"
directory_fmt = ("{category}", "{user[id]}", "tags")
pattern = BASE_PATTERN + r"/tag(-?\d+)$"
- test = (
- ("https://vk.com/tag304303884", {
- "count": 44,
- }),
- )
+ example = "https://vk.com/tag12345"
def __init__(self, match):
VkExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 053a799..14e3c7b 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -116,16 +116,7 @@ class VscoUserExtractor(VscoExtractor):
"""Extractor for images from a user on vsco.co"""
subcategory = "user"
pattern = BASE_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])"
- test = (
- ("https://vsco.co/missuri/gallery", {
- "pattern": r"https://image(-aws.+)?\.vsco\.co"
- r"/[0-9a-f/]+/[\w-]+\.\w+",
- "range": "1-80",
- "count": 80,
- }),
- ("https://vsco.co/missuri/images/1"),
- ("https://vsco.co/missuri"),
- )
+ example = "https://vsco.co/USER/gallery"
def images(self):
url = "{}/{}/gallery".format(self.root, self.user)
@@ -149,11 +140,7 @@ class VscoCollectionExtractor(VscoExtractor):
directory_fmt = ("{category}", "{user}", "collection")
archive_fmt = "c_{user}_{id}"
pattern = BASE_PATTERN + r"/collection/"
- test = ("https://vsco.co/vsco/collection/1", {
- "pattern": r"https://image(-aws.+)?\.vsco\.co/[0-9a-f/]+/[\w-]+\.\w+",
- "range": "1-80",
- "count": 80,
- })
+ example = "https://vsco.co/USER/collection/12345"
def images(self):
url = "{}/{}/collection/1".format(self.root, self.user)
@@ -176,29 +163,7 @@ class VscoImageExtractor(VscoExtractor):
"""Extractor for individual images on vsco.co"""
subcategory = "image"
pattern = BASE_PATTERN + r"/media/([0-9a-fA-F]+)"
- test = (
- ("https://vsco.co/erenyildiz/media/5d34b93ef632433030707ce2", {
- "url": "a45f9712325b42742324b330c348b72477996031",
- "content": "1394d070828d82078035f19a92f404557b56b83f",
- "keyword": {
- "id" : "5d34b93ef632433030707ce2",
- "user" : "erenyildiz",
- "grid" : "erenyildiz",
- "meta" : dict,
- "tags" : list,
- "date" : "dt:2019-07-21 19:12:11",
- "video" : False,
- "width" : 1537,
- "height": 1537,
- "description": "re:Ni seviyorum. #vsco #vscox #vscochallenges",
- },
- }),
- ("https://vsco.co/jimenalazof/media/5b4feec558f6c45c18c040fd", {
- "url": "08e7eef3301756ce81206c0b47c1e9373756a74a",
- "content": "e739f058d726ee42c51c180a505747972a7dfa47",
- "keyword": {"video" : True},
- }),
- )
+ example = "https://vsco.co/USER/media/0123456789abcdef"
def __init__(self, match):
VscoExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 0ba0d91..479e8a8 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -20,8 +20,7 @@ class WallhavenExtractor(Extractor):
archive_fmt = "{id}"
request_interval = 1.4
- def __init__(self, match):
- Extractor.__init__(self, match)
+ def _init(self):
self.api = WallhavenAPI(self)
def items(self):
@@ -58,15 +57,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
directory_fmt = ("{category}", "{search[q]}")
archive_fmt = "s_{search[q]}_{id}"
pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
- test = (
- ("https://wallhaven.cc/search?q=touhou"),
- (("https://wallhaven.cc/search?q=id%3A87"
- "&categories=111&purity=100&sorting=date_added&order=asc&page=3"), {
- "pattern": (r"https://w\.wallhaven\.cc"
- r"/full/\w\w/wallhaven-\w+\.\w+"),
- "count": "<= 30",
- }),
- )
+ example = "https://wallhaven.cc/search?q=QUERY"
def __init__(self, match):
WallhavenExtractor.__init__(self, match)
@@ -84,9 +75,7 @@ class WallhavenCollectionExtractor(WallhavenExtractor):
subcategory = "collection"
directory_fmt = ("{category}", "{username}", "{collection_id}")
pattern = r"(?:https?://)?wallhaven\.cc/user/([^/?#]+)/favorites/(\d+)"
- test = ("https://wallhaven.cc/user/AksumkA/favorites/74", {
- "count": ">= 50",
- })
+ example = "https://wallhaven.cc/user/USER/favorites/12345"
def __init__(self, match):
WallhavenExtractor.__init__(self, match)
@@ -103,12 +92,15 @@ class WallhavenUserExtractor(WallhavenExtractor):
"""Extractor for a wallhaven user"""
subcategory = "user"
pattern = r"(?:https?://)?wallhaven\.cc/user/([^/?#]+)/?$"
- test = ("https://wallhaven.cc/user/AksumkA/",)
+ example = "https://wallhaven.cc/user/USER"
def __init__(self, match):
WallhavenExtractor.__init__(self, match)
self.username = match.group(1)
+ def initialize(self):
+ pass
+
def items(self):
base = "{}/user/{}/".format(self.root, self.username)
return self._dispatch_extractors((
@@ -121,10 +113,7 @@ class WallhavenCollectionsExtractor(WallhavenExtractor):
"""Extractor for all collections of a wallhaven user"""
subcategory = "collections"
pattern = r"(?:https?://)?wallhaven\.cc/user/([^/?#]+)/favorites/?$"
- test = ("https://wallhaven.cc/user/AksumkA/favorites", {
- "pattern": WallhavenCollectionExtractor.pattern,
- "count": 4,
- })
+ example = "https://wallhaven.cc/user/USER/favorites"
def __init__(self, match):
WallhavenExtractor.__init__(self, match)
@@ -144,12 +133,7 @@ class WallhavenUploadsExtractor(WallhavenExtractor):
directory_fmt = ("{category}", "{username}")
archive_fmt = "u_{username}_{id}"
pattern = r"(?:https?://)?wallhaven\.cc/user/([^/?#]+)/uploads"
- test = ("https://wallhaven.cc/user/AksumkA/uploads", {
- "pattern": (r"https://[^.]+\.wallhaven\.cc"
- r"/full/\w\w/wallhaven-\w+\.\w+"),
- "range": "1-100",
- "count": 100,
- })
+ example = "https://wallhaven.cc/user/USER/uploads"
def __init__(self, match):
WallhavenExtractor.__init__(self, match)
@@ -168,41 +152,7 @@ class WallhavenImageExtractor(WallhavenExtractor):
subcategory = "image"
pattern = (r"(?:https?://)?(?:wallhaven\.cc/w/|whvn\.cc/"
r"|w\.wallhaven\.cc/[a-z]+/\w\w/wallhaven-)(\w+)")
- test = (
- ("https://wallhaven.cc/w/01w334", {
- "pattern": (r"https://[^.]+\.wallhaven\.cc"
- r"/full/01/wallhaven-01w334\.jpg"),
- "content": "497212679383a465da1e35bd75873240435085a2",
- "keyword": {
- "id" : "01w334",
- "width" : 1920,
- "height" : 1200,
- "resolution" : "1920x1200",
- "ratio" : "1.6",
- "colors" : list,
- "tags" : list,
- "file_size" : 278799,
- "file_type" : "image/jpeg",
- "purity" : "sfw",
- "short_url" : "https://whvn.cc/01w334",
- "source" : str,
- "uploader" : {
- "group" : "Owner/Developer",
- "username" : "AksumkA",
- },
- "date" : "dt:2014-08-31 06:17:19",
- "wh_category": "anime",
- "views" : int,
- "favorites" : int,
- },
- }),
- # NSFW
- ("https://wallhaven.cc/w/dge6v3", {
- "url": "e4b802e70483f659d790ad5d0bd316245badf2ec",
- }),
- ("https://whvn.cc/01w334"),
- ("https://w.wallhaven.cc/full/01/wallhaven-01w334.jpg"),
- )
+ example = "https://wallhaven.cc/w/ID"
def __init__(self, match):
WallhavenExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/wallpapercave.py b/gallery_dl/extractor/wallpapercave.py
index 6c3af76..bce1026 100644
--- a/gallery_dl/extractor/wallpapercave.py
+++ b/gallery_dl/extractor/wallpapercave.py
@@ -18,9 +18,7 @@ class WallpapercaveImageExtractor(Extractor):
subcategory = "image"
root = "https://wallpapercave.com"
pattern = r"(?:https?://)?(?:www\.)?wallpapercave\.com"
- test = ("https://wallpapercave.com/w/wp10270355", {
- "content": "58b088aaa1cf1a60e347015019eb0c5a22b263a6",
- })
+ example = "https://wallpapercave.com/w/wp12345"
def items(self):
page = self.request(text.ensure_http_scheme(self.url)).text
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index bdedfcb..6f152ed 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,17 +21,7 @@ class WarosuThreadExtractor(Extractor):
filename_fmt = "{tim}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
pattern = r"(?:https?://)?(?:www\.)?warosu\.org/([^/]+)/thread/(\d+)"
- test = (
- ("https://warosu.org/jp/thread/16656025", {
- "url": "889d57246ed67e491e5b8f7f124e50ea7991e770",
- "keyword": "c00ea4c5460c5986994f17bb8416826d42ca57c0",
- }),
- ("https://warosu.org/jp/thread/16658073", {
- "url": "4500cf3184b067424fd9883249bd543c905fbecd",
- "keyword": "7534edf4ec51891dbf44d775b73fbbefd52eec71",
- "content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c",
- }),
- )
+ example = "https://warosu.org/a/thread/12345"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index eca4f1a..ddbfaa0 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -30,8 +30,7 @@ class WeasylExtractor(Extractor):
return True
return False
- def __init__(self, match):
- Extractor.__init__(self, match)
+ def _init(self):
self.session.headers['X-Weasyl-API-Key'] = self.config("api-key")
def request_submission(self, submitid):
@@ -73,32 +72,7 @@ class WeasylExtractor(Extractor):
class WeasylSubmissionExtractor(WeasylExtractor):
subcategory = "submission"
pattern = BASE_PATTERN + r"(?:~[\w~-]+/submissions|submission)/(\d+)"
- test = (
- ("https://www.weasyl.com/~fiz/submissions/2031/a-wesley", {
- "pattern": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29"
- "40be928532785dfbf35c37622664d2fbb8114c3b063df969562fc5"
- "1/fiz-a-wesley.png",
- "keyword": {
- "comments" : int,
- "date" : "dt:2012-04-20 00:38:04",
- "description" : "<p>(flex)</p>\n",
- "favorites" : int,
- "folder_name" : "Wesley Stuff",
- "folderid" : 2081,
- "friends_only": False,
- "owner" : "Fiz",
- "owner_login" : "fiz",
- "rating" : "general",
- "submitid" : 2031,
- "subtype" : "visual",
- "tags" : list,
- "title" : "A Wesley!",
- "type" : "submission",
- "views" : int,
- },
- }),
- ("https://www.weasyl.com/submission/2031/a-wesley"),
- )
+ example = "https://www.weasyl.com/~USER/submissions/12345/TITLE"
def __init__(self, match):
WeasylExtractor.__init__(self, match)
@@ -114,13 +88,7 @@ class WeasylSubmissionExtractor(WeasylExtractor):
class WeasylSubmissionsExtractor(WeasylExtractor):
subcategory = "submissions"
pattern = BASE_PATTERN + r"(?:~|submissions/)([\w~-]+)/?$"
- test = (
- ("https://www.weasyl.com/~tanidareal", {
- "count": ">= 200"
- }),
- ("https://www.weasyl.com/submissions/tanidareal"),
- ("https://www.weasyl.com/~aro~so")
- )
+ example = "https://www.weasyl.com/submissions/USER"
def __init__(self, match):
WeasylExtractor.__init__(self, match)
@@ -135,9 +103,7 @@ class WeasylFolderExtractor(WeasylExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{owner_login}", "{folder_name}")
pattern = BASE_PATTERN + r"submissions/([\w~-]+)\?folderid=(\d+)"
- test = ("https://www.weasyl.com/submissions/tanidareal?folderid=7403", {
- "count": ">= 12"
- })
+ example = "https://www.weasyl.com/submissions/USER?folderid=12345"
def __init__(self, match):
WeasylExtractor.__init__(self, match)
@@ -158,14 +124,7 @@ class WeasylJournalExtractor(WeasylExtractor):
filename_fmt = "{journalid} {title}.{extension}"
archive_fmt = "{journalid}"
pattern = BASE_PATTERN + r"journal/(\d+)"
- test = ("https://www.weasyl.com/journal/17647/bbcode", {
- "keyword": {
- "title" : "BBCode",
- "date" : "dt:2013-09-19 23:11:23",
- "content": "<p><a>javascript:alert(42);</a></p>\n\n"
- "<p>No more of that!</p>\n",
- },
- })
+ example = "https://www.weasyl.com/journal/12345"
def __init__(self, match):
WeasylExtractor.__init__(self, match)
@@ -182,9 +141,7 @@ class WeasylJournalsExtractor(WeasylExtractor):
filename_fmt = "{journalid} {title}.{extension}"
archive_fmt = "{journalid}"
pattern = BASE_PATTERN + r"journals/([\w~-]+)"
- test = ("https://www.weasyl.com/journals/charmander", {
- "count": ">= 2",
- })
+ example = "https://www.weasyl.com/journals/USER"
def __init__(self, match):
WeasylExtractor.__init__(self, match)
@@ -204,9 +161,7 @@ class WeasylFavoriteExtractor(WeasylExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{owner_login}", "Favorites")
pattern = BASE_PATTERN + r"favorites\?userid=(\d+)"
- test = ("https://www.weasyl.com/favorites?userid=184616&feature=submit", {
- "count": ">= 5",
- })
+ example = "https://www.weasyl.com/favorites?userid=12345"
def __init__(self, match):
WeasylExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/webmshare.py b/gallery_dl/extractor/webmshare.py
index b038425..7e2b5ea 100644
--- a/gallery_dl/extractor/webmshare.py
+++ b/gallery_dl/extractor/webmshare.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,36 +21,7 @@ class WebmshareVideoExtractor(Extractor):
archive_fmt = "{id}"
pattern = (r"(?:https?://)?(?:s\d+\.)?webmshare\.com"
r"/(?:play/|download-webm/)?(\w{3,})")
- test = (
- ("https://webmshare.com/O9mWY", {
- "keyword": {
- "date": "dt:2022-12-04 00:00:00",
- "extension": "webm",
- "filename": "O9mWY",
- "height": 568,
- "id": "O9mWY",
- "thumb": "https://s1.webmshare.com/t/O9mWY.jpg",
- "title": "Yeah buddy over here",
- "url": "https://s1.webmshare.com/O9mWY.webm",
- "views": int,
- "width": 320,
- },
- }),
- ("https://s1.webmshare.com/zBGAg.webm", {
- "keyword": {
- "date": "dt:2018-12-07 00:00:00",
- "height": 1080,
- "id": "zBGAg",
- "thumb": "https://s1.webmshare.com/t/zBGAg.jpg",
- "title": "",
- "url": "https://s1.webmshare.com/zBGAg.webm",
- "views": int,
- "width": 1920,
- },
- }),
- ("https://webmshare.com/play/zBGAg"),
- ("https://webmshare.com/download-webm/zBGAg"),
- )
+ example = "https://webmshare.com/_ID_"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 21f7c21..dc9a4f1 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Leonardo Taccari
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -18,10 +18,10 @@ BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/(([^/?#]+)"
class WebtoonsBase():
category = "webtoons"
root = "https://www.webtoons.com"
- cookiedomain = ".webtoons.com"
+ cookies_domain = ".webtoons.com"
def setup_agegate_cookies(self):
- self._update_cookies({
+ self.cookies_update({
"atGDPR" : "AD_CONSENT",
"needCCPA" : "false",
"needCOPPA" : "false",
@@ -46,6 +46,8 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
archive_fmt = "{title_no}_{episode_no}_{num}"
pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/(?:[^/?#]+))"
r"/viewer(?:\?([^#'\"]+))")
+ example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
+ "?title_no=123&episode_no=12345")
test = (
(("https://www.webtoons.com/en/comedy/safely-endangered"
"/ep-572-earth/viewer?title_no=352&episode_no=572"), {
@@ -71,15 +73,18 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
)
def __init__(self, match):
- self.path, self.lang, self.genre, self.comic, query = match.groups()
+ self.path, self.lang, self.genre, self.comic, self.query = \
+ match.groups()
- url = "{}/{}/viewer?{}".format(self.root, self.path, query)
+ url = "{}/{}/viewer?{}".format(self.root, self.path, self.query)
GalleryExtractor.__init__(self, match, url)
+
+ def _init(self):
self.setup_agegate_cookies()
- query = text.parse_query(query)
- self.title_no = query.get("title_no")
- self.episode_no = query.get("episode_no")
+ params = text.parse_query(self.query)
+ self.title_no = params.get("title_no")
+ self.episode_no = params.get("episode_no")
def metadata(self, page):
keywords, pos = text.extract(
@@ -116,37 +121,19 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
categorytransfer = True
pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))"
r"/list(?:\?([^#]+))")
- test = (
- # english
- (("https://www.webtoons.com/en/comedy/live-with-yourself/"
- "list?title_no=919"), {
- "pattern": WebtoonsEpisodeExtractor.pattern,
- "range": "1-15",
- "count": ">= 15",
- }),
- # french
- (("https://www.webtoons.com/fr/romance/subzero/"
- "list?title_no=1845&page=3"), {
- "count": ">= 15",
- }),
- # (#820)
- (("https://www.webtoons.com/en/challenge/scoob-and-shag/"
- "list?title_no=210827&page=9"), {
- "count": ">= 18",
- }),
- # (#1643)
- ("https://www.webtoons.com/es/romance/lore-olympus/"
- "list?title_no=1725"),
- )
+ example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
def __init__(self, match):
Extractor.__init__(self, match)
+ self.path, self.lang, self.genre, self.comic, self.query = \
+ match.groups()
+
+ def _init(self):
self.setup_agegate_cookies()
- self.path, self.lang, self.genre, self.comic, query = match.groups()
- query = text.parse_query(query)
- self.title_no = query.get("title_no")
- self.page_no = text.parse_int(query.get("page"), 1)
+ params = text.parse_query(self.query)
+ self.title_no = params.get("title_no")
+ self.page_no = text.parse_int(params.get("page"), 1)
def items(self):
page = None
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 5a3adc8..168d5a0 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -28,14 +28,15 @@ class WeiboExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self._prefix, self.user = match.groups()
+
+ def _init(self):
self.retweets = self.config("retweets", True)
self.videos = self.config("videos", True)
self.livephoto = self.config("livephoto", True)
cookies = _cookie_cache()
if cookies is not None:
- self.session.cookies.update(cookies)
- self.session.headers["Referer"] = self.root + "/"
+ self.cookies.update(cookies)
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
@@ -217,16 +218,10 @@ class WeiboUserExtractor(WeiboExtractor):
"""Extractor for weibo user profiles"""
subcategory = "user"
pattern = USER_PATTERN + r"(?:$|#)"
- test = (
- ("https://weibo.com/1758989602", {
- "pattern": r"^https://weibo\.com/u/1758989602\?tabtype=feed$",
- }),
- ("https://weibo.com/u/1758989602"),
- ("https://weibo.com/p/1758989602"),
- ("https://m.weibo.cn/profile/2314621010"),
- ("https://m.weibo.cn/p/2304132314621010_-_WEIBO_SECOND_PROFILE_WEIBO"),
- ("https://www.weibo.com/p/1003062314621010/home"),
- )
+ example = "https://weibo.com/USER"
+
+ def initialize(self):
+ pass
def items(self):
base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
@@ -243,10 +238,7 @@ class WeiboHomeExtractor(WeiboExtractor):
"""Extractor for weibo 'home' listings"""
subcategory = "home"
pattern = USER_PATTERN + r"\?tabtype=home"
- test = ("https://weibo.com/1758989602?tabtype=home", {
- "range": "1-30",
- "count": 30,
- })
+ example = "https://weibo.com/USER?tabtype=home"
def statuses(self):
endpoint = "/profile/myhot"
@@ -258,24 +250,7 @@ class WeiboFeedExtractor(WeiboExtractor):
"""Extractor for weibo user feeds"""
subcategory = "feed"
pattern = USER_PATTERN + r"\?tabtype=feed"
- test = (
- ("https://weibo.com/1758989602?tabtype=feed", {
- "range": "1-30",
- "count": 30,
- }),
- ("https://weibo.com/zhouyuxi77?tabtype=feed", {
- "keyword": {"status": {"user": {"id": 7488709788}}},
- "range": "1",
- }),
- ("https://www.weibo.com/n/周于希Sally?tabtype=feed", {
- "keyword": {"status": {"user": {"id": 7488709788}}},
- "range": "1",
- }),
- # deleted (#2521)
- ("https://weibo.com/u/7500315942?tabtype=feed", {
- "count": 0,
- }),
- )
+ example = "https://weibo.com/USER?tabtype=feed"
def statuses(self):
endpoint = "/statuses/mymblog"
@@ -287,12 +262,7 @@ class WeiboVideosExtractor(WeiboExtractor):
"""Extractor for weibo 'video' listings"""
subcategory = "videos"
pattern = USER_PATTERN + r"\?tabtype=video"
- test = ("https://weibo.com/1758989602?tabtype=video", {
- "pattern": r"https://f\.(video\.weibocdn\.com|us\.sinaimg\.cn)"
- r"/(../)?\w+\.mp4\?label=mp",
- "range": "1-30",
- "count": 30,
- })
+ example = "https://weibo.com/USER?tabtype=video"
def statuses(self):
endpoint = "/profile/getprofilevideolist"
@@ -306,11 +276,7 @@ class WeiboNewvideoExtractor(WeiboExtractor):
"""Extractor for weibo 'newVideo' listings"""
subcategory = "newvideo"
pattern = USER_PATTERN + r"\?tabtype=newVideo"
- test = ("https://weibo.com/1758989602?tabtype=newVideo", {
- "pattern": r"https://f\.video\.weibocdn\.com/(../)?\w+\.mp4\?label=mp",
- "range": "1-30",
- "count": 30,
- })
+ example = "https://weibo.com/USER?tabtype=newVideo"
def statuses(self):
endpoint = "/profile/getWaterFallContent"
@@ -322,9 +288,7 @@ class WeiboArticleExtractor(WeiboExtractor):
"""Extractor for weibo 'article' listings"""
subcategory = "article"
pattern = USER_PATTERN + r"\?tabtype=article"
- test = ("https://weibo.com/1758989602?tabtype=article", {
- "count": 0,
- })
+ example = "https://weibo.com/USER?tabtype=article"
def statuses(self):
endpoint = "/statuses/mymblog"
@@ -336,12 +300,7 @@ class WeiboAlbumExtractor(WeiboExtractor):
"""Extractor for weibo 'album' listings"""
subcategory = "album"
pattern = USER_PATTERN + r"\?tabtype=album"
- test = ("https://weibo.com/1758989602?tabtype=album", {
- "pattern": r"https://(wx\d+\.sinaimg\.cn/large/\w{32}\.(jpg|png|gif)"
- r"|g\.us\.sinaimg\.cn/../\w+\.mp4)",
- "range": "1-3",
- "count": 3,
- })
+ example = "https://weibo.com/USER?tabtype=album"
def statuses(self):
endpoint = "/profile/getImageWall"
@@ -363,57 +322,7 @@ class WeiboStatusExtractor(WeiboExtractor):
"""Extractor for images from a status on weibo.cn"""
subcategory = "status"
pattern = BASE_PATTERN + r"/(detail|status|\d+)/(\w+)"
- test = (
- ("https://m.weibo.cn/detail/4323047042991618", {
- "pattern": r"https?://wx\d+.sinaimg.cn/large/\w+.jpg",
- "keyword": {"status": {
- "count": 1,
- "date": "dt:2018-12-30 13:56:36",
- }},
- }),
- ("https://m.weibo.cn/detail/4339748116375525", {
- "pattern": r"https?://f.us.sinaimg.cn/\w+\.mp4\?label=mp4_1080p",
- }),
- # unavailable video (#427)
- ("https://m.weibo.cn/status/4268682979207023", {
- "exception": exception.NotFoundError,
- }),
- # non-numeric status ID (#664)
- ("https://weibo.com/3314883543/Iy7fj4qVg"),
- # original retweets (#1542)
- ("https://m.weibo.cn/detail/4600272267522211", {
- "options": (("retweets", "original"),),
- "keyword": {"status": {"id": 4600167083287033}},
- }),
- # type == livephoto (#2146)
- ("https://weibo.com/5643044717/KkuDZ4jAA", {
- "range": "2,4,6",
- "pattern": r"https://video\.weibo\.com/media/play\?livephoto="
- r"https%3A%2F%2Fus.sinaimg.cn%2F\w+\.mov",
- }),
- # type == gif
- ("https://weibo.com/1758989602/LvBhm5DiP", {
- "pattern": r"https://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM0104"
- r"120005tc0E010\.mp4\?label=gif_mp4",
- }),
- # missing 'playback_list' (#2792)
- ("https://weibo.com/2909128931/4409545658754086", {
- "count": 10,
- }),
- # empty 'playback_list' (#3301)
- ("https://weibo.com/1501933722/4142890299009993", {
- "pattern": r"https://f\.us\.sinaimg\.cn/004zstGKlx07dAHg4ZVu010f01"
- r"000OOl0k01\.mp4\?label=mp4_hd&template=template_7&ori"
- r"=0&ps=1CwnkDw1GXwCQx.+&KID=unistore,video",
- "count": 1,
- }),
- # mix_media_info (#3793)
- ("https://weibo.com/2427303621/MxojLlLgQ", {
- "count": 9,
- }),
- ("https://m.weibo.cn/status/4339748116375525"),
- ("https://m.weibo.cn/5746766133/4339748116375525"),
- )
+ example = "https://weibo.com/detail/12345"
def statuses(self):
status = self._status_by_id(self.user)
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 0e06858..938c048 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -69,10 +69,7 @@ class WikiartArtistExtractor(WikiartExtractor):
subcategory = "artist"
directory_fmt = ("{category}", "{artist[artistName]}")
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
- test = ("https://www.wikiart.org/en/thomas-cole", {
- "url": "6844f207a5063c499fc1d5651b03127bc4fe2f73",
- "keyword": "09230b5f504697119e267349bf92487e657a7384",
- })
+ example = "https://www.wikiart.org/en/ARTIST"
def __init__(self, match):
WikiartExtractor.__init__(self, match)
@@ -94,16 +91,7 @@ class WikiartImageExtractor(WikiartArtistExtractor):
"""Extractor for individual paintings on wikiart.org"""
subcategory = "image"
pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)"
- test = (
- ("https://www.wikiart.org/en/thomas-cole/the-departure-1838", {
- "url": "976cc2545f308a650b5dbb35c29d3cee0f4673b3",
- "keyword": "8e80cdcb01c1fedb934633d1c4c3ab0419cfbedf",
- }),
- # no year or '-' in slug
- ("https://www.wikiart.org/en/huang-shen/summer", {
- "url": "d7f60118c34067b2b37d9577e412dc1477b94207",
- }),
- )
+ example = "https://www.wikiart.org/en/ARTIST/TITLE"
def __init__(self, match):
WikiartArtistExtractor.__init__(self, match)
@@ -125,9 +113,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
subcategory = "artworks"
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
- test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", {
- "url": "36e054fcb3363b7f085c81f4778e6db3994e56a3",
- })
+ example = "https://www.wikiart.org/en/paintings-by-GROUP/TYPE"
def __init__(self, match):
WikiartExtractor.__init__(self, match)
@@ -147,10 +133,7 @@ class WikiartArtistsExtractor(WikiartExtractor):
"""Extractor for artist collections on wikiart.org"""
subcategory = "artists"
pattern = (BASE_PATTERN + r"/artists-by-([\w-]+)/([\w-]+)")
- test = ("https://www.wikiart.org/en/artists-by-century/12", {
- "pattern": WikiartArtistExtractor.pattern,
- "count": ">= 8",
- })
+ example = "https://www.wikiart.org/en/artists-by-GROUP/TYPE"
def __init__(self, match):
WikiartExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/wikifeet.py b/gallery_dl/extractor/wikifeet.py
index 5f02e94..d3586c0 100644
--- a/gallery_dl/extractor/wikifeet.py
+++ b/gallery_dl/extractor/wikifeet.py
@@ -18,62 +18,7 @@ class WikifeetGalleryExtractor(GalleryExtractor):
archive_fmt = "{type}_{celeb}_{pid}"
pattern = (r"(?:https?://)(?:(?:www\.)?wikifeetx?|"
r"men\.wikifeet)\.com/([^/?#]+)")
- test = (
- ("https://www.wikifeet.com/Madison_Beer", {
- "pattern": (r"https://pics\.wikifeet\.com/Madison_Beer"
- r"-Feet-\d+\.jpg"),
- "count" : ">= 352",
- "keyword": {
- "celeb" : "Madison_Beer",
- "celebrity" : "Madison Beer",
- "birthday" : "dt:1999-03-05 00:00:00",
- "birthplace": "United States",
- "rating" : float,
- "pid" : int,
- "width" : int,
- "height" : int,
- "shoesize" : "9 US",
- "type" : "women",
- "tags" : list,
- },
- }),
- ("https://www.wikifeetx.com/Tifa_Quinn", {
- "pattern": (r"https://pics\.wikifeet\.com/Tifa_Quinn"
- r"-Feet-\d+\.jpg"),
- "count" : ">= 9",
- "keyword": {
- "celeb" : "Tifa_Quinn",
- "celebrity" : "Tifa Quinn",
- "birthday" : "[NOT SET]",
- "birthplace": "United States",
- "rating" : float,
- "pid" : int,
- "width" : int,
- "height" : int,
- "shoesize" : "4 US",
- "type" : "women",
- "tags" : list,
- },
- }),
- ("https://men.wikifeet.com/Chris_Hemsworth", {
- "pattern": (r"https://pics\.wikifeet\.com/Chris_Hemsworth"
- r"-Feet-\d+\.jpg"),
- "count" : ">= 860",
- "keyword": {
- "celeb" : "Chris_Hemsworth",
- "celebrity" : "Chris Hemsworth",
- "birthday" : "dt:1983-08-11 00:00:00",
- "birthplace": "Australia",
- "rating" : float,
- "pid" : int,
- "width" : int,
- "height" : int,
- "shoesize" : "12.5 US",
- "type" : "men",
- "tags" : list,
- },
- }),
- )
+ example = "https://www.wikifeet.com/CELEB"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index b308e74..6dc9362 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -32,54 +32,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
filename_fmt = "{num:>03}_{id}.{extension}"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)"
- test = (
- ("https://xhamster.com/photos/gallery/11748968", {
- "pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$",
- "count": ">= 144",
- "keyword": {
- "comments": int,
- "count": int,
- "favorite": bool,
- "id": int,
- "num": int,
- "height": int,
- "width": int,
- "imageURL": str,
- "pageURL": str,
- "thumbURL": str,
- "gallery": {
- "date": "dt:2019-04-16 00:07:31",
- "description": "",
- "dislikes": int,
- "id": 11748968,
- "likes": int,
- "tags": ["NON-Porn"],
- "thumbnail": str,
- "title": "Make the world better.",
- "views": int,
- },
- "user": {
- "id": 16874672,
- "name": "Anonymousrants",
- "retired": bool,
- "subscribers": int,
- "url": "https://xhamster.com/users/anonymousrants",
- "verified": bool,
- },
- },
- }),
- ("https://jp.xhamster2.com/photos/gallery/11748968", {
- "pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$",
- "count": ">= 144",
- }),
- ("https://xhamster.com/photos/gallery/make-the-world-better-11748968"),
- ("https://xhamster.com/photos/gallery/11748968"),
- ("https://xhamster.one/photos/gallery/11748968"),
- ("https://xhamster.desi/photos/gallery/11748968"),
- ("https://xhamster2.com/photos/gallery/11748968"),
- ("https://en.xhamster.com/photos/gallery/11748968"),
- ("https://xhamster.porncache.net/photos/gallery/11748968"),
- )
+ example = "https://xhamster.com/photos/gallery/12345"
def __init__(self, match):
XhamsterExtractor.__init__(self, match)
@@ -150,14 +103,7 @@ class XhamsterUserExtractor(XhamsterExtractor):
"""Extractor for all galleries of an xhamster user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])"
- test = (
- ("https://xhamster.com/users/goldenpalomino/photos", {
- "pattern": XhamsterGalleryExtractor.pattern,
- "count": 50,
- "range": "1-50",
- }),
- ("https://xhamster.com/users/nickname68"),
- )
+ example = "https://xhamster.com/users/USER/photos"
def __init__(self, match):
XhamsterExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 46ea074..a28d8f5 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -28,30 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
r"/(?:profiles|amateur-channels|model-channels)"
r"/([^/?#]+)/photos/(\d+)")
- test = (
- ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
- "count": 8,
- "pattern": r"https://profile-pics-cdn\d+\.xvideos-cdn\.com"
- r"/[^/]+\,\d+/videos/profiles/galleries/84/ca/37"
- r"/pervertedcouple/gal751031/pic_\d+_big\.jpg",
- "keyword": {
- "gallery": {
- "id" : 751031,
- "title": "Random Stuff",
- "tags" : list,
- },
- "user": {
- "id" : 20245371,
- "name" : "pervertedcouple",
- "display" : "Pervertedcouple",
- "sex" : "Woman",
- "description": str,
- },
- },
- }),
- ("https://www.xvideos.com/amateur-channels/pervertedcouple/photos/12"),
- ("https://www.xvideos.com/model-channels/pervertedcouple/photos/12"),
- )
+ example = "https://www.xvideos.com/profiles/USER/photos/12345"
def __init__(self, match):
self.user, self.gallery_id = match.groups()
@@ -97,13 +74,7 @@ class XvideosUserExtractor(XvideosBase, Extractor):
categorytransfer = True
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
r"/profiles/([^/?#]+)/?(?:#.*)?$")
- test = (
- ("https://www.xvideos.com/profiles/pervertedcouple", {
- "url": "a413f3e60d6d3a2de79bd44fa3b7a9c03db4336e",
- "keyword": "335a3304941ff2e666c0201e9122819b61b34adb",
- }),
- ("https://www.xvideos.com/profiles/pervertedcouple#_tabPhotos"),
- )
+ example = "https://www.xvideos.com/profiles/USER"
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index b3a1652..cb3c74c 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,7 +19,7 @@ class YoutubeDLExtractor(Extractor):
filename_fmt = "{title}-{id}.{extension}"
archive_fmt = "{extractor_key} {id}"
pattern = r"ytdl:(.*)"
- test = ("ytdl:https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9",)
+ example = "ytdl:https://www.youtube.com/watch?v=abcdefghijk"
def __init__(self, match):
# import main youtube_dl module
@@ -76,7 +76,7 @@ class YoutubeDLExtractor(Extractor):
ytdl_module, self, user_opts, extr_opts)
# transfer cookies to ytdl
- cookies = self.session.cookies
+ cookies = self.cookies
if cookies:
set_cookie = ytdl_instance.cookiejar.set_cookie
for cookie in cookies:
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 03fd909..5fe1943 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -21,17 +21,19 @@ class ZerochanExtractor(BooruExtractor):
root = "https://www.zerochan.net"
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
- cookiedomain = ".zerochan.net"
- cookienames = ("z_id", "z_hash")
+ cookies_domain = ".zerochan.net"
+ cookies_names = ("z_id", "z_hash")
def login(self):
self._logged_in = True
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(self._login_impl(username, password))
- else:
- self._logged_in = False
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ return self.cookies_update(self._login_impl(username, password))
+
+ self._logged_in = False
@cache(maxage=90*86400, keyarg=1)
def _login_impl(self, username, password):
@@ -78,7 +80,8 @@ class ZerochanExtractor(BooruExtractor):
html = data["tags"]
tags = data["tags"] = []
for tag in html.split("<li class=")[1:]:
- category, _, name = text.extr(tag, 'alt="', '<').partition('">')
+ category = text.extr(tag, 'alt="', '"')
+ name = text.extr(tag, ">-->", "</a>")
tags.append(category + ":" + name.strip())
return data
@@ -108,23 +111,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?"
- test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
- "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
- "count": "> 24",
- "keyword": {
- "extension": r"re:jpg|png",
- "file_url": r"re:https://static\.zerochan\.net"
- r"/.+\.full\.\d+\.(jpg|png)",
- "filename": r"re:(Perth\.\(Kantai\.Collection\)"
- r"|Kantai\.Collection)\.full\.\d+",
- "height": r"re:^\d+$",
- "id": r"re:^\d+$",
- "name": r"re:(Perth \(Kantai Collection\)|Kantai Collection)",
- "search_tags": "Perth (Kantai Collection)",
- "size": r"re:^\d+k$",
- "width": r"re:^\d+$",
- },
- })
+ example = "https://www.zerochan.net/TAG"
def __init__(self, match):
ZerochanExtractor.__init__(self, match)
@@ -174,40 +161,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
class ZerochanImageExtractor(ZerochanExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/(\d+)"
- test = ("https://www.zerochan.net/2920445", {
- "pattern": r"https://static\.zerochan\.net/"
- r"Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
- "keyword": {
- "author": "YeFan 葉凡",
- "date": "dt:2020-04-24 21:33:44",
- "file_url": "https://static.zerochan.net"
- "/Perth.%28Kantai.Collection%29.full.2920445.jpg",
- "filename": "Perth.(Kantai.Collection).full.2920445",
- "height": 1366,
- "id": 2920445,
- "path": ["Kantai Collection", "Perth (Kantai Collection)"],
- "size": 1975296,
- "tags": [
- "Mangaka:YeFan 葉凡",
- "Game:Kantai Collection",
- "Character:Perth (Kantai Collection)",
- "Theme:Blonde Hair",
- "Theme:Braids",
- "Theme:Coat",
- "Theme:Female",
- "Theme:Firefighter Outfit",
- "Theme:Group",
- "Theme:Long Sleeves",
- "Theme:Personification",
- "Theme:Pins",
- "Theme:Ribbon",
- "Theme:Shirt",
- "Theme:Short Hair",
- ],
- "uploader": "YukinoTokisaki",
- "width": 1920,
- },
- })
+ example = "https://www.zerochan.net/12345"
def __init__(self, match):
ZerochanExtractor.__init__(self, match)