summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-01-23 23:35:05 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2024-01-23 23:35:05 -0500
commita82132b82b6855e0ff258abc53e9d2726b1855f2 (patch)
tree967ee98356978382816464463d191ac97251631e
parent6cdb45219881dce3e185f3c3952f0c55985faed7 (diff)
parent12e23f1195164dcb740d6d4a4287e762c9e5e534 (diff)
Update upstream source from tag 'upstream/1.26.7'
Update to upstream version '1.26.7' with Debian dir 8c7ba9fcd1b4536a45368487934f15dbf2b21a74
-rw-r--r--CHANGELOG.md43
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5240
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt5
-rw-r--r--gallery_dl/__init__.py2
-rw-r--r--gallery_dl/extractor/2ch.py91
-rw-r--r--gallery_dl/extractor/__init__.py5
-rw-r--r--gallery_dl/extractor/batoto.py15
-rw-r--r--gallery_dl/extractor/blogger.py2
-rw-r--r--gallery_dl/extractor/bunkr.py14
-rw-r--r--gallery_dl/extractor/chevereto.py4
-rw-r--r--gallery_dl/extractor/common.py8
-rw-r--r--gallery_dl/extractor/deviantart.py63
-rw-r--r--gallery_dl/extractor/erome.py20
-rw-r--r--gallery_dl/extractor/fuskator.py15
-rw-r--r--gallery_dl/extractor/gelbooru.py13
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py40
-rw-r--r--gallery_dl/extractor/hatenablog.py167
-rw-r--r--gallery_dl/extractor/hbrowse.py92
-rw-r--r--gallery_dl/extractor/issuu.py3
-rw-r--r--gallery_dl/extractor/kemonoparty.py26
-rw-r--r--gallery_dl/extractor/mangadex.py32
-rw-r--r--gallery_dl/extractor/mastodon.py12
-rw-r--r--gallery_dl/extractor/nijie.py3
-rw-r--r--gallery_dl/extractor/nitter.py4
-rw-r--r--gallery_dl/extractor/oauth.py4
-rw-r--r--gallery_dl/extractor/paheal.py2
-rw-r--r--gallery_dl/extractor/patreon.py22
-rw-r--r--gallery_dl/extractor/philomena.py11
-rw-r--r--gallery_dl/extractor/pixiv.py6
-rw-r--r--gallery_dl/extractor/sankaku.py2
-rw-r--r--gallery_dl/extractor/shimmie2.py39
-rw-r--r--gallery_dl/extractor/steamgriddb.py211
-rw-r--r--gallery_dl/extractor/twitter.py2
-rw-r--r--gallery_dl/extractor/urlshortener.py11
-rw-r--r--gallery_dl/extractor/vk.py8
-rw-r--r--gallery_dl/extractor/webtoons.py48
-rw-r--r--gallery_dl/extractor/wikimedia.py181
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_extractor.py29
-rw-r--r--test/test_results.py103
44 files changed, 1275 insertions, 345 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b135b7..277250d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,48 @@
# Changelog
+## 1.26.7 - 2024-01-21
+### Extractors
+#### Additions
+- [2ch] add support ([#1009](https://github.com/mikf/gallery-dl/issues/1009), [#3540](https://github.com/mikf/gallery-dl/issues/3540), [#4444](https://github.com/mikf/gallery-dl/issues/4444))
+- [deviantart:avatar] add `formats` option ([#4995](https://github.com/mikf/gallery-dl/issues/4995))
+- [hatenablog] add support ([#5036](https://github.com/mikf/gallery-dl/issues/5036), [#5037](https://github.com/mikf/gallery-dl/issues/5037))
+- [mangadex] add `list` extractor ([#5025](https://github.com/mikf/gallery-dl/issues/5025))
+- [steamgriddb] add support ([#5033](https://github.com/mikf/gallery-dl/issues/5033), [#5041](https://github.com/mikf/gallery-dl/issues/5041))
+- [wikimedia] add support ([#1443](https://github.com/mikf/gallery-dl/issues/1443), [#2906](https://github.com/mikf/gallery-dl/issues/2906), [#3660](https://github.com/mikf/gallery-dl/issues/3660), [#2340](https://github.com/mikf/gallery-dl/issues/2340))
+- [wikimedia] support `fandom` wikis ([#2677](https://github.com/mikf/gallery-dl/issues/2677), [#3378](https://github.com/mikf/gallery-dl/issues/3378))
+#### Fixes
+- [blogger] fix `lh-*.googleusercontent.com` URLs ([#5091](https://github.com/mikf/gallery-dl/issues/5091))
+- [bunkr] update domain ([#5088](https://github.com/mikf/gallery-dl/issues/5088))
+- [deviantart] fix AttributeError for URLs without username ([#5065](https://github.com/mikf/gallery-dl/issues/5065))
+- [deviantart] fix `KeyError: 'premium_folder_data'` ([#5063](https://github.com/mikf/gallery-dl/issues/5063))
+- [deviantart:avatar] fix exception when `comments` are enabled ([#4995](https://github.com/mikf/gallery-dl/issues/4995))
+- [fuskator] make metadata extraction non-fatal ([#5039](https://github.com/mikf/gallery-dl/issues/5039))
+- [gelbooru] only log "Incomplete API response" for favorites ([#5045](https://github.com/mikf/gallery-dl/issues/5045))
+- [giantessbooru] update domain
+- [issuu] fix extraction
+- [nijie] fix download URLs of single image posts ([#5049](https://github.com/mikf/gallery-dl/issues/5049))
+- [patreon] fix `KeyError: 'name'` ([#5048](https://github.com/mikf/gallery-dl/issues/5048), [#5069](https://github.com/mikf/gallery-dl/issues/5069), [#5093](https://github.com/mikf/gallery-dl/issues/5093))
+- [pixiv] update API headers ([#5029](https://github.com/mikf/gallery-dl/issues/5029))
+- [realbooru] fix download URLs of older posts
+- [twitter] revert to using `media` timeline by default ([#4953](https://github.com/mikf/gallery-dl/issues/4953))
+- [vk] transform image URLs to non-blurred versions ([#5017](https://github.com/mikf/gallery-dl/issues/5017))
+#### Improvements
+- [batoto] support more mirror domains ([#5042](https://github.com/mikf/gallery-dl/issues/5042))
+- [batoto] improve v2 manga URL pattern
+- [gelbooru] support `all` tag and URLs with empty tags ([#5076](https://github.com/mikf/gallery-dl/issues/5076))
+- [patreon] download `m3u8` manifests with ytdl
+- [sankaku] support post URLs with alphanumeric IDs ([#5073](https://github.com/mikf/gallery-dl/issues/5073))
+#### Metadata
+- [batoto] improve `manga_id` extraction ([#5042](https://github.com/mikf/gallery-dl/issues/5042))
+- [erome] fix `count` metadata
+- [kemonoparty] add `revision_hash` metadata ([#4706](https://github.com/mikf/gallery-dl/issues/4706), [#4727](https://github.com/mikf/gallery-dl/issues/4727), [#5013](https://github.com/mikf/gallery-dl/issues/5013))
+- [paheal] fix `source` metadata
+- [webtoons] extract more metadata ([#5061](https://github.com/mikf/gallery-dl/issues/5061), [#5094](https://github.com/mikf/gallery-dl/issues/5094))
+#### Removals
+- [chevereto] remove `pixl.li`
+- [hbrowse] remove module
+- [nitter] remove `nitter.lacontrevoie.fr`
+
## 1.26.6 - 2024-01-06
### Extractors
#### Additions
diff --git a/PKG-INFO b/PKG-INFO
index e7550e9..e9a8b02 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.26.6
+Version: 1.26.7
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index ee165e5..490b54c 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index ad6fd4a..b779e1e 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2024-01-06" "1.26.6" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-01-21" "1.26.7" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index a57d39b..7fec8ae 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2024-01-06" "1.26.6" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-01-21" "1.26.7" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1803,6 +1803,22 @@ or whenever your \f[I]cache file\f[] is deleted or cleared.
Minimum wait time in seconds before API requests.
+.SS extractor.deviantart.avatar.formats
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+["original.jpg", "big.jpg", "big.gif", ".png"]
+
+.IP "Description:" 4
+Avatar URL formats to return.
+
+Each format is parsed as \f[I]SIZE.EXT\f[].
+.br
+Leave \f[I]SIZE\f[] empty to download the regular, small avatar format.
+.br
+
+
.SS extractor.[E621].metadata
.IP "Type:" 6
.br
@@ -3551,6 +3567,226 @@ Filters used during searches.
Download video files.
+.SS extractor.steamgriddb.animated
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include animated assets when downloading from a list of assets.
+
+
+.SS extractor.steamgriddb.epilepsy
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include assets tagged with epilepsy when downloading from a list of assets.
+
+
+.SS extractor.steamgriddb.dimensions
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"all"\f[]
+
+.IP "Examples:" 4
+.br
+* \f[I]"1024x512,512x512"\f[]
+.br
+* \f[I]["460x215", "920x430"]\f[]
+
+.IP "Description:" 4
+Only include assets that are in the specified dimensions. \f[I]all\f[] can be
+used to specify all dimensions. Valid values are:
+
+.br
+* Grids: \f[I]460x215\f[], \f[I]920x430\f[], \f[I]600x900\f[], \f[I]342x482\f[], \f[I]660x930\f[],
+\f[I]512x512\f[], \f[I]1024x1024\f[]
+.br
+* Heroes: \f[I]1920x620\f[], \f[I]3840x1240\f[], \f[I]1600x650\f[]
+.br
+* Logos: N/A (will be ignored)
+.br
+* Icons: \f[I]8x8\f[], \f[I]10x10\f[], \f[I]14x14\f[], \f[I]16x16\f[], \f[I]20x20\f[], \f[I]24x24\f[],
+\f[I]28x28\f[], \f[I]32x32\f[], \f[I]35x35\f[], \f[I]40x40\f[], \f[I]48x48\f[], \f[I]54x54\f[],
+\f[I]56x56\f[], \f[I]57x57\f[], \f[I]60x60\f[], \f[I]64x64\f[], \f[I]72x72\f[], \f[I]76x76\f[],
+\f[I]80x80\f[], \f[I]90x90\f[], \f[I]96x96\f[], \f[I]100x100\f[], \f[I]114x114\f[], \f[I]120x120\f[],
+\f[I]128x128\f[], \f[I]144x144\f[], \f[I]150x150\f[], \f[I]152x152\f[], \f[I]160x160\f[],
+\f[I]180x180\f[], \f[I]192x192\f[], \f[I]194x194\f[], \f[I]256x256\f[], \f[I]310x310\f[],
+\f[I]512x512\f[], \f[I]768x768\f[], \f[I]1024x1024\f[]
+
+
+.SS extractor.steamgriddb.file-types
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"all"\f[]
+
+.IP "Examples:" 4
+.br
+* \f[I]"png,jpeg"\f[]
+.br
+* \f[I]["jpeg", "webp"]\f[]
+
+.IP "Description:" 4
+Only include assets that are in the specified file types. \f[I]all\f[] can be
+used to specifiy all file types. Valid values are:
+
+.br
+* Grids: \f[I]png\f[], \f[I]jpeg\f[], \f[I]jpg\f[], \f[I]webp\f[]
+.br
+* Heroes: \f[I]png\f[], \f[I]jpeg\f[], \f[I]jpg\f[], \f[I]webp\f[]
+.br
+* Logos: \f[I]png\f[], \f[I]webp\f[]
+.br
+* Icons: \f[I]png\f[], \f[I]ico\f[]
+
+
+.SS extractor.steamgriddb.download-fake-png
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download fake PNGs alongside the real file.
+
+
+.SS extractor.steamgriddb.humor
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include assets tagged with humor when downloading from a list of assets.
+
+
+.SS extractor.steamgriddb.languages
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"all"\f[]
+
+.IP "Examples:" 4
+.br
+* \f[I]"en,km"\f[]
+.br
+* \f[I]["fr", "it"]\f[]
+
+.IP "Description:" 4
+Only include assets that are in the specified languages. \f[I]all\f[] can be
+used to specifiy all languages. Valid values are \f[I]ISO 639-1\f[]
+language codes.
+
+
+.SS extractor.steamgriddb.nsfw
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include assets tagged with adult content when downloading from a list of assets.
+
+
+.SS extractor.steamgriddb.sort
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]score_desc\f[]
+
+.IP "Description:" 4
+Set the chosen sorting method when downloading from a list of assets. Can be one of:
+
+.br
+* \f[I]score_desc\f[] (Highest Score (Beta))
+.br
+* \f[I]score_asc\f[] (Lowest Score (Beta))
+.br
+* \f[I]score_old_desc\f[] (Highest Score (Old))
+.br
+* \f[I]score_old_asc\f[] (Lowest Score (Old))
+.br
+* \f[I]age_desc\f[] (Newest First)
+.br
+* \f[I]age_asc\f[] (Oldest First)
+
+
+.SS extractor.steamgriddb.static
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include static assets when downloading from a list of assets.
+
+
+.SS extractor.steamgriddb.styles
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]all\f[]
+
+.IP "Examples:" 4
+.br
+* \f[I]white,black\f[]
+.br
+* \f[I]["no_logo", "white_logo"]\f[]
+
+.IP "Description:" 4
+Only include assets that are in the specified styles. \f[I]all\f[] can be used
+to specify all styles. Valid values are:
+
+.br
+* Grids: \f[I]alternate\f[], \f[I]blurred\f[], \f[I]no_logo\f[], \f[I]material\f[], \f[I]white_logo\f[]
+.br
+* Heroes: \f[I]alternate\f[], \f[I]blurred\f[], \f[I]material\f[]
+.br
+* Logos: \f[I]official\f[], \f[I]white\f[], \f[I]black\f[], \f[I]custom\f[]
+.br
+* Icons: \f[I]official\f[], \f[I]custom\f[]
+
+
+.SS extractor.steamgriddb.untagged
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include untagged assets when downloading from a list of assets.
+
+
.SS extractor.[szurubooru].username & .token
.IP "Type:" 6
\f[I]string\f[]
@@ -4039,7 +4275,7 @@ Controls the strategy / tweet source used for timeline URLs
.br
* \f[I]"with_replies"\f[]: \f[I]/with_replies\f[] timeline + search
.br
-* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[], \f[I]replies\f[], and \f[I]text-tweets\f[] settings
+* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[] and \f[I]text-tweets\f[] settings
.SS extractor.twitter.text-tweets
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index d695df9..e9a8b02 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
-Name: gallery-dl
-Version: 1.26.6
+Name: gallery_dl
+Version: 1.26.7
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 271b4a9..ff16efd 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -40,6 +40,7 @@ gallery_dl/downloader/common.py
gallery_dl/downloader/http.py
gallery_dl/downloader/text.py
gallery_dl/downloader/ytdl.py
+gallery_dl/extractor/2ch.py
gallery_dl/extractor/2chan.py
gallery_dl/extractor/2chen.py
gallery_dl/extractor/35photo.py
@@ -90,7 +91,7 @@ gallery_dl/extractor/gelbooru_v01.py
gallery_dl/extractor/gelbooru_v02.py
gallery_dl/extractor/generic.py
gallery_dl/extractor/gofile.py
-gallery_dl/extractor/hbrowse.py
+gallery_dl/extractor/hatenablog.py
gallery_dl/extractor/hentai2read.py
gallery_dl/extractor/hentaicosplays.py
gallery_dl/extractor/hentaifoundry.py
@@ -190,6 +191,7 @@ gallery_dl/extractor/slideshare.py
gallery_dl/extractor/smugmug.py
gallery_dl/extractor/soundgasm.py
gallery_dl/extractor/speakerdeck.py
+gallery_dl/extractor/steamgriddb.py
gallery_dl/extractor/subscribestar.py
gallery_dl/extractor/szurubooru.py
gallery_dl/extractor/tapas.py
@@ -221,6 +223,7 @@ gallery_dl/extractor/webtoons.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
gallery_dl/extractor/wikifeet.py
+gallery_dl/extractor/wikimedia.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
gallery_dl/extractor/ytdl.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index fff53eb..19ea77b 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -45,7 +45,7 @@ def main():
elif filename.startswith("\\f"):
filename = "\f" + filename[2:]
config.set((), "filename", filename)
- if args.directory:
+ if args.directory is not None:
config.set((), "base-directory", args.directory)
config.set((), "directory", ())
if args.postprocessors:
diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py
new file mode 100644
index 0000000..dbbf21b
--- /dev/null
+++ b/gallery_dl/extractor/2ch.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://2ch.hk/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+class _2chThreadExtractor(Extractor):
+ """Extractor for 2ch threads"""
+ category = "2ch"
+ subcategory = "thread"
+ root = "https://2ch.hk"
+ directory_fmt = ("{category}", "{board}", "{thread} {title}")
+ filename_fmt = "{tim}{filename:? //}.{extension}"
+ archive_fmt = "{board}_{thread}_{tim}"
+ pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)"
+ example = "https://2ch.hk/a/res/12345.html"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
+ posts = self.request(url).json()["threads"][0]["posts"]
+
+ op = posts[0]
+ title = op.get("subject") or text.remove_html(op["comment"])
+
+ thread = {
+ "board" : self.board,
+ "thread": self.thread,
+ "title" : text.unescape(title)[:50],
+ }
+
+ yield Message.Directory, thread
+ for post in posts:
+ files = post.get("files")
+ if files:
+ post["post_name"] = post["name"]
+ post["date"] = text.parse_timestamp(post["timestamp"])
+ del post["files"]
+ del post["name"]
+
+ for file in files:
+ file.update(thread)
+ file.update(post)
+
+ file["filename"] = file["fullname"].rpartition(".")[0]
+ file["tim"], _, file["extension"] = \
+ file["name"].rpartition(".")
+
+ yield Message.Url, self.root + file["path"], file
+
+
+class _2chBoardExtractor(Extractor):
+ """Extractor for 2ch boards"""
+ category = "2ch"
+ subcategory = "board"
+ root = "https://2ch.hk"
+ pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$"
+ example = "https://2ch.hk/a/"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ # index page
+ url = "{}/{}/index.json".format(self.root, self.board)
+ index = self.request(url).json()
+ index["_extractor"] = _2chThreadExtractor
+ for thread in index["threads"]:
+ url = "{}/{}/res/{}.html".format(
+ self.root, self.board, thread["thread_num"])
+ yield Message.Queue, url, index
+
+ # pages 1..n
+ for n in util.advance(index["pages"], 1):
+ url = "{}/{}/{}.json".format(self.root, self.board, n)
+ page = self.request(url).json()
+ page["_extractor"] = _2chThreadExtractor
+ for thread in page["threads"]:
+ url = "{}/{}/res/{}.html".format(
+ self.root, self.board, thread["thread_num"])
+ yield Message.Queue, url, page
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 9e33f2c..d624736 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -10,6 +10,7 @@ import sys
import re
modules = [
+ "2ch",
"2chan",
"2chen",
"35photo",
@@ -53,7 +54,7 @@ modules = [
"gelbooru_v01",
"gelbooru_v02",
"gofile",
- "hbrowse",
+ "hatenablog",
"hentai2read",
"hentaicosplays",
"hentaifoundry",
@@ -145,6 +146,7 @@ modules = [
"smugmug",
"soundgasm",
"speakerdeck",
+ "steamgriddb",
"subscribestar",
"szurubooru",
"tapas",
@@ -175,6 +177,7 @@ modules = [
"weibo",
"wikiart",
"wikifeet",
+ "wikimedia",
"xhamster",
"xvideos",
"zerochan",
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index cd6302e..e82cd09 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -10,8 +10,11 @@ from .common import Extractor, ChapterExtractor, MangaExtractor
from .. import text, exception
import re
-BASE_PATTERN = (r"(?:https?://)?"
- r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)")
+BASE_PATTERN = (r"(?:https?://)?(?:"
+ r"(?:ba|d|h|m|w)to\.to|"
+ r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|"
+ r"comiko\.(?:net|org)|"
+ r"bat(?:otoo|o?two)\.com)")
class BatotoBase():
@@ -38,7 +41,8 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
- manga_id = extr("/title/", "/")
+ manga_id = text.extr(
+ extr('rel="canonical" href="', '"'), "/title/", "/")
match = re.match(
r"(?:Volume\s+(\d+) )?"
@@ -76,12 +80,13 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
"""Extractor for bato.to manga"""
reverse = False
chapterclass = BatotoChapterExtractor
- pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$"
+ pattern = (BASE_PATTERN +
+ r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
example = "https://bato.to/title/12345-MANGA/"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
- self.manga_id = match.group(1)
+ self.manga_id = match.group(1) or match.group(2)
url = "{}/title/{}".format(self.root, self.manga_id)
MangaExtractor.__init__(self, match, url)
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 58ae59d..402408e 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -37,7 +37,7 @@ class BloggerExtractor(BaseExtractor):
findall_image = re.compile(
r'src="(https?://(?:'
r'blogger\.googleusercontent\.com/img|'
- r'lh\d+\.googleusercontent\.com/|'
+ r'lh\d+(?:-\w+)?\.googleusercontent\.com|'
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
findall_video = re.compile(
r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 26123b8..e7fc14b 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,13 +6,13 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkrr.su/"""
+"""Extractors for https://bunkrr.ru/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
from urllib.parse import urlsplit, urlunsplit
-BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)"
+BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:[rs]u|la|is|to)"
MEDIA_DOMAIN_OVERRIDES = {
"cdn9.bunkr.ru" : "c9.bunkr.ru",
@@ -27,11 +27,11 @@ CDN_HOSTED_EXTENSIONS = (
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkrr.su albums"""
+ """Extractor for bunkrr.ru albums"""
category = "bunkr"
- root = "https://bunkrr.su"
+ root = "https://bunkrr.ru"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
- example = "https://bunkrr.su/a/ID"
+ example = "https://bunkrr.ru/a/ID"
def fetch_album(self, album_id):
# album metadata
@@ -84,11 +84,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
class BunkrMediaExtractor(BunkrAlbumExtractor):
- """Extractor for bunkrr.su media links"""
+ """Extractor for bunkrr.ru media links"""
subcategory = "media"
directory_fmt = ("{category}",)
pattern = BASE_PATTERN + r"/[vid]/([^/?#]+)"
- example = "https://bunkrr.su/v/FILENAME"
+ example = "https://bunkrr.ru/v/FILENAME"
def fetch_album(self, album_id):
try:
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 2bf200b..ef5a44c 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -38,10 +38,6 @@ BASE_PATTERN = CheveretoExtractor.update({
"root": "https://jpg4.su",
"pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
},
- "pixl": {
- "root": "https://pixl.li",
- "pattern": r"pixl\.(?:li|is)",
- },
"imgkiwi": {
"root": "https://img.kiwi",
"pattern": r"img\.kiwi",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 0dd05ef..cf0f8c9 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -102,6 +102,9 @@ class Extractor():
def config_accumulate(self, key):
return config.accumulate(self._cfgpath, key)
+ def config_instance(self, key, default=None):
+ return default
+
def _config_shared(self, key, default=None):
return config.interpolate_common(
("extractor",), self._cfgpath, key, default)
@@ -735,9 +738,10 @@ class BaseExtractor(Extractor):
for index, group in enumerate(match.groups()):
if group is not None:
if index:
- self.category, self.root = self.instances[index-1]
+ self.category, self.root, info = self.instances[index-1]
if not self.root:
self.root = text.root_from_url(match.group(0))
+ self.config_instance = info.get
else:
self.root = group
self.category = group.partition("://")[2]
@@ -757,7 +761,7 @@ class BaseExtractor(Extractor):
root = info["root"]
if root:
root = root.rstrip("/")
- instance_list.append((category, root))
+ instance_list.append((category, root, info))
pattern = info.get("pattern")
if not pattern:
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 4b5f1d7..bcfbe73 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -38,7 +38,7 @@ class DeviantartExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.user = (match.group(1) or match.group(2)).lower()
+ self.user = (match.group(1) or match.group(2) or "").lower()
self.offset = 0
def _init(self):
@@ -452,9 +452,11 @@ class DeviantartExtractor(Extractor):
return None
dev = self.api.deviation(deviation["deviationid"], False)
- folder = dev["premium_folder_data"]
+ folder = deviation["premium_folder_data"]
username = dev["author"]["username"]
- has_access = folder["has_access"]
+
+ # premium_folder_data is no longer present when user has access (#5063)
+ has_access = ("premium_folder_data" not in dev) or folder["has_access"]
if not has_access and folder["type"] == "watchers" and \
self.config("auto-watch"):
@@ -547,22 +549,45 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
example = "https://www.deviantart.com/USER/avatar/"
def deviations(self):
- profile = self.api.user_profile(self.user.lower())
- if profile:
- url = profile["user"]["usericon"]
- return ({
- "author" : profile["user"],
- "category" : "avatar",
- "index" : text.parse_int(url.rpartition("?")[2]),
- "is_deleted" : False,
- "is_downloadable": False,
- "published_time" : 0,
- "title" : "avatar",
- "content" : {
- "src": url.replace("/avatars/", "/avatars-big/", 1),
- },
- },)
- return ()
+ name = self.user.lower()
+ profile = self.api.user_profile(name)
+ if not profile:
+ return ()
+
+ user = profile["user"]
+ icon = user["usericon"]
+ index = icon.rpartition("?")[2]
+
+ formats = self.config("formats")
+ if not formats:
+ url = icon.replace("/avatars/", "/avatars-big/", 1)
+ return (self._make_deviation(url, user, index, ""),)
+
+ if isinstance(formats, str):
+ formats = formats.replace(" ", "").split(",")
+
+ results = []
+ for fmt in formats:
+ fmt, _, ext = fmt.rpartition(".")
+ if fmt:
+ fmt = "-" + fmt
+ url = "https://a.deviantart.net/avatars{}/{}/{}/{}.{}?{}".format(
+ fmt, name[0], name[1], name, ext, index)
+ results.append(self._make_deviation(url, user, index, fmt))
+ return results
+
+ def _make_deviation(self, url, user, index, fmt):
+ return {
+ "author" : user,
+ "category" : "avatar",
+ "index" : text.parse_int(index),
+ "is_deleted" : False,
+ "is_downloadable": False,
+ "published_time" : 0,
+ "title" : "avatar" + fmt,
+ "stats" : {"comments": 0},
+ "content" : {"src": url},
+ }
class DeviantartBackgroundExtractor(DeviantartExtractor):
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 6a0e069..8c9da2f 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -44,24 +44,26 @@ class EromeExtractor(Extractor):
pos = page.index('<div class="user-profile', pos)
user, pos = text.extract(
page, 'href="https://www.erome.com/', '"', pos)
- count, pos = text.extract(
- page, 'fa-camera"></i>', '</span>', pos)
+
+ urls = []
+ groups = page.split('<div class="media-group"')
+ for group in util.advance(groups, 1):
+ url = (text.extr(group, '<source src="', '"') or
+ text.extr(group, 'data-src="', '"'))
+ if url:
+ urls.append(url)
data = {
"album_id" : album_id,
"title" : text.unescape(title),
"user" : text.unquote(user),
+ "count" : len(urls),
"_http_headers": {"Referer": url},
- "count" : text.parse_int(count),
}
yield Message.Directory, data
- groups = page.split('<div class="media-group"')
- for data["num"], group in enumerate(util.advance(groups, 1), 1):
- url = (text.extr(group, '<source src="', '"') or
- text.extr(group, 'data-src="', '"'))
- if url:
- yield Message.Url, url, text.nameext_from_url(url, data)
+ for data["num"], url in enumerate(urls, 1):
+ yield Message.Url, url, text.nameext_from_url(url, data)
def albums(self):
return ()
diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py
index 20afb5a..beecbff 100644
--- a/gallery_dl/extractor/fuskator.py
+++ b/gallery_dl/extractor/fuskator.py
@@ -22,7 +22,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
def __init__(self, match):
self.gallery_hash = match.group(1)
- url = "{}/thumbs/{}/".format(self.root, self.gallery_hash)
+ url = "{}/thumbs/{}/index.html".format(self.root, self.gallery_hash)
GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
@@ -50,15 +50,16 @@ class FuskatorGalleryExtractor(GalleryExtractor):
"gallery_id" : text.parse_int(gallery_id),
"gallery_hash": self.gallery_hash,
"title" : text.unescape(title[:-15]),
- "views" : data["hits"],
- "score" : data["rating"],
- "tags" : data["tags"].split(","),
- "count" : len(data["images"]),
+ "views" : data.get("hits"),
+ "score" : data.get("rating"),
+ "tags" : (data.get("tags") or "").split(","),
}
def images(self, page):
- for image in self.data["images"]:
- yield "https:" + image["imageUrl"], image
+ return [
+ ("https:" + image["imageUrl"], image)
+ for image in self.data["images"]
+ ]
class FuskatorSearchExtractor(Extractor):
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index eba1539..83f1392 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -23,7 +23,7 @@ class GelbooruBase():
root = "https://gelbooru.com"
offset = 0
- def _api_request(self, params, key="post"):
+ def _api_request(self, params, key="post", log=False):
if "s" not in params:
params["s"] = "post"
params["api_key"] = self.api_key
@@ -35,8 +35,9 @@ class GelbooruBase():
try:
posts = data[key]
except KeyError:
- self.log.error("Incomplete API response (missing '%s')", key)
- self.log.debug("%s", data)
+ if log:
+ self.log.error("Incomplete API response (missing '%s')", key)
+ self.log.debug("%s", data)
return []
if not isinstance(posts, list):
@@ -117,7 +118,7 @@ class GelbooruBase():
class GelbooruTagExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02TagExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
- pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)"
+ pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]*)"
example = "https://gelbooru.com/index.php?page=post&s=list&tags=TAG"
@@ -169,7 +170,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
"limit": "1",
}
- count = self._api_request(params, "@attributes")[0]["count"]
+ count = self._api_request(params, "@attributes", True)[0]["count"]
if count <= self.offset:
return
@@ -186,7 +187,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
params["limit"] = self.per_page
while True:
- favs = self._api_request(params, "favorite")
+ favs = self._api_request(params, "favorite", True)
favs.reverse()
if skip:
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 0c8af3d..7ab6d02 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -22,14 +22,10 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _init(self):
self.api_key = self.config("api-key")
self.user_id = self.config("user-id")
-
- try:
- self.api_root = INSTANCES[self.category]["api_root"]
- except KeyError:
- self.api_root = self.root
+ self.api_root = self.config_instance("api_root") or self.root
if self.category == "realbooru":
- self.items = self._items_realbooru
+ self._file_url = self._file_url_realbooru
self._tags = self._tags_realbooru
def _api_request(self, params):
@@ -128,28 +124,6 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
return url
- def _items_realbooru(self):
- from .common import Message
- data = self.metadata()
-
- for post in self.posts():
- try:
- html = self._html(post)
- url = post["file_url"] = text.rextract(
- html, 'href="', '"', html.index(">Original<"))[0]
- except Exception:
- self.log.debug("Unable to fetch download URL for post %s "
- "(md5: %s)", post.get("id"), post.get("md5"))
- continue
-
- text.nameext_from_url(url, post)
- post.update(data)
- self._prepare(post)
- self._tags(post, html)
-
- yield Message.Directory, post
- yield Message.Url, url, post
-
def _tags_realbooru(self, post, page):
tag_container = text.extr(page, 'id="tagLink"', '</div>')
tags = collections.defaultdict(list)
@@ -161,7 +135,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
post["tags_" + key] = " ".join(value)
-INSTANCES = {
+BASE_PATTERN = GelbooruV02Extractor.update({
"realbooru": {
"root": "https://realbooru.com",
"pattern": r"realbooru\.com",
@@ -187,16 +161,14 @@ INSTANCES = {
"root": "https://xbooru.com",
"pattern": r"xbooru\.com",
},
-}
-
-BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
+})
class GelbooruV02TagExtractor(GelbooruV02Extractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
+ pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
def __init__(self, match):
@@ -208,6 +180,8 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
return {"search_tags": self.tags}
def posts(self):
+ if self.tags == "all":
+ self.tags = ""
return self._pagination({"tags": self.tags})
diff --git a/gallery_dl/extractor/hatenablog.py b/gallery_dl/extractor/hatenablog.py
new file mode 100644
index 0000000..792f666
--- /dev/null
+++ b/gallery_dl/extractor/hatenablog.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hatenablog.com"""
+
+import re
+from .common import Extractor, Message
+from .. import text
+
+
+BASE_PATTERN = (
+ r"(?:hatenablog:https?://([^/?#]+)|(?:https?://)?"
+ r"([\w-]+\.(?:hatenablog\.(?:com|jp)"
+ r"|hatenadiary\.com|hateblo\.jp)))"
+)
+QUERY_RE = r"(?:\?([^#]*))?(?:#.*)?$"
+
+
+class HatenablogExtractor(Extractor):
+ """Base class for HatenaBlog extractors"""
+ category = "hatenablog"
+ directory_fmt = ("{category}", "{domain}")
+ filename_fmt = "{category}_{domain}_{entry}_{num:>02}.{extension}"
+ archive_fmt = "{filename}"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.domain = match.group(1) or match.group(2)
+
+ def _init(self):
+ self._find_img = re.compile(r'<img +([^>]+)').finditer
+
+ def _handle_article(self, article: str):
+ extr = text.extract_from(article)
+ date = text.parse_datetime(extr('<time datetime="', '"'))
+ entry_link = text.unescape(extr('<a href="', '"'))
+ entry = entry_link.partition("/entry/")[2]
+ title = text.unescape(extr('>', '<'))
+ content = extr(
+ '<div class="entry-content hatenablog-entry">', '</div>')
+
+ images = []
+ for i in self._find_img(content):
+ attributes = i.group(1)
+ if 'class="hatena-fotolife"' not in attributes:
+ continue
+ image = text.unescape(text.extr(attributes, 'src="', '"'))
+ images.append(image)
+
+ data = {
+ "domain": self.domain,
+ "date": date,
+ "entry": entry,
+ "title": title,
+ "count": len(images),
+ }
+ yield Message.Directory, data
+ for data["num"], url in enumerate(images, 1):
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class HatenablogEntriesExtractor(HatenablogExtractor):
+ """Base class for a list of entries"""
+ allowed_parameters = ()
+
+ def __init__(self, match):
+ HatenablogExtractor.__init__(self, match)
+ self.path = match.group(3)
+ self.query = {key: value for key, value in text.parse_query(
+ match.group(4)).items() if self._acceptable_query(key)}
+
+ def _init(self):
+ HatenablogExtractor._init(self)
+ self._find_pager_url = re.compile(
+ r' class="pager-next">\s*<a href="([^"]+)').search
+
+ def items(self):
+ url = "https://" + self.domain + self.path
+ query = self.query
+
+ while url:
+ page = self.request(url, params=query).text
+
+ extr = text.extract_from(page)
+ attributes = extr('<body ', '>')
+ if "page-archive" in attributes:
+ yield from self._handle_partial_articles(extr)
+ else:
+ yield from self._handle_full_articles(extr)
+
+ match = self._find_pager_url(page)
+ url = text.unescape(match.group(1)) if match else None
+ query = None
+
+ def _handle_partial_articles(self, extr):
+ while True:
+ section = extr('<section class="archive-entry', '</section>')
+ if not section:
+ break
+
+ url = "hatenablog:" + text.unescape(text.extr(
+ section, '<a class="entry-title-link" href="', '"'))
+ data = {"_extractor": HatenablogEntryExtractor}
+ yield Message.Queue, url, data
+
+ def _handle_full_articles(self, extr):
+ while True:
+ attributes = extr('<article ', '>')
+ if not attributes:
+ break
+ if "no-entry" in attributes:
+ continue
+
+ article = extr('', '</article>')
+ yield from self._handle_article(article)
+
+ def _acceptable_query(self, key):
+ return key == "page" or key in self.allowed_parameters
+
+
+class HatenablogEntryExtractor(HatenablogExtractor):
+ """Extractor for a single entry URL"""
+ subcategory = "entry"
+ pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE
+ example = "https://BLOG.hatenablog.com/entry/PATH"
+
+ def __init__(self, match):
+ HatenablogExtractor.__init__(self, match)
+ self.path = match.group(3)
+
+ def items(self):
+ url = "https://" + self.domain + "/entry/" + self.path
+ page = self.request(url).text
+
+ extr = text.extract_from(page)
+ while True:
+ attributes = extr('<article ', '>')
+ if "no-entry" in attributes:
+ continue
+ article = extr('', '</article>')
+ return self._handle_article(article)
+
+
+class HatenablogHomeExtractor(HatenablogEntriesExtractor):
+ """Extractor for a blog's home page"""
+ subcategory = "home"
+ pattern = BASE_PATTERN + r"(/?)" + QUERY_RE
+ example = "https://BLOG.hatenablog.com"
+
+
+class HatenablogArchiveExtractor(HatenablogEntriesExtractor):
+ """Extractor for a blog's archive page"""
+ subcategory = "archive"
+ pattern = (BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?"
+ r"|/category/[^?#]+)?)" + QUERY_RE)
+ example = "https://BLOG.hatenablog.com/archive/2024"
+
+
+class HatenablogSearchExtractor(HatenablogEntriesExtractor):
+ """Extractor for a blog's search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"(/search)" + QUERY_RE
+ example = "https://BLOG.hatenablog.com/search?q=QUERY"
+ allowed_parameters = ("q",)
diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py
deleted file mode 100644
index a522140..0000000
--- a/gallery_dl/extractor/hbrowse.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2015-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://www.hbrowse.com/"""
-
-from .common import ChapterExtractor, MangaExtractor
-from .. import text, util, exception
-
-
-class HbrowseBase():
- """Base class for hbrowse extractors"""
- category = "hbrowse"
- root = "https://www.hbrowse.com"
-
- def parse_page(self, page, data):
- """Parse metadata on 'page' and add it to 'data'"""
- data, pos = text.extract_all(page, (
- ('manga' , '<td class="listLong">', '</td>'),
- ('artist', '<td class="listLong">', '</td>'),
- ('total' , '<td class="listLong">', ' '),
- ('origin', '<td class="listLong">', '</td>'),
- ), values=data)
-
- if not data["manga"] and "<b>Warning</b>" in page:
- msg = page.rpartition(">")[2].strip()
- raise exception.StopExtraction("Site is not accessible: '%s'", msg)
-
- tags = text.extract(page, 'class="listTable"', '</table>', pos)[0]
-
- data["manga"] = text.unescape(data["manga"])
- data["total"] = text.parse_int(data["total"])
- data["artist"] = text.remove_html(data["artist"])
- data["origin"] = text.remove_html(data["origin"])
- data["tags"] = list(text.extract_iter(tags, 'href="/browse/', '"'))
- return data
-
-
-class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
- """Extractor for manga-chapters from hbrowse.com"""
- directory_fmt = ("{category}", "{manga_id} {manga}", "c{chapter:>05}")
- filename_fmt = ("{category}_{manga_id}_{chapter:>05}_"
- "{page:>03}.{extension}")
- archive_fmt = "{manga_id}_{chapter}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
- example = "https://www.hbrowse.com/12345/c00000"
-
- def __init__(self, match):
- self.path, self.gid, self.chapter = match.groups()
- self.path += "/"
- ChapterExtractor.__init__(self, match)
-
- def metadata(self, page):
- return self.parse_page(page, {
- "manga_id": text.parse_int(self.gid),
- "chapter": text.parse_int(self.chapter)
- })
-
- def images(self, page):
- base = self.root + "/data" + self.path
- json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]"
- return [(base + name, None) for name in util.json_loads(json_data)]
-
-
-class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
- """Extractor for manga from hbrowse.com"""
- chapterclass = HbrowseChapterExtractor
- reverse = False
- pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
- example = "https://www.hbrowse.com/12345"
-
- def chapters(self, page):
- results = []
- data = self.parse_page(page, {
- "manga_id": text.parse_int(
- self.manga_url.rstrip("/").rpartition("/")[2])
- })
-
- pos = 0
- needle = '<td class="listMiddle">\n<a class="listLink" href="'
- while True:
- url, pos = text.extract(page, needle, '"', pos)
- if not url:
- return results
- title, pos = text.extract(page, '>View ', '<', pos)
- data["chapter"] = text.parse_int(url.rpartition("/")[2][1:])
- data["title"] = title
- results.append((text.urljoin(self.root, url), data.copy()))
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index f6170c2..54c6539 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -29,8 +29,9 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
example = "https://issuu.com/issuu/docs/TITLE/"
def metadata(self, page):
+ pos = page.rindex('id="initial-data"')
data = util.json_loads(text.rextract(
- page, '<script data-json="', '"')[0].replace("&quot;", '"'))
+ page, '<script data-json="', '"', pos)[0].replace("&quot;", '"'))
doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime(
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index c24e57d..10228b5 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -9,9 +9,10 @@
"""Extractors for https://kemono.party/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache, memcache
import itertools
+import json
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
@@ -37,10 +38,14 @@ class KemonopartyExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
+ self.revisions = self.config("revisions")
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
+ self._json_dumps = json.JSONEncoder(
+ ensure_ascii=False, check_circular=False,
+ sort_keys=True, separators=(",", ":")).encode
def items(self):
find_hash = re.compile(HASH_PATTERN).match
@@ -223,11 +228,23 @@ class KemonopartyExtractor(Extractor):
idx = len(revs)
for rev in revs:
+ rev["revision_hash"] = self._revision_hash(rev)
rev["revision_index"] = idx
idx -= 1
return revs
+ def _revision_hash(self, revision):
+ rev = revision.copy()
+ rev.pop("revision_id", None)
+ rev.pop("added", None)
+ rev.pop("next", None)
+ rev.pop("prev", None)
+ rev["file"].pop("name", None)
+ for a in rev["attachments"]:
+ a.pop("name", None)
+ return util.sha1(self._json_dumps(rev))
+
def _validate(response):
return (response.headers["content-length"] != "9" or
@@ -252,13 +269,13 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
url = self.api_url
params = text.parse_query(self.query)
params["o"] = text.parse_int(params.get("o"))
- revisions = self.config("revisions")
while True:
posts = self.request(url, params=params).json()
- if revisions:
+ if self.revisions:
for post in posts:
+ post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
post_url = "{}/post/{}".format(self.api_url, post["id"])
try:
@@ -296,7 +313,8 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
def posts(self):
if not self.revision:
post = self.request(self.api_url).json()
- if self.config("revisions"):
+ if self.revisions:
+ post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
try:
revs = self._post_revisions(self.api_url)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 94bea57..bca7e4d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -148,6 +148,32 @@ class MangadexFeedExtractor(MangadexExtractor):
return self.api.user_follows_manga_feed()
+class MangadexListExtractor(MangadexExtractor):
+ """Extractor for mangadex lists"""
+ subcategory = "list"
+ pattern = (BASE_PATTERN +
+ r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?")
+ example = ("https://mangadex.org/list"
+ "/01234567-89ab-cdef-0123-456789abcdef/NAME")
+
+ def __init__(self, match):
+ MangadexExtractor.__init__(self, match)
+ if match.group(2) == "feed":
+ self.subcategory = "list-feed"
+ else:
+ self.items = self._items_titles
+
+ def chapters(self):
+ return self.api.list_feed(self.uuid)
+
+ def _items_titles(self):
+ data = {"_extractor": MangadexMangaExtractor}
+ for item in self.api.list(self.uuid)["relationships"]:
+ if item["type"] == "manga":
+ url = "{}/title/{}".format(self.root, item["id"])
+ yield Message.Queue, url, data
+
+
class MangadexAPI():
"""Interface for the MangaDex API v5
@@ -173,6 +199,12 @@ class MangadexAPI():
params = {"includes[]": ("scanlation_group",)}
return self._call("/chapter/" + uuid, params)["data"]
+ def list(self, uuid):
+ return self._call("/list/" + uuid)["data"]
+
+ def list_feed(self, uuid):
+ return self._pagination("/list/" + uuid + "/feed")
+
@memcache(keyarg=1)
def manga(self, uuid):
params = {"includes[]": ("artist", "author")}
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 0b63d6c..68b4196 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -75,7 +75,7 @@ class MastodonExtractor(BaseExtractor):
account["acct"], account["moved"]["acct"])
-INSTANCES = {
+BASE_PATTERN = MastodonExtractor.update({
"mastodon.social": {
"root" : "https://mastodon.social",
"pattern" : r"mastodon\.social",
@@ -100,9 +100,7 @@ INSTANCES = {
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
}
-}
-
-BASE_PATTERN = MastodonExtractor.update(INSTANCES) + "(?:/web)?"
+}) + "(?:/web)?"
class MastodonUserExtractor(MastodonExtractor):
@@ -174,10 +172,8 @@ class MastodonAPI():
if access_token is None or access_token == "cache":
access_token = _access_token_cache(extractor.instance)
if not access_token:
- try:
- access_token = INSTANCES[extractor.category]["access-token"]
- except (KeyError, TypeError):
- pass
+ access_token = extractor.config_instance("access-token")
+
if access_token:
self.headers = {"Authorization": "Bearer " + access_token}
else:
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index b991705..9614513 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -116,7 +116,8 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
yield from text.extract_iter(
page, 'href="javascript:void(0);"><img src="', '"')
else:
- yield text.extr(page, 'itemprop="image" src="', '"')
+ pos = page.find('id="view-center"') + 1
+ yield text.extract(page, 'itemprop="image" src="', '"', pos)[0]
@staticmethod
def _extract_user_name(page):
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index bc7b308..d36f509 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -235,10 +235,6 @@ BASE_PATTERN = NitterExtractor.update({
"root": "https://nitter.net",
"pattern": r"nitter\.net",
},
- "nitter.lacontrevoie.fr": {
- "root": "https://nitter.lacontrevoie.fr",
- "pattern": r"nitter\.lacontrevoie\.fr",
- },
"nitter.1d4.us": {
"root": "https://nitter.1d4.us",
"pattern": r"nitter\.1d4\.us",
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 1690160..8c8a5a9 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -358,8 +358,8 @@ class OAuthMastodon(OAuthBase):
yield Message.Version, 1
from . import mastodon
- for application in mastodon.INSTANCES.values():
- if self.instance == application["root"].partition("://")[2]:
+ for _, root, application in mastodon.MastodonExtractor.instances:
+ if self.instance == root.partition("://")[2]:
break
else:
application = self._register(self.instance)
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 89c0d2f..5226724 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -56,7 +56,7 @@ class PahealExtractor(Extractor):
"date" : text.parse_datetime(
extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
"source" : text.unescape(text.extr(
- extr(">Source&nbsp;Link<", "</td>"), "href='", "'")),
+ extr(">Source Link<", "</td>"), "href='", "'")),
}
dimensions, size, ext = extr("Info</th><td>", "<").split(" // ")
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 6c2f39d..62d11f2 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -52,19 +52,29 @@ class PatreonExtractor(Extractor):
post["hash"] = fhash
post["type"] = kind
post["num"] += 1
- yield Message.Url, url, text.nameext_from_url(name, post)
+ text.nameext_from_url(name, post)
+ if text.ext_from_url(url) == "m3u8":
+ url = "ytdl:" + url
+ post["extension"] = "mp4"
+ yield Message.Url, url, post
else:
self.log.debug("skipping %s (%s %s)", url, fhash, kind)
- @staticmethod
- def _postfile(post):
+ def _postfile(self, post):
postfile = post.get("post_file")
if postfile:
- return (("postfile", postfile["url"], postfile["name"]),)
+ url = postfile["url"]
+ name = postfile.get("name")
+ if not name:
+ if url.startswith("https://stream.mux.com/"):
+ name = url
+ else:
+ name = self._filename(url) or url
+ return (("postfile", url, name),)
return ()
def _images(self, post):
- for image in post["images"]:
+ for image in post.get("images") or ():
url = image.get("download_url")
if url:
name = image.get("file_name") or self._filename(url) or url
@@ -80,7 +90,7 @@ class PatreonExtractor(Extractor):
return ()
def _attachments(self, post):
- for attachment in post["attachments"]:
+ for attachment in post.get("attachments") or ():
url = self.request(
attachment["url"], method="HEAD",
allow_redirects=False, fatal=False,
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index ac6a391..339646f 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -32,7 +32,7 @@ class PhilomenaExtractor(BooruExtractor):
post["date"] = text.parse_datetime(post["created_at"])
-INSTANCES = {
+BASE_PATTERN = PhilomenaExtractor.update({
"derpibooru": {
"root": "https://derpibooru.org",
"pattern": r"(?:www\.)?derpibooru\.org",
@@ -48,9 +48,7 @@ INSTANCES = {
"pattern": r"furbooru\.org",
"filter_id": "2",
},
-}
-
-BASE_PATTERN = PhilomenaExtractor.update(INSTANCES)
+})
class PhilomenaPostExtractor(PhilomenaExtractor):
@@ -176,10 +174,7 @@ class PhilomenaAPI():
if filter_id:
params["filter_id"] = filter_id
elif not api_key:
- try:
- params["filter_id"] = INSTANCES[extr.category]["filter_id"]
- except (KeyError, TypeError):
- params["filter_id"] = "2"
+ params["filter_id"] = extr.config_instance("filter_id") or "2"
params["page"] = extr.page_start
params["per_page"] = extr.per_page
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 4414c71..b9821f2 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -826,9 +826,9 @@ class PixivAppAPI():
extractor.session.headers.update({
"App-OS" : "ios",
- "App-OS-Version": "13.1.2",
- "App-Version" : "7.7.6",
- "User-Agent" : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
+ "App-OS-Version": "16.7.2",
+ "App-Version" : "7.19.1",
+ "User-Agent" : "PixivIOSApp/7.19.1 (iOS 16.7.2; iPhone12,8)",
"Referer" : "https://app-api.pixiv.net/",
})
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 602895c..b3b7a9c 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -143,7 +143,7 @@ class SankakuPostExtractor(SankakuExtractor):
"""Extractor for single posts from sankaku.app"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/posts?(?:/show)?/([0-9a-f]+)"
+ pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
example = "https://sankaku.app/post/show/12345"
def __init__(self, match):
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 8a08fab..67f38c4 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -19,17 +19,12 @@ class Shimmie2Extractor(BaseExtractor):
archive_fmt = "{id}"
def _init(self):
- try:
- instance = INSTANCES[self.category]
- except KeyError:
- return
-
- cookies = instance.get("cookies")
+ cookies = self.config_instance("cookies")
if cookies:
domain = self.root.rpartition("/")[2]
self.cookies_update_dict(cookies, domain=domain)
- file_url = instance.get("file_url")
+ file_url = self.config_instance("file_url")
if file_url:
self.file_url_fmt = file_url
@@ -73,15 +68,15 @@ class Shimmie2Extractor(BaseExtractor):
return "'"
-INSTANCES = {
+BASE_PATTERN = Shimmie2Extractor.update({
"loudbooru": {
"root": "https://loudbooru.com",
"pattern": r"loudbooru\.com",
"cookies": {"ui-tnc-agreed": "true"},
},
"giantessbooru": {
- "root": "https://giantessbooru.com",
- "pattern": r"giantessbooru\.com",
+ "root": "https://sizechangebooru.com",
+ "pattern": r"(?:sizechange|giantess)booru\.com",
"cookies": {"agreed": "true"},
},
"tentaclerape": {
@@ -97,9 +92,7 @@ INSTANCES = {
"root": "https://rule34hentai.net",
"pattern": r"rule34hentai\.net",
},
-}
-
-BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
+}) + r"/(?:index\.php\?q=/?)?"
class Shimmie2TagExtractor(Shimmie2Extractor):
@@ -183,25 +176,25 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
extr = text.extract_from(self.request(url).text)
while True:
- pid = extr('href="./index.php?q=/post/view/', '&')
+ pid = extr("href='./index.php?q=/post/view/", "&")
if not pid:
break
- tags, dimensions, size = extr('title="', '"').split(" // ")
+ tags, dimensions, size = extr("title='", "'").split(" // ")
width, _, height = dimensions.partition("x")
yield {
"file_url": file_url_fmt(pid),
- "id": pid,
- "md5": "",
- "tags": tags,
- "width": width,
- "height": height,
- "size": text.parse_bytes(size[:-1]),
+ "id" : pid,
+ "md5" : "",
+ "tags" : tags,
+ "width" : width,
+ "height" : height,
+ "size" : text.parse_bytes(size[:-1]),
}
pnum += 1
- if not extr('/{}">{}<'.format(pnum, pnum), ">"):
+ if not extr("/{0}'>{0}<".format(pnum), ">"):
return
@@ -248,7 +241,7 @@ class Shimmie2PostExtractor(Shimmie2Extractor):
"id" : self.post_id,
"tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
"md5" : "",
- "file_url": self.root + extr('id="main_image" src=".', '"'),
+ "file_url": self.root + extr("id='main_image' src='.", "'"),
"width" : extr("orig_width =", ";"),
"height" : 0,
"size" : 0,
diff --git a/gallery_dl/extractor/steamgriddb.py b/gallery_dl/extractor/steamgriddb.py
new file mode 100644
index 0000000..9d46fd6
--- /dev/null
+++ b/gallery_dl/extractor/steamgriddb.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.steamgriddb.com"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com"
+LANGUAGE_CODES = (
+ "aa", "ab", "ae", "af", "ak", "am", "an", "ar", "as", "av", "ay", "az",
+ "ba", "be", "bg", "bh", "bi", "bm", "bn", "bo", "br", "bs", "ca", "ce",
+ "ch", "co", "cr", "cs", "cu", "cv", "cy", "da", "de", "dv", "dz", "ee",
+ "el", "en", "eo", "es", "et", "eu", "fa", "ff", "fi", "fj", "fo", "fr",
+ "fy", "ga", "gd", "gl", "gn", "gu", "gv", "ha", "he", "hi", "ho", "hr",
+ "ht", "hu", "hy", "hz", "ia", "id", "ie", "ig", "ii", "ik", "io", "is",
+ "it", "iu", "ja", "jv", "ka", "kg", "ki", "kj", "kk", "kl", "km", "kn",
+ "ko", "kr", "ks", "ku", "kv", "kw", "ky", "la", "lb", "lg", "li", "ln",
+ "lo", "lt", "lu", "lv", "mg", "mh", "mi", "mk", "ml", "mn", "mr", "ms",
+ "mt", "my", "na", "nb", "nd", "ne", "ng", "nl", "nn", "no", "nr", "nv",
+ "ny", "oc", "oj", "om", "or", "os", "pa", "pi", "pl", "ps", "pt", "qu",
+ "rm", "rn", "ro", "ru", "rw", "sa", "sc", "sd", "se", "sg", "si", "sk",
+ "sl", "sm", "sn", "so", "sq", "sr", "ss", "st", "su", "sv", "sw", "ta",
+ "te", "tg", "th", "ti", "tk", "tl", "tn", "to", "tr", "ts", "tt", "tw",
+ "ty", "ug", "uk", "ur", "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi",
+ "yo", "za", "zh", "zu",
+)
+FILE_EXT_TO_MIME = {
+ "png": "image/png",
+ "jpeg": "image/jpeg",
+ "jpg": "image/jpeg",
+ "webp": "image/webp",
+ "ico": "image/vnd.microsoft.icon",
+ "all": "all",
+}
+
+
+class SteamgriddbExtractor(Extractor):
+ """Base class for SteamGridDB"""
+ category = "steamgriddb"
+ directory_fmt = ("{category}", "{subcategory}", "{game[id]}")
+ filename_fmt = "{game[id]}_{id}_{num:>02}.{extension}"
+ archive_fmt = "{filename}"
+ root = "https://www.steamgriddb.com"
+
+ def _init(self):
+ self.cookies_update({
+ "userprefs": "%7B%22adult%22%3Afalse%7D",
+ })
+
+ def items(self):
+ download_fake_png = self.config("download-fake-png", True)
+
+ for asset in self.assets():
+ if download_fake_png and asset.get("fake_png"):
+ urls = (asset["url"], asset["fake_png"])
+ else:
+ urls = (asset["url"],)
+
+ asset["count"] = len(urls)
+ yield Message.Directory, asset
+ for asset["num"], url in enumerate(urls, 1):
+ yield Message.Url, url, text.nameext_from_url(url, asset)
+
+ def _call(self, endpoint, **kwargs):
+ data = self.request(self.root + endpoint, **kwargs).json()
+ if not data["success"]:
+ raise exception.StopExtraction(data["error"])
+ return data["data"]
+
+
+class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
+ """Base class for extracting a list of assets"""
+
+ def __init__(self, match):
+ SteamgriddbExtractor.__init__(self, match)
+ list_type = match.group(1)
+ id = int(match.group(2))
+ self.game_id = id if list_type == "game" else None
+ self.collection_id = id if list_type == "collection" else None
+ self.page = int(match.group(3) or 1)
+
+ def assets(self):
+ limit = 48
+ page = min(self.page - 1, 0)
+
+ sort = self.config("sort", "score_desc")
+ if sort not in ("score_desc", "score_asc", "score_old_desc",
+ "score_old_asc", "age_desc", "age_asc"):
+ raise exception.StopExtractor("Invalid sort '%s'", sort)
+
+ json = {
+ "static" : self.config("static", True),
+ "animated": self.config("animated", True),
+ "humor" : self.config("humor", True),
+ "nsfw" : self.config("nsfw", True),
+ "epilepsy": self.config("epilepsy", True),
+ "untagged": self.config("untagged", True),
+
+ "asset_type": self.asset_type,
+ "limit": limit,
+ "order": sort,
+ }
+ if self.valid_dimensions:
+ json["dimensions"] = self.config_list(
+ "dimensions", "dimension", self.valid_dimensions)
+ json["styles"] = self.config_list("styles", "style", self.valid_styles)
+ json["languages"] = self.config_list(
+ "languages", "language", LANGUAGE_CODES)
+ file_types = self.config_list(
+ "file-types", "file type", self.valid_file_types)
+ json["mime"] = [FILE_EXT_TO_MIME[i] for i in file_types]
+
+ if self.game_id:
+ json["game_id"] = [self.game_id]
+ else:
+ json["collection_id"] = self.collection_id
+
+ while True:
+ json["page"] = page
+
+ data = self._call(
+ "/api/public/search/assets", method="POST", json=json)
+ for asset in data["assets"]:
+ if not asset.get("game"):
+ asset["game"] = data["game"]
+ yield asset
+
+ if data["total"] <= limit * page:
+ break
+ page += 1
+
+ def config_list(self, key, type_name, valid_values):
+ value = self.config(key)
+ if isinstance(value, str):
+ value = value.split(",")
+
+ if value is None or "all" in value:
+ return ["all"]
+
+ for i in value:
+ if i not in valid_values:
+ raise exception.StopExtraction("Invalid %s '%s'", type_name, i)
+
+ return value
+
+
+class SteamgriddbAssetExtractor(SteamgriddbExtractor):
+ """Extractor for a single asset"""
+ subcategory = "asset"
+ pattern = BASE_PATTERN + r"/(grid|hero|logo|icon)/(\d+)"
+ example = "https://www.steamgriddb.com/grid/1234"
+
+ def __init__(self, match):
+ SteamgriddbExtractor.__init__(self, match)
+ self.asset_type = match.group(1)
+ self.asset_id = match.group(2)
+
+ def assets(self):
+ endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
+ asset = self._call(endpoint)["asset"]
+ return (asset,)
+
+
+class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor):
+ subcategory = "grids"
+ asset_type = "grid"
+ pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/grids(?:/(\d+))?"
+ example = "https://www.steamgriddb.com/game/1234/grids"
+ valid_dimensions = ("460x215", "920x430", "600x900", "342x482", "660x930",
+ "512x512", "1024x1024")
+ valid_styles = ("alternate", "blurred", "no_logo", "material",
+ "white_logo")
+ valid_file_types = ("png", "jpeg", "jpg", "webp")
+
+
+class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor):
+ subcategory = "heroes"
+ asset_type = "hero"
+ pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/heroes(?:/(\d+))?"
+ example = "https://www.steamgriddb.com/game/1234/heroes"
+ valid_dimensions = ("1920x620", "3840x1240", "1600x650")
+ valid_styles = ("alternate", "blurred", "material")
+ valid_file_types = ("png", "jpeg", "jpg", "webp")
+
+
+class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor):
+ subcategory = "logos"
+ asset_type = "logo"
+ pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/logos(?:/(\d+))?"
+ example = "https://www.steamgriddb.com/game/1234/logos"
+ valid_dimensions = None
+ valid_styles = ("official", "white", "black", "custom")
+ valid_file_types = ("png", "webp")
+
+
+class SteamgriddbIconsExtractor(SteamgriddbAssetsExtractor):
+ subcategory = "icons"
+ asset_type = "icon"
+ pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/icons(?:/(\d+))?"
+ example = "https://www.steamgriddb.com/game/1234/icons"
+ valid_dimensions = ["{0}x{0}".format(i) for i in (8, 10, 14, 16, 20, 24,
+ 28, 32, 35, 40, 48, 54, 56, 57, 60, 64, 72, 76, 80, 90,
+ 96, 100, 114, 120, 128, 144, 150, 152, 160, 180, 192,
+ 194, 256, 310, 512, 768, 1024)]
+ valid_styles = ("official", "custom")
+ valid_file_types = ("png", "ico")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index aa9ab9f..cf759e0 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -546,7 +546,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
def _select_tweet_source(self):
strategy = self.config("strategy")
if strategy is None or strategy == "auto":
- if self.retweets or self.replies or self.textonly:
+ if self.retweets or self.textonly:
return self.api.user_tweets
else:
return self.api.user_media
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index f2e6521..49a3deb 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -15,7 +15,7 @@ class UrlshortenerExtractor(BaseExtractor):
basecategory = "urlshortener"
-INSTANCES = {
+BASE_PATTERN = UrlshortenerExtractor.update({
"bitly": {
"root": "https://bit.ly",
"pattern": r"bit\.ly",
@@ -26,9 +26,7 @@ INSTANCES = {
"root": "https://t.co",
"pattern": r"t\.co",
},
-}
-
-BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
+})
class UrlshortenerLinkExtractor(UrlshortenerExtractor):
@@ -42,10 +40,7 @@ class UrlshortenerLinkExtractor(UrlshortenerExtractor):
self.id = match.group(match.lastindex)
def _init(self):
- try:
- self.headers = INSTANCES[self.category]["headers"]
- except Exception:
- self.headers = None
+ self.headers = self.config_instance("headers")
def items(self):
response = self.request(
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index c22e67e..95eeafe 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text, exception
+import re
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
@@ -24,6 +25,7 @@ class VkExtractor(Extractor):
request_interval = (0.5, 1.5)
def items(self):
+ sub = re.compile(r"/imp[fg]/").sub
sizes = "wzyxrqpo"
data = self.metadata()
@@ -40,11 +42,15 @@ class VkExtractor(Extractor):
continue
try:
- photo["url"] = photo[size + "src"]
+ url = photo[size + "src"]
except KeyError:
self.log.warning("no photo URL found (%s)", photo.get("id"))
continue
+ photo["url"] = sub("/", url.partition("?")[0])
+ # photo["url"] = url
+ photo["_fallback"] = (url,)
+
try:
_, photo["width"], photo["height"] = photo[size]
except ValueError:
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 3f2f410..949c7cb 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -87,23 +87,41 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
self.episode_no = params.get("episode_no")
def metadata(self, page):
- keywords, pos = text.extract(
- page, '<meta name="keywords" content="', '"')
- title, pos = text.extract(
- page, '<meta property="og:title" content="', '"', pos)
- descr, pos = text.extract(
- page, '<meta property="og:description" content="', '"', pos)
+ extr = text.extract_from(page)
+ title = extr('<meta property="og:title" content="', '"')
+ descr = extr('<meta property="og:description" content="', '"')
+
+ if extr('<div class="subj_info"', '\n'):
+ comic_name = extr('>', '<')
+ episode_name = extr('<h1 class="subj_episode" title="', '"')
+ else:
+ comic_name = episode_name = ""
+
+ if extr('<span class="tx _btnOpenEpisodeList ', '"'):
+ episode = extr('>#', '<')
+ else:
+ episode = ""
+
+ if extr('<div class="author_area"', '\n'):
+ username = extr('/creator/', '"')
+ author_name = extr('<span>', '</span>')
+ else:
+ username = author_name = ""
return {
- "genre" : self.genre,
- "comic" : self.comic,
- "title_no" : self.title_no,
- "episode_no" : self.episode_no,
- "title" : text.unescape(title),
- "episode" : keywords.split(", ")[1],
- "description": text.unescape(descr),
- "lang" : self.lang,
- "language" : util.code_to_language(self.lang),
+ "genre" : self.genre,
+ "comic" : self.comic,
+ "title_no" : self.title_no,
+ "episode_no" : self.episode_no,
+ "title" : text.unescape(title),
+ "episode" : episode,
+ "comic_name" : text.unescape(comic_name),
+ "episode_name": text.unescape(episode_name),
+ "username" : username,
+ "author_name" : text.unescape(author_name),
+ "description" : text.unescape(descr),
+ "lang" : self.lang,
+ "language" : util.code_to_language(self.lang),
}
@staticmethod
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
new file mode 100644
index 0000000..1eafc29
--- /dev/null
+++ b/gallery_dl/extractor/wikimedia.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Ailothaen
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Wikimedia sites"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class WikimediaExtractor(BaseExtractor):
+ """Base class for wikimedia extractors"""
+ basecategory = "wikimedia"
+ filename_fmt = "{filename} ({sha1[:8]}).{extension}"
+ directory_fmt = ("{category}", "{page}")
+ archive_fmt = "{sha1}"
+ request_interval = (1.0, 2.0)
+
+ def __init__(self, match):
+ BaseExtractor.__init__(self, match)
+ path = match.group(match.lastindex)
+
+ if self.category == "fandom":
+ self.category = \
+ "fandom-" + self.root.partition(".")[0].rpartition("/")[2]
+
+ if path.startswith("wiki/"):
+ path = path[5:]
+ self.api_path = "/w/api.php"
+ else:
+ self.api_path = "/api.php"
+
+ pre, sep, _ = path.partition(":")
+ prefix = pre.lower() if sep else None
+
+ self.title = path = text.unquote(path)
+ if prefix:
+ self.subcategory = prefix
+
+ if prefix == "category":
+ self.params = {
+ "generator": "categorymembers",
+ "gcmtitle" : path,
+ "gcmtype" : "file",
+ }
+ elif prefix == "file":
+ self.params = {
+ "titles" : path,
+ }
+ else:
+ self.params = {
+ "generator": "images",
+ "titles" : path,
+ }
+
+ def _init(self):
+ api_path = self.config_instance("api-path")
+ if api_path:
+ if api_path[0] == "/":
+ self.api_url = self.root + api_path
+ else:
+ self.api_url = api_path
+ else:
+ self.api_url = self.root + self.api_path
+
+ def items(self):
+ for info in self._pagination(self.params):
+ image = info["imageinfo"][0]
+
+ image["metadata"] = {
+ m["name"]: m["value"]
+ for m in image["metadata"]}
+ image["commonmetadata"] = {
+ m["name"]: m["value"]
+ for m in image["commonmetadata"]}
+
+ filename = image["canonicaltitle"]
+ image["filename"], _, image["extension"] = \
+ filename.partition(":")[2].rpartition(".")
+ image["date"] = text.parse_datetime(
+ image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
+ image["page"] = self.title
+
+ yield Message.Directory, image
+ yield Message.Url, image["url"], image
+
+ def _pagination(self, params):
+ """
+ https://www.mediawiki.org/wiki/API:Query
+ https://opendata.stackexchange.com/questions/13381
+ """
+
+ url = self.api_url
+ params["action"] = "query"
+ params["format"] = "json"
+ params["prop"] = "imageinfo"
+ params["iiprop"] = (
+ "timestamp|user|userid|comment|canonicaltitle|url|size|"
+ "sha1|mime|metadata|commonmetadata|extmetadata|bitdepth"
+ )
+
+ while True:
+ data = self.request(url, params=params).json()
+
+ try:
+ pages = data["query"]["pages"]
+ except KeyError:
+ pass
+ else:
+ yield from pages.values()
+
+ try:
+ continuation = data["continue"]
+ except KeyError:
+ break
+ params.update(continuation)
+
+
+BASE_PATTERN = WikimediaExtractor.update({
+ "wikipedia": {
+ "root": None,
+ "pattern": r"[a-z]{2,}\.wikipedia\.org",
+ },
+ "wiktionary": {
+ "root": None,
+ "pattern": r"[a-z]{2,}\.wiktionary\.org",
+ },
+ "wikiquote": {
+ "root": None,
+ "pattern": r"[a-z]{2,}\.wikiquote\.org",
+ },
+ "wikibooks": {
+ "root": None,
+ "pattern": r"[a-z]{2,}\.wikibooks\.org",
+ },
+ "wikisource": {
+ "root": None,
+ "pattern": r"[a-z]{2,}\.wikisource\.org",
+ },
+ "wikinews": {
+ "root": None,
+ "pattern": r"[a-z]{2,}\.wikinews\.org",
+ },
+ "wikiversity": {
+ "root": None,
+ "pattern": r"[a-z]{2,}\.wikiversity\.org",
+ },
+ "wikispecies": {
+ "root": "https://species.wikimedia.org",
+ "pattern": r"species\.wikimedia\.org",
+ },
+ "wikimediacommons": {
+ "root": "https://commons.wikimedia.org",
+ "pattern": r"commons\.wikimedia\.org",
+ },
+ "mediawiki": {
+ "root": "https://www.mediawiki.org",
+ "pattern": r"(?:www\.)?mediawiki\.org",
+ },
+ "fandom": {
+ "root": None,
+ "pattern": r"[\w-]+\.fandom\.com",
+ "api-path": "/api.php",
+ },
+ "mariowiki": {
+ "root": "https://www.mariowiki.com",
+ "pattern": r"(?:www\.)?mariowiki\.com",
+ },
+})
+
+
+class WikimediaArticleExtractor(WikimediaExtractor):
+ """Extractor for wikimedia articles"""
+ subcategory = "article"
+ pattern = BASE_PATTERN + r"/(?!static/)([^?#]+)"
+ example = "https://en.wikipedia.org/wiki/TITLE"
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 15905d6..f99beaa 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.6"
+__version__ = "1.26.7"
diff --git a/test/test_extractor.py b/test/test_extractor.py
index d2dd643..75a0b87 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -24,6 +24,11 @@ from gallery_dl.extractor.directlink import DirectlinkExtractor # noqa E402
_list_classes = extractor._list_classes
+try:
+ from test import results
+except ImportError:
+ results = None
+
class FakeExtractor(Extractor):
category = "fake"
@@ -92,17 +97,29 @@ class TestExtractorModule(unittest.TestCase):
with self.assertRaises(TypeError):
FakeExtractor.from_url(invalid)
- def test_unique_pattern_matches(self):
- try:
- import test.results
- except ImportError:
- raise unittest.SkipTest("no test data")
+ @unittest.skipIf(not results, "no test data")
+ def test_categories(self):
+ for result in results.all():
+ url = result["#url"]
+ base, cat, sub = result["#category"]
+ try:
+ extr = result["#class"].from_url(url)
+ except ImportError as exc:
+ if exc.name in ("youtube_dl", "yt_dlp"):
+ print("Skipping '{}' category checks".format(cat))
+ continue
+ raise
+ self.assertEqual(extr.category, cat, url)
+ self.assertEqual(extr.subcategory, sub, url)
+ self.assertEqual(extr.basecategory, base, url)
+ @unittest.skipIf(not results, "no test data")
+ def test_unique_pattern_matches(self):
# collect testcase URLs
test_urls = []
append = test_urls.append
- for result in test.results.all():
+ for result in results.all():
append((result["#url"], result["#class"]))
# iterate over all testcase URLs
diff --git a/test/test_results.py b/test/test_results.py
index c7a5001..680b0f9 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -28,6 +28,27 @@ BROKEN = {
"photobucket",
}
+CONFIG = {
+ "cache": {
+ "file": None,
+ },
+ "downloader": {
+ "adjust-extensions": False,
+ "part": False,
+ },
+}
+
+AUTH = {
+ "pixiv",
+ "nijie",
+ "horne",
+ "reddit",
+ "seiga",
+ "fantia",
+ "instagram",
+ "twitter",
+}
+
class TestExtractorResults(unittest.TestCase):
@@ -66,6 +87,19 @@ class TestExtractorResults(unittest.TestCase):
for key, value in result["#options"].items():
key = key.split(".")
config.set(key[:-1], key[-1], value)
+
+ requires_auth = result.get("#auth")
+ if requires_auth is None:
+ requires_auth = (result["#category"][1] in AUTH)
+ if requires_auth:
+ extr = result["#class"].from_url(result["#url"])
+ if not any(extr.config(key) for key in (
+ "username", "cookies", "api-key", "client-id",
+ "refresh-token")):
+ msg = "no auth"
+ self._skipped.append((result["#url"], msg))
+ self.skipTest(msg)
+
if "#range" in result:
config.set((), "image-range" , result["#range"])
config.set((), "chapter-range", result["#range"])
@@ -348,56 +382,21 @@ class TestFormatter(formatter.StringFormatter):
def setup_test_config():
- name = "gallerydl"
- email = "gallerydl@openaliasbox.org"
- email2 = "gallerydl@protonmail.com"
-
- config.clear()
- config.set(("cache",), "file", None)
- config.set(("downloader",), "part", False)
- config.set(("downloader",), "adjust-extensions", False)
- config.set(("extractor" ,), "timeout" , 60)
- config.set(("extractor" ,), "username", name)
- config.set(("extractor" ,), "password", name)
-
- config.set(("extractor", "nijie") , "username", email)
- config.set(("extractor", "seiga") , "username", email)
- config.set(("extractor", "horne") , "username", email2)
- config.set(("extractor", "pinterest") , "username", email2)
- config.set(("extractor", "pinterest") , "username", None) # login broken
-
- config.set(("extractor", "newgrounds"), "username", "d1618111")
- config.set(("extractor", "newgrounds"), "password", "d1618111")
-
- config.set(("extractor", "mangoxo") , "username", "LiQiang3")
- config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
-
- for category in ("danbooru", "atfbooru", "aibooru", "booruvar",
- "e621", "e926", "e6ai",
- "instagram", "twitter", "subscribestar", "deviantart",
- "inkbunny", "tapas", "pillowfort", "mangadex",
- "vipergirls"):
- config.set(("extractor", category), "username", None)
-
- config.set(("extractor", "mastodon.social"), "access-token",
- "Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ")
-
- config.set(("extractor", "nana"), "favkey",
- "9237ddb82019558ea7d179e805100805"
- "ea6aa1c53ca6885cd4c179f9fb22ead2")
-
- config.set(("extractor", "deviantart"), "client-id", "7777")
- config.set(("extractor", "deviantart"), "client-secret",
- "ff14994c744d9208e5caeec7aab4a026")
-
- config.set(("extractor", "tumblr"), "api-key",
- "0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
- config.set(("extractor", "tumblr"), "api-secret",
- "6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
- config.set(("extractor", "tumblr"), "access-token",
- "N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
- config.set(("extractor", "tumblr"), "access-token-secret",
- "sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
+ config._config.update(CONFIG)
+
+
+def load_test_config():
+ try:
+ path = os.path.join(
+ os.path.dirname(os.path.dirname(__file__)),
+ "archive", "config.json")
+ with open(path) as fp:
+ CONFIG.update(json.loads(fp.read()))
+ except FileNotFoundError:
+ pass
+ except Exception as exc:
+ sys.exit("Error when loading {}: {}: {}".format(
+ path, exc.__class__.__name__, exc))
def generate_tests():
@@ -414,7 +413,7 @@ def generate_tests():
if v in ("f", "fail"):
self.fail("manual test failure")
else:
- self._skipped.append((result["#url"], exc))
+ self._skipped.append((result["#url"], "manual skip"))
self.skipTest(exc)
return test
@@ -442,10 +441,12 @@ def generate_tests():
enum[name] += 1
method = _generate_method(result)
+ method.__doc__ = result["#url"]
method.__name__ = "test_{}_{}".format(name, enum[name])
setattr(TestExtractorResults, method.__name__, method)
generate_tests()
if __name__ == "__main__":
+ load_test_config()
unittest.main(warnings="ignore")