aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-01-08 03:22:30 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2024-01-08 03:22:30 -0500
commit8646342be01c70ec97eacb16f6b88e4360aa585e (patch)
tree7e39f7daad35df3cae62e51073d3a79e79411c92
parent0e6f612882a06d191e8d56bbe2bc05020009e379 (diff)
parente949aaf6f6ac93896947d5b736e48e7911926efb (diff)
Update upstream source from tag 'upstream/1.26.6'
Update to upstream version '1.26.6' with Debian dir 5409f55ee0db2818bae3ba157a2f17afdb377c21
-rw-r--r--CHANGELOG.md32
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl26
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.574
-rw-r--r--docs/gallery-dl-example.conf19
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt4
-rw-r--r--gallery_dl/cookies.py6
-rw-r--r--gallery_dl/extractor/__init__.py4
-rw-r--r--gallery_dl/extractor/batoto.py118
-rw-r--r--gallery_dl/extractor/chevereto.py2
-rw-r--r--gallery_dl/extractor/common.py7
-rw-r--r--gallery_dl/extractor/deviantart.py56
-rw-r--r--gallery_dl/extractor/fanbox.py59
-rw-r--r--gallery_dl/extractor/gelbooru.py20
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py2
-rw-r--r--gallery_dl/extractor/idolcomplex.py17
-rw-r--r--gallery_dl/extractor/imagechest.py2
-rw-r--r--gallery_dl/extractor/komikcast.py20
-rw-r--r--gallery_dl/extractor/lynxchan.py8
-rw-r--r--gallery_dl/extractor/manganelo.py9
-rw-r--r--gallery_dl/extractor/nijie.py7
-rw-r--r--gallery_dl/extractor/nudecollect.py87
-rw-r--r--gallery_dl/extractor/paheal.py13
-rw-r--r--gallery_dl/extractor/pinterest.py3
-rw-r--r--gallery_dl/extractor/poringa.py138
-rw-r--r--gallery_dl/extractor/rule34us.py6
-rw-r--r--gallery_dl/extractor/szurubooru.py4
-rw-r--r--gallery_dl/extractor/twitter.py4
-rw-r--r--gallery_dl/extractor/weibo.py3
-rw-r--r--gallery_dl/extractor/zzup.py40
-rw-r--r--gallery_dl/version.py2
-rw-r--r--setup.cfg3
35 files changed, 623 insertions, 190 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8907e07..7b135b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,37 @@
# Changelog
+## 1.26.6 - 2024-01-06
+### Extractors
+#### Additions
+- [batoto] add `chapter` and `manga` extractors ([#1434](https://github.com/mikf/gallery-dl/issues/1434), [#2111](https://github.com/mikf/gallery-dl/issues/2111), [#4979](https://github.com/mikf/gallery-dl/issues/4979))
+- [deviantart] add `avatar` and `background` extractors ([#4995](https://github.com/mikf/gallery-dl/issues/4995))
+- [poringa] add support ([#4675](https://github.com/mikf/gallery-dl/issues/4675), [#4962](https://github.com/mikf/gallery-dl/issues/4962))
+- [szurubooru] support `snootbooru.com` ([#5023](https://github.com/mikf/gallery-dl/issues/5023))
+- [zzup] add `gallery` extractor ([#4517](https://github.com/mikf/gallery-dl/issues/4517), [#4604](https://github.com/mikf/gallery-dl/issues/4604), [#4659](https://github.com/mikf/gallery-dl/issues/4659), [#4863](https://github.com/mikf/gallery-dl/issues/4863), [#5016](https://github.com/mikf/gallery-dl/issues/5016))
+#### Fixes
+- [gelbooru] fix `favorite` extractor ([#4903](https://github.com/mikf/gallery-dl/issues/4903))
+- [idolcomplex] fix extraction & update URL patterns ([#5002](https://github.com/mikf/gallery-dl/issues/5002))
+- [imagechest] fix loading more than 10 images in a gallery ([#4469](https://github.com/mikf/gallery-dl/issues/4469))
+- [jpgfish] update domain
+- [komikcast] fix `manga` extractor ([#5027](https://github.com/mikf/gallery-dl/issues/5027))
+- [komikcast] update domain ([#5027](https://github.com/mikf/gallery-dl/issues/5027))
+- [lynxchan] update `bbw-chan` domain ([#4970](https://github.com/mikf/gallery-dl/issues/4970))
+- [manganelo] fix extraction & recognize `.to` TLDs ([#5005](https://github.com/mikf/gallery-dl/issues/5005))
+- [paheal] restore `extension` metadata ([#4976](https://github.com/mikf/gallery-dl/issues/4976))
+- [rule34us] add fallback for `video-cdn1` videos ([#4985](https://github.com/mikf/gallery-dl/issues/4985))
+- [weibo] fix AttributeError in `user` extractor ([#5022](https://github.com/mikf/gallery-dl/issues/5022))
+#### Improvements
+- [gelbooru] show error for invalid API responses ([#4903](https://github.com/mikf/gallery-dl/issues/4903))
+- [rule34] recognize URLs with `www` subdomain ([#4984](https://github.com/mikf/gallery-dl/issues/4984))
+- [twitter] raise error for invalid `strategy` values ([#4953](https://github.com/mikf/gallery-dl/issues/4953))
+#### Metadata
+- [fanbox] add `metadata` option ([#4921](https://github.com/mikf/gallery-dl/issues/4921))
+- [nijie] add `count` metadata ([#146](https://github.com/mikf/gallery-dl/issues/146))
+- [pinterest] add `count` metadata ([#4981](https://github.com/mikf/gallery-dl/issues/4981))
+### Miscellaneous
+- fix and update zsh completion ([#4972](https://github.com/mikf/gallery-dl/issues/4972))
+- fix `--cookies-from-browser` macOS Firefox profile path
+
## 1.26.5 - 2023-12-23
### Extractors
#### Additions
diff --git a/PKG-INFO b/PKG-INFO
index bdd0025..e7550e9 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.26.5
+Version: 1.26.6
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 782c8de..ee165e5 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 39fabcc..e5153f5 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -4,10 +4,10 @@ local curcontext="$curcontext"
typeset -A opt_args
local rc=1
-_arguments -C -S \
+_arguments -s -S \
{-h,--help}'[Print this help message and exit]' \
--version'[Print program version and exit]' \
-{-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'<format>' \
+{-f,--filename}'[Filename format string for downloaded files ('\''/O'\'' for "original" filenames)]':'<format>' \
{-d,--destination}'[Target location for file downloads]':'<path>' \
{-D,--directory}'[Exact location for file downloads]':'<path>' \
{-X,--extractors}'[Load external extractors from PATH]':'<path>' \
@@ -15,7 +15,7 @@ _arguments -C -S \
--source-address'[Client-side IP address to bind to]':'<ip>' \
--user-agent'[User-Agent request header]':'<ua>' \
--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \
-{-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'<file>':_files \
+{-i,--input-file}'[Download URLs found in FILE ('\''-'\'' for stdin). More than one --input-file can be specified]':'<file>':_files \
{-I,--input-file-comment}'[Download URLs found in FILE. Comment them out after they were downloaded successfully.]':'<file>':_files \
{-x,--input-file-delete}'[Download URLs found in FILE. Delete them after they were downloaded successfully.]':'<file>':_files \
{-q,--quiet}'[Activate quiet mode]' \
@@ -53,34 +53,28 @@ _arguments -C -S \
--config-toml'[Additional configuration files in TOML format]':'<file>':_files \
--config-create'[Create a basic configuration file]' \
--config-ignore'[Do not read default configuration files]' \
---ignore-config'[==SUPPRESS==]' \
{-u,--username}'[Username to login with]':'<user>' \
{-p,--password}'[Password belonging to the given username]':'<pass>' \
--netrc'[Enable .netrc authentication data]' \
{-C,--cookies}'[File to load additional cookies from]':'<file>':_files \
--cookies-export'[Export session cookies to FILE]':'<file>':_files \
---cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)]':'<browser[/domain][+keyring][:profile][::container]>' \
+--cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with '\''/'\'', keyring name prefixed with '\''+'\'', profile prefixed with '\'':'\'', and container prefixed with '\''::'\'' ('\''none'\'' for no container)]':'<browser[/domain][+keyring][:profile][::container]>' \
--download-archive'[Record all downloaded or skipped files in FILE and skip downloading any file already in it]':'<file>':_files \
{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \
{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \
---range'[Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. "5", "8-20", or "1:24:3")]':'<range>' \
---chapter-range'[Like "--range", but applies to manga chapters and other delegated URLs]':'<range>' \
---filter'[Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \
---chapter-filter'[Like "--filter", but applies to manga chapters and other delegated URLs]':'<expr>' \
+--range'[Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. '\''5'\'', '\''8-20'\'', or '\''1:24:3'\'')]':'<range>' \
+--chapter-range'[Like '\''--range'\'', but applies to manga chapters and other delegated URLs]':'<range>' \
+--filter'[Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '\''-K'\''. Example: --filter "image_width >= 1000 and rating in ('\''s'\'', '\''q'\'')"]':'<expr>' \
+--chapter-filter'[Like '\''--filter'\'', but applies to manga chapters and other delegated URLs]':'<expr>' \
{-P,--postprocessor}'[Activate the specified post processor]':'<name>' \
{-O,--postprocessor-option}'[Additional post processor options]':'<key=value>' \
--write-metadata'[Write metadata to separate JSON files]' \
--write-info-json'[Write gallery metadata to a info.json file]' \
---write-infojson'[==SUPPRESS==]' \
--write-tags'[Write image tags to separate text files]' \
--zip'[Store downloaded files in a ZIP archive]' \
--cbz'[Store downloaded files in a CBZ archive]' \
---mtime'[Set file modification times according to metadata selected by NAME. Examples: "date" or "status[date]"]':'<name>' \
---mtime-from-date'[==SUPPRESS==]' \
---ugoira'[Convert Pixiv Ugoira to FORMAT using FFmpeg. Supported formats are "webm", "mp4", "gif", "vp8", "vp9", "vp9-lossless", "copy".]':'<format>' \
---ugoira-conv'[==SUPPRESS==]' \
---ugoira-conv-lossless'[==SUPPRESS==]' \
---ugoira-conv-copy'[==SUPPRESS==]' \
+--mtime'[Set file modification times according to metadata selected by NAME. Examples: '\''date'\'' or '\''status\[date\]'\'']':'<name>' \
+--ugoira'[Convert Pixiv Ugoira to FORMAT using FFmpeg. Supported formats are '\''webm'\'', '\''mp4'\'', '\''gif'\'', '\''vp8'\'', '\''vp9'\'', '\''vp9-lossless'\'', '\''copy'\''.]':'<format>' \
--exec'[Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
--exec-after'[Execute CMD after all files were downloaded. Example: --exec-after "cd {_directory} && convert * ../doc.pdf"]':'<cmd>' && rc=0
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index caa0d4a..ad6fd4a 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-12-23" "1.26.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-01-06" "1.26.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index b641f29..a57d39b 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-12-23" "1.26.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-01-06" "1.26.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -761,6 +761,23 @@ to be passed to
\f[I]ssl.SSLContext.set_ciphers()\f[]
+.SS extractor.*.tls12
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+.br
+* \f[I]true\f[]
+.br
+* \f[I]false\f[] for \f[I]patreon\f[], \f[I]pixiv:series\f[]
+
+.IP "Description:" 4
+Allow selecting TLS 1.2 cipher suites.
+
+Can be disabled to alter TLS fingerprints
+and potentially bypass Cloudflare blocks.
+
+
.SS extractor.*.keywords
.IP "Type:" 6
\f[I]object\f[] (name -> value)
@@ -1243,6 +1260,29 @@ To use a different formatting for those values other than the default
after a colon \f[I]:\f[], for example \f[I]{date:%Y%m%d}\f[].
+.SS extractor.*.write-pages
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+During data extraction,
+write received HTTP request data
+to enumerated files in the current working directory.
+
+Special values:
+
+.br
+* \f[I]"all"\f[]: Include HTTP request and response headers. Hide \f[I]Authorization\f[], \f[I]Cookie\f[], and \f[I]Set-Cookie\f[] values.
+.br
+* \f[I]"ALL"\f[]: Include all HTTP request and response headers.
+
+
.SH EXTRACTOR-SPECIFIC OPTIONS
.SS extractor.artstation.external
.IP "Type:" 6
@@ -1587,7 +1627,13 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[], \f[I]"favorite"\f[], \f[I]"status"\f[].
+\f[I]"avatar"\f[],
+\f[I]"background"\f[],
+\f[I]"gallery"\f[],
+\f[I]"scraps"\f[],
+\f[I]"journal"\f[],
+\f[I]"favorite"\f[],
+\f[I]"status"\f[].
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
@@ -1773,7 +1819,7 @@ Minimum wait time in seconds before API requests.
.br
* notes,pools
.br
-* ["notes", "pools"
+* ["notes", "pools"]
.IP "Description:" 4
Extract additional metadata (notes, pool metadata) if available.
@@ -1940,6 +1986,28 @@ extraction and download for YouTube, Vimeo and SoundCloud embeds.
* \f[I]false\f[]: Ignore embeds.
+.SS extractor.fanbox.metadata
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Example:" 4
+.br
+* user,plan
+.br
+* ["user", "plan"]
+
+.IP "Description:" 4
+Extract \f[I]plan\f[] and extended \f[I]user\f[] metadata.
+
+
.SS extractor.flickr.access-token & .access-token-secret
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf
index c3f8049..cda584e 100644
--- a/docs/gallery-dl-example.conf
+++ b/docs/gallery-dl-example.conf
@@ -176,16 +176,15 @@
"imgur":
{
- "#": "use different directory and filename formats when coming from a reddit post",
- "directory":
- {
- "'_reddit' in locals()": []
- },
- "filename":
- {
- "'_reddit' in locals()": "{_reddit[id]} {id}.{extension}",
- "" : "{id}.{extension}"
- }
+ "#": "general imgur settings",
+ "filename": "{id}.{extension}"
+ },
+
+ "reddit>imgur":
+ {
+ "#": "special settings for imgur URLs found in reddit posts",
+ "directory": [],
+ "filename": "{_reddit[id]} {_reddit[title]} {id}.{extension}"
},
"tumblr":
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 934609a..d695df9 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.26.5
+Version: 1.26.6
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 30cda54..271b4a9 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -55,6 +55,7 @@ gallery_dl/extractor/adultempire.py
gallery_dl/extractor/architizer.py
gallery_dl/extractor/artstation.py
gallery_dl/extractor/aryion.py
+gallery_dl/extractor/batoto.py
gallery_dl/extractor/bbc.py
gallery_dl/extractor/behance.py
gallery_dl/extractor/blogger.py
@@ -149,7 +150,6 @@ gallery_dl/extractor/nijie.py
gallery_dl/extractor/nitter.py
gallery_dl/extractor/nozomi.py
gallery_dl/extractor/nsfwalbum.py
-gallery_dl/extractor/nudecollect.py
gallery_dl/extractor/oauth.py
gallery_dl/extractor/paheal.py
gallery_dl/extractor/patreon.py
@@ -165,6 +165,7 @@ gallery_dl/extractor/pixiv.py
gallery_dl/extractor/pixnet.py
gallery_dl/extractor/plurk.py
gallery_dl/extractor/poipiku.py
+gallery_dl/extractor/poringa.py
gallery_dl/extractor/pornhub.py
gallery_dl/extractor/pornpics.py
gallery_dl/extractor/postmill.py
@@ -224,6 +225,7 @@ gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
gallery_dl/extractor/ytdl.py
gallery_dl/extractor/zerochan.py
+gallery_dl/extractor/zzup.py
gallery_dl/postprocessor/__init__.py
gallery_dl/postprocessor/classify.py
gallery_dl/postprocessor/common.py
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 416cc9a..478abb6 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -215,9 +215,11 @@ def _firefox_cookies_database(profile=None, container=None):
def _firefox_browser_directory():
if sys.platform in ("win32", "cygwin"):
- return os.path.expandvars(r"%APPDATA%\Mozilla\Firefox\Profiles")
+ return os.path.expandvars(
+ r"%APPDATA%\Mozilla\Firefox\Profiles")
if sys.platform == "darwin":
- return os.path.expanduser("~/Library/Application Support/Firefox")
+ return os.path.expanduser(
+ "~/Library/Application Support/Firefox/Profiles")
return os.path.expanduser("~/.mozilla/firefox")
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 695b8b2..9e33f2c 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -24,6 +24,7 @@ modules = [
"architizer",
"artstation",
"aryion",
+ "batoto",
"bbc",
"behance",
"blogger",
@@ -107,7 +108,6 @@ modules = [
"nitter",
"nozomi",
"nsfwalbum",
- "nudecollect",
"paheal",
"patreon",
"philomena",
@@ -122,6 +122,7 @@ modules = [
"pixnet",
"plurk",
"poipiku",
+ "poringa",
"pornhub",
"pornpics",
"postmill",
@@ -177,6 +178,7 @@ modules = [
"xhamster",
"xvideos",
"zerochan",
+ "zzup",
"booru",
"moebooru",
"foolfuuka",
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
new file mode 100644
index 0000000..cd6302e
--- /dev/null
+++ b/gallery_dl/extractor/batoto.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bato.to/"""
+
+from .common import Extractor, ChapterExtractor, MangaExtractor
+from .. import text, exception
+import re
+
+BASE_PATTERN = (r"(?:https?://)?"
+ r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)")
+
+
+class BatotoBase():
+ """Base class for batoto extractors"""
+ category = "batoto"
+ root = "https://bato.to"
+
+ def request(self, url, **kwargs):
+ kwargs["encoding"] = "utf-8"
+ return Extractor.request(self, url, **kwargs)
+
+
+class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
+ """Extractor for bato.to manga chapters"""
+ pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)"
+ example = "https://bato.to/title/12345-MANGA/54321"
+
+ def __init__(self, match):
+ self.root = text.root_from_url(match.group(0))
+ self.chapter_id = match.group(1)
+ url = "{}/title/0/{}".format(self.root, self.chapter_id)
+ ChapterExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
+ manga_id = extr("/title/", "/")
+
+ match = re.match(
+ r"(?:Volume\s+(\d+) )?"
+ r"\w+\s+(\d+)(.*)", info)
+ if match:
+ volume, chapter, minor = match.groups()
+ title = text.remove_html(extr(
+ "selected>", "</option")).partition(" : ")[2]
+ else:
+ volume = chapter = 0
+ minor = ""
+ title = info
+
+ return {
+ "manga" : text.unescape(manga),
+ "manga_id" : text.parse_int(manga_id),
+ "title" : text.unescape(title),
+ "volume" : text.parse_int(volume),
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": minor,
+ "chapter_id" : text.parse_int(self.chapter_id),
+ "date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
+ }
+
+ def images(self, page):
+ images_container = text.extr(page, 'pageOpts', ':[0,0]}"')
+ images_container = text.unescape(images_container)
+ return [
+ (url, None)
+ for url in text.extract_iter(images_container, r"\"", r"\"")
+ ]
+
+
+class BatotoMangaExtractor(BatotoBase, MangaExtractor):
+ """Extractor for bato.to manga"""
+ reverse = False
+ chapterclass = BatotoChapterExtractor
+ pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$"
+ example = "https://bato.to/title/12345-MANGA/"
+
+ def __init__(self, match):
+ self.root = text.root_from_url(match.group(0))
+ self.manga_id = match.group(1)
+ url = "{}/title/{}".format(self.root, self.manga_id)
+ MangaExtractor.__init__(self, match, url)
+
+ def chapters(self, page):
+ extr = text.extract_from(page)
+
+ warning = extr(' class="alert alert-warning">', "</div><")
+ if warning:
+ raise exception.StopExtraction("'%s'", text.remove_html(warning))
+
+ data = {
+ "manga_id": text.parse_int(self.manga_id),
+ "manga" : text.unescape(extr(
+ "<title>", "<").rpartition(" - ")[0]),
+ }
+
+ extr('<div data-hk="0-0-0-0"', "")
+ results = []
+ while True:
+ href = extr('<a href="/title/', '"')
+ if not href:
+ break
+
+ chapter = href.rpartition("-ch_")[2]
+ chapter, sep, minor = chapter.partition(".")
+
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
+ data["date"] = text.parse_datetime(
+ extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
+
+ url = "{}/title/{}".format(self.root, href)
+ results.append((url, data.copy()))
+ return results
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 21166bd..2bf200b 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -35,7 +35,7 @@ class CheveretoExtractor(BaseExtractor):
BASE_PATTERN = CheveretoExtractor.update({
"jpgfish": {
- "root": "https://jpg2.su",
+ "root": "https://jpg4.su",
"pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
},
"pixl": {
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 9b010c5..0dd05ef 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -526,12 +526,15 @@ class Extractor():
if include == "all":
include = extractors
elif isinstance(include, str):
- include = include.split(",")
+ include = include.replace(" ", "").split(",")
result = [(Message.Version, 1)]
for category in include:
- if category in extractors:
+ try:
extr, url = extractors[category]
+ except KeyError:
+ self.log.warning("Invalid include '%s'", category)
+ else:
result.append((Message.Queue, url, {"_extractor": extr}))
return iter(result)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 2ba47e1..4b5f1d7 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -38,7 +38,7 @@ class DeviantartExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.user = match.group(1) or match.group(2)
+ self.user = (match.group(1) or match.group(2)).lower()
self.offset = 0
def _init(self):
@@ -104,7 +104,6 @@ class DeviantartExtractor(Extractor):
raise exception.StopExtraction()
else:
self.subcategory = "group-" + self.subcategory
- self.user = self.user.lower()
self.group = True
for deviation in self.deviations():
@@ -513,11 +512,13 @@ class DeviantartUserExtractor(DeviantartExtractor):
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
- (DeviantartGalleryExtractor , base + "gallery"),
- (DeviantartScrapsExtractor , base + "gallery/scraps"),
- (DeviantartJournalExtractor , base + "posts"),
- (DeviantartStatusExtractor , base + "posts/statuses"),
- (DeviantartFavoriteExtractor, base + "favourites"),
+ (DeviantartAvatarExtractor , base + "avatar"),
+ (DeviantartBackgroundExtractor, base + "banner"),
+ (DeviantartGalleryExtractor , base + "gallery"),
+ (DeviantartScrapsExtractor , base + "gallery/scraps"),
+ (DeviantartJournalExtractor , base + "posts"),
+ (DeviantartStatusExtractor , base + "posts/statuses"),
+ (DeviantartFavoriteExtractor , base + "favourites"),
), ("gallery",))
@@ -538,6 +539,47 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
+class DeviantartAvatarExtractor(DeviantartExtractor):
+ """Extractor for an artist's avatar"""
+ subcategory = "avatar"
+ archive_fmt = "a_{_username}_{index}"
+ pattern = BASE_PATTERN + r"/avatar"
+ example = "https://www.deviantart.com/USER/avatar/"
+
+ def deviations(self):
+ profile = self.api.user_profile(self.user.lower())
+ if profile:
+ url = profile["user"]["usericon"]
+ return ({
+ "author" : profile["user"],
+ "category" : "avatar",
+ "index" : text.parse_int(url.rpartition("?")[2]),
+ "is_deleted" : False,
+ "is_downloadable": False,
+ "published_time" : 0,
+ "title" : "avatar",
+ "content" : {
+ "src": url.replace("/avatars/", "/avatars-big/", 1),
+ },
+ },)
+ return ()
+
+
+class DeviantartBackgroundExtractor(DeviantartExtractor):
+ """Extractor for an artist's banner"""
+ subcategory = "background"
+ archive_fmt = "b_{index}"
+ pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
+ example = "https://www.deviantart.com/USER/banner/"
+
+ def deviations(self):
+ try:
+ return (self.api.user_profile(self.user.lower())
+ ["cover_deviation"]["cover_deviation"],)
+ except Exception:
+ return ()
+
+
class DeviantartFolderExtractor(DeviantartExtractor):
"""Extractor for deviations inside an artist's gallery folder"""
subcategory = "folder"
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 4572bea..61a3928 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -8,6 +8,7 @@
from .common import Extractor, Message
from .. import text
+from ..cache import memcache
import re
BASE_PATTERN = (
@@ -27,8 +28,20 @@ class FanboxExtractor(Extractor):
_warning = True
def _init(self):
+ self.headers = {"Origin": self.root}
self.embeds = self.config("embeds", True)
+ includes = self.config("metadata")
+ if includes:
+ if isinstance(includes, str):
+ includes = includes.split(",")
+ elif not isinstance(includes, (list, tuple)):
+ includes = ("user", "plan")
+ self._meta_user = ("user" in includes)
+ self._meta_plan = ("plan" in includes)
+ else:
+ self._meta_user = self._meta_plan = False
+
if self._warning:
if not self.cookies_check(("FANBOXSESSID",)):
self.log.warning("no 'FANBOXSESSID' cookie set")
@@ -43,11 +56,9 @@ class FanboxExtractor(Extractor):
"""Return all relevant post objects"""
def _pagination(self, url):
- headers = {"Origin": self.root}
-
while url:
url = text.ensure_http_scheme(url)
- body = self.request(url, headers=headers).json()["body"]
+ body = self.request(url, headers=self.headers).json()["body"]
for item in body["items"]:
try:
yield self._get_post_data(item["id"])
@@ -58,9 +69,8 @@ class FanboxExtractor(Extractor):
def _get_post_data(self, post_id):
"""Fetch and process post data"""
- headers = {"Origin": self.root}
url = "https://api.fanbox.cc/post.info?postId="+post_id
- post = self.request(url, headers=headers).json()["body"]
+ post = self.request(url, headers=self.headers).json()["body"]
content_body = post.pop("body", None)
if content_body:
@@ -98,8 +108,47 @@ class FanboxExtractor(Extractor):
post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False
+ if self._meta_user:
+ post["user"] = self._get_user_data(post["creatorId"])
+ if self._meta_plan:
+ plans = self._get_plan_data(post["creatorId"])
+ post["plan"] = plans[post["feeRequired"]]
+
return content_body, post
+ @memcache(keyarg=1)
+ def _get_user_data(self, creator_id):
+ url = "https://api.fanbox.cc/creator.get"
+ params = {"creatorId": creator_id}
+ data = self.request(url, params=params, headers=self.headers).json()
+
+ user = data["body"]
+ user.update(user.pop("user"))
+
+ return user
+
+ @memcache(keyarg=1)
+ def _get_plan_data(self, creator_id):
+ url = "https://api.fanbox.cc/plan.listCreator"
+ params = {"creatorId": creator_id}
+ data = self.request(url, params=params, headers=self.headers).json()
+
+ plans = {0: {
+ "id" : "",
+ "title" : "",
+ "fee" : 0,
+ "description" : "",
+ "coverImageUrl" : "",
+ "creatorId" : creator_id,
+ "hasAdultContent": None,
+ "paymentMethod" : None,
+ }}
+ for plan in data["body"]:
+ del plan["user"]
+ plans[plan["fee"]] = plan
+
+ return plans
+
def _get_urls_from_post(self, content_body, post):
num = 0
cover_image = post.get("coverImageUrl")
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index b62ff78..eba1539 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -32,10 +32,13 @@ class GelbooruBase():
url = self.root + "/index.php?page=dapi&q=index&json=1"
data = self.request(url, params=params).json()
- if key not in data:
- return ()
+ try:
+ posts = data[key]
+ except KeyError:
+ self.log.error("Incomplete API response (missing '%s')", key)
+ self.log.debug("%s", data)
+ return []
- posts = data[key]
if not isinstance(posts, list):
return (posts,)
return posts
@@ -165,15 +168,16 @@ class GelbooruFavoriteExtractor(GelbooruBase,
"id" : self.favorite_id,
"limit": "1",
}
- count = self._api_request(params, "@attributes")[0]["count"]
+ count = self._api_request(params, "@attributes")[0]["count"]
if count <= self.offset:
return
- pnum, last = divmod(count + 1, self.per_page)
- if self.offset >= last:
+ pnum, last = divmod(count-1, self.per_page)
+ if self.offset > last:
+ # page number change
self.offset -= last
- diff, self.offset = divmod(self.offset, self.per_page)
+ diff, self.offset = divmod(self.offset-1, self.per_page)
pnum -= diff + 1
skip = self.offset
@@ -183,8 +187,8 @@ class GelbooruFavoriteExtractor(GelbooruBase,
while True:
favs = self._api_request(params, "favorite")
-
favs.reverse()
+
if skip:
favs = favs[skip:]
skip = 0
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 0864b9f..0c8af3d 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -168,7 +168,7 @@ INSTANCES = {
},
"rule34": {
"root": "https://rule34.xxx",
- "pattern": r"rule34\.xxx",
+ "pattern": r"(?:www\.)?rule34\.xxx",
"api_root": "https://api.rule34.xxx",
},
"safebooru": {
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index b9e2c3d..f70a948 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -34,8 +34,11 @@ class IdolcomplexExtractor(SankakuExtractor):
self.start_post = 0
def _init(self):
+ self.find_pids = re.compile(
+ r" href=[\"#]/\w\w/posts/([0-9a-f]+)"
+ ).findall
self.find_tags = re.compile(
- r'tag-type-([^"]+)">\s*<div [^>]+>\s*<a href="/\?tags=([^"]+)'
+ r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
).findall
def items(self):
@@ -149,8 +152,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
- example = "https://idol.sankakucomplex.com/?tags=TAGS"
+ pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
+ example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
per_page = 20
def __init__(self, match):
@@ -196,7 +199,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
page = self.request(self.root, params=params, retries=10).text
pos = ((page.find('id="more-popular-posts-link"') + 1) or
(page.find('<span class="thumb') + 1))
- yield from text.extract_iter(page, ' href="/posts/', '"', pos)
+
+ yield from self.find_pids(page, pos)
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
if not next_url:
@@ -218,7 +222,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
- pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pools?/show/(\d+)"
+ pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
example = "https://idol.sankakucomplex.com/pools/show/12345"
per_page = 24
@@ -242,8 +246,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
while True:
page = self.request(url, params=params, retries=10).text
pos = page.find('id="pool-show"') + 1
- post_ids = list(text.extract_iter(
- page, ' href="/posts/', '"', pos))
+ post_ids = self.find_pids(page, pos)
yield from post_ids
if len(post_ids) < self.per_page:
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 9aa0332..9199d12 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -44,7 +44,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
}
def images(self, page):
- if " More Files</button>" in page:
+ if ' load-all">' in page:
url = "{}/p/{}/loadAll".format(self.root, self.gallery_id)
headers = {
"X-Requested-With": "XMLHttpRequest",
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index a3e0130..7a19be5 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -6,19 +6,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://komikcast.site/"""
+"""Extractors for https://komikcast.lol/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:site|me|com)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:lol|site|me|com)"
class KomikcastBase():
"""Base class for komikcast extractors"""
category = "komikcast"
- root = "https://komikcast.site"
+ root = "https://komikcast.lol"
@staticmethod
def parse_chapter_string(chapter_string, data=None):
@@ -46,9 +46,9 @@ class KomikcastBase():
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
- """Extractor for manga-chapters from komikcast.site"""
+ """Extractor for manga-chapters from komikcast.lol"""
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
- example = "https://komikcast.site/chapter/TITLE/"
+ example = "https://komikcast.lol/chapter/TITLE/"
def metadata(self, page):
info = text.extr(page, "<title>", " - Komikcast<")
@@ -65,10 +65,10 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
- """Extractor for manga from komikcast.site"""
+ """Extractor for manga from komikcast.lol"""
chapterclass = KomikcastChapterExtractor
pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
- example = "https://komikcast.site/komik/TITLE"
+ example = "https://komikcast.lol/komik/TITLE"
def chapters(self, page):
results = []
@@ -76,8 +76,10 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
for item in text.extract_iter(
page, '<a class="chapter-link-item" href="', '</a'):
- url, _, chapter_string = item.rpartition('">Chapter ')
- self.parse_chapter_string(chapter_string, data)
+ url, _, chapter = item.rpartition('">Chapter')
+ chapter, sep, minor = chapter.strip().partition(".")
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
results.append((url, data.copy()))
return results
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
index 0edd5c1..85b3fef 100644
--- a/gallery_dl/extractor/lynxchan.py
+++ b/gallery_dl/extractor/lynxchan.py
@@ -18,8 +18,8 @@ class LynxchanExtractor(BaseExtractor):
BASE_PATTERN = LynxchanExtractor.update({
"bbw-chan": {
- "root": "https://bbw-chan.nl",
- "pattern": r"bbw-chan\.nl",
+ "root": "https://bbw-chan.link",
+ "pattern": r"bbw-chan\.(?:link|nl)",
},
"kohlchan": {
"root": "https://kohlchan.net",
@@ -40,7 +40,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
- example = "https://bbw-chan.nl/a/res/12345.html"
+ example = "https://endchan.org/a/res/12345.html"
def __init__(self, match):
LynxchanExtractor.__init__(self, match)
@@ -71,7 +71,7 @@ class LynxchanBoardExtractor(LynxchanExtractor):
"""Extractor for LynxChan boards"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
- example = "https://bbw-chan.nl/a/"
+ example = "https://endchan.org/a/"
def __init__(self, match):
LynxchanExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 46019ad..232b98d 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -10,7 +10,11 @@ from .common import ChapterExtractor, MangaExtractor
from .. import text
import re
-BASE_PATTERN = r"(?:https?://)?((?:chap|read|www\.|m\.)?mangan(?:at|el)o\.com)"
+BASE_PATTERN = (
+ r"(?:https?://)?"
+ r"((?:chap|read|www\.|m\.)?mangan(?:at|el)o"
+ r"\.(?:to|com))"
+)
class ManganeloBase():
@@ -67,10 +71,11 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
def images(self, page):
page = text.extr(
- page, 'class="container-chapter-reader', '\n<div')
+ page, 'class="container-chapter-reader', 'class="container')
return [
(url, None)
for url in text.extract_iter(page, '<img src="', '"')
+ if not url.endswith("/gohome.png")
] or [
(url, None)
for url in text.extract_iter(
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 57c3118..b991705 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -55,9 +55,12 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
else:
data["user_id"] = data["artist_id"]
data["user_name"] = data["artist_name"]
- yield Message.Directory, data
- for num, url in enumerate(self._extract_images(image_id, page)):
+ urls = list(self._extract_images(image_id, page))
+ data["count"] = len(urls)
+
+ yield Message.Directory, data
+ for num, url in enumerate(urls):
image = text.nameext_from_url(url, {
"num": num,
"url": "https:" + url,
diff --git a/gallery_dl/extractor/nudecollect.py b/gallery_dl/extractor/nudecollect.py
deleted file mode 100644
index bda5d77..0000000
--- a/gallery_dl/extractor/nudecollect.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://nudecollect.com/"""
-
-from .common import GalleryExtractor
-from .. import text
-
-
-class NudecollectExtractor(GalleryExtractor):
- """Base class for Nudecollect extractors"""
- category = "nudecollect"
- directory_fmt = ("{category}", "{title}")
- filename_fmt = "{slug}_{num:>03}.{extension}"
- archive_fmt = "{slug}_{num}"
- root = "https://www.nudecollect.com"
-
- def request(self, url, **kwargs):
- kwargs["allow_redirects"] = False
- return GalleryExtractor.request(self, url, **kwargs)
-
- @staticmethod
- def get_title(page):
- return text.unescape(text.extr(page, "<title>", "</title>"))[31:]
-
- @staticmethod
- def get_image(page):
- return text.extr(page, '<img src="', '"')
-
-
-class NudecollectImageExtractor(NudecollectExtractor):
- """Extractor for individual images from nudecollect.com"""
- subcategory = "image"
- pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
- r"(/content/([^/?#]+)/image-(\d+)-pics-(\d+)"
- r"-mirror-(\d+)\.html)")
- example = ("https://www.nudecollect.com/content/12345_TITLE"
- "/image-1-pics-108-mirror-1.html")
-
- def __init__(self, match):
- NudecollectExtractor.__init__(self, match)
- _, self.slug, self.num, self.count, self.mirror = match.groups()
-
- def metadata(self, page):
- return {
- "slug" : self.slug,
- "title" : self.get_title(page),
- "count" : text.parse_int(self.count),
- "mirror": text.parse_int(self.mirror),
- }
-
- def images(self, page):
- return ((self.get_image(page), {"num": text.parse_int(self.num)}),)
-
-
-class NudecollectAlbumExtractor(NudecollectExtractor):
- """Extractor for image albums on nudecollect.com"""
- subcategory = "album"
- pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
- r"/content/([^/?#]+)/(?:index-mirror-(\d+)-(\d+)"
- r"|page-\d+-pics-(\d+)-mirror-(\d+))\.html")
- example = ("https://www.nudecollect.com/content/12345_TITLE"
- "/index-mirror-01-123.html")
-
- def __init__(self, match):
- self.slug = match.group(1)
- self.mirror = match.group(2) or match.group(5)
- self.count = text.parse_int(match.group(3) or match.group(4))
- url = "{}/content/{}/image-1-pics-{}-mirror-{}.html".format(
- self.root, self.slug, self.count, self.mirror)
- NudecollectExtractor.__init__(self, match, url)
-
- def metadata(self, page):
- return {
- "slug" : self.slug,
- "title" : self.get_title(page),
- "mirror": text.parse_int(self.mirror),
- }
-
- def images(self, page):
- url = self.get_image(page)
- p1, _, p2 = url.partition("/image0")
- ufmt = p1 + "/image{:>05}" + p2[4:]
- return [(ufmt.format(num), None) for num in range(1, self.count + 1)]
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 0389ead..89c0d2f 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -32,7 +32,7 @@ class PahealExtractor(Extractor):
post["tags"] = text.unquote(post["tags"])
post.update(data)
yield Message.Directory, post
- yield Message.Url, url, text.nameext_from_url(url, post)
+ yield Message.Url, url, post
def get_metadata(self):
"""Return general metadata"""
@@ -59,11 +59,13 @@ class PahealExtractor(Extractor):
extr(">Source&nbsp;Link<", "</td>"), "href='", "'")),
}
- dimensions, size, ext = extr("Info</th><td>", ">").split(" // ")
- post["width"], _, height = dimensions.partition("x")
+ dimensions, size, ext = extr("Info</th><td>", "<").split(" // ")
post["size"] = text.parse_bytes(size[:-1])
+ post["width"], _, height = dimensions.partition("x")
post["height"], _, duration = height.partition(", ")
post["duration"] = text.parse_float(duration[:-1])
+ post["filename"] = "{} - {}".format(post_id, post["tags"])
+ post["extension"] = ext
return post
@@ -112,6 +114,7 @@ class PahealTagExtractor(PahealExtractor):
tags, data, date = data.split("\n")
dimensions, size, ext = data.split(" // ")
+ tags = text.unescape(tags)
width, _, height = dimensions.partition("x")
height, _, duration = height.partition(", ")
@@ -119,9 +122,11 @@ class PahealTagExtractor(PahealExtractor):
"id": pid, "md5": md5, "file_url": url,
"width": width, "height": height,
"duration": text.parse_float(duration[:-1]),
- "tags": text.unescape(tags),
+ "tags": tags,
"size": text.parse_bytes(size[:-1]),
"date": text.parse_datetime(date, "%B %d, %Y; %H:%M"),
+ "filename" : "{} - {}".format(pid, tags),
+ "extension": ext,
}
def _extract_data_ex(self, post):
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 4b26393..c46a587 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -47,6 +47,7 @@ class PinterestExtractor(Extractor):
carousel_data = pin.get("carousel_data")
if carousel_data:
+ pin["count"] = len(carousel_data["carousel_slots"])
for num, slot in enumerate(carousel_data["carousel_slots"], 1):
slot["media_id"] = slot.pop("id")
pin.update(slot)
@@ -65,7 +66,7 @@ class PinterestExtractor(Extractor):
if videos or media.get("duration") is None:
pin.update(media)
- pin["num"] = 0
+ pin["num"] = pin["count"] = 1
pin["media_id"] = ""
url = media["url"]
diff --git a/gallery_dl/extractor/poringa.py b/gallery_dl/extractor/poringa.py
new file mode 100644
index 0000000..0149d06
--- /dev/null
+++ b/gallery_dl/extractor/poringa.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for http://www.poringa.net/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
+import itertools
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?poringa\.net"
+
+
+class PoringaExtractor(Extractor):
+ category = "poringa"
+ directory_fmt = ("{category}", "{user}", "{post_id}")
+ filename_fmt = "{post_id}_{title}_{num:>03}_{filename}.{extension}"
+ archive_fmt = "{post_id}_{num}"
+ root = "http://www.poringa.net"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item = match.group(1)
+ self.__cookies = True
+
+ def items(self):
+ for post_id in self.posts():
+ url = "{}/posts/imagenes/{}".format(self.root, post_id)
+
+ try:
+ response = self.request(url)
+ except exception.HttpError as exc:
+ self.log.warning(
+ "Unable to fetch posts for '%s' (%s)", post_id, exc)
+ continue
+
+ if "/registro-login?" in response.url:
+ self.log.warning("Private post '%s'", post_id)
+ continue
+
+ page = response.text
+ title, pos = text.extract(
+ page, 'property="og:title" content="', '"')
+
+ try:
+ pos = page.index('<div class="main-info', pos)
+ user, pos = text.extract(
+ page, 'href="http://www.poringa.net/', '"', pos)
+ except ValueError:
+ user = None
+
+ if not user:
+ user = "poringa"
+
+ data = {
+ "post_id" : post_id,
+ "title" : text.unescape(title),
+ "user" : text.unquote(user),
+ "_http_headers": {"Referer": url},
+ }
+
+ main_post = text.extr(
+ page, 'property="dc:content" role="main">', '</div>')
+ urls = list(text.extract_iter(
+ main_post, '<img class="imagen" border="0" src="', '"'))
+ data["count"] = len(urls)
+
+ yield Message.Directory, data
+ for data["num"], url in enumerate(urls, 1):
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def posts(self):
+ return ()
+
+ def request(self, url, **kwargs):
+ if self.__cookies:
+ self.__cookies = False
+ self.cookies_update(_cookie_cache())
+
+ for _ in range(5):
+ response = Extractor.request(self, url, **kwargs)
+ if response.cookies:
+ _cookie_cache.update("", response.cookies)
+ if response.content.find(
+ b"<title>Please wait a few moments</title>", 0, 600) < 0:
+ return response
+ self.sleep(5.0, "check")
+
+ def _pagination(self, url, params):
+ for params["p"] in itertools.count(1):
+ page = self.request(url, params=params).text
+
+ posts_ids = PoringaPostExtractor.pattern.findall(page)
+ posts_ids = list(dict.fromkeys(posts_ids))
+ yield from posts_ids
+
+ if len(posts_ids) < 19:
+ return
+
+
+class PoringaPostExtractor(PoringaExtractor):
+ """Extractor for posts on poringa.net"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)"
+ example = "http://www.poringa.net/posts/imagenes/12345/TITLE.html"
+
+ def posts(self):
+ return (self.item,)
+
+
+class PoringaUserExtractor(PoringaExtractor):
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(\w+)$"
+ example = "http://www.poringa.net/USER"
+
+ def posts(self):
+ url = self.root + "/buscar/"
+ params = {"q": self.item}
+ return self._pagination(url, params)
+
+
+class PoringaSearchExtractor(PoringaExtractor):
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)"
+ example = "http://www.poringa.net/buscar/?q=QUERY"
+
+ def posts(self):
+ url = self.root + "/buscar/"
+ params = {"q": self.item}
+ return self._pagination(url, params)
+
+
+@cache()
+def _cookie_cache():
+ return ()
diff --git a/gallery_dl/extractor/rule34us.py b/gallery_dl/extractor/rule34us.py
index 6439a22..cf70ccc 100644
--- a/gallery_dl/extractor/rule34us.py
+++ b/gallery_dl/extractor/rule34us.py
@@ -38,7 +38,11 @@ class Rule34usExtractor(BooruExtractor):
"height" : extr(' x ', 'h'),
"file_url": extr(' src="', '"'),
}
- post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
+
+ url = post["file_url"]
+ if "//video-cdn1." in url:
+ post["_fallback"] = (url.replace("//video-cdn1.", "//video."),)
+ post["md5"] = url.rpartition("/")[2].partition(".")[0]
tags = collections.defaultdict(list)
for tag_type, tag_name in self._find_tags(page):
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index 5415bf3..08cccab 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -87,6 +87,10 @@ BASE_PATTERN = SzurubooruExtractor.update({
"root": "https://booru.bcbnsfw.space",
"pattern": r"booru\.bcbnsfw\.space",
},
+ "snootbooru": {
+ "root": "https://snootbooru.com",
+ "pattern": r"snootbooru\.com",
+ },
})
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index fdcefdd..aa9ab9f 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -552,9 +552,11 @@ class TwitterTimelineExtractor(TwitterExtractor):
return self.api.user_media
if strategy == "tweets":
return self.api.user_tweets
+ if strategy == "media":
+ return self.api.user_media
if strategy == "with_replies":
return self.api.user_tweets_and_replies
- return self.api.user_media
+ raise exception.StopExtraction("Invalid strategy '%s'", strategy)
class TwitterTweetsExtractor(TwitterExtractor):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 7413b5a..3bd0648 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -225,9 +225,6 @@ class WeiboUserExtractor(WeiboExtractor):
pattern = USER_PATTERN + r"(?:$|#)"
example = "https://weibo.com/USER"
- def initialize(self):
- pass
-
def items(self):
base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
return self._dispatch_extractors((
diff --git a/gallery_dl/extractor/zzup.py b/gallery_dl/extractor/zzup.py
new file mode 100644
index 0000000..45b0cd8
--- /dev/null
+++ b/gallery_dl/extractor/zzup.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class ZzupGalleryExtractor(GalleryExtractor):
+ category = "zzup"
+ directory_fmt = ("{category}", "{title}")
+ filename_fmt = "{slug}_{num:>03}.{extension}"
+ archive_fmt = "{slug}_{num}"
+ root = "https://zzup.com"
+ pattern = (r"(?:https?://)?(?:www\.)?zzup\.com(/content"
+ r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
+ example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
+
+ def __init__(self, match):
+ url = "{}/{}/index.html".format(self.root, match.group(1))
+ GalleryExtractor.__init__(self, match, url)
+ self.slug = match.group(2)
+
+ def metadata(self, page):
+ return {
+ "slug" : self.slug,
+ "title": text.unescape(text.extr(
+ page, "<title>", "</title>"))[:-11],
+ }
+
+ def images(self, page):
+ path = text.extr(page, 'class="picbox"><a target="_blank" href="', '"')
+ count = text.parse_int(text.extr(path, "-pics-", "-mirror"))
+ page = self.request(self.root + path).text
+ url = self.root + text.extr(page, '\n<a href="', '"')
+ p1, _, p2 = url.partition("/image0")
+ ufmt = p1 + "/image{:>05}" + p2[4:]
+ return [(ufmt.format(num), None) for num in range(1, count + 1)]
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index b74d977..15905d6 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.5"
+__version__ = "1.26.6"
diff --git a/setup.cfg b/setup.cfg
index 235ebd5..013f39f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,10 +1,11 @@
[flake8]
-exclude = .git,__pycache__,build,dist,archive,results
+exclude = .git,__pycache__,build,dist,archive
ignore = E203,E226,W504
per-file-ignores =
setup.py: E501
gallery_dl/extractor/500px.py: E501
gallery_dl/extractor/mangapark.py: E501
+ test/results/*.py: E122,E241,E402,E501
[egg_info]
tag_build =