aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md20
-rw-r--r--PKG-INFO7
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl1
-rw-r--r--data/completion/gallery-dl4
-rw-r--r--data/completion/gallery-dl.fish1
-rw-r--r--data/man/gallery-dl.15
-rw-r--r--data/man/gallery-dl.conf.540
-rw-r--r--gallery_dl.egg-info/PKG-INFO7
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/__init__.py76
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/common.py14
-rw-r--r--gallery_dl/extractor/danbooru.py2
-rw-r--r--gallery_dl/extractor/deviantart.py2
-rw-r--r--gallery_dl/extractor/exhentai.py73
-rw-r--r--gallery_dl/extractor/nijie.py34
-rw-r--r--gallery_dl/extractor/oauth.py2
-rw-r--r--gallery_dl/extractor/patreon.py7
-rw-r--r--gallery_dl/extractor/pixeldrain.py4
-rw-r--r--gallery_dl/extractor/reddit.py3
-rw-r--r--gallery_dl/extractor/twitter.py37
-rw-r--r--gallery_dl/extractor/urlgalleries.py55
-rw-r--r--gallery_dl/option.py5
-rw-r--r--gallery_dl/output.py4
-rw-r--r--gallery_dl/util.py29
-rw-r--r--gallery_dl/version.py2
-rw-r--r--setup.py1
28 files changed, 344 insertions, 97 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 39f5884..88dbc44 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
# Changelog
+## 1.26.4 - 2023-12-10
+### Extractors
+#### Additions
+- [exhentai] add `fallback-retries` option ([#4792](https://github.com/mikf/gallery-dl/issues/4792))
+- [urlgalleries] add `gallery` extractor ([#919](https://github.com/mikf/gallery-dl/issues/919), [#1184](https://github.com/mikf/gallery-dl/issues/1184), [#2905](https://github.com/mikf/gallery-dl/issues/2905), [#4886](https://github.com/mikf/gallery-dl/issues/4886))
+#### Fixes
+- [nijie] fix image URLs of multi-image posts ([#4876](https://github.com/mikf/gallery-dl/issues/4876))
+- [patreon] fix bootstrap data extraction ([#4904](https://github.com/mikf/gallery-dl/issues/4904), [#4906](https://github.com/mikf/gallery-dl/issues/4906))
+- [twitter] fix `/media` timelines ([#4898](https://github.com/mikf/gallery-dl/issues/4898), [#4899](https://github.com/mikf/gallery-dl/issues/4899))
+- [twitter] retry API requests when response contains incomplete results ([#4811](https://github.com/mikf/gallery-dl/issues/4811))
+#### Improvements
+- [exhentai] store more cookies when logging in with username & password ([#4881](https://github.com/mikf/gallery-dl/issues/4881))
+- [twitter] generalize "Login Required" errors ([#4734](https://github.com/mikf/gallery-dl/issues/4734), [#4324](https://github.com/mikf/gallery-dl/issues/4324))
+### Options
+- add `-e/--error-file` command-line and `output.errorfile` config option ([#4732](https://github.com/mikf/gallery-dl/issues/4732))
+### Miscellaneous
+- automatically build and push Docker images
+- prompt for passwords on login when necessary
+- fix `util.dump_response()` to work with `bytes` header values
+
## 1.26.3 - 2023-11-27
### Extractors
#### Additions
diff --git a/PKG-INFO b/PKG-INFO
index 8c77174..e75fd05 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.26.3
+Version: 1.26.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -26,6 +26,7 @@ Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
@@ -111,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 776ba43..5603929 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index f3b17a2..39fabcc 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -26,6 +26,7 @@ _arguments -C -S \
{-s,--simulate}'[Simulate data extraction; do not download anything]' \
{-E,--extractor-info}'[Print extractor defaults and settings]' \
{-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \
+{-e,--error-file}'[Add input URLs which returned an error to FILE]':'<file>':_files \
--list-modules'[Print a list of available extractor modules]' \
--list-extractors'[Print a list of extractor classes with description, (sub)category and example URL]' \
--write-log'[Write logging output to FILE]':'<file>':_files \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index e5bc6c1..d280ab4 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -5,12 +5,12 @@ _gallery_dl()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- if [[ "${prev}" =~ ^(-i|--input-file|-I|--input-file-comment|-x|--input-file-delete|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|-C|--cookies|--cookies-export|--download-archive)$ ]]; then
+ if [[ "${prev}" =~ ^(-i|--input-file|-I|--input-file-comment|-x|--input-file-delete|-e|--error-file|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|-C|--cookies|--cookies-export|--download-archive)$ ]]; then
COMPREPLY=( $(compgen -f -- "${cur}") )
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --input-file --input-file-comment --input-file-delete --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --postprocessor --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --input-file --input-file-comment --input-file-delete --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --postprocessor --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 5cb60d4..135dfb7 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -20,6 +20,7 @@ complete -c gallery-dl -s 'j' -l 'dump-json' -d 'Print JSON information'
complete -c gallery-dl -s 's' -l 'simulate' -d 'Simulate data extraction; do not download anything'
complete -c gallery-dl -s 'E' -l 'extractor-info' -d 'Print extractor defaults and settings'
complete -c gallery-dl -s 'K' -l 'list-keywords' -d 'Print a list of available keywords and example values for the given URLs'
+complete -c gallery-dl -r -F -s 'e' -l 'error-file' -d 'Add input URLs which returned an error to FILE'
complete -c gallery-dl -l 'list-modules' -d 'Print a list of available extractor modules'
complete -c gallery-dl -l 'list-extractors' -d 'Print a list of extractor classes with description, (sub)category and example URL'
complete -c gallery-dl -r -F -l 'write-log' -d 'Write logging output to FILE'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 2789154..277b227 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-11-27" "1.26.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-12-10" "1.26.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -80,6 +80,9 @@ Print extractor defaults and settings
.B "\-K, \-\-list\-keywords"
Print a list of available keywords and example values for the given URLs
.TP
+.B "\-e, \-\-error\-file" \f[I]FILE\f[]
+Add input URLs which returned an error to FILE
+.TP
.B "\-\-list\-modules"
Print a list of available extractor modules
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index adf9885..95e9627 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-11-27" "1.26.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-12-10" "1.26.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -450,7 +450,7 @@ during data extraction.
The username and password to use when attempting to log in to
another site.
-Specifying a username and password is required for
+Specifying username and password is required for
.br
* \f[I]nijie\f[]
@@ -509,6 +509,10 @@ by using a \f[I].netrc\f[] file. (see Authentication_)
(*) The password value for these sites should be
the API key found in your user profile, not the actual account password.
+Note: Leave the \f[I]password\f[] value empty or undefined
+to get prompted for a passeword when performing a login
+(see \f[I]getpass()\f[]).
+
.SS extractor.*.netrc
.IP "Type:" 6
@@ -1802,6 +1806,18 @@ depending on the input URL
* \f[I]"exhentai.org"\f[]: Use \f[I]exhentai.org\f[] for all URLs
+.SS extractor.exhentai.fallback-retries
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]2\f[]
+
+.IP "Description:" 4
+Number of times a failed image gets retried.
+Use \f[I]-1\f[] for infinite retries
+
+
.SS extractor.exhentai.fav
.IP "Type:" 6
\f[I]string\f[]
@@ -5070,6 +5086,26 @@ File to write external URLs unsupported by *gallery-dl* to.
The default format string here is \f[I]"{message}"\f[].
+.SS output.errorfile
+.IP "Type:" 6
+.br
+* \f[I]Path\f[]
+.br
+* \f[I]Logging Configuration\f[]
+
+.IP "Description:" 4
+File to write input URLs which returned an error to.
+
+The default format string here is also \f[I]"{message}"\f[].
+
+When combined with
+\f[I]-I\f[]/\f[I]--input-file-comment\f[] or
+\f[I]-x\f[]/\f[I]--input-file-delete\f[],
+this option will cause *all* input URLs from these files
+to be commented/deleted after processing them
+and not just successful ones.
+
+
.SS output.num-to-str
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 5fc537b..e1b709b 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.26.3
+Version: 1.26.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -26,6 +26,7 @@ Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
@@ -111,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 255edc7..9bcf0b2 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -203,6 +203,7 @@ gallery_dl/extractor/twibooru.py
gallery_dl/extractor/twitter.py
gallery_dl/extractor/unsplash.py
gallery_dl/extractor/uploadir.py
+gallery_dl/extractor/urlgalleries.py
gallery_dl/extractor/urlshortener.py
gallery_dl/extractor/vanillarock.py
gallery_dl/extractor/vichan.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 287faf1..fff53eb 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -226,18 +226,26 @@ def main():
else:
jobtype = args.jobtype or job.DownloadJob
+ input_manager = InputManager()
+ input_manager.log = input_log = logging.getLogger("inputfile")
+
# unsupported file logging handler
handler = output.setup_logging_handler(
"unsupportedfile", fmt="{message}")
if handler:
- ulog = logging.getLogger("unsupported")
+ ulog = job.Job.ulog = logging.getLogger("unsupported")
ulog.addHandler(handler)
ulog.propagate = False
- job.Job.ulog = ulog
+
+ # error file logging handler
+ handler = output.setup_logging_handler(
+ "errorfile", fmt="{message}", mode="a")
+ if handler:
+ elog = input_manager.err = logging.getLogger("errorfile")
+ elog.addHandler(handler)
+ elog.propagate = False
# collect input URLs
- input_manager = InputManager()
- input_manager.log = input_log = logging.getLogger("inputfile")
input_manager.add_list(args.urls)
if args.input_files:
@@ -270,6 +278,7 @@ def main():
if status:
retval |= status
+ input_manager.error()
else:
input_manager.success()
@@ -281,6 +290,7 @@ def main():
except exception.NoExtractorError:
log.error("Unsupported URL '%s'", url)
retval |= 64
+ input_manager.error()
input_manager.next()
return retval
@@ -301,8 +311,11 @@ class InputManager():
def __init__(self):
self.urls = []
self.files = ()
+ self.log = self.err = None
+
+ self._url = ""
+ self._item = None
self._index = 0
- self._current = None
self._pformat = None
def add_url(self, url):
@@ -439,17 +452,33 @@ class InputManager():
self._index += 1
def success(self):
- if self._current:
- url, path, action, indicies = self._current
- lines = self.files[path]
- action(lines, indicies)
- try:
- with open(path, "w", encoding="utf-8") as fp:
- fp.writelines(lines)
- except Exception as exc:
- self.log.warning(
- "Unable to update '%s' (%s: %s)",
- path, exc.__class__.__name__, exc)
+ if self._item:
+ self._rewrite()
+
+ def error(self):
+ if self.err:
+ if self._item:
+ url, path, action, indicies = self._item
+ lines = self.files[path]
+ out = "".join(lines[i] for i in indicies)
+ if out and out[-1] == "\n":
+ out = out[:-1]
+ self._rewrite()
+ else:
+ out = str(self._url)
+ self.err.info(out)
+
+ def _rewrite(self):
+ url, path, action, indicies = self._item
+ lines = self.files[path]
+ action(lines, indicies)
+ try:
+ with open(path, "w", encoding="utf-8") as fp:
+ fp.writelines(lines)
+ except Exception as exc:
+ self.log.warning(
+ "Unable to update '%s' (%s: %s)",
+ path, exc.__class__.__name__, exc)
@staticmethod
def _action_comment(lines, indicies):
@@ -467,23 +496,24 @@ class InputManager():
def __next__(self):
try:
- item = self.urls[self._index]
+ url = self.urls[self._index]
except IndexError:
raise StopIteration
- if isinstance(item, tuple):
- self._current = item
- item = item[0]
+ if isinstance(url, tuple):
+ self._item = url
+ url = url[0]
else:
- self._current = None
+ self._item = None
+ self._url = url
if self._pformat:
output.stderr_write(self._pformat({
"total" : len(self.urls),
"current": self._index + 1,
- "url" : item,
+ "url" : url,
}))
- return item
+ return url
class ExtendedUrl():
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 72239d5..d074de2 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -155,6 +155,7 @@ modules = [
"tumblrgallery",
"twibooru",
"twitter",
+ "urlgalleries",
"unsplash",
"uploadir",
"urlshortener",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index f378427..9b010c5 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -136,6 +136,18 @@ class Extractor():
kwargs["timeout"] = self._timeout
if "verify" not in kwargs:
kwargs["verify"] = self._verify
+
+ if "json" in kwargs:
+ json = kwargs["json"]
+ if json is not None:
+ kwargs["data"] = util.json_dumps(json).encode()
+ del kwargs["json"]
+ headers = kwargs.get("headers")
+ if headers:
+ headers["Content-Type"] = "application/json"
+ else:
+ kwargs["headers"] = {"Content-Type": "application/json"}
+
response = None
tries = 1
@@ -233,7 +245,7 @@ class Extractor():
password = None
if username:
- password = self.config("password")
+ password = self.config("password") or util.LazyPrompt()
elif self.config("netrc", False):
try:
info = netrc.netrc().authenticators(self.category)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 56d81e5..9e6516e 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -36,7 +36,7 @@ class DanbooruExtractor(BaseExtractor):
username, api_key = self._get_auth_info()
if username:
self.log.debug("Using HTTP Basic Auth for user '%s'", username)
- self.session.auth = (username, api_key)
+ self.session.auth = util.HTTPBasicAuth(username, api_key)
def skip(self, num):
pages = num // self.per_page
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 2c37ef1..1852dc1 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1239,7 +1239,7 @@ class DeviantartOAuthAPI():
self.log.info("Requesting public access token")
data = {"grant_type": "client_credentials"}
- auth = (self.client_id, self.client_secret)
+ auth = util.HTTPBasicAuth(self.client_id, self.client_secret)
response = self.extractor.request(
url, method="POST", data=data, auth=auth, fatal=False)
data = response.json()
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 5dc498f..a479d00 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -47,14 +47,6 @@ class ExhentaiExtractor(Extractor):
if self.version != "ex":
self.cookies.set("nw", "1", domain=self.cookies_domain)
- self.original = self.config("original", True)
-
- limits = self.config("limits", False)
- if limits and limits.__class__ is int:
- self.limits = limits
- self._remaining = 0
- else:
- self.limits = False
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
@@ -85,6 +77,7 @@ class ExhentaiExtractor(Extractor):
@cache(maxage=90*24*3600, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
+
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
headers = {
"Referer": "https://e-hentai.org/bounce_login.php?b=d&bt=1-1",
@@ -98,10 +91,19 @@ class ExhentaiExtractor(Extractor):
"ipb_login_submit": "Login!",
}
+ self.cookies.clear()
+
response = self.request(url, method="POST", headers=headers, data=data)
if b"You are now logged in as:" not in response.content:
raise exception.AuthenticationError()
- return {c: response.cookies[c] for c in self.cookies_names}
+
+ # collect more cookies
+ url = self.root + "/favorites.php"
+ response = self.request(url)
+ if response.history:
+ self.request(url)
+
+ return self.cookies
class ExhentaiGalleryExtractor(ExhentaiExtractor):
@@ -128,6 +130,19 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if source == "hitomi":
self.items = self._items_hitomi
+ limits = self.config("limits", False)
+ if limits and limits.__class__ is int:
+ self.limits = limits
+ self._remaining = 0
+ else:
+ self.limits = False
+
+ self.fallback_retries = self.config("fallback-retries", 2)
+ if self.fallback_retries < 0:
+ self.fallback_retries = float("inf")
+
+ self.original = self.config("original", True)
+
def favorite(self, slot="0"):
url = self.root + "/gallerypopups.php"
params = {
@@ -301,12 +316,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if self.original and orig:
url = self.root + "/fullimg" + text.unescape(orig)
data = self._parse_original_info(extr('ownload original', '<'))
- data["_fallback"] = ("{}?nl={}".format(url, nl),)
+ data["_fallback"] = self._fallback_original(nl, url)
else:
url = iurl
data = self._parse_image_info(url)
- data["_fallback"] = self._fallback(
- None, self.image_num, nl)
+ data["_fallback"] = self._fallback_1280(nl, self.image_num)
except IndexError:
self.log.debug("Page content:\n%s", page)
raise exception.StopExtraction(
@@ -315,6 +329,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["num"] = self.image_num
data["image_token"] = self.key_start = extr('var startkey="', '";')
data["_url_1280"] = iurl
+ data["_nl"] = nl
self.key_show = extr('var showkey="', '";')
self._check_509(iurl, data)
@@ -351,12 +366,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
url = text.unescape(origurl)
data = self._parse_original_info(text.extract(
i6, "ownload original", "<", pos)[0])
- data["_fallback"] = ("{}?nl={}".format(url, nl),)
+ data["_fallback"] = self._fallback_original(nl, url)
else:
url = imgurl
data = self._parse_image_info(url)
- data["_fallback"] = self._fallback(
- imgkey, request["page"], nl)
+ data["_fallback"] = self._fallback_1280(
+ nl, request["page"], imgkey)
except IndexError:
self.log.debug("Page content:\n%s", page)
raise exception.StopExtraction(
@@ -365,6 +380,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["num"] = request["page"]
data["image_token"] = imgkey
data["_url_1280"] = imgurl
+ data["_nl"] = nl
self._check_509(imgurl, data)
yield url, text.nameext_from_url(url, data)
@@ -431,13 +447,26 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.NotFoundError("image page")
return page
- def _fallback(self, imgkey, num, nl):
- url = "{}/s/{}/{}-{}?nl={}".format(
- self.root, imgkey or self.key_start, self.gallery_id, num, nl)
- page = self.request(url, fatal=False).text
- if page.startswith(("Invalid page", "Keep trying")):
- return
- yield self.image_from_page(page)[0]
+ def _fallback_original(self, nl, fullimg):
+ url = "{}?nl={}".format(fullimg, nl)
+ for _ in range(self.fallback_retries):
+ yield url
+
+ def _fallback_1280(self, nl, num, token=None):
+ if not token:
+ token = self.key_start
+
+ for _ in range(self.fallback_retries):
+ url = "{}/s/{}/{}-{}?nl={}".format(
+ self.root, token, self.gallery_id, num, nl)
+
+ page = self.request(url, fatal=False).text
+ if page.startswith(("Invalid page", "Keep trying")):
+ return
+ url, data = self.image_from_page(page)
+ yield url
+
+ nl = data["_nl"]
@staticmethod
def _parse_image_info(url):
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 76c5404..54f2942 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -57,7 +57,11 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
data["user_name"] = data["artist_name"]
yield Message.Directory, data
- for image in self._extract_images(page):
+ for num, url in enumerate(self._extract_images(image_id, page)):
+ image = text.nameext_from_url(url, {
+ "num": num,
+ "url": "https:" + url,
+ })
image.update(data)
if not image["extension"]:
image["extension"] = "jpg"
@@ -72,7 +76,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
extr = text.extract_from(page)
keywords = text.unescape(extr(
'name="keywords" content="', '" />')).split(",")
- data = {
+ return {
"title" : keywords[0].strip(),
"description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")),
@@ -82,7 +86,6 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"artist_name": keywords[1],
"tags" : keywords[2:-1],
}
- return data
@staticmethod
def _extract_data_horne(page):
@@ -90,7 +93,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
extr = text.extract_from(page)
keywords = text.unescape(extr(
'name="keywords" content="', '" />')).split(",")
- data = {
+ return {
"title" : keywords[0].strip(),
"description": text.unescape(extr(
'property="og:description" content="', '"')),
@@ -101,21 +104,16 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"itemprop='datePublished' content=", "<").rpartition(">")[2],
"%Y-%m-%d %H:%M:%S", 9),
}
- return data
- @staticmethod
- def _extract_images(page):
- """Extract image URLs from 'page'"""
- images = text.extract_iter(page, "/view_popup.php", "</a>")
- for num, image in enumerate(images):
- src = text.extr(image, 'src="', '"')
- if not src:
- continue
- url = ("https:" + src).replace("/__rs_l120x120/", "/")
- yield text.nameext_from_url(url, {
- "num": num,
- "url": url,
- })
+ def _extract_images(self, image_id, page):
+ if '&#diff_1" ' in page:
+ # multiple images
+ url = "{}/view_popup.php?id={}".format(self.root, image_id)
+ page = self.request(url).text
+ yield from text.extract_iter(
+ page, 'href="javascript:void(0);"><img src="', '"')
+ else:
+ yield text.extr(page, 'itemprop="image" src="', '"')
@staticmethod
def _extract_user_name(page):
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index d1f135d..65db94d 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -183,7 +183,7 @@ class OAuthBase(Extractor):
}
if auth:
- auth = (client_id, client_secret)
+ auth = util.HTTPBasicAuth(client_id, client_secret)
else:
auth = None
data["client_id"] = client_id
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 6aef9cb..fb560e9 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -254,6 +254,13 @@ class PatreonExtractor(Extractor):
if bootstrap:
return util.json_loads(bootstrap + "}")
+ bootstrap = text.extr(
+ page,
+ 'window.patreon = wrapInProxy({"bootstrap":',
+ '},"apiServer"')
+ if bootstrap:
+ return util.json_loads(bootstrap + "}")
+
bootstrap = text.extr(page, "window.patreon.bootstrap,", "});")
if bootstrap:
return util.json_loads(bootstrap + "}")
diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py
index 34b4ebf..5cfdc43 100644
--- a/gallery_dl/extractor/pixeldrain.py
+++ b/gallery_dl/extractor/pixeldrain.py
@@ -9,7 +9,7 @@
"""Extractors for https://pixeldrain.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, util
BASE_PATTERN = r"(?:https?://)?pixeldrain\.com"
@@ -23,7 +23,7 @@ class PixeldrainExtractor(Extractor):
def _init(self):
api_key = self.config("api-key")
if api_key:
- self.session.auth = ("", api_key)
+ self.session.auth = util.HTTPBasicAuth("", api_key)
def parse_datetime(self, date_string):
return text.parse_datetime(
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index c0bf5b3..feb6d1f 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -423,9 +423,10 @@ class RedditAPI():
"grants/installed_client"),
"device_id": "DO_NOT_TRACK_THIS_DEVICE"}
+ auth = util.HTTPBasicAuth(self.client_id, "")
response = self.extractor.request(
url, method="POST", headers=self.headers,
- data=data, auth=(self.client_id, ""), fatal=False)
+ data=data, auth=auth, fatal=False)
data = response.json()
if response.status_code != 200:
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ca1e906..f874f12 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1276,8 +1276,21 @@ class TwitterAPI():
self.headers["x-csrf-token"] = csrf_token
if response.status_code < 400:
- # success
- return response.json()
+ data = response.json()
+ if not data.get("errors") or not any(
+ (e.get("message") or "").lower().startswith("timeout")
+ for e in data["errors"]):
+ return data # success or non-timeout errors
+
+ msg = data["errors"][0].get("message") or "Unspecified"
+ self.extractor.log.debug("Internal Twitter error: '%s'", msg)
+
+ if self.headers["x-twitter-auth-type"]:
+ self.extractor.log.debug("Retrying API request")
+ continue # retry
+
+ # fall through to "Login Required"
+ response.status_code = 404
if response.status_code == 429:
# rate limit exceeded
@@ -1289,11 +1302,9 @@ class TwitterAPI():
self.extractor.wait(until=until, seconds=seconds)
continue
- if response.status_code == 403 and \
- not self.headers["x-twitter-auth-type"] and \
- endpoint == "/2/search/adaptive.json":
- raise exception.AuthorizationError(
- "Login required to access search results")
+ if response.status_code in (403, 404) and \
+ not self.headers["x-twitter-auth-type"]:
+ raise exception.AuthorizationError("Login required")
# error
try:
@@ -1431,7 +1442,12 @@ class TwitterAPI():
for instr in instructions:
instr_type = instr.get("type")
if instr_type == "TimelineAddEntries":
- entries = instr["entries"]
+ if entries:
+ entries.extend(instr["entries"])
+ else:
+ entries = instr["entries"]
+ elif instr_type == "TimelineAddToModule":
+ entries = instr["moduleItems"]
elif instr_type == "TimelineReplaceEntry":
entry = instr["entry"]
if entry["entryId"].startswith("cursor-bottom-"):
@@ -1479,6 +1495,11 @@ class TwitterAPI():
if esw("tweet-"):
tweets.append(entry)
+ elif esw("profile-grid-"):
+ if "content" in entry:
+ tweets.extend(entry["content"]["items"])
+ else:
+ tweets.append(entry)
elif esw(("homeConversation-",
"profile-conversation-",
"conversationthread-")):
diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py
new file mode 100644
index 0000000..b21709a
--- /dev/null
+++ b/gallery_dl/extractor/urlgalleries.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://urlgalleries.net/"""
+
+from .common import GalleryExtractor, Message
+from .. import text
+
+
+class UrlgalleriesGalleryExtractor(GalleryExtractor):
+ """Base class for Urlgalleries extractors"""
+ category = "urlgalleries"
+ root = "urlgalleries.net"
+ request_interval = (0.5, 1.0)
+ pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)"
+ example = "https://blog.urlgalleries.net/gallery-12345/TITLE"
+
+ def __init__(self, match):
+ self.blog, self.gallery_id = match.groups()
+ url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format(
+ self.blog, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
+
+ def items(self):
+ page = self.request(self.gallery_url).text
+ imgs = self.images(page)
+ data = self.metadata(page)
+ data["count"] = len(imgs)
+ del page
+
+ root = "https://{}.urlgalleries.net".format(self.blog)
+ yield Message.Directory, data
+ for data["num"], img in enumerate(imgs, 1):
+ response = self.request(
+ root + img, method="HEAD", allow_redirects=False)
+ yield Message.Queue, response.headers["Location"], data
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ return {
+ "gallery_id": self.gallery_id,
+ "_site": extr(' title="', '"'), # site name
+ "blog" : text.unescape(extr(' title="', '"')),
+ "_rprt": extr(' title="', '"'), # report button
+ "title": text.unescape(extr(' title="', '"').strip()),
+ "date" : text.parse_datetime(
+ extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"),
+ }
+
+ def images(self, page):
+ imgs = text.extr(page, 'id="wtf"', "</div>")
+ return list(text.extract_iter(imgs, " href='", "'"))
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 255d9f2..72a602f 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -287,6 +287,11 @@ def build_parser():
"for the given URLs"),
)
output.add_argument(
+ "-e", "--error-file",
+ dest="errorfile", metavar="FILE", action=ConfigAction,
+ help="Add input URLs which returned an error to FILE",
+ )
+ output.add_argument(
"--list-modules",
dest="list_modules", action="store_true",
help="Print a list of available extractor modules",
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 9508ff3..c0971f0 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -210,7 +210,7 @@ def configure_logging(loglevel):
root.setLevel(minlevel)
-def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL):
+def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL, mode="w"):
"""Setup a new logging handler"""
opts = config.interpolate(("output",), key)
if not opts:
@@ -219,7 +219,7 @@ def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL):
opts = {"path": opts}
path = opts.get("path")
- mode = opts.get("mode", "w")
+ mode = opts.get("mode", mode)
encoding = opts.get("encoding", "utf-8")
try:
path = util.expand_path(path)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 62aa12d..53502ef 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -14,6 +14,7 @@ import sys
import json
import time
import random
+import getpass
import hashlib
import sqlite3
import binascii
@@ -274,7 +275,7 @@ Response Headers
if hide_auth:
authorization = req_headers.get("Authorization")
if authorization:
- atype, sep, _ = authorization.partition(" ")
+ atype, sep, _ = str(authorization).partition(" ")
req_headers["Authorization"] = atype + " ***" if sep else "***"
cookie = req_headers.get("Cookie")
@@ -290,15 +291,17 @@ Response Headers
r"(^|, )([^ =]+)=[^,;]*", r"\1\2=***", set_cookie,
)
+ fmt_nv = "{}: {}".format
+
fp.write(outfmt.format(
request=request,
response=response,
request_headers="\n".join(
- name + ": " + value
+ fmt_nv(name, value)
for name, value in req_headers.items()
),
response_headers="\n".join(
- name + ": " + value
+ fmt_nv(name, value)
for name, value in res_headers.items()
),
).encode())
@@ -487,6 +490,26 @@ CODES = {
}
+class HTTPBasicAuth():
+ __slots__ = ("authorization",)
+
+ def __init__(self, username, password):
+ self.authorization = b"Basic " + binascii.b2a_base64(
+ username.encode("latin1") + b":" + str(password).encode("latin1")
+ )[:-1]
+
+ def __call__(self, request):
+ request.headers["Authorization"] = self.authorization
+ return request
+
+
+class LazyPrompt():
+ __slots__ = ()
+
+ def __str__(self):
+ return getpass.getpass()
+
+
class CustomNone():
"""None-style type that supports more operations than regular None"""
__slots__ = ()
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 5034fb2..f0d55f6 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.3"
+__version__ = "1.26.4"
diff --git a/setup.py b/setup.py
index c91549a..ee66f5f 100644
--- a/setup.py
+++ b/setup.py
@@ -135,6 +135,7 @@ def build_setuptools():
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Internet :: WWW/HTTP",