aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-09-09 01:58:06 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-09-09 01:58:06 -0400
commit302a512573e19208c01662a4021112b307ccd564 (patch)
treec9bdcb9c6a2ed5fb77cb9ba78eb7e39828e34869
parentd89b7125fe8077cbf9ffc267118813917e2bf197 (diff)
parent3f5483df9075ae526f4c54f4cbe80edeabf6d4cc (diff)
downloadgallery-dl-302a512573e19208c01662a4021112b307ccd564.tar.bz2
gallery-dl-302a512573e19208c01662a4021112b307ccd564.tar.xz
gallery-dl-302a512573e19208c01662a4021112b307ccd564.tar.zst
Update upstream source from tag 'upstream/1.18.4'
Update to upstream version '1.18.4' with Debian dir 7028ff2f6c45bfb1e6332887005f654a9fdc8a48
-rw-r--r--CHANGELOG.md35
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.591
-rw-r--r--docs/gallery-dl.conf10
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/extractor/2chan.py1
-rw-r--r--gallery_dl/extractor/35photo.py1
-rw-r--r--gallery_dl/extractor/420chan.py76
-rw-r--r--gallery_dl/extractor/4chan.py1
-rw-r--r--gallery_dl/extractor/500px.py1
-rw-r--r--gallery_dl/extractor/8kun.py1
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/behance.py2
-rw-r--r--gallery_dl/extractor/blogger.py1
-rw-r--r--gallery_dl/extractor/common.py19
-rw-r--r--gallery_dl/extractor/deviantart.py192
-rw-r--r--gallery_dl/extractor/directlink.py1
-rw-r--r--gallery_dl/extractor/dynastyscans.py3
-rw-r--r--gallery_dl/extractor/exhentai.py4
-rw-r--r--gallery_dl/extractor/fanbox.py1
-rw-r--r--gallery_dl/extractor/fantia.py1
-rw-r--r--gallery_dl/extractor/flickr.py1
-rw-r--r--gallery_dl/extractor/foolfuuka.py44
-rw-r--r--gallery_dl/extractor/foolslide.py13
-rw-r--r--gallery_dl/extractor/furaffinity.py27
-rw-r--r--gallery_dl/extractor/gfycat.py2
-rw-r--r--gallery_dl/extractor/hentaifox.py1
-rw-r--r--gallery_dl/extractor/hitomi.py6
-rw-r--r--gallery_dl/extractor/imagefap.py14
-rw-r--r--gallery_dl/extractor/imagehosts.py1
-rw-r--r--gallery_dl/extractor/imgbb.py2
-rw-r--r--gallery_dl/extractor/imgbox.py1
-rw-r--r--gallery_dl/extractor/imgth.py1
-rw-r--r--gallery_dl/extractor/imgur.py3
-rw-r--r--gallery_dl/extractor/inkbunny.py11
-rw-r--r--gallery_dl/extractor/issuu.py1
-rw-r--r--gallery_dl/extractor/keenspot.py1
-rw-r--r--gallery_dl/extractor/kemonoparty.py6
-rw-r--r--gallery_dl/extractor/khinsider.py1
-rw-r--r--gallery_dl/extractor/livedoor.py1
-rw-r--r--gallery_dl/extractor/luscious.py2
-rw-r--r--gallery_dl/extractor/mangadex.py3
-rw-r--r--gallery_dl/extractor/mangoxo.py3
-rw-r--r--gallery_dl/extractor/naver.py1
-rw-r--r--gallery_dl/extractor/nhentai.py44
-rw-r--r--gallery_dl/extractor/nijie.py1
-rw-r--r--gallery_dl/extractor/nozomi.py1
-rw-r--r--gallery_dl/extractor/oauth.py6
-rw-r--r--gallery_dl/extractor/patreon.py1
-rw-r--r--gallery_dl/extractor/photobucket.py2
-rw-r--r--gallery_dl/extractor/piczel.py1
-rw-r--r--gallery_dl/extractor/pixiv.py3
-rw-r--r--gallery_dl/extractor/pixnet.py2
-rw-r--r--gallery_dl/extractor/pornhub.py6
-rw-r--r--gallery_dl/extractor/reactor.py2
-rw-r--r--gallery_dl/extractor/reddit.py42
-rw-r--r--gallery_dl/extractor/seiga.py1
-rw-r--r--gallery_dl/extractor/seisoparty.py8
-rw-r--r--gallery_dl/extractor/senmanga.py1
-rw-r--r--gallery_dl/extractor/sexcom.py1
-rw-r--r--gallery_dl/extractor/shopify.py64
-rw-r--r--gallery_dl/extractor/simplyhentai.py2
-rw-r--r--gallery_dl/extractor/slickpic.py2
-rw-r--r--gallery_dl/extractor/slideshare.py1
-rw-r--r--gallery_dl/extractor/smugmug.py3
-rw-r--r--gallery_dl/extractor/speakerdeck.py1
-rw-r--r--gallery_dl/extractor/test.py1
-rw-r--r--gallery_dl/extractor/tsumino.py1
-rw-r--r--gallery_dl/extractor/twitter.py75
-rw-r--r--gallery_dl/extractor/unsplash.py4
-rw-r--r--gallery_dl/extractor/vanillarock.py2
-rw-r--r--gallery_dl/extractor/vsco.py1
-rw-r--r--gallery_dl/extractor/warosu.py1
-rw-r--r--gallery_dl/extractor/weasyl.py4
-rw-r--r--gallery_dl/extractor/wikiart.py1
-rw-r--r--gallery_dl/extractor/wikieat.py1
-rw-r--r--gallery_dl/extractor/xhamster.py2
-rw-r--r--gallery_dl/extractor/xvideos.py1
-rw-r--r--gallery_dl/job.py28
-rw-r--r--gallery_dl/util.py21
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_results.py5
-rw-r--r--test/test_util.py1
86 files changed, 607 insertions, 346 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1cfd97d..84ffc1e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,40 @@
# Changelog
+## 1.18.4 - 2021-09-04
+### Additions
+- [420chan] add `thread` and `board` extractors ([#1773](https://github.com/mikf/gallery-dl/issues/1773))
+- [deviantart] add `tag` extractor ([#1803](https://github.com/mikf/gallery-dl/issues/1803))
+- [deviantart] add `comments` option ([#1800](https://github.com/mikf/gallery-dl/issues/1800))
+- [deviantart] implement a `auto-watch` option ([#1466](https://github.com/mikf/gallery-dl/issues/1466), [#1757](https://github.com/mikf/gallery-dl/issues/1757))
+- [foolfuuka] add `gallery` extractor ([#1785](https://github.com/mikf/gallery-dl/issues/1785))
+- [furaffinity] expand URL pattern for searches ([#1780](https://github.com/mikf/gallery-dl/issues/1780))
+- [kemonoparty] automatically generate required DDoS-GUARD cookies ([#1779](https://github.com/mikf/gallery-dl/issues/1779))
+- [nhentai] add `favorite` extractor ([#1814](https://github.com/mikf/gallery-dl/issues/1814))
+- [shopify] support windsorstore.com ([#1793](https://github.com/mikf/gallery-dl/issues/1793))
+- [twitter] add `url` to user objects ([#1787](https://github.com/mikf/gallery-dl/issues/1787), [#1532](https://github.com/mikf/gallery-dl/issues/1532))
+- [twitter] expand t.co links in user descriptions ([#1787](https://github.com/mikf/gallery-dl/issues/1787), [#1532](https://github.com/mikf/gallery-dl/issues/1532))
+- show a warning if an extractor doesn`t yield any results ([#1428](https://github.com/mikf/gallery-dl/issues/1428), [#1759](https://github.com/mikf/gallery-dl/issues/1759))
+- add a `j` format string conversion
+- implement a `fallback` option ([#1770](https://github.com/mikf/gallery-dl/issues/1770))
+- implement a `path-strip` option
+### Changes
+- [shopify] use API for product listings ([#1793](https://github.com/mikf/gallery-dl/issues/1793))
+- update default User-Agent headers
+### Fixes
+- [deviantart] prevent exceptions for "empty" videos ([#1796](https://github.com/mikf/gallery-dl/issues/1796))
+- [exhentai] improve image limits check ([#1808](https://github.com/mikf/gallery-dl/issues/1808))
+- [inkbunny] fix extraction ([#1816](https://github.com/mikf/gallery-dl/issues/1816))
+- [mangadex] prevent exceptions for manga without English title ([#1815](https://github.com/mikf/gallery-dl/issues/1815))
+- [oauth] use defaults when config values are set to `null` ([#1778](https://github.com/mikf/gallery-dl/issues/1778))
+- [pixiv] fix pixivision title extraction
+- [reddit] delay RedditAPI initialization ([#1813](https://github.com/mikf/gallery-dl/issues/1813))
+- [twitter] improve error reporting ([#1759](https://github.com/mikf/gallery-dl/issues/1759))
+- [twitter] fix issue when filtering quote tweets ([#1792](https://github.com/mikf/gallery-dl/issues/1792))
+- [twitter] fix `logout` option ([#1719](https://github.com/mikf/gallery-dl/issues/1719))
+### Removals
+- [deviantart] remove the "you need session cookies to download mature scraps" warning ([#1777](https://github.com/mikf/gallery-dl/issues/1777), [#1776](https://github.com/mikf/gallery-dl/issues/1776))
+- [foolslide] remove entry for kobato.hologfx.com
+
## 1.18.3 - 2021-08-13
### Additions
- [bbc] add `width` option ([#1706](https://github.com/mikf/gallery-dl/issues/1706))
diff --git a/PKG-INFO b/PKG-INFO
index 58f985a..1dfa877 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.18.3
+Version: 1.18.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index dcf0337..5c09275 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 363ff0a..edf1068 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-08-13" "1.18.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-09-04" "1.18.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 008129f..869d605 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-08-13" "1.18.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-09-04" "1.18.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -224,17 +224,6 @@ For example with \f[I]"parent-metadata": "_p_"\f[]:
Share number of skipped downloads between parent and child extractors.
-.SS extractor.*.url-metadata
-.IP "Type:" 6
-\f[I]string\f[]
-
-.IP "Default:" 9
-\f[I]null\f[]
-
-.IP "Description:" 4
-Insert a file's download URL into its metadata dictionary as the given name.
-
-
.SS extractor.*.path-restrict
.IP "Type:" 6
\f[I]string\f[] or \f[I]object\f[]
@@ -299,6 +288,28 @@ Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be
escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[]
+.SS extractor.*.path-strip
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Description:" 4
+Set of characters to remove from the end of generated path segment names
+using \f[I]str.rstrip()\f[]
+
+Special values:
+
+.br
+* \f[I]"auto"\f[]: Use characters from \f[I]"unix"\f[] or \f[I]"windows"\f[]
+depending on the local operating system
+.br
+* \f[I]"unix"\f[]: \f[I]""\f[]
+.br
+* \f[I]"windows"\f[]: \f[I]". "\f[]
+
+
.SS extractor.*.extension-map
.IP "Type:" 6
\f[I]object\f[]
@@ -599,6 +610,17 @@ Default value used for missing or undefined keyword names in
\f[I]format strings\f[].
+.SS extractor.*.url-metadata
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Insert a file's download URL into its metadata dictionary as the given name.
+
+
.SS extractor.*.category-transfer
.IP "Type:" 6
\f[I]bool\f[]
@@ -777,6 +799,17 @@ functions (\f[I]postprocessors\f[], \f[I]download archive\f[], etc.)
will be executed as normal.
+.SS extractor.*.fallback
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Use fallback download URLs when a download fails.
+
+
.SS extractor.*.image-range
.IP "Type:" 6
\f[I]string\f[]
@@ -997,6 +1030,17 @@ to access 18+ content without \f[I]API Key\f[].
See \f[I]Filters\f[] for details.
+.SS extractor.deviantart.comments
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract \f[I]comments\f[] metadata.
+
+
.SS extractor.deviantart.extra
.IP "Type:" 6
\f[I]bool\f[]
@@ -1160,6 +1204,18 @@ Note: The \f[I]refresh-token\f[] becomes invalid
or whenever your \f[I]cache file\f[] is deleted or cleared.
+.SS extractor.deviantart.auto-watch
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Automatically watch users when encountering "Watchers-Only Deviations"
+(requires a \f[I]refresh-token\f[]).
+
+
.SS extractor.deviantart.wait-min
.IP "Type:" 6
\f[I]integer\f[]
@@ -2155,6 +2211,17 @@ Fetch media from all Tweets and replies in a \f[I]conversation
<https://help.twitter.com/en/using-twitter/twitter-conversations>\f[].
+.SS extractor.twitter.logout
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Logout and retry as guest when access to another user's Tweets is blocked.
+
+
.SS extractor.twitter.quoted
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 020b802..f8b6c36 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -10,10 +10,11 @@
"proxy": null,
"skip": true,
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0",
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
"retries": 4,
"timeout": 30.0,
"verify": true,
+ "fallback": true,
"sleep": 0,
"sleep-request": 0,
@@ -22,6 +23,7 @@
"path-restrict": "auto",
"path-replace": "_",
"path-remove": "\\u0000-\\u001f\\u007f",
+ "path-strip": "auto",
"extension-map": {
"jpeg": "jpg",
"jpe" : "jpg",
@@ -65,6 +67,7 @@
{
"client-id": null,
"client-secret": null,
+ "comments": false,
"extra": false,
"flat": true,
"folders": false,
@@ -140,7 +143,7 @@
"username": null,
"password": null,
"include": "posts",
- "sleep-request": 5.0,
+ "sleep-request": 8.0,
"videos": true
},
"khinsider":
@@ -215,8 +218,7 @@
"id-min": "0",
"id-max": "zik0zj",
"recursion": 0,
- "videos": true,
- "user-agent": "Python:gallery-dl:0.8.4 (by /u/mikf1)"
+ "videos": true
},
"redgifs":
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 3e6ca0e..30c9c75 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.18.3
+Version: 1.18.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.4/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index c10b36d..5c427d6 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -36,6 +36,7 @@ gallery_dl/downloader/ytdl.py
gallery_dl/extractor/2chan.py
gallery_dl/extractor/35photo.py
gallery_dl/extractor/3dbooru.py
+gallery_dl/extractor/420chan.py
gallery_dl/extractor/4chan.py
gallery_dl/extractor/500px.py
gallery_dl/extractor/8kun.py
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index f5d2a4c..c92969b 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -35,7 +35,6 @@ class _2chanThreadExtractor(Extractor):
self.server, self.board, self.thread)
page = self.request(url).text
data = self.metadata(page)
- yield Message.Version, 1
yield Message.Directory, data
for post in self.posts(page):
if "filename" not in post:
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index 27634de..6a40d41 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -22,7 +22,6 @@ class _35photoExtractor(Extractor):
def items(self):
first = True
data = self.metadata()
- yield Message.Version, 1
for photo_id in self.photos():
for photo in self._photo_data(photo_id):
diff --git a/gallery_dl/extractor/420chan.py b/gallery_dl/extractor/420chan.py
new file mode 100644
index 0000000..fd0172e
--- /dev/null
+++ b/gallery_dl/extractor/420chan.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://420chan.org/"""
+
+from .common import Extractor, Message
+
+
+class _420chanThreadExtractor(Extractor):
+ """Extractor for 420chan threads"""
+ category = "420chan"
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board}", "{thread} {title}")
+ archive_fmt = "{board}_{thread}_{filename}"
+ pattern = r"(?:https?://)?boards\.420chan\.org/([^/?#]+)/thread/(\d+)"
+ test = ("https://boards.420chan.org/ani/thread/33251/chow-chows", {
+ "pattern": r"https://boards\.420chan\.org/ani/src/\d+\.jpg",
+ "content": "b07c803b0da78de159709da923e54e883c100934",
+ "count": 2,
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "https://api.420chan.org/{}/res/{}.json".format(
+ self.board, self.thread)
+ posts = self.request(url).json()["posts"]
+
+ data = {
+ "board" : self.board,
+ "thread": self.thread,
+ "title" : posts[0].get("sub") or posts[0]["com"][:50],
+ }
+
+ yield Message.Directory, data
+ for post in posts:
+ if "filename" in post:
+ post.update(data)
+ post["extension"] = post["ext"][1:]
+ url = "https://boards.420chan.org/{}/src/{}{}".format(
+ post["board"], post["filename"], post["ext"])
+ yield Message.Url, url, post
+
+
+class _420chanBoardExtractor(Extractor):
+ """Extractor for 420chan boards"""
+ category = "420chan"
+ subcategory = "board"
+ pattern = r"(?:https?://)?boards\.420chan\.org/([^/?#]+)/\d*$"
+ test = ("https://boards.420chan.org/po/", {
+ "pattern": _420chanThreadExtractor.pattern,
+ "count": ">= 100",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ url = "https://api.420chan.org/{}/threads.json".format(self.board)
+ threads = self.request(url).json()
+
+ for page in threads:
+ for thread in page["threads"]:
+ url = "https://boards.420chan.org/{}/thread/{}/".format(
+ self.board, thread["no"])
+ thread["page"] = page["page"]
+ thread["_extractor"] = _420chanThreadExtractor
+ yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index bed30b1..bf9615d 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -49,7 +49,6 @@ class _4chanThreadExtractor(Extractor):
"title" : text.unescape(title)[:50],
}
- yield Message.Version, 1
yield Message.Directory, data
for post in posts:
if "filename" in post:
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 696b370..8c6fa09 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -29,7 +29,6 @@ class _500pxExtractor(Extractor):
def items(self):
first = True
data = self.metadata()
- yield Message.Version, 1
for photo in self.photos():
url = photo["images"][-1]["url"]
diff --git a/gallery_dl/extractor/8kun.py b/gallery_dl/extractor/8kun.py
index e55bb08..9232f88 100644
--- a/gallery_dl/extractor/8kun.py
+++ b/gallery_dl/extractor/8kun.py
@@ -49,7 +49,6 @@ class _8kunThreadExtractor(Extractor):
"num" : 0,
}
- yield Message.Version, 1
yield Message.Directory, data
for post in posts:
if "filename" in post:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index e130db2..f68ea9f 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -12,6 +12,7 @@ modules = [
"2chan",
"35photo",
"3dbooru",
+ "420chan",
"4chan",
"500px",
"8kun",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index f13edf7..71d3320 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -19,7 +19,6 @@ class BehanceExtractor(Extractor):
root = "https://www.behance.net"
def items(self):
- yield Message.Version, 1
for gallery in self.galleries():
gallery["_extractor"] = BehanceGalleryExtractor
yield Message.Queue, gallery["url"], self._update(gallery)
@@ -99,7 +98,6 @@ class BehanceGalleryExtractor(BehanceExtractor):
imgs = self.get_images(data)
data["count"] = len(imgs)
- yield Message.Version, 1
yield Message.Directory, data
for data["num"], (url, module) in enumerate(imgs, 1):
data["module"] = module
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 60170dc..7e7c282 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -34,7 +34,6 @@ class BloggerExtractor(Extractor):
self.api = BloggerAPI(self)
def items(self):
- yield Message.Version, 1
blog = self.api.blog_by_url("http://" + self.blog)
blog["pages"] = blog["pages"]["totalItems"]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 2533ae5..d9f69ab 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -226,7 +226,7 @@ class Extractor():
elif platform == "linux":
platform = "X11; Linux x86_64"
elif platform == "macos":
- platform = "Macintosh; Intel Mac OS X 11.2"
+ platform = "Macintosh; Intel Mac OS X 11.5"
if browser == "chrome":
_emulate_browser_chrome(session, platform)
@@ -235,7 +235,7 @@ class Extractor():
else:
headers["User-Agent"] = self.config("user-agent", (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
- "rv:78.0) Gecko/20100101 Firefox/78.0"))
+ "rv:91.0) Gecko/20100101 Firefox/91.0"))
headers["Accept"] = "*/*"
headers["Accept-Language"] = "en-US,en;q=0.5"
headers["Accept-Encoding"] = "gzip, deflate"
@@ -339,6 +339,11 @@ class Extractor():
return True
return False
+ def _prepare_ddosguard_cookies(self):
+ if not self._cookiejar.get("__ddg2", domain=self.cookiedomain):
+ self._cookiejar.set(
+ "__ddg2", util.generate_token(), domain=self.cookiedomain)
+
def _get_date_min_max(self, dmin=None, dmax=None):
"""Retrieve and parse 'date-min' and 'date-max' config values"""
def get(key, default):
@@ -448,7 +453,6 @@ class GalleryExtractor(Extractor):
pass
images = enumerate(imgs, 1)
- yield Message.Version, 1
yield Message.Directory, data
for data[self.enum], (url, imgdata) in images:
if imgdata:
@@ -504,7 +508,6 @@ class MangaExtractor(Extractor):
if self.reverse:
chapters.reverse()
- yield Message.Version, 1
for chapter, data in chapters:
data["_extractor"] = self.chapterclass
yield Message.Queue, chapter, data
@@ -602,8 +605,8 @@ class HTTPSAdapter(HTTPAdapter):
def _emulate_browser_firefox(session, platform):
headers = session.headers
- headers["User-Agent"] = ("Mozilla/5.0 (" + platform + "; rv:78.0) "
- "Gecko/20100101 Firefox/78.0")
+ headers["User-Agent"] = ("Mozilla/5.0 (" + platform + "; rv:91.0) "
+ "Gecko/20100101 Firefox/91.0")
headers["Accept"] = ("text/html,application/xhtml+xml,"
"application/xml;q=0.9,image/webp,*/*;q=0.8")
headers["Accept-Language"] = "en-US,en;q=0.5"
@@ -636,13 +639,13 @@ def _emulate_browser_firefox(session, platform):
def _emulate_browser_chrome(session, platform):
if platform.startswith("Macintosh"):
- platform = platform.replace(".", "_") + "_0"
+ platform = platform.replace(".", "_") + "_2"
headers = session.headers
headers["Upgrade-Insecure-Requests"] = "1"
headers["User-Agent"] = (
"Mozilla/5.0 (" + platform + ") AppleWebKit/537.36 "
- "(KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36")
+ "(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36")
headers["Accept"] = ("text/html,application/xhtml+xml,application/xml;"
"q=0.9,image/webp,image/apng,*/*;q=0.8")
headers["Referer"] = None
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 900fde8..b4ac742 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -41,6 +41,7 @@ class DeviantartExtractor(Extractor):
self.extra = self.config("extra", False)
self.quality = self.config("quality", "100")
self.original = self.config("original", True)
+ self.comments = self.config("comments", False)
self.user = match.group(1) or match.group(2)
self.group = False
self.api = None
@@ -66,8 +67,6 @@ class DeviantartExtractor(Extractor):
def items(self):
self.api = DeviantartOAuthAPI(self)
- if not self.api.refresh_token_key:
- self._fetch_premium = self._fetch_premium_notoken
if self.user:
profile = self.api.user_profile(self.user)
@@ -78,7 +77,6 @@ class DeviantartExtractor(Extractor):
else:
self.user = profile["user"]["username"]
- yield Message.Version, 1
for deviation in self.deviations():
if isinstance(deviation, tuple):
url, data = deviation
@@ -86,8 +84,10 @@ class DeviantartExtractor(Extractor):
continue
if "premium_folder_data" in deviation:
- if not self._fetch_premium(deviation):
+ data = self._fetch_premium(deviation)
+ if not data:
continue
+ deviation.update(data)
self.prepare(deviation)
yield Message.Directory, deviation
@@ -117,7 +117,7 @@ class DeviantartExtractor(Extractor):
content = self.api.deviation_download(deviation["deviationid"])
yield self.commit(deviation, content)
- if "videos" in deviation:
+ if "videos" in deviation and deviation["videos"]:
video = max(deviation["videos"],
key=lambda x: text.parse_int(x["quality"][:-1]))
yield self.commit(deviation, video)
@@ -163,6 +163,12 @@ class DeviantartExtractor(Extractor):
deviation["date"] = text.parse_timestamp(
deviation["published_time"])
+ if self.comments:
+ deviation["comments"] = (
+ self.api.comments_deviation(deviation["deviationid"])
+ if deviation["stats"]["comments"] else ()
+ )
+
# filename metadata
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
deviation["index_base36"] = util.bencode(deviation["index"], alphabet)
@@ -307,39 +313,48 @@ class DeviantartExtractor(Extractor):
self.wait(seconds=180)
def _fetch_premium(self, deviation):
- cache = self._premium_cache
-
- if deviation["deviationid"] not in cache:
+ try:
+ return self._premium_cache[deviation["deviationid"]]
+ except KeyError:
+ pass
- # check accessibility
+ # check accessibility
+ if self.api.refresh_token_key:
dev = self.api.deviation(deviation["deviationid"], False)
has_access = dev["premium_folder_data"]["has_access"]
-
- if has_access:
- self.log.info("Fetching premium folder data")
- else:
- self.log.warning("Unable to access premium content (type: %s)",
- dev["premium_folder_data"]["type"])
- # fill cache
- for dev in self.api.gallery(
- deviation["author"]["username"],
- deviation["premium_folder_data"]["gallery_id"],
- public=False,
- ):
- cache[dev["deviationid"]] = dev if has_access else None
-
- data = cache[deviation["deviationid"]]
- if data:
- deviation.update(data)
- return True
- return False
-
- def _fetch_premium_notoken(self, deviation):
- if not self._premium_cache:
+ username = dev["author"]["username"]
+ folder = dev["premium_folder_data"]
+
+ if not has_access and folder["type"] == "watchers" and \
+ self.config("auto-watch"):
+ if self.api.user_friends_watch(username):
+ has_access = True
+ self.log.info(
+ "Watching %s for premium folder access", username)
+ else:
+ self.log.warning(
+ "Error when trying to watch %s. "
+ "Try again with a new refresh-token", username)
+ else:
self.log.warning(
"Unable to access premium content (no refresh-token)")
- self._premium_cache = True
- return False
+ self._fetch_premium = lambda _: None
+ return None
+
+ if has_access:
+ self.log.info("Fetching premium folder data")
+ else:
+ self.log.warning("Unable to access premium content (type: %s)",
+ folder["type"])
+ self._fetch_premium = lambda _: None
+ return None
+
+ # fill cache
+ cache = self._premium_cache
+ for dev in self.api.gallery(
+ username, folder["gallery_id"], public=False):
+ cache[dev["deviationid"]] = dev
+ return cache[deviation["deviationid"]]
class DeviantartUserExtractor(DeviantartExtractor):
@@ -754,6 +769,30 @@ class DeviantartPopularExtractor(DeviantartExtractor):
deviation["popular"] = self.popular
+class DeviantartTagExtractor(DeviantartExtractor):
+ """Extractor for deviations from tag searches"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "Tags", "{search_tags}")
+ archive_fmt = "T_{search_tags}_{index}.{extension}"
+ pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
+ test = ("https://www.deviantart.com/tag/nature", {
+ "options": (("original", False),),
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def __init__(self, match):
+ DeviantartExtractor.__init__(self, match)
+ self.tag = text.unquote(match.group(1))
+
+ def deviations(self):
+ return self.api.browse_tags(self.tag, self.offset)
+
+ def prepare(self, deviation):
+ DeviantartExtractor.prepare(self, deviation)
+ deviation["search_tags"] = self.tag
+
+
class DeviantartWatchExtractor(DeviantartExtractor):
"""Extractor for Deviations from watched users"""
subcategory = "watch"
@@ -795,7 +834,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"exception": exception.NotFoundError,
}),
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
+ "options": (("comments", True),),
"pattern": r"https://api-da\.wixmp\.com/_api/download/file",
+ "keyword": {"comments": list},
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
@@ -890,15 +931,9 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
)
cookiedomain = ".deviantart.com"
cookienames = ("auth", "auth_secure", "userinfo")
- _warning = True
def deviations(self):
eclipse_api = DeviantartEclipseAPI(self)
- if self._warning:
- DeviantartScrapsExtractor._warning = False
- if not self._check_cookies(self.cookienames):
- self.log.warning(
- "No session cookies set: Unable to fetch mature scraps.")
for obj in eclipse_api.gallery_scraps(self.user, self.offset):
deviation = obj["deviation"]
@@ -924,7 +959,6 @@ class DeviantartFollowingExtractor(DeviantartExtractor):
def items(self):
eclipse_api = DeviantartEclipseAPI(self)
- yield Message.Version, 1
for user in eclipse_api.user_watching(self.user, self.offset):
url = "{}/{}".format(self.root, user["username"])
user["_extractor"] = DeviantartUserExtractor
@@ -1003,6 +1037,17 @@ class DeviantartOAuthAPI():
}
return self._pagination(endpoint, params)
+ def browse_tags(self, tag, offset=0):
+ """ Browse a tag """
+ endpoint = "browse/tags"
+ params = {
+ "tag" : tag,
+ "offset" : offset,
+ "limit" : 50,
+ "mature_content": self.mature,
+ }
+ return self._pagination(endpoint, params)
+
def browse_user_journals(self, username, offset=0):
"""Yield all journal entries of a specific user"""
endpoint = "browse/user/journals"
@@ -1023,7 +1068,14 @@ class DeviantartOAuthAPI():
endpoint = "collections/folders"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature}
- return self._pagination_folders(endpoint, params)
+ return self._pagination_list(endpoint, params)
+
+ def comments_deviation(self, deviation_id, offset=0):
+ """Fetch comments posted on a deviation"""
+ endpoint = "comments/deviation/" + deviation_id
+ params = {"maxdepth": "5", "offset": offset, "limit": 50,
+ "mature_content": self.mature}
+ return self._pagination_list(endpoint, params=params, key="thread")
def deviation(self, deviation_id, public=True):
"""Query and return info about a single Deviation"""
@@ -1039,13 +1091,13 @@ class DeviantartOAuthAPI():
"""Get extended content of a single Deviation"""
endpoint = "deviation/content"
params = {"deviationid": deviation_id}
- return self._call(endpoint, params, public=public)
+ return self._call(endpoint, params=params, public=public)
def deviation_download(self, deviation_id, public=True):
"""Get the original file download (if allowed)"""
endpoint = "deviation/download/" + deviation_id
params = {"mature_content": self.mature}
- return self._call(endpoint, params, public=public)
+ return self._call(endpoint, params=params, public=public)
def deviation_metadata(self, deviations):
""" Fetch deviation metadata for a set of deviations"""
@@ -1056,7 +1108,7 @@ class DeviantartOAuthAPI():
for num, deviation in enumerate(deviations)
)
params = {"mature_content": self.mature}
- return self._call(endpoint, params)["metadata"]
+ return self._call(endpoint, params=params)["metadata"]
def gallery(self, username, folder_id, offset=0, extend=True, public=True):
"""Yield all Deviation-objects contained in a gallery folder"""
@@ -1078,7 +1130,7 @@ class DeviantartOAuthAPI():
endpoint = "gallery/folders"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature}
- return self._pagination_folders(endpoint, params)
+ return self._pagination_list(endpoint, params)
@memcache(keyarg=1)
def user_profile(self, username):
@@ -1086,6 +1138,29 @@ class DeviantartOAuthAPI():
endpoint = "user/profile/" + username
return self._call(endpoint, fatal=False)
+ def user_friends_watch(self, username):
+ """Watch a user"""
+ endpoint = "user/friends/watch/" + username
+ data = {
+ "watch[friend]" : "0",
+ "watch[deviations]" : "0",
+ "watch[journals]" : "0",
+ "watch[forum_threads]": "0",
+ "watch[critiques]" : "0",
+ "watch[scraps]" : "0",
+ "watch[activity]" : "0",
+ "watch[collections]" : "0",
+ "mature_content" : self.mature,
+ }
+ return self._call(
+ endpoint, method="POST", data=data, public=False, fatal=False)
+
+ def user_friends_unwatch(self, username):
+ """Unwatch a user"""
+ endpoint = "user/friends/unwatch/" + username
+ return self._call(
+ endpoint, method="POST", public=False, fatal=False)
+
def authenticate(self, refresh_token_key):
"""Authenticate the application by requesting an access token"""
self.headers["Authorization"] = \
@@ -1117,16 +1192,18 @@ class DeviantartOAuthAPI():
refresh_token_key, data["refresh_token"])
return "Bearer " + data["access_token"]
- def _call(self, endpoint, params=None, fatal=True, public=True):
+ def _call(self, endpoint, fatal=True, public=True, **kwargs):
"""Call an API endpoint"""
url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint
+ kwargs["fatal"] = None
+
while True:
if self.delay:
time.sleep(self.delay)
self.authenticate(None if public else self.refresh_token_key)
- response = self.extractor.request(
- url, headers=self.headers, params=params, fatal=None)
+ kwargs["headers"] = self.headers
+ response = self.extractor.request(url, **kwargs)
data = response.json()
status = response.status_code
@@ -1151,14 +1228,14 @@ class DeviantartOAuthAPI():
return data
def _pagination(self, endpoint, params,
- extend=True, public=True, unpack=False):
+ extend=True, public=True, unpack=False, key="results"):
warn = True
while True:
- data = self._call(endpoint, params, public=public)
- if "results" not in data:
+ data = self._call(endpoint, params=params, public=public)
+ if key not in data:
self.log.error("Unexpected API response: %s", data)
return
- results = data["results"]
+ results = data[key]
if unpack:
results = [item["journal"] for item in results
@@ -1183,11 +1260,16 @@ class DeviantartOAuthAPI():
if not data["has_more"]:
return
- params["offset"] = data["next_offset"]
+ if "next_cursor" in data:
+ params["offset"] = None
+ params["cursor"] = data["next_cursor"]
+ else:
+ params["offset"] = data["next_offset"]
+ params["cursor"] = None
- def _pagination_folders(self, endpoint, params):
+ def _pagination_list(self, endpoint, params, key="results"):
result = []
- result.extend(self._pagination(endpoint, params, False))
+ result.extend(self._pagination(endpoint, params, False, key=key))
return result
def _metadata(self, deviations):
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 8505b0b..6ddf2ec 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -62,6 +62,5 @@ class DirectlinkExtractor(Extractor):
data["_http_headers"] = {
"Referer": self.url.encode("latin-1", "ignore")}
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, self.url, data
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 67051c9..4541d25 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -112,7 +112,6 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
self.query = match.group(1) or ""
def items(self):
- yield Message.Version, 1
yield Message.Directory, {}
for image_id in self.images():
image = self._parse_image_page(image_id)
@@ -137,7 +136,7 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor):
pattern = BASE_PATTERN + r"/images/(\d+)"
test = ("https://dynasty-scans.com/images/1245", {
"url": "15e54bd94148a07ed037f387d046c27befa043b2",
- "keyword": "3b630c6139e5ff06e141541d57960f8a2957efbb",
+ "keyword": "0d8976c2d6fbc9ed6aa712642631b96e456dc37f",
})
def images(self):
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index bccd6c8..aabfe6b 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -203,8 +203,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def _validate_response(response):
# declared inside 'items()' to be able to access 'data'
- if not response.history and \
- response.headers.get("content-length") == "137":
+ if not response.history and response.headers.get(
+ "content-type", "").startswith("text/html"):
self._report_limits(data)
return True
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 06054b2..cc6ee97 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -31,7 +31,6 @@ class FanboxExtractor(Extractor):
self.embeds = self.config("embeds", True)
def items(self):
- yield Message.Version, 1
if self._warning:
if "FANBOXSESSID" not in self.session.cookies:
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 16fed4e..9df2bef 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -20,7 +20,6 @@ class FantiaExtractor(Extractor):
_warning = True
def items(self):
- yield Message.Version, 1
if self._warning:
if "_session_id" not in self.session.cookies:
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index cf4c033..6c5c7df 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -155,7 +155,6 @@ class FlickrAlbumExtractor(FlickrExtractor):
return self._album_items()
def _album_items(self):
- yield Message.Version, 1
data = FlickrExtractor.metadata(self)
data["_extractor"] = FlickrAlbumExtractor
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index b82160f..9b89999 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -273,3 +273,47 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
if len(posts) <= 3:
return
params["page"] += 1
+
+
+class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
+ """Base extractor for FoolFuuka galleries"""
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "{board}", "gallery")
+ pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?"
+ test = (
+ ("https://archive.4plebs.org/tg/gallery/1"),
+ ("https://archived.moe/gd/gallery/2"),
+ ("https://archiveofsins.com/h/gallery/3"),
+ ("https://arch.b4k.co/meta/gallery/"),
+ ("https://desuarchive.org/a/gallery/5"),
+ ("https://boards.fireden.net/sci/gallery/6"),
+ ("https://archive.nyafuu.org/c/gallery/7"),
+ ("https://rbt.asia/g/gallery/8"),
+ ("https://thebarchive.com/b/gallery/9"),
+ ("https://archive.wakarimasen.moe/a/gallery/10"),
+ )
+
+ def __init__(self, match):
+ FoolfuukaExtractor.__init__(self, match)
+
+ board = match.group(match.lastindex)
+ if board.isdecimal():
+ self.board = match.group(match.lastindex-1)
+ self.pages = (board,)
+ else:
+ self.board = board
+ self.pages = map(format, itertools.count(1))
+
+ def metadata(self):
+ return {"board": self.board}
+
+ def posts(self):
+ base = "{}/_/api/chan/gallery/?board={}&page=".format(
+ self.root, self.board)
+
+ for page in self.pages:
+ with self.request(base + page) as response:
+ posts = response.json()
+ if not posts:
+ return
+ yield from posts
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index f8664e7..a1470dc 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -39,9 +39,6 @@ class FoolslideExtractor(BaseExtractor):
BASE_PATTERN = FoolslideExtractor.update({
- "dokireader": {
- "root": "https://kobato.hologfx.com/reader",
- },
"kireicake": {
"root": "https://reader.kireicake.com",
},
@@ -66,10 +63,6 @@ class FoolslideChapterExtractor(FoolslideExtractor):
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
test = (
- (("https://kobato.hologfx.com/reader/read/"
- "hitoribocchi_no_oo_seikatsu/en/3/34"), {
- "keyword": "6e719ac86f0c6dab89390dd7e507e678459e0dbc",
- }),
("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
@@ -94,7 +87,6 @@ class FoolslideChapterExtractor(FoolslideExtractor):
data["count"] = len(imgs)
data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
- yield Message.Version, 1
yield Message.Directory, data
for data["page"], image in enumerate(imgs, 1):
try:
@@ -128,11 +120,6 @@ class FoolslideMangaExtractor(FoolslideExtractor):
categorytransfer = True
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
test = (
- (("https://kobato.hologfx.com/reader/series/"
- "boku_ha_ohimesama_ni_narenai/"), {
- "url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
- "keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
- }),
("https://reader.kireicake.com/series/wonderland/", {
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
"keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb",
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 8c2887e..9516dfa 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -232,16 +232,27 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
"""Extractor for furaffinity search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
- test = ("https://www.furaffinity.net/search/?q=cute", {
- "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
- r"/art/[^/]+/\d+/\d+.\w+\.\w+",
- "range": "45-50",
- "count": 6,
- })
+ pattern = BASE_PATTERN + r"/search(?:/([^/?#]+))?/?[?&]([^#]+)"
+ test = (
+ ("https://www.furaffinity.net/search/?q=cute", {
+ "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
+ r"/art/[^/]+/\d+/\d+.\w+\.\w+",
+ "range": "45-50",
+ "count": 6,
+ }),
+ ("https://www.furaffinity.net/search/cute&rating-general=0", {
+ "range": "1",
+ "count": 1,
+ }),
+ )
+
+ def __init__(self, match):
+ FuraffinityExtractor.__init__(self, match)
+ self.query = text.parse_query(match.group(2))
+ if self.user and "q" not in self.query:
+ self.query["q"] = text.unescape(self.user)
def metadata(self):
- self.query = text.parse_query(self.user)
return {"search": self.query.get("q")}
def posts(self):
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 5732816..6d31f7d 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -102,7 +102,7 @@ class GfycatImageExtractor(GfycatExtractor):
"keyword": {
"gfyId": "graygenerouscowrie",
"gfyName": "GrayGenerousCowrie",
- "gfyNumber": "755075459",
+ "gfyNumber": 755075459,
"title": "Bottom's up",
"username": "jackson3oh3",
"createDate": 1495884169,
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index a5bebdd..0327f56 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -129,7 +129,6 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
self.path = match.group(1)
def items(self):
- yield Message.Version, 1
for gallery in self.galleries():
yield Message.Queue, gallery["url"], gallery
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 7c338a8..01e5629 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
- "url": "1de8510bd4c3048a1cbbf242505d8449e93ba5a4",
+ "url": "e057652b40629d3d72b0ef059c6ec7556417139c",
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
- "url": "681bb07d8ce4d0c4d0592e47b239b6e42d566386",
+ "url": "779b94b47d5f0c2341db03499270d2b5370196f6",
"count": 1413,
}),
# gallery with "broken" redirect
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 8785f65..7922e84 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -36,12 +36,14 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
test = (
("https://www.imagefap.com/pictures/7102714", {
- "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
+ "pattern": r"https://cdnh\.imagefap\.com"
+ r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
}),
("https://www.imagefap.com/gallery/5486966", {
- "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
+ "pattern": r"https://cdnh\.imagefap\.com"
+ r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": "3e24eace5b09639b881ebd393165862feb46adde",
}),
("https://www.imagefap.com/gallery.php?gid=7102714"),
@@ -57,7 +59,6 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
url = "{}/pictures/{}/".format(self.root, self.gid)
page = self.request(url).text
data = self.get_job_metadata(page)
- yield Message.Version, 1
yield Message.Directory, data
for url, image in self.get_images():
data.update(image)
@@ -106,7 +107,8 @@ class ImagefapImageExtractor(ImagefapExtractor):
pattern = BASE_PATTERN + r"/photo/(\d+)"
test = (
("https://www.imagefap.com/photo/1369341772/", {
- "pattern": r"https://cdn.imagefap.com/images/full/\d+/\d+/\d+.jpg",
+ "pattern": r"https://cdnh\.imagefap\.com"
+ r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": "8894e45f7262020d8d66ce59917315def1fc475b",
}),
("https://beta.imagefap.com/photo/1369341772/"),
@@ -118,7 +120,6 @@ class ImagefapImageExtractor(ImagefapExtractor):
def items(self):
url, data = self.get_image()
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
@@ -169,7 +170,6 @@ class ImagefapUserExtractor(ImagefapExtractor):
self.user, self.user_id = match.groups()
def items(self):
- yield Message.Version, 1
for gid, name in self.get_gallery_data():
url = "{}/gallery/{}".format(self.root, gid)
data = {
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 9328437..13996d0 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -57,7 +57,6 @@ class ImagehostImageExtractor(Extractor):
if self.https and url.startswith("http:"):
url = "https:" + url[5:]
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 5dcca62..1e875f0 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -43,7 +43,6 @@ class ImgbbExtractor(Extractor):
data = self.metadata(page)
first = True
- yield Message.Version, 1
for img in self.images(page):
image = {
"id" : img["url_viewer"].rpartition("/")[2],
@@ -230,6 +229,5 @@ class ImgbbImageExtractor(ImgbbExtractor):
}
image["extension"] = text.ext_from_url(image["url"])
- yield Message.Version, 1
yield Message.Directory, image
yield Message.Url, image["url"], image
diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py
index 516ef18..7ae39c0 100644
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@@ -20,7 +20,6 @@ class ImgboxExtractor(Extractor):
def items(self):
data = self.get_job_metadata()
- yield Message.Version, 1
yield Message.Directory, data
for image_key in self.get_image_keys():
diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py
index 8a6fe1c..6b424ad 100644
--- a/gallery_dl/extractor/imgth.py
+++ b/gallery_dl/extractor/imgth.py
@@ -33,7 +33,6 @@ class ImgthGalleryExtractor(Extractor):
def items(self):
page = self.request(self.url_base + "0").text
data = self.metadata(page)
- yield Message.Version, 1
yield Message.Directory, data
for data["num"], url in enumerate(self.images(page), 1):
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index f925c9e..2035655 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -46,7 +46,6 @@ class ImgurExtractor(Extractor):
album_ex = ImgurAlbumExtractor
image_ex = ImgurImageExtractor
- yield Message.Version, 1
for item in items:
item["_extractor"] = album_ex if item["is_album"] else image_ex
yield Message.Queue, item["link"], item
@@ -133,7 +132,6 @@ class ImgurImageExtractor(ImgurExtractor):
image.update(image["media"][0])
del image["media"]
url = self._prepare(image)
- yield Message.Version, 1
yield Message.Directory, image
yield Message.Url, url, image
@@ -221,7 +219,6 @@ class ImgurAlbumExtractor(ImgurExtractor):
except KeyError:
pass
- yield Message.Version, 1
for num, image in enumerate(images, 1):
url = self._prepare(image)
image["num"] = num
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 2f7935b..cbe0f43 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -30,9 +30,8 @@ class InkbunnyExtractor(Extractor):
def items(self):
self.api.authenticate()
- to_bool = ("deleted", "digitalsales", "favorite", "forsale",
- "friends_only", "guest_block", "hidden", "printsales",
- "public", "scraps")
+ to_bool = ("deleted", "favorite", "friends_only", "guest_block",
+ "hidden", "public", "scraps")
for post in self.posts():
post["date"] = text.parse_datetime(
@@ -42,7 +41,8 @@ class InkbunnyExtractor(Extractor):
files = post["files"]
for key in to_bool:
- post[key] = (post[key] == "t")
+ if key in post:
+ post[key] = (post[key] == "t")
del post["keywords"]
del post["files"]
@@ -81,17 +81,14 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
"user_id" : "20969",
"comments_count" : "re:[0-9]+",
"deleted" : bool,
- "digitalsales" : bool,
"favorite" : bool,
"favorites_count": "re:[0-9]+",
- "forsale" : bool,
"friends_only" : bool,
"guest_block" : bool,
"hidden" : bool,
"pagecount" : "re:[0-9]+",
"pools" : list,
"pools_count" : int,
- "printsales" : bool,
"public" : bool,
"rating_id" : "re:[0-9]+",
"rating_name" : str,
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index 6266e5f..88d57e5 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -94,7 +94,6 @@ class IssuuUserExtractor(IssuuBase, Extractor):
url = "{}/call/profile/v1/documents/{}".format(self.root, self.user)
params = {"offset": 0, "limit": "25"}
- yield Message.Version, 1
while True:
data = self.request(url, params=params).json()
diff --git a/gallery_dl/extractor/keenspot.py b/gallery_dl/extractor/keenspot.py
index 0cbea67..4012760 100644
--- a/gallery_dl/extractor/keenspot.py
+++ b/gallery_dl/extractor/keenspot.py
@@ -55,7 +55,6 @@ class KeenspotComicExtractor(Extractor):
def items(self):
data = {"comic": self.comic}
- yield Message.Version, 1
yield Message.Directory, data
with self.request(self.root + "/") as response:
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 972316b..a911d35 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -24,13 +24,9 @@ class KemonopartyExtractor(Extractor):
filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
archive_fmt = "{service}_{user}_{id}_{num}"
cookiedomain = ".kemono.party"
- _warning = True
def items(self):
- if self._warning:
- if not self._check_cookies(("__ddg1", "__ddg2")):
- self.log.warning("no DDoS-GUARD cookies set (__ddg1, __ddg2)")
- KemonopartyExtractor._warning = False
+ self._prepare_ddosguard_cookies()
find_inline = re.compile(r'src="(/inline/[^"]+)').findall
skip_service = \
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index 679b5a0..67a1a95 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -40,7 +40,6 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
raise exception.NotFoundError("soundtrack")
data = self.metadata(page)
- yield Message.Version, 1
yield Message.Directory, data
for track in self.tracks(page):
track.update(data)
diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py
index feffdfd..cffbc10 100644
--- a/gallery_dl/extractor/livedoor.py
+++ b/gallery_dl/extractor/livedoor.py
@@ -25,7 +25,6 @@ class LivedoorExtractor(Extractor):
self.user = match.group(1)
def items(self):
- yield Message.Version, 1
for post in self.posts():
images = self._images(post)
if images:
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index c296102..665d257 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -120,7 +120,6 @@ class LusciousAlbumExtractor(LusciousExtractor):
def items(self):
album = self.metadata()
- yield Message.Version, 1
yield Message.Directory, {"album": album}
for num, image in enumerate(self.images(), 1):
image["num"] = num
@@ -443,7 +442,6 @@ fragment AlbumMinimal on Album {
}
}
"""
- yield Message.Version, 1
while True:
data = self._graphql("AlbumListWithPeek", variables, query)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index a8241dc..53ae76a 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -61,7 +61,8 @@ class MangadexExtractor(Extractor):
chnum, sep, minor = 0, "", ""
data = {
- "manga" : mattributes["title"]["en"],
+ "manga" : (mattributes["title"].get("en") or
+ next(iter(mattributes["title"].values()))),
"manga_id": manga["data"]["id"],
"title" : cattributes["title"],
"volume" : text.parse_int(cattributes["volume"]),
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index ad2947a..5d240d4 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -106,7 +106,6 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
data = self.metadata(page)
imgs = self.images(url, page)
- yield Message.Version, 1
yield Message.Directory, data
data["extension"] = None
@@ -175,8 +174,6 @@ class MangoxoChannelExtractor(MangoxoExtractor):
url = "{}/channel/{}/album/".format(self.root, self.channel_id)
data = {"_extractor": MangoxoAlbumExtractor}
- yield Message.Version, 1
-
while True:
page = self.request(url + str(num)).text
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index 413a58a..7e94b1c 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -102,7 +102,6 @@ class NaverBlogExtractor(NaverBase, Extractor):
self.blog_id = match.group(1) or match.group(2)
def items(self):
- yield Message.Version, 1
# fetch first post number
url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id)
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index fd83328..20b716b 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://nhentai.net/"""
+"""Extractors for https://nhentai.net/"""
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
@@ -23,7 +23,7 @@ class NhentaiBase():
class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
"""Extractor for image galleries from nhentai.net"""
- pattern = r"(?:https?://)?nhentai\.net(/g/(\d+))"
+ pattern = r"(?:https?://)?nhentai\.net/g/(\d+)"
test = ("https://nhentai.net/g/147850/", {
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
"keyword": {
@@ -49,13 +49,11 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
})
def __init__(self, match):
- GalleryExtractor.__init__(self, match)
- self.gallery_id = match.group(2)
- self.data = None
+ url = self.root + "/api/gallery/" + match.group(1)
+ GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
- self.data = data = json.loads(text.parse_unicode_escapes(text.extract(
- page, 'JSON.parse("', '");')[0]))
+ self.data = data = json.loads(page)
title_en = data["title"].get("english", "")
title_ja = data["title"].get("japanese", "")
@@ -103,7 +101,6 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
class NhentaiSearchExtractor(NhentaiBase, Extractor):
"""Extractor for nhentai search results"""
- category = "nhentai"
subcategory = "search"
pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"
test = ("https://nhentai.net/search/?q=touhou", {
@@ -117,7 +114,6 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor):
self.params = text.parse_query(match.group(1))
def items(self):
- yield Message.Version, 1
data = {"_extractor": NhentaiGalleryExtractor}
for gallery_id in self._pagination(self.params):
url = "{}/g/{}/".format(self.root, gallery_id)
@@ -133,3 +129,31 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor):
if 'class="next"' not in page:
return
params["page"] += 1
+
+
+class NhentaiFavoriteExtractor(NhentaiBase, Extractor):
+ """Extractor for nhentai favorites"""
+ subcategory = "favorite"
+ pattern = r"(?:https?://)?nhentai\.net/favorites/?(?:\?([^#]+))?"
+ test = ("https://nhentai.net/favorites/",)
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.params = text.parse_query(match.group(1))
+
+ def items(self):
+ data = {"_extractor": NhentaiGalleryExtractor}
+ for gallery_id in self._pagination(self.params):
+ url = "{}/g/{}/".format(self.root, gallery_id)
+ yield Message.Queue, url, data
+
+ def _pagination(self, params):
+ url = "{}/favorites/".format(self.root)
+ params["page"] = text.parse_int(params.get("page"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+ yield from text.extract_iter(page, 'href="/g/', '/')
+ if 'class="next"' not in page:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index e558513..6cb7c05 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -36,7 +36,6 @@ class NijieExtractor(AsynchronousMixin, Extractor):
def items(self):
self.login()
- yield Message.Version, 1
for image_id in self.image_ids():
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index b74355d..44411c8 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -25,7 +25,6 @@ class NozomiExtractor(Extractor):
archive_fmt = "{dataid}"
def items(self):
- yield Message.Version, 1
data = self.metadata()
self.session.headers["Origin"] = self.root
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index c798ad0..4dc1e43 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -31,8 +31,8 @@ class OAuthBase(Extractor):
self.cache = config.get(("extractor", self.category), "cache", True)
def oauth_config(self, key, default=None):
- return config.interpolate(
- ("extractor", self.subcategory), key, default)
+ value = config.interpolate(("extractor", self.subcategory), key)
+ return value if value is not None else default
def recv(self):
"""Open local HTTP server and recv callback parameters"""
@@ -220,7 +220,7 @@ class OAuthDeviantart(OAuthBase):
"client-secret", deviantart.DeviantartOAuthAPI.CLIENT_SECRET),
"https://www.deviantart.com/oauth2/authorize",
"https://www.deviantart.com/oauth2/token",
- scope="browse",
+ scope="browse user.manage",
cache=deviantart._refresh_token_cache,
)
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 9c32d7a..547465b 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -27,7 +27,6 @@ class PatreonExtractor(Extractor):
_warning = True
def items(self):
- yield Message.Version, 1
if self._warning:
if "session_id" not in self.session.cookies:
diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py
index 5e2120a..bea0276 100644
--- a/gallery_dl/extractor/photobucket.py
+++ b/gallery_dl/extractor/photobucket.py
@@ -54,7 +54,6 @@ class PhotobucketAlbumExtractor(Extractor):
self.session.headers["Referer"] = self.url
def items(self):
- yield Message.Version, 1
for image in self.images():
image["titleOrFilename"] = text.unescape(image["titleOrFilename"])
image["title"] = text.unescape(image["title"])
@@ -172,6 +171,5 @@ class PhotobucketImageExtractor(Extractor):
mtype, _, mid = base64.b64decode(image["id"]).partition(b":")
image["pictureId"] = mid.decode() if mtype == b"mediaId" else ""
- yield Message.Version, 1
yield Message.Directory, image
yield Message.Url, image["fileUrl"], image
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 45ce7f8..94de983 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -22,7 +22,6 @@ class PiczelExtractor(Extractor):
api_root = "https://tombstone.piczel.tv"
def items(self):
- yield Message.Version, 1
for post in self.posts():
post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
post["date"] = text.parse_datetime(
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index aefe644..e21a82c 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -213,7 +213,6 @@ class PixivMeExtractor(PixivExtractor):
data = {"_extractor": PixivUserExtractor}
response = self.request(
url, method="HEAD", allow_redirects=False, notfound="user")
- yield Message.Version, 1
yield Message.Queue, response.headers["Location"], data
@@ -552,7 +551,7 @@ class PixivPixivisionExtractor(PixivExtractor):
headers = {"User-Agent": "Mozilla/5.0"}
self.page = self.request(url, headers=headers).text
- title = text.extract(self.page, '<title>', ' - pixivision<')[0]
+ title = text.extract(self.page, '<title>', '<')[0]
return {
"pixivision_id" : self.pixivision_id,
"pixivision_title": text.unescape(title),
diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py
index 342f4fa..98928d6 100644
--- a/gallery_dl/extractor/pixnet.py
+++ b/gallery_dl/extractor/pixnet.py
@@ -89,7 +89,6 @@ class PixnetImageExtractor(PixnetExtractor):
data["blog"] = self.blog
data["user"] = data.pop("author_name")
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, data["url"], data
@@ -120,7 +119,6 @@ class PixnetSetExtractor(PixnetExtractor):
page = self.request(url, encoding="utf-8").text
data = self.metadata(page)
- yield Message.Version, 1
yield Message.Directory, data
for num, info in enumerate(self._pagination(page), 1):
url, pos = text.extract(info, ' href="', '"')
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index 61e3d41..f976e82 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -31,7 +31,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
test = (
("https://www.pornhub.com/album/19289801", {
"pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/",
- "count": 308,
+ "count": ">= 300",
"keyword": {
"id" : int,
"num" : int,
@@ -60,7 +60,6 @@ class PornhubGalleryExtractor(PornhubExtractor):
def items(self):
data = self.metadata()
- yield Message.Version, 1
yield Message.Directory, data
for num, image in enumerate(self.images(), 1):
url = image["url"]
@@ -146,7 +145,6 @@ class PornhubUserExtractor(PornhubExtractor):
}
data = {"_extractor": PornhubGalleryExtractor}
- yield Message.Version, 1
while True:
page = self.request(
url, method="POST", headers=headers, params=params).text
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index bbbdd3f..04fe581 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -37,7 +37,6 @@ class ReactorExtractor(Extractor):
def items(self):
data = self.metadata()
- yield Message.Version, 1
yield Message.Directory, data
for post in self.posts():
for image in self._parse_post(post):
@@ -197,7 +196,6 @@ class ReactorPostExtractor(ReactorExtractor):
self.post_id = match.group(2)
def items(self):
- yield Message.Version, 1
post = self.request(self.url).text
pos = post.find('class="uhead">')
for image in self._parse_post(post[pos:]):
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 273ac05..8953edd 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2020 Mike Fährmann
+# Copyright 2017-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,25 +21,20 @@ class RedditExtractor(Extractor):
archive_fmt = "{filename}"
cookiedomain = None
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.api = RedditAPI(self)
- self.max_depth = self.config("recursion", 0)
-
def items(self):
+ self.api = RedditAPI(self)
match_submission = RedditSubmissionExtractor.pattern.match
match_subreddit = RedditSubredditExtractor.pattern.match
match_user = RedditUserExtractor.pattern.match
parentdir = self.config("parent-directory")
+ max_depth = self.config("recursion", 0)
videos = self.config("videos", True)
submissions = self.submissions()
visited = set()
depth = 0
- yield Message.Version, 1
-
while True:
extra = []
@@ -105,7 +100,7 @@ class RedditExtractor(Extractor):
elif not match_user(url) and not match_subreddit(url):
yield Message.Queue, text.unescape(url), data
- if not extra or depth == self.max_depth:
+ if not extra or depth == max_depth:
return
depth += 1
submissions = (
@@ -257,34 +252,35 @@ class RedditImageExtractor(Extractor):
def items(self):
data = text.nameext_from_url(self.url)
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, self.url, data
class RedditAPI():
- """Minimal interface for the reddit API"""
+ """Interface for the Reddit API
+
+ Ref: https://www.reddit.com/dev/api/
+ """
CLIENT_ID = "6N9uN0krSDE-ig"
USER_AGENT = "Python:gallery-dl:0.8.4 (by /u/mikf1)"
def __init__(self, extractor):
self.extractor = extractor
- self.comments = text.parse_int(extractor.config("comments", 0))
- self.morecomments = extractor.config("morecomments", False)
self.log = extractor.log
- client_id = extractor.config("client-id", self.CLIENT_ID)
- user_agent = extractor.config("user-agent", self.USER_AGENT)
+ config = extractor.config
+ self.comments = text.parse_int(config("comments", 0))
+ self.morecomments = config("morecomments", False)
- if (client_id == self.CLIENT_ID) ^ (user_agent == self.USER_AGENT):
- raise exception.StopExtraction(
- "Conflicting values for 'client-id' and 'user-agent': "
- "overwrite either both or none of them.")
-
- self.client_id = client_id
- self.headers = {"User-Agent": user_agent}
+ client_id = config("client-id")
+ if client_id is None:
+ self.client_id = self.CLIENT_ID
+ self.headers = {"User-Agent": self.USER_AGENT}
+ else:
+ self.client_id = client_id
+ self.headers = {"User-Agent": config("user-agent")}
- token = extractor.config("refresh-token")
+ token = config("refresh-token")
if token is None or token == "cache":
key = "#" + self.client_id
self.refresh_token = _refresh_token_cache(key)
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index 7f9130d..bf38a77 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -29,7 +29,6 @@ class SeigaExtractor(Extractor):
images = iter(self.get_images())
data = next(images)
- yield Message.Version, 1
yield Message.Directory, data
for image in util.advance(images, self.start_image):
data.update(image)
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py
index b736b4b..28e049b 100644
--- a/gallery_dl/extractor/seisoparty.py
+++ b/gallery_dl/extractor/seisoparty.py
@@ -21,7 +21,6 @@ class SeisopartyExtractor(Extractor):
filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
archive_fmt = "{service}_{user}_{id}_{num}"
cookiedomain = ".seiso.party"
- _warning = True
def __init__(self, match):
Extractor.__init__(self, match)
@@ -30,10 +29,7 @@ class SeisopartyExtractor(Extractor):
r'href="(https://cdn(?:-\d)?\.seiso\.party/files/[^"]+)').findall
def items(self):
- if self._warning:
- if not self._check_cookies(("__ddg1", "__ddg2")):
- self.log.warning("no DDoS-GUARD cookies set (__ddg1, __ddg2)")
- SeisopartyExtractor._warning = False
+ self._prepare_ddosguard_cookies()
for post in self.posts():
files = post.pop("files")
@@ -109,7 +105,7 @@ class SeisopartyPostExtractor(SeisopartyExtractor):
"url": "75f13b92de0ce399b6163c3de18f1f36011c2366",
"count": 2,
"keyword": {
- "content": "この前描いためぐるちゃんのPSDファイルです。\n"
+ "content": "この前描いためぐるちゃんのPSDファイルです。<br/>"
"どうぞよろしくお願いします。",
"date": "dt:2021-05-06 12:38:31",
"extension": "re:psd|jpg",
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index bde0b5d..34177b4 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -42,7 +42,6 @@ class SenmangaChapterExtractor(Extractor):
def items(self):
data = self.metadata()
- yield Message.Version, 1
yield Message.Directory, data
for data["page"] in range(1, data["count"]+1):
data["extension"] = None
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 41d2e67..ccedff3 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -21,7 +21,6 @@ class SexcomExtractor(Extractor):
root = "https://www.sex.com"
def items(self):
- yield Message.Version, 1
yield Message.Directory, self.metadata()
for pin in map(self._parse_pin, self.pins()):
if pin:
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 1bc353a..6d924de 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -10,7 +10,6 @@
from .common import BaseExtractor, Message
from .. import text
-import re
class ShopifyExtractor(BaseExtractor):
@@ -27,17 +26,7 @@ class ShopifyExtractor(BaseExtractor):
data = self.metadata()
yield Message.Directory, data
- headers = {"X-Requested-With": "XMLHttpRequest"}
- for url in self.products():
- response = self.request(
- url + ".json", headers=headers, fatal=False)
- if response.status_code >= 400:
- self.log.warning('Skipping %s ("%s: %s")',
- url, response.status_code, response.reason)
- continue
- product = response.json()["product"]
- del product["image"]
-
+ for product in self.products():
for num, image in enumerate(product.pop("images"), 1):
text.nameext_from_url(image["src"], image)
image.update(data)
@@ -59,7 +48,10 @@ BASE_PATTERN = ShopifyExtractor.update({
"pattern": r"(?:www\.)?fashionnova\.com",
},
"omgmiamiswimwear": {
- "root": "https://www.omgmiamiswimwear.com"
+ "root": "https://www.omgmiamiswimwear.com",
+ },
+ "windsorstore": {
+ "root": "https://www.windsorstore.com",
},
})
@@ -73,45 +65,27 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
("https://www.fashionnova.com/collections/mini-dresses", {
"range": "1-20",
"count": 20,
- "archive": False,
}),
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
("https://www.omgmiamiswimwear.com/collections/fajas"),
+ ("https://www.windsorstore.com/collections/dresses-ball-gowns"),
)
def metadata(self):
return self.request(self.item_url + ".json").json()
def products(self):
- params = {"page": 1}
- fetch = True
- last = None
-
- for pattern in (
- r"/collections/[\w-]+/products/[\w-]+",
- r"href=[\"'](/products/[\w-]+)",
- ):
- search_re = re.compile(pattern)
-
- while True:
- if fetch:
- page = self.request(self.item_url, params=params).text
- urls = search_re.findall(page)
-
- if len(urls) < 3:
- if last:
- return
- fetch = False
- break
- fetch = True
-
- for path in urls:
- if last == path:
- continue
- last = path
- yield self.root + path
- params["page"] += 1
+ url = self.item_url + "/products.json"
+
+ while url:
+ response = self.request(url)
+ yield from response.json()["products"]
+
+ url = response.links.get("next")
+ if not url:
+ return
+ url = url["url"]
class ShopifyProductExtractor(ShopifyExtractor):
@@ -129,7 +103,11 @@ class ShopifyProductExtractor(ShopifyExtractor):
"count": 5,
}),
("https://www.fashionnova.com/collections/flats/products/name"),
+ ("https://www.windsorstore.com/collections/accessories-belts/products"
+ "/rhine-buckle-dbl-o-ring-pu-strap-belt-073010158001"),
)
def products(self):
- return (self.item_url,)
+ product = self.request(self.item_url + ".json").json()["product"]
+ del product["image"]
+ return (product,)
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index e1b14ef..580e917 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -128,7 +128,6 @@ class SimplyhentaiImageExtractor(Extractor):
})
data["token"] = data["filename"].rpartition("_")[2]
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
@@ -192,6 +191,5 @@ class SimplyhentaiVideoExtractor(Extractor):
date), "%B %d, %Y %H:%M"),
})
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, video_url, data
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index ddd45ce..b5fbdc2 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -68,7 +68,6 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
"count": len(imgs),
}
- yield Message.Version, 1
yield Message.Directory, data
for num, img in enumerate(imgs, 1):
url = img["url_rsz"] + "/o/" + img["fname"]
@@ -137,6 +136,5 @@ class SlickpicUserExtractor(SlickpicExtractor):
data = {"_extractor": SlickpicAlbumExtractor}
base = self.root + "/albums/"
- yield Message.Version, 1
for album in text.extract_iter(page, 'href="' + base, '"'):
yield Message.Queue, base + album, data
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 15dbb85..a3c77e8 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -49,7 +49,6 @@ class SlidesharePresentationExtractor(Extractor):
data = self.get_job_metadata(page)
imgs = self.get_image_urls(page)
data["count"] = len(imgs)
- yield Message.Version, 1
yield Message.Directory, data
for data["num"], url in enumerate(imgs, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 02cf832..f7408e6 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -95,7 +95,6 @@ class SmugmugAlbumExtractor(SmugmugExtractor):
del album["Uris"]
data = {"Album": album, "User": user}
- yield Message.Version, 1
yield Message.Directory, data
for image in self.api.album_images(self.album_id, "ImageSizeDetails"):
@@ -133,7 +132,6 @@ class SmugmugImageExtractor(SmugmugExtractor):
data = {"Image": image}
text.nameext_from_url(url, data)
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, url, data
@@ -170,7 +168,6 @@ class SmugmugPathExtractor(SmugmugExtractor):
self.domain, self.user, self.path = match.groups()
def items(self):
- yield Message.Version, 1
if not self.user:
self.user = self.api.site_user(self.domain)["NickName"]
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
index f5b9171..9eb8761 100644
--- a/gallery_dl/extractor/speakerdeck.py
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -39,7 +39,6 @@ class SpeakerdeckPresentationExtractor(Extractor):
data = self.get_job_metadata()
imgs = self.get_image_urls()
data["count"] = len(imgs)
- yield Message.Version, 1
yield Message.Directory, data
for data["num"], url in enumerate(imgs, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/test.py b/gallery_dl/extractor/test.py
index 2f4992c..5d81ed5 100644
--- a/gallery_dl/extractor/test.py
+++ b/gallery_dl/extractor/test.py
@@ -71,7 +71,6 @@ class TestExtractor(Extractor):
if not tests:
raise exception.NotFoundError("test")
- yield Message.Version, 1
for test in tests:
yield Message.Queue, test[0], {}
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index 5809463..92bd634 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -152,7 +152,6 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor):
self.query = match.group(1)
def items(self):
- yield Message.Version, 1
for gallery in self.galleries():
url = "{}/entry/{}".format(self.root, gallery["id"])
gallery["_extractor"] = TsuminoGalleryExtractor
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7e78941..2dfcb55 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -208,30 +208,45 @@ class TwitterExtractor(Extractor):
return tdata
def _transform_user(self, user):
+ try:
+ return self._user_cache[user["id_str"]]
+ except KeyError:
+ pass
+
uid = user["id_str"]
- cache = self._user_cache
-
- if uid not in cache:
- cache[uid] = {
- "id" : text.parse_int(uid),
- "name" : user["screen_name"],
- "nick" : user["name"],
- "description" : user["description"],
- "location" : user["location"],
- "date" : text.parse_datetime(
- user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
- "verified" : user.get("verified", False),
- "profile_banner" : user.get("profile_banner_url", ""),
- "profile_image" : user.get(
- "profile_image_url_https", "").replace("_normal.", "."),
- "favourites_count": user["favourites_count"],
- "followers_count" : user["followers_count"],
- "friends_count" : user["friends_count"],
- "listed_count" : user["listed_count"],
- "media_count" : user["media_count"],
- "statuses_count" : user["statuses_count"],
- }
- return cache[uid]
+ entities = user["entities"]
+
+ self._user_cache[uid] = udata = {
+ "id" : text.parse_int(uid),
+ "name" : user["screen_name"],
+ "nick" : user["name"],
+ "location" : user["location"],
+ "date" : text.parse_datetime(
+ user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
+ "verified" : user.get("verified", False),
+ "profile_banner" : user.get("profile_banner_url", ""),
+ "profile_image" : user.get(
+ "profile_image_url_https", "").replace("_normal.", "."),
+ "favourites_count": user["favourites_count"],
+ "followers_count" : user["followers_count"],
+ "friends_count" : user["friends_count"],
+ "listed_count" : user["listed_count"],
+ "media_count" : user["media_count"],
+ "statuses_count" : user["statuses_count"],
+ }
+
+ descr = user["description"]
+ urls = entities["description"].get("urls")
+ if urls:
+ for url in urls:
+ descr = descr.replace(url["url"], url["expanded_url"])
+ udata["description"] = descr
+
+ if "url" in entities:
+ url = entities["url"]["urls"][0]
+ udata["url"] = url.get("expanded_url") or url.get("url")
+
+ return udata
def _users_result(self, users):
userfmt = self.config("users")
@@ -461,6 +476,11 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/1424882930803908612", {
"options": (("replies", "self"),),
"count": 4,
+ "keyword": {"user": {
+ "description": "re:business email-- rhettaro.bloom@gmail.com "
+ "patreon- http://patreon.com/Princecanary",
+ "url": "http://princecanary.tumblr.com",
+ }},
}),
("https://twitter.com/i/web/status/1424898916156284928", {
"options": (("replies", "self"),),
@@ -749,8 +769,8 @@ class TwitterAPI():
)
except Exception:
msg = data["errors"]
- if response.status_code < 400:
- self.extractor.log.warning(msg)
+ if msg and response.status_code < 400:
+ raise exception.StopExtraction(msg)
else:
msg = ""
@@ -768,12 +788,14 @@ class TwitterAPI():
if response.status_code == 401 and \
"have been blocked from viewing" in msg:
# account blocked
- extr = extr = self.extractor
+ extr = self.extractor
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
guest_token = self._guest_token()
extr.session.cookies.set(
"gt", guest_token, domain=extr.cookiedomain)
+ extr._cookiefile = None
+ del extr.session.cookies["auth_token"]
self.headers["x-guest-token"] = guest_token
self.headers["x-twitter-auth-type"] = None
extr.log.info("Retrying API request as guest")
@@ -861,6 +883,7 @@ class TwitterAPI():
if "quoted_status_id_str" in tweet:
quoted = tweets.get(tweet["quoted_status_id_str"])
if quoted:
+ quoted = quoted.copy()
quoted["author"] = users[quoted["user_id_str"]]
quoted["user"] = tweet["user"]
quoted["quoted"] = True
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 6cfc69e..1677929 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -115,13 +115,13 @@ class UnsplashImageExtractor(UnsplashExtractor):
"id": "uMJXuywXLiU",
"instagram_username": "just_midwest_rock",
"last_name": "Hoefler",
- "location": None,
+ "location": "Madison, WI",
"name": "Dave Hoefler",
"portfolio_url": str,
"total_collections": int,
"total_likes": int,
"total_photos": int,
- "twitter_username": None,
+ "twitter_username": "dave_hoefler",
"updated_at": str,
"username": "davehoefler",
},
diff --git a/gallery_dl/extractor/vanillarock.py b/gallery_dl/extractor/vanillarock.py
index 32eaa36..3d934b2 100644
--- a/gallery_dl/extractor/vanillarock.py
+++ b/gallery_dl/extractor/vanillarock.py
@@ -56,7 +56,6 @@ class VanillarockPostExtractor(VanillarockExtractor):
'<div class="cat-tag">', '</div>'))[::2],
}
- yield Message.Version, 1
yield Message.Directory, data
for data["num"], url in enumerate(imgs, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
@@ -84,7 +83,6 @@ class VanillarockTagExtractor(VanillarockExtractor):
url = self.root + self.path
data = {"_extractor": VanillarockPostExtractor}
- yield Message.Version, 1
while url:
extr = text.extract_from(self.request(url).text)
while True:
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 76e4e3d..9278242 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -30,7 +30,6 @@ class VscoExtractor(Extractor):
def items(self):
videos = self.config("videos", True)
- yield Message.Version, 1
yield Message.Directory, {"user": self.user}
for img in self.images():
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index d353144..7f51732 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -47,7 +47,6 @@ class WarosuThreadExtractor(Extractor):
title = text.remove_html(posts[0]["com"])
data["title"] = text.unescape(title)[:50]
- yield Message.Version, 1
yield Message.Directory, data
for post in posts:
if "image" in post:
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index 711d3fa..75b78c5 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -123,7 +123,6 @@ class WeasylSubmissionsExtractor(WeasylExtractor):
self.owner_login = match.group(1)
def items(self):
- yield Message.Version, 1
yield Message.Directory, {"owner_login": self.owner_login}
yield from self.submissions(self.owner_login)
@@ -141,7 +140,6 @@ class WeasylFolderExtractor(WeasylExtractor):
self.owner_login, self.folderid = match.groups()
def items(self):
- yield Message.Version, 1
iter = self.submissions(self.owner_login, self.folderid)
# Folder names are only on single submission api calls
msg, url, data = next(iter)
@@ -171,7 +169,6 @@ class WeasylJournalExtractor(WeasylExtractor):
def items(self):
data = self.retrieve_journal(self.journalid)
- yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, data["html"], data
@@ -190,7 +187,6 @@ class WeasylJournalsExtractor(WeasylExtractor):
self.owner_login = match.group(1)
def items(self):
- yield Message.Version, 1
yield Message.Directory, {"owner_login": self.owner_login}
url = "{}/journals/{}".format(self.root, self.owner_login)
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 9f95e14..f68cb85 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -27,7 +27,6 @@ class WikiartExtractor(Extractor):
def items(self):
data = self.metadata()
- yield Message.Version, 1
yield Message.Directory, data
for painting in self.paintings():
url = painting["image"]
diff --git a/gallery_dl/extractor/wikieat.py b/gallery_dl/extractor/wikieat.py
index f544bcb..c7b1958 100644
--- a/gallery_dl/extractor/wikieat.py
+++ b/gallery_dl/extractor/wikieat.py
@@ -43,7 +43,6 @@ class WikieatThreadExtractor(Extractor):
"num" : 0,
}
- yield Message.Version, 1
yield Message.Directory, data
for post in posts:
if "filename" in post:
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index 258e89c..f7a0a7e 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -90,7 +90,6 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
def items(self):
data = self.metadata()
- yield Message.Version, 1
yield Message.Directory, data
for num, image in enumerate(self.images(), 1):
url = image["imageURL"]
@@ -167,7 +166,6 @@ class XhamsterUserExtractor(XhamsterExtractor):
self.user = match.group(2)
def items(self):
- yield Message.Version, 1
url = "{}/users/{}/photos".format(self.root, self.user)
data = {"_extractor": XhamsterGalleryExtractor}
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 9fdc5aa..59649a0 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -129,7 +129,6 @@ class XvideosUserExtractor(XvideosBase, Extractor):
]
galleries.sort(key=lambda x: x["id"])
- yield Message.Version, 1
for gallery in galleries:
url = "https://www.xvideos.com/profiles/{}/photos/{}".format(
self.user, gallery["id"])
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 953d9c3..32e9bb5 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -68,12 +68,16 @@ class Job():
def run(self):
"""Execute or run the job"""
- sleep = self.extractor.config("sleep-extractor")
+ extractor = self.extractor
+ log = extractor.log
+ msg = None
+
+ sleep = extractor.config("sleep-extractor")
if sleep:
time.sleep(sleep)
+
try:
- log = self.extractor.log
- for msg in self.extractor:
+ for msg in extractor:
self.dispatch(msg)
except exception.StopExtraction as exc:
if exc.message:
@@ -100,8 +104,12 @@ class Job():
except BaseException:
self.status |= 1
raise
+ else:
+ if msg is None:
+ log.info("No results for %s", extractor.url)
finally:
self.handle_finalize()
+
return self.status
def dispatch(self, msg):
@@ -125,13 +133,6 @@ class Job():
if self.pred_queue(url, kwdict):
self.handle_queue(url, kwdict)
- elif msg[0] == Message.Version:
- if msg[1] != 1:
- raise "unsupported message-version ({}, {})".format(
- self.extractor.category, msg[1]
- )
- # TODO: support for multiple message versions
-
def handle_url(self, url, kwdict):
"""Handle Message.Url"""
@@ -199,6 +200,7 @@ class DownloadJob(Job):
Job.__init__(self, url, parent)
self.log = self.get_logger("download")
self.blacklist = None
+ self.fallback = None
self.archive = None
self.sleep = None
self.hooks = ()
@@ -237,8 +239,9 @@ class DownloadJob(Job):
# download from URL
if not self.download(url):
- # use fallback URLs if available
- for num, url in enumerate(kwdict.get("_fallback", ()), 1):
+ # use fallback URLs if available/enabled
+ fallback = kwdict.get("_fallback", ()) if self.fallback else ()
+ for num, url in enumerate(fallback, 1):
util.remove_file(pathfmt.temppath)
self.log.info("Trying fallback URL #%d", num)
if self.download(url):
@@ -394,6 +397,7 @@ class DownloadJob(Job):
pathfmt.set_directory(kwdict)
self.sleep = cfg("sleep")
+ self.fallback = cfg("fallback", True)
if not cfg("download", True):
# monkey-patch method to do nothing and always return True
self.download = pathfmt.fix_extension
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 2c0fae6..935bf99 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -547,6 +547,7 @@ class Formatter():
- "u": calls str.upper
- "c": calls str.capitalize
- "C": calls string.capwords
+ - "j". calls json.dumps
- "t": calls str.strip
- "d": calls text.parse_timestamp
- "U": calls urllib.parse.unquote
@@ -581,6 +582,7 @@ class Formatter():
"u": str.upper,
"c": str.capitalize,
"C": string.capwords,
+ "j": json.dumps,
"t": str.strip,
"T": to_timestamp,
"d": text.parse_timestamp,
@@ -849,6 +851,15 @@ class PathFormat():
remove = config("path-remove", "\x00-\x1f\x7f")
self.clean_path = self._build_cleanfunc(remove, "")
+ strip = config("path-strip", "auto")
+ if strip == "auto":
+ strip = ". " if WINDOWS else ""
+ elif strip == "unix":
+ strip = ""
+ elif strip == "windows":
+ strip = ". "
+ self.strip = strip
+
basedir = extractor._parentdir
if not basedir:
basedir = config("base-directory")
@@ -982,13 +993,14 @@ class PathFormat():
"""Apply 'kwdict' to directory format strings"""
segments = []
append = segments.append
+ strip = self.strip
try:
for formatter in self.directory_formatters:
segment = formatter(kwdict).strip()
- if WINDOWS:
+ if strip:
# remove trailing dots and spaces (#647)
- segment = segment.rstrip(". ")
+ segment = segment.rstrip(strip)
if segment:
append(self.clean_segment(segment))
return segments
@@ -998,6 +1010,7 @@ class PathFormat():
def build_directory_conditional(self, kwdict):
segments = []
append = segments.append
+ strip = self.strip
try:
for condition, formatters in self.directory_conditions:
@@ -1007,8 +1020,8 @@ class PathFormat():
formatters = self.directory_formatters
for formatter in formatters:
segment = formatter(kwdict).strip()
- if WINDOWS:
- segment = segment.rstrip(". ")
+ if strip:
+ segment = segment.rstrip(strip)
if segment:
append(self.clean_segment(segment))
return segments
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 566159d..7e6458f 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.18.3"
+__version__ = "1.18.4"
diff --git a/test/test_results.py b/test/test_results.py
index c36b6dd..8a20e6b 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -314,11 +314,6 @@ def setup_test_config():
"e621", "inkbunny", "tapas", "pillowfort", "mangadex"):
config.set(("extractor", category), "username", None)
- config.set(("extractor", "kemonoparty"), "cookies", {
- "__ddg1": "0gBDGpJ3KZQmA4B9QH25", "__ddg2": "lmj5s1jnJOvhPXCX"})
- config.set(("extractor", "seisoparty"), "cookies", {
- "__ddg1": "Y8rBxSDHO5UCEtQvzyI9", "__ddg2": "lmj5s1jnJOvhPXCX"})
-
config.set(("extractor", "mastodon.social"), "access-token",
"Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ")
diff --git a/test/test_util.py b/test/test_util.py
index 7a31ebb..1aa66d1 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -297,6 +297,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{t!d}", datetime.datetime(2010, 1, 1))
self._run_test("{t!d:%Y-%m-%d}", "2010-01-01")
self._run_test("{dt!T}", "1262304000")
+ self._run_test("{l!j}", '["a", "b", "c"]')
with self.assertRaises(KeyError):
self._run_test("{a!q}", "hello world")