aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-08-26 19:34:45 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-08-26 19:34:45 -0400
commitb75d158d014d6c43d7d785c46c9372a9cf84d144 (patch)
tree7dca4a7e61fe8b6e2bff2142fc19891e783a7d6d
parent64ad8e7bd15df71ab1116eede414558631bcad32 (diff)
New upstream version 1.10.2upstream/1.10.2
-rw-r--r--.travis.yml14
-rw-r--r--CHANGELOG.md24
-rw-r--r--README.rst8
-rw-r--r--docs/configuration.rst60
-rw-r--r--docs/gallery-dl.conf5
-rw-r--r--docs/supportedsites.rst12
-rw-r--r--gallery_dl/downloader/http.py18
-rw-r--r--gallery_dl/downloader/ytdl.py4
-rw-r--r--gallery_dl/extractor/adultempire.py8
-rw-r--r--gallery_dl/extractor/artstation.py1
-rw-r--r--gallery_dl/extractor/booru.py8
-rw-r--r--gallery_dl/extractor/common.py3
-rw-r--r--gallery_dl/extractor/deviantart.py394
-rw-r--r--gallery_dl/extractor/gelbooru.py8
-rw-r--r--gallery_dl/extractor/hitomi.py2
-rw-r--r--gallery_dl/extractor/imagebam.py6
-rw-r--r--gallery_dl/extractor/imgbb.py33
-rw-r--r--gallery_dl/extractor/imgur.py67
-rw-r--r--gallery_dl/extractor/instagram.py169
-rw-r--r--gallery_dl/extractor/luscious.py2
-rw-r--r--gallery_dl/extractor/newgrounds.py2
-rw-r--r--gallery_dl/extractor/patreon.py130
-rw-r--r--gallery_dl/extractor/pixiv.py20
-rw-r--r--gallery_dl/extractor/pururin.py2
-rw-r--r--gallery_dl/extractor/reactor.py6
-rw-r--r--gallery_dl/extractor/reddit.py2
-rw-r--r--gallery_dl/extractor/sankaku.py7
-rw-r--r--gallery_dl/extractor/sexcom.py1
-rw-r--r--gallery_dl/extractor/simplyhentai.py162
-rw-r--r--gallery_dl/extractor/twitter.py1
-rw-r--r--gallery_dl/extractor/wikiart.py2
-rw-r--r--gallery_dl/extractor/xhamster.py16
-rw-r--r--gallery_dl/job.py20
-rw-r--r--gallery_dl/oauth.py2
-rw-r--r--gallery_dl/option.py3
-rw-r--r--gallery_dl/postprocessor/classify.py23
-rw-r--r--gallery_dl/postprocessor/common.py5
-rw-r--r--gallery_dl/postprocessor/metadata.py15
-rw-r--r--gallery_dl/postprocessor/mtime.py4
-rw-r--r--gallery_dl/postprocessor/ugoira.py10
-rw-r--r--gallery_dl/util.py179
-rw-r--r--gallery_dl/version.py2
-rwxr-xr-xscripts/run_tests.sh2
-rwxr-xr-xscripts/supportedsites.py13
-rw-r--r--test/test_downloader.py4
-rw-r--r--test/test_postprocessor.py294
-rw-r--r--test/test_results.py17
47 files changed, 1261 insertions, 529 deletions
diff --git a/.travis.yml b/.travis.yml
index 6158941..4b3a2cd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,11 +16,18 @@ matrix:
env: GALLERYDL_TESTS=results
- language: minimal
dist: xenial
+ env: GALLERYDL_TESTS=snap
addons:
snaps:
- name: snapcraft
classic: true
- env: SNAP_TESTS=true
+ install:
+ - true
+ script:
+ - sudo apt update
+ - snapcraft --destructive-mode
+ - sudo snap try
+ - snap run gallery-dl --verbose https://twitter.com/ubuntu/status/1121001597092364288
git:
depth: 3
@@ -31,6 +38,7 @@ branches:
- /^v\d+\.\d+\.\d+(-\S*)?$/
- /^test(-\w+)+$/
+install:
+ - pip install -r requirements.txt pyOpenSSL
script:
- - 'if test "${SNAP_TESTS}" != true; then ./scripts/run_tests.sh; else true; fi'
- - 'if test "${SNAP_TESTS}" = true; then sudo apt update && snapcraft --destructive-mode && sudo snap try && snap run gallery-dl --verbose https://twitter.com/ubuntu/status/1121001597092364288; else true; fi'
+ - ./scripts/run_tests.sh
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 58e295c..99df78a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,30 @@
# Changelog
+## 1.10.2 - 2019-08-23
+### Additions
+- Support for `instagram` stories and IGTV ([#371](https://github.com/mikf/gallery-dl/issues/371), [#373](https://github.com/mikf/gallery-dl/issues/373))
+- Support for individual `imgbb` images ([#363](https://github.com/mikf/gallery-dl/issues/363))
+- `deviantart.quality` option to set the JPEG compression quality for newer images ([#369](https://github.com/mikf/gallery-dl/issues/369))
+- `enumerate` option for `extractor.skip` ([#306](https://github.com/mikf/gallery-dl/issues/306))
+- `adjust-extensions` option to control filename extension adjustments
+- `path-remove` option to remove control characters etc. from filesystem paths
+### Changes
+- Rename `restrict-filenames` to `path-restrict`
+- Adjust `pixiv` metadata and default filename format ([#366](https://github.com/mikf/gallery-dl/issues/366))
+ - Set `filename` to `"{category}_{user[id]}_{id}{suffix}.{extension}"` to restore the old default
+- Improve and optimize directory and filename generation
+### Fixes
+- Allow the `classify` post-processor to handle files with unknown filename extension ([#138](https://github.com/mikf/gallery-dl/issues/138))
+- Fix rate limit handling for OAuth APIs ([#368](https://github.com/mikf/gallery-dl/issues/368))
+- Fix artwork and scraps extraction on `deviantart` ([#376](https://github.com/mikf/gallery-dl/issues/376), [#392](https://github.com/mikf/gallery-dl/issues/392))
+- Distinguish between `imgur` album and gallery URLs ([#380](https://github.com/mikf/gallery-dl/issues/380))
+- Prevent crash when using `--ugoira-conv` ([#382](https://github.com/mikf/gallery-dl/issues/382))
+- Handle multi-image posts on `patreon` ([#383](https://github.com/mikf/gallery-dl/issues/383))
+- Miscellaneous fixes for `*reactor`, `simplyhentai`
+
## 1.10.1 - 2019-08-02
## Fixes
-- Restore functionality of both domains for `exhentai` extractors
+- Use the correct domain for exhentai.org input URLs
## 1.10.0 - 2019-08-01
### Warning
diff --git a/README.rst b/README.rst
index 3bca007..e62a7ec 100644
--- a/README.rst
+++ b/README.rst
@@ -78,8 +78,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.2/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.2/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -224,13 +224,13 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.1.zip
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.2.zip
.. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
.. _pip: https://pip.pypa.io/en/stable/
-.. _Requests: http://docs.python-requests.org/en/master/
+.. _Requests: https://2.python-requests.org/en/master/#requests-http-for-humans
.. _FFmpeg: https://www.ffmpeg.org/
.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/
.. _pyOpenSSL: https://pyopenssl.org/
diff --git a/docs/configuration.rst b/docs/configuration.rst
index c6f757d..0e2e355 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -108,21 +108,36 @@ Description Directory path used as the base for all download destinations.
=========== =====
-extractor.*.restrict-filenames
-------------------------------
+extractor.*.path-restrict
+-------------------------
=========== =====
Type ``string``
Default ``"auto"``
-Example ``"/!? ()[]{}"``
-Description Characters to replace with underscores (``_``) when generating
- directory and file names.
+Example ``"/!? (){}"``
+Description Set of characters to replace with underscores (``_``)
+ in generated path segment names.
Special values:
* ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
depending on the local operating system
* ``"unix"``: ``"/"``
- * ``"windows"``: ``"<>:\"\\|/?*"``
+ * ``"windows"``: ``"\\\\|/<>:\"?*"``
+
+ Note: In a set with 2 or more characters, ``[]^-\`` need to be
+ escaped with backslashes, e.g. ``"\\[\\]"``
+=========== =====
+
+
+extractor.*.path-remove
+-----------------------
+=========== =====
+Type ``string``
+Default ``"\\u0000-\\u001f\\u007f"`` (ASCII control characters)
+Description Set of characters to remove from generated path names.
+
+ Note: In a set with 2 or more characters, ``[]^-\`` need to be
+ escaped with backslashes, e.g. ``"\\[\\]"``
=========== =====
@@ -131,8 +146,11 @@ extractor.*.skip
=========== =====
Type ``bool`` or ``string``
Default ``true``
-Description Controls the behavior when downloading files whose filename
- already exists.
+Description Controls the behavior when downloading files that have been
+ downloaded before, i.e. a file with the same filename already
+ exists or its ID is in a `download archive`__.
+
+ __ `extractor.*.archive`_
* ``true``: Skip downloads
* ``false``: Overwrite already existing files
@@ -144,6 +162,9 @@ Description Controls the behavior when downloading files whose filename
* ``"exit"``: Exit the program altogether
* ``"exit:N"``: Skip downloads and exit the program
after ``N`` consecutive skips
+
+ * ``"enumerate"``: Append a numeric suffix to the end of the
+ original filename (``file.ext.1``, ``file.ext.2``, etc)
=========== =====
@@ -555,6 +576,15 @@ Description Download original files if available.
=========== =====
+extractor.deviantart.quality
+----------------------------
+=========== =====
+Type ``integer``
+Default ``100``
+Description JPEG compression quality for newer images hosted on wixmp servers.
+=========== =====
+
+
extractor.deviantart.refresh-token
----------------------------------
=========== =====
@@ -1098,6 +1128,16 @@ Description Certificate validation during file downloads.
=========== =====
+downloader.http.adjust-extensions
+---------------------------------
+=========== =====
+Type ``bool``
+Default ``true``
+Description Check the file headers of ``jpg``, ``png``, and ``gif`` files
+ and adjust their filename extensions if they do not match.
+=========== =====
+
+
downloader.ytdl.format
----------------------
=========== =====
@@ -1772,7 +1812,7 @@ Description An object with the ``name`` of a post-processor and its options.
.. _timeout: https://docs.python-requests.org/en/latest/user/advanced/#timeouts
.. _verify: https://docs.python-requests.org/en/master/user/advanced/#ssl-cert-verification
.. _Last-Modified: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.29
-.. _`Requests' proxy documentation`: http://docs.python-requests.org/en/master/user/advanced/#proxies
+.. _`Requests' proxy documentation`: https://2.python-requests.org/en/master/user/advanced/#proxies
.. _format string: https://docs.python.org/3/library/string.html#formatstrings
.. _format strings: https://docs.python.org/3/library/string.html#formatstrings
.. _strptime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
@@ -1780,5 +1820,5 @@ Description An object with the ``name`` of a post-processor and its options.
.. _webbrowser.open(): https://docs.python.org/3/library/webbrowser.html
.. _datetime: https://docs.python.org/3/library/datetime.html#datetime-objects
.. _datetime.max: https://docs.python.org/3/library/datetime.html#datetime.datetime.max
-.. _Authentication: https://github.com/mikf/gallery-dl#5authentication
+.. _Authentication: https://github.com/mikf/gallery-dl#authentication
.. _youtube-dl: https://github.com/ytdl-org/youtube-dl
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index a4a9ee0..b9ff32d 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -8,8 +8,9 @@
"proxy": null,
"skip": true,
"sleep": 0,
+ "path-restrict": "auto",
+ "path-remove": "\\u0000-\\u001f\\u007f",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
- "restrict-filenames": "auto",
"artstation":
{
@@ -30,6 +31,7 @@
"mature": true,
"metadata": false,
"original": true,
+ "quality": 100,
"wait-min": 0
},
"exhentai":
@@ -154,6 +156,7 @@
"http":
{
+ "adjust-extensions": true,
"mtime": true,
"rate": null,
"retries": 4,
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
index d2fb4ea..05c8555 100644
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@@ -28,6 +28,7 @@ Dynasty Reader https://dynasty-scans.com/ Chapters, individual Im
E-Hentai https://e-hentai.org/ Favorites, Galleries, Search Results Optional
e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches
EroLord.com http://erolord.com/ Galleries
+ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Optional
Fallen Angels Scans https://www.fascans.com/ Chapters, Manga
Fashion Nova https://www.fashionnova.com/ Collections, Products
Fireden https://boards.fireden.net/ Threads
@@ -47,11 +48,11 @@ Hypnohub https://hypnohub.net/ Pools, Popular Images,
Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional
ImageBam http://www.imagebam.com/ Galleries, individual Images
ImageFap https://imagefap.com/ Images from Users, Galleries, individual Images
-ImgBB https://imgbb.com/ Images from Users, Albums Optional
+ImgBB https://imgbb.com/ Images from Users, Albums, individual Images Optional
imgbox https://imgbox.com/ Galleries, individual Images
imgth https://imgth.com/ Galleries
-imgur https://imgur.com/ Albums, individual Images
-Instagram https://www.instagram.com/ Images from Users, individual Images, Tag-Searches Optional
+imgur https://imgur.com/ Albums, Galleries, individual Images
+Instagram https://www.instagram.com/ |instagram-C| Optional
Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga
Joyreactor http://joyreactor.cc/ |joyreactor-C|
Keenspot http://www.keenspot.com/ Comics
@@ -77,7 +78,7 @@ Niconico Seiga https://seiga.nicovideo.jp/ Images from Users, indi
nijie https://nijie.info/ |nijie-C| Required
NSFWalbum.com https://nsfwalbum.com/ Albums
Nyafuu Archive https://archive.nyafuu.org/ Threads
-Patreon https://www.patreon.com/ Images from Users, Creators
+Patreon https://www.patreon.com/ Images from Users, Creators, Posts
Pawoo https://pawoo.net/ Images from Users, Images from Statuses
Photobucket https://photobucket.com/ Albums, individual Images
Piczel https://piczel.tv/ Images from Users, Folders, individual Images
@@ -100,7 +101,7 @@ Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag-Searches
Sen Manga https://raw.senmanga.com/ Chapters
Sense-Scans http://sensescans.com/reader/ Chapters, Manga
Sex.com https://www.sex.com/ Boards, Pins, related Pins, Search Results
-Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos
+Simply Hentai https://www.simply-hentai.com/ Galleries
SlickPic https://www.slickpic.com/ Images from Users, Albums
SlideShare https://www.slideshare.net/ Presentations
SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth)
@@ -134,6 +135,7 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh
.. |flickr-C| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
.. |hentaifoundry-C| replace:: Images from Users, Favorites, individual Images, Popular Images, Recent Images, Scraps
+.. |instagram-C| replace:: Images from Users, Channels, individual Images, Stories, Tag-Searches
.. |joyreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches
.. |nijie-C| replace:: Images from Users, Doujin, Favorites, individual Images
.. |pixiv-C| replace:: Images from Users, Favorites, Follows, pixiv.me Links, Rankings, Search Results, Individual Images
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 7a95191..e3229eb 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -26,6 +26,7 @@ class HttpDownloader(DownloaderBase):
def __init__(self, extractor, output):
DownloaderBase.__init__(self, extractor, output)
+ self.adjust_extension = self.config("adjust-extensions", True)
self.retries = self.config("retries", extractor._retries)
self.timeout = self.config("timeout", extractor._timeout)
self.verify = self.config("verify", extractor._verify)
@@ -59,7 +60,6 @@ class HttpDownloader(DownloaderBase):
def _download_impl(self, url, pathfmt):
response = None
- adj_ext = None
tries = 0
msg = ""
@@ -103,7 +103,7 @@ class HttpDownloader(DownloaderBase):
elif code == 206: # Partial Content
offset = filesize
size = response.headers["Content-Range"].rpartition("/")[2]
- elif code == 416: # Requested Range Not Satisfiable
+ elif code == 416 and filesize: # Requested Range Not Satisfiable
break
else:
msg = "{}: {} for url: {}".format(code, response.reason, url)
@@ -114,7 +114,7 @@ class HttpDownloader(DownloaderBase):
size = text.parse_int(size)
# set missing filename extension
- if not pathfmt.has_extension:
+ if not pathfmt.extension:
pathfmt.set_extension(self.get_extension(response))
if pathfmt.exists():
pathfmt.temppath = ""
@@ -152,15 +152,16 @@ class HttpDownloader(DownloaderBase):
continue
# check filename extension
- adj_ext = self.check_extension(file, pathfmt)
+ if self.adjust_extension:
+ adj_ext = self.check_extension(file, pathfmt.extension)
+ if adj_ext:
+ pathfmt.set_extension(adj_ext)
break
self.downloading = False
- if adj_ext:
- pathfmt.set_extension(adj_ext)
if self.mtime:
- pathfmt.keywords["_mtime"] = response.headers.get("Last-Modified")
+ pathfmt.kwdict["_mtime"] = response.headers.get("Last-Modified")
return True
def receive(self, response, file):
@@ -196,9 +197,8 @@ class HttpDownloader(DownloaderBase):
return "txt"
@staticmethod
- def check_extension(file, pathfmt):
+ def check_extension(file, extension):
"""Check filename extension against fileheader"""
- extension = pathfmt.keywords["extension"]
if extension in FILETYPE_CHECK:
file.seek(0)
header = file.read(8)
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index a233487..7d8b905 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -50,7 +50,7 @@ class YoutubeDLDownloader(DownloaderBase):
return False
if "entries" in info_dict:
- index = pathfmt.keywords.get("_ytdl_index")
+ index = pathfmt.kwdict.get("_ytdl_index")
if index is None:
return self._download_playlist(pathfmt, info_dict)
else:
@@ -59,7 +59,7 @@ class YoutubeDLDownloader(DownloaderBase):
def _download_video(self, pathfmt, info_dict):
if "url" in info_dict:
- text.nameext_from_url(info_dict["url"], pathfmt.keywords)
+ text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
pathfmt.set_extension(info_dict["ext"])
if pathfmt.exists():
pathfmt.temppath = ""
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
index 5ea835f..5e2480a 100644
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@@ -21,12 +21,12 @@ class AdultempireGalleryExtractor(GalleryExtractor):
test = (
("https://www.adultempire.com/5998/gallery.html", {
"range": "1",
- "keyword": "0533ef1184892be8ac02b17286797c95f389ba63",
+ "keyword": "25c8171f5623678491a0d7bdf38a7a6ebfa4a361",
"content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
}),
("https://www.adultdvdempire.com/5683/gallery.html", {
"url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
- "keyword": "59fe5d95929efc5040a819a5f77aba7a022bb85a",
+ "keyword": "0fe9a6e3f0a331b95ba77f66a643705ca86e8ec5",
}),
)
@@ -42,8 +42,8 @@ class AdultempireGalleryExtractor(GalleryExtractor):
"studio" : extr(">studio</small>", "<").strip(),
"date" : text.parse_datetime(extr(
">released</small>", "<").strip(), "%m/%d/%Y"),
- "actors" : text.split_html(extr(
- '<ul class="item-details item-cast-list ', '</ul>'))[1:],
+ "actors" : sorted(text.split_html(extr(
+ '<ul class="item-details item-cast-list ', '</ul>'))[1:]),
}
def images(self, page):
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index f7b3bc1..2892bd4 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -41,6 +41,7 @@ class ArtstationExtractor(Extractor):
player = adict["player_embedded"]
url = text.extract(player, 'src="', '"')[0]
if not url.startswith(self.root):
+ asset["extension"] = None
yield Message.Url, "ytdl:" + url, asset
continue
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index c63085a..54a8878 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -41,10 +41,8 @@ class BooruExtractor(SharedConfigMixin, Extractor):
return pages * self.per_page
def items(self):
- data = self.get_metadata()
-
yield Message.Version, 1
- yield Message.Directory, data
+ data = self.get_metadata()
self.reset_page()
while True:
@@ -59,9 +57,11 @@ class BooruExtractor(SharedConfigMixin, Extractor):
if url.startswith("/"):
url = text.urljoin(self.api_url, url)
image.update(data)
+ text.nameext_from_url(url, image)
if self.extags:
self.extended_tags(image)
- yield Message.Url, url, text.nameext_from_url(url, image)
+ yield Message.Directory, image
+ yield Message.Url, url, image
if len(images) < self.per_page:
return
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 5c40e2a..a90af1c 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -87,7 +87,8 @@ class Extractor():
raise exception.HttpError(exc)
else:
code = response.status_code
- if 200 <= code < 400 or not fatal and \
+ if 200 <= code < 400 or fatal is None and \
+ (400 <= code < 500) or not fatal and \
(400 <= code < 429 or 431 <= code < 500):
if encoding:
response.encoding = encoding
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 63e2913..bd1299b 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -27,7 +27,7 @@ BASE_PATTERN = (
class DeviantartExtractor(Extractor):
- """Base class for deviantart extractors"""
+ """Base class for deviantart extractors using the OAuth API"""
category = "deviantart"
directory_fmt = ("{category}", "{author[username]!l}")
filename_fmt = "{category}_{index}_{title}.{extension}"
@@ -38,11 +38,15 @@ class DeviantartExtractor(Extractor):
self.offset = 0
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
+ self.quality = self.config("quality", "100")
self.original = self.config("original", True)
self.user = match.group(1) or match.group(2)
self.group = False
self.api = DeviantartAPI(self)
+ if self.quality:
+ self.quality = "q_{}".format(self.quality)
+
if self.original != "image":
self._update_content = self._update_content_default
else:
@@ -81,12 +85,15 @@ class DeviantartExtractor(Extractor):
text.ext_from_url(content["src"]) != "gif":
self._update_content(deviation, content)
- if deviation["index"] <= 790677560 and \
- content["src"].startswith("https://images-wixmp-"):
- # https://github.com/r888888888/danbooru/issues/4069
- content["src"] = re.sub(
- r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", content["src"])
+ if content["src"].startswith("https://images-wixmp-"):
+ if deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ content["src"] = re.sub(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*",
+ r"/intermediary\1", content["src"])
+ if self.quality:
+ content["src"] = re.sub(
+ r"q_\d+", self.quality, content["src"])
yield self.commit(deviation, content)
@@ -133,8 +140,16 @@ class DeviantartExtractor(Extractor):
@staticmethod
def commit(deviation, target):
url = target["src"]
- deviation["target"] = text.nameext_from_url(url, target.copy())
- deviation["extension"] = deviation["target"]["extension"]
+ thumb = deviation["thumbs"][0]["src"] if "thumbs" in deviation else url
+ target = text.nameext_from_url(thumb, target.copy())
+ if target["filename"].endswith("-150"):
+ target["filename"] = target["filename"][:-4]
+ if not target["filename"].count("-"):
+ name, _, hid = target["filename"].rpartition("_")
+ target["filename"] = name + "-" + hid
+ deviation["target"] = target
+ deviation["filename"] = target["filename"]
+ deviation["extension"] = target["extension"] = text.ext_from_url(url)
return Message.Url, url, deviation
def _commit_journal_html(self, deviation, journal):
@@ -225,14 +240,6 @@ class DeviantartExtractor(Extractor):
if mtype and mtype.startswith("image/"):
content.update(data)
- def _html_request(self, url, **kwargs):
- cookies = {"userinfo": (
- '__167217c8e6aac1a3331f;{"username":"","uniqueid":"ab2e8b184471bf0'
- 'e3f8ed3ee7a3220aa","vd":"Bc7vEx,BdC7Fy,A,J,A,,B,A,B,BdC7Fy,BdC7XU'
- ',J,J,A,BdC7XU,13,A,B,A,,A,A,B,A,A,,A","attr":56}'
- )}
- return self.request(url, cookies=cookies, **kwargs)
-
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
@@ -360,68 +367,6 @@ class DeviantartFolderExtractor(DeviantartExtractor):
deviation["folder"] = self.folder
-class DeviantartDeviationExtractor(DeviantartExtractor):
- """Extractor for single deviations"""
- subcategory = "deviation"
- archive_fmt = "{index}.{extension}"
- pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?&#]+-\d+)"
- test = (
- (("https://www.deviantart.com/shimoda7/art/"
- "For-the-sake-of-a-memory-10073852"), {
- "options": (("original", 0),),
- "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
- }),
- ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
- "exception": exception.NotFoundError,
- }),
- (("https://www.deviantart.com/myria-moon/art/"
- "Aime-Moi-part-en-vadrouille-261986576"), {
- "pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
- r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
- }),
- # wixmp URL rewrite
- (("https://www.deviantart.com/citizenfresh/art/"
- "Hverarond-14-the-beauty-of-the-earth-789295466"), {
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/intermediary/f/[^/]+/[^.]+\.jpg$")
- }),
- # non-download URL for GIFs (#242)
- (("https://www.deviantart.com/skatergators/art/"
- "COM-Monique-Model-781571783"), {
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/f/[^/]+/[^.]+\.gif\?token="),
- }),
- # external URLs from description (#302)
- (("https://www.deviantart.com/uotapo/art/"
- "INANAKI-Memorial-Humane7-590297498"), {
- "options": (("extra", 1), ("original", 0)),
- "pattern": r"https?://sta\.sh/\w+$",
- "range": "2-",
- "count": 4,
- }),
- # old-style URLs
- ("https://shimoda7.deviantart.com"
- "/art/For-the-sake-of-a-memory-10073852"),
- ("https://myria-moon.deviantart.com"
- "/art/Aime-Moi-part-en-vadrouille-261986576"),
- ("https://zzz.deviantart.com/art/zzz-1234567890"),
- )
-
- skip = Extractor.skip
-
- def __init__(self, match):
- DeviantartExtractor.__init__(self, match)
- self.path = match.group(3)
-
- def deviations(self):
- url = "{}/{}/{}".format(self.root, self.user, self.path)
- response = self._html_request(url, fatal=False)
- deviation_id = text.extract(response.text, '//deviation/', '"')[0]
- if response.status_code >= 400 or not deviation_id:
- raise exception.NotFoundError("image")
- return (self.api.deviation(deviation_id),)
-
-
class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
@@ -558,54 +503,6 @@ class DeviantartJournalExtractor(DeviantartExtractor):
return self.api.browse_user_journals(self.user, self.offset)
-class DeviantartScrapsExtractor(DeviantartExtractor):
- """Extractor for an artist's scraps"""
- subcategory = "scraps"
- directory_fmt = ("{category}", "{username}", "Scraps")
- archive_fmt = "s_{username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery/\?catpath=scraps\b"
- test = (
- ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps", {
- "count": 12,
- "options": (("original", False),),
- }),
- ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
- )
-
- def deviations(self):
- url = "{}/{}/gallery/?catpath=scraps".format(self.root, self.user)
- page = self._html_request(url).text
- csrf, pos = text.extract(page, '"csrf":"', '"')
- iid , pos = text.extract(page, '"requestid":"', '"', pos)
-
- url = "https://www.deviantart.com/dapi/v1/gallery/0"
- data = {
- "username": self.user,
- "offset": self.offset,
- "limit": "24",
- "catpath": "scraps",
- "_csrf": csrf,
- "dapiIid": iid + "-jsok7403-1.1"
- }
-
- while True:
- content = self.request(
- url, method="POST", data=data).json()["content"]
-
- for item in content["results"]:
- if item["html"].startswith('<div class="ad-container'):
- continue
- deviation_url = text.extract(item["html"], 'href="', '"')[0]
- page = self._html_request(deviation_url).text
- deviation_id = text.extract(page, '//deviation/', '"')[0]
- if deviation_id:
- yield self.api.deviation(deviation_id)
-
- if not content["has_more"]:
- return
- data["offset"] = content["next_offset"]
-
-
class DeviantartPopularExtractor(DeviantartExtractor):
"""Extractor for popular deviations"""
subcategory = "popular"
@@ -649,6 +546,247 @@ class DeviantartPopularExtractor(DeviantartExtractor):
deviation["popular"] = self.popular
+class DeviantartExtractorV2(Extractor):
+ """Base class for deviantart extractors using the NAPI"""
+ category = "deviantart"
+ directory_fmt = ("{category}", "{author[username]!l}")
+ filename_fmt = "{category}_{index}_{title}.{extension}"
+ root = "https://www.deviantart.com"
+
+ def __init__(self, match=None):
+ Extractor.__init__(self, match)
+ self.offset = 0
+ self.extra = self.config("extra", False)
+ self.quality = self.config("quality", "100")
+ self.user = match.group(1) or match.group(2)
+
+ if self.quality:
+ self.quality = "q_{}".format(self.quality)
+
+ def items(self):
+ url = (
+ self.root + "/_napi/da-browse/shared_api/deviation/extended_fetch"
+ )
+ params = {
+ "deviationid" : None,
+ "username" : None,
+ "type" : None,
+ "include_session": "false",
+ }
+ headers = {
+ "Referer": self.root,
+ }
+
+ yield Message.Version, 1
+ for deviation in self.deviations():
+ params["deviationid"] = deviation["deviationId"]
+ params["username"] = deviation["author"]["username"]
+ params["type"] = "journal" if deviation["isJournal"] else "art"
+ data = self.request(url, params=params, headers=headers).json()
+
+ if "deviation" not in data:
+ self.log.warning("Skipping %s", params["deviationid"])
+ continue
+ deviation = self._extract(data)
+
+ yield Message.Directory, deviation
+ yield Message.Url, deviation["target"]["src"], deviation
+ if self.extra:
+ for match in DeviantartStashExtractor.pattern.finditer(
+ deviation["description"]):
+ deviation["_extractor"] = DeviantartStashExtractor
+ yield Message.Queue, match.group(0), deviation
+
+ def _extract(self, data):
+ deviation = data["deviation"]
+ extended = deviation["extended"]
+ files = deviation["files"]
+ del deviation["extended"]
+ del deviation["files"]
+
+ # prepare deviation metadata
+ deviation["description"] = extended.get("description", "")
+ deviation["username"] = self.user.lower()
+ deviation["stats"] = extended["stats"]
+ deviation["stats"]["comments"] = data["comments"]["total"]
+ deviation["index"] = deviation["deviationId"]
+ deviation["tags"] = [t["name"] for t in extended.get("tags") or ()]
+ deviation["date"] = text.parse_datetime(
+ deviation["publishedTime"])
+ deviation["category_path"] = "/".join(
+ extended[key]["displayNameEn"]
+ for key in ("typeFacet", "contentFacet", "categoryFacet")
+ if key in extended
+ )
+
+ # extract download target
+ target = files[-1]
+ name = files[0]["src"]
+
+ if target["type"] == "gif":
+ pass
+ elif target["type"] == "video":
+ # select largest video
+ target = max(
+ files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
+ name = target["src"]
+ elif target["type"] == "flash":
+ if target["src"].startswith("https://sandbox.deviantart.com"):
+ # extract SWF file from "sandbox"
+ target["src"] = text.extract(
+ self.request(target["src"]).text,
+ 'id="sandboxembed" src="', '"',
+ )[0]
+ elif "download" in extended:
+ target = extended["download"]
+ target["src"] = target["url"]
+ del target["url"]
+
+ # url rewrites
+ if target["src"].startswith("https://images-wixmp-"):
+ if deviation["index"] <= 790677560:
+ # https://github.com/r888888888/danbooru/issues/4069
+ target["src"] = re.sub(
+ r"(/f/[^/]+/[^/]+)/v\d+/.*",
+ r"/intermediary\1", target["src"])
+ if self.quality:
+ target["src"] = re.sub(
+ r"q_\d+", self.quality, target["src"])
+
+ text.nameext_from_url(name, target)
+ if target["filename"].endswith("-150"):
+ target["filename"] = target["filename"][:-4]
+ if not target["filename"].count("-"):
+ name, _, hid = target["filename"].rpartition("_")
+ target["filename"] = name + "-" + hid
+ deviation["target"] = target
+ deviation["filename"] = target["filename"]
+ deviation["extension"] = target["extension"] = (
+ text.ext_from_url(target["src"]))
+ return deviation
+
+
+class DeviantartDeviationExtractor(DeviantartExtractorV2):
+ """Extractor for single deviations"""
+ subcategory = "deviation"
+ archive_fmt = "{index}.{extension}"
+ pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)"
+ test = (
+ (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
+ "options": (("original", 0),),
+ "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+ }),
+ ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
+ "count": 0,
+ }),
+ (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
+ "pattern": (r"https://www.deviantart.com/download/261986576"
+ r"/[\w-]+\.jpg\?token=\w+&ts=\d+"),
+ }),
+ # wixmp URL rewrite
+ (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+ r"/intermediary/f/[^/]+/[^.]+\.jpg$")
+ }),
+ # wixmp URL rewrite v2 (#369)
+ (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
+ "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
+ }),
+ # non-download URL for GIFs (#242)
+ (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
+ r"/f/[^/]+/[^.]+\.gif\?token="),
+ }),
+ # external URLs from description (#302)
+ (("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
+ "options": (("extra", 1), ("original", 0)),
+ "pattern": r"https?://sta\.sh/\w+$",
+ "range": "2-",
+ "count": 4,
+ }),
+ # video
+ ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
+ "url": "3b6e6e761d2d393fa61a4dc3ed6e7db51b14d07b",
+ "keyword": {
+ "target": {
+ "duration": 306,
+ "extension": "mp4",
+ "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
+ "filesize": 9963639,
+ "quality": "1080p",
+ "src": str,
+ "type": "video",
+ },
+ }
+ }),
+ # archive
+ ("https://www.deviantart.com/itsvenue/art/-brush-pngs-14-763300948", {
+ "pattern": r"https://.+deviantart.com/download/763300948/.*\.rar",
+ }),
+ # swf
+ ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", {
+ "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf",
+ }),
+ # old-style URLs
+ ("https://shimoda7.deviantart.com"
+ "/art/For-the-sake-of-a-memory-10073852"),
+ ("https://myria-moon.deviantart.com"
+ "/art/Aime-Moi-part-en-vadrouille-261986576"),
+ ("https://zzz.deviantart.com/art/zzz-1234567890"),
+ )
+
+ skip = Extractor.skip
+
+ def __init__(self, match):
+ DeviantartExtractorV2.__init__(self, match)
+ self.type = match.group(3)
+ self.deviation_id = match.group(4)
+
+ def deviations(self):
+ return ({
+ "deviationId": self.deviation_id,
+ "author" : {"username": self.user},
+ "isJournal" : self.type == "journal",
+ },)
+
+
+class DeviantartScrapsExtractor(DeviantartExtractorV2):
+ """Extractor for an artist's scraps"""
+ subcategory = "scraps"
+ directory_fmt = ("{category}", "{username}", "Scraps")
+ archive_fmt = "s_{username}_{index}.{extension}"
+ pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
+ test = (
+ ("https://www.deviantart.com/shimoda7/gallery/scraps", {
+ "count": 12,
+ }),
+ ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
+ ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
+ )
+
+ def deviations(self):
+ url = self.root + "/_napi/da-user-profile/api/gallery/contents"
+ params = {
+ "username" : self.user,
+ "offset" : self.offset,
+ "limit" : "24",
+ "scraps_folder": "true",
+ }
+ headers = {
+ "Referer": "{}/{}/gallery/scraps".format(self.root, self.user),
+ }
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+
+ for obj in data["results"]:
+ yield obj["deviation"]
+
+ if not data["hasMore"]:
+ return
+ params["offset"] = data["nextOffset"]
+
+
class DeviantartAPI():
"""Minimal interface for the DeviantArt API
@@ -805,7 +943,7 @@ class DeviantartAPI():
self.authenticate(None if public else self.refresh_token)
response = self.extractor.request(
- url, headers=self.headers, params=params, fatal=False)
+ url, headers=self.headers, params=params, fatal=None)
data = response.json()
status = response.status_code
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index ce2e83b..4ec7f00 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -33,16 +33,16 @@ class GelbooruExtractor(booru.XmlParserMixin,
self.session.cookies["fringeBenefits"] = "yup"
def items_noapi(self):
- data = self.get_metadata()
-
yield Message.Version, 1
- yield Message.Directory, data
+ data = self.get_metadata()
for post in self.get_posts():
post = self.get_post_data(post)
url = post["file_url"]
post.update(data)
- yield Message.Url, url, text.nameext_from_url(url, post)
+ text.nameext_from_url(url, post)
+ yield Message.Directory, post
+ yield Message.Url, url, post
def get_posts(self):
"""Return an iterable containing all relevant post objects"""
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index c112465..e4f18b3 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -21,7 +21,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
test = (
("https://hitomi.la/galleries/867789.html", {
"url": "cb759868d090fe0e2655c3e29ebf146054322b6d",
- "keyword": "067b5d9b9c0f98530cd5dd2444e0f5a5b4b00d38",
+ "keyword": "d097a8db8e810045131b4510c41714004f9eff3a",
}),
("https://hitomi.la/galleries/1036181.html", {
# "aa" subdomain for gallery-id ending in 1 (#142)
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 6980185..76b2c38 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -41,14 +41,14 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"
test = (
("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
- "url": "fb01925129a1ff1941762eaa3a2783a66de6847f",
+ "url": "76d976788ae2757ac81694736b07b72356f5c4c8",
"keyword": "9e25b8827474ac93c54855e798d60aa3cbecbd7a",
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
}),
("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", {
# more than 100 images; see issue #219
"count": 107,
- "url": "f92ce5b17676b6ea69288f0aef26f4cdbea7fd8d",
+ "url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1",
}),
("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
"exception": exception.NotFoundError,
@@ -108,7 +108,7 @@ class ImagebamImageExtractor(ImagebamExtractor):
r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)")
test = (
("http://www.imagebam.com/image/94d56c502511890", {
- "url": "b384893c35a01a09c58018db71ddc4cf2480be95",
+ "url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
"keyword": "4263d4840007524129792b8587a562b5d20c2687",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 442634b..4aa670b 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -17,6 +17,7 @@ import json
class ImgbbExtractor(Extractor):
"""Base class for imgbb extractors"""
category = "imgbb"
+ directory_fmt = ("{category}", "{user}")
filename_fmt = "{title} {id}.{extension}"
archive_fmt = "{id}"
root = "https://imgbb.com"
@@ -145,7 +146,6 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
class ImgbbUserExtractor(ImgbbExtractor):
"""Extractor for user profiles in imgbb.com"""
subcategory = "user"
- directory_fmt = ("{category}", "{user}")
pattern = r"(?:https?://)?([^.]+)\.imgbb\.com/?(?:\?([^#]+))?$"
test = ("https://folkie.imgbb.com", {
"range": "1-80",
@@ -177,3 +177,34 @@ class ImgbbUserExtractor(ImgbbExtractor):
"params_hidden[userid]": user,
"params_hidden[from]" : "user",
})
+
+
+class ImgbbImageExtractor(ImgbbExtractor):
+ subcategory = "image"
+ pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?&#]+)"
+ test = ("https://ibb.co/NLZHgqS", {
+ "url": "fbca86bac09de6fc0304054b2170b423ca1e84fa",
+ "keyword": "5d70e779bad03b2dc5273b627638045168671157",
+ })
+
+ def __init__(self, match):
+ ImgbbExtractor.__init__(self, match)
+ self.image_id = match.group(1)
+
+ def items(self):
+ url = "https://ibb.co/" + self.image_id
+ extr = text.extract_from(self.request(url).text)
+
+ image = {
+ "id" : self.image_id,
+ "title" : text.unescape(extr('"og:title" content="', '"')),
+ "url" : extr('"og:image" content="', '"'),
+ "width" : text.parse_int(extr('"og:image:width" content="', '"')),
+ "height": text.parse_int(extr('"og:image:height" content="', '"')),
+ "user" : extr('rel="author">', '<').lower(),
+ }
+ image["extension"] = text.ext_from_url(image["url"])
+
+ yield Message.Version, 1
+ yield Message.Directory, image
+ yield Message.Url, image["url"], image
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index c5e3d17..8523523 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -20,13 +20,19 @@ class ImgurExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.item_id = match.group(1)
+ self.key = match.group(1)
self.mp4 = self.config("mp4", True)
- def _get_data(self, path):
+ def _extract_data(self, path):
response = self.request(self.root + path, notfound=self.subcategory)
- data = text.extract(response.text, "image : ", ",\n")[0]
- return self._clean(json.loads(data))
+ data = json.loads(text.extract(
+ response.text, "image : ", ",\n")[0])
+ try:
+ del data["adConfig"]
+ del data["isAd"]
+ except KeyError:
+ pass
+ return data
def _prepare(self, image):
image["ext"] = image["ext"].partition("?")[0]
@@ -37,18 +43,9 @@ class ImgurExtractor(Extractor):
image["extension"] = image["ext"][1:]
return url
- @staticmethod
- def _clean(data):
- try:
- del data["adConfig"]
- del data["isAd"]
- except KeyError:
- pass
- return data
-
class ImgurImageExtractor(ImgurExtractor):
- """Extractor for individual images from imgur.com"""
+ """Extractor for individual images on imgur.com"""
subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
archive_fmt = "{hash}"
@@ -101,22 +98,21 @@ class ImgurImageExtractor(ImgurExtractor):
)
def items(self):
- image = self._get_data("/" + self.item_id)
+ image = self._extract_data("/" + self.key)
url = self._prepare(image)
-
yield Message.Version, 1
yield Message.Directory, image
yield Message.Url, url, image
class ImgurAlbumExtractor(ImgurExtractor):
- """Extractor for image albums from imgur.com"""
+ """Extractor for imgur albums"""
subcategory = "album"
directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
archive_fmt = "{album[hash]}_{hash}"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
- r"/(?:a|gallery|t/unmuted)/(\w{7}|\w{5})")
+ r"/(?:a|t/unmuted)/(\w{7}|\w{5})")
test = (
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
@@ -147,7 +143,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
"width": int,
},
}),
- ("https://imgur.com/gallery/eD9CT", { # large album
+ ("https://imgur.com/a/eD9CT", { # large album
"url": "4ee94de31ff26be416271bc0b1ea27b9349c9937",
}),
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
@@ -164,13 +160,13 @@ class ImgurAlbumExtractor(ImgurExtractor):
)
def items(self):
- album = self._get_data("/a/" + self.item_id + "/all")
+ album = self._extract_data("/a/" + self.key + "/all")
images = album["album_images"]["images"]
del album["album_images"]
if int(album["num_images"]) > len(images):
url = "{}/ajaxalbums/getimages/{}/hit.json".format(
- self.root, self.item_id)
+ self.root, self.key)
images = self.request(url).json()["data"]["images"]
yield Message.Version, 1
@@ -180,3 +176,32 @@ class ImgurAlbumExtractor(ImgurExtractor):
image["num"] = num
image["album"] = album
yield Message.Url, url, image
+
+
+class ImgurGalleryExtractor(ImgurExtractor):
+ """Extractor for imgur galleries"""
+ subcategory = "gallery"
+ pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
+ r"/gallery/(\w{7}|\w{5})")
+ test = (
+ ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
+ "pattern": "https://imgur.com/zf2fIms",
+ }),
+ ("https://imgur.com/gallery/eD9CT", {
+ "pattern": "https://imgur.com/a/eD9CT",
+ }),
+ )
+
+ def items(self):
+ url = self.root + "/a/" + self.key
+ with self.request(url, method="HEAD", fatal=False) as response:
+ code = response.status_code
+
+ if code < 400:
+ extr = ImgurAlbumExtractor
+ else:
+ extr = ImgurImageExtractor
+ url = self.root + "/" + self.key
+
+ yield Message.Version, 1
+ yield Message.Queue, url, {"_extractor": extr}
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 475e24b..e5cfe8b 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -8,11 +8,10 @@
"""Extract images from https://www.instagram.com/"""
-import hashlib
-import json
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
+import json
class InstagramExtractor(Extractor):
@@ -37,10 +36,11 @@ class InstagramExtractor(Extractor):
data.update(metadata)
yield Message.Directory, data
- if data['typename'] == 'GraphImage':
+ if data['typename'] in ('GraphImage', 'GraphStoryImage', 'GraphStoryVideo'):
yield Message.Url, data['display_url'], \
text.nameext_from_url(data['display_url'], data)
elif data['typename'] == 'GraphVideo':
+ data["extension"] = None
yield Message.Url, \
'ytdl:{}/p/{}/'.format(self.root, data['shortcode']), data
@@ -140,33 +140,113 @@ class InstagramExtractor(Extractor):
return medias
+ def _extract_stories(self, url):
+ if self.highlight_id:
+ user_id = ''
+ highlight_id = '"{}"'.format(self.highlight_id)
+ query_hash = '30a89afdd826d78a5376008a7b81c205'
+ else:
+ page = self.request(url).text
+ shared_data = self._extract_shared_data(page)
+
+ # If no stories are present the URL redirects to `ProfilePage'
+ if 'StoriesPage' not in shared_data['entry_data']:
+ return []
+
+ user_id = '"{}"'.format(
+ shared_data['entry_data']['StoriesPage'][0]['user']['id'])
+ highlight_id = ''
+ query_hash = 'cda12de4f7fd3719c0569ce03589f4c4'
+
+ variables = (
+ '{{'
+ '"reel_ids":[{}],"tag_names":[],"location_ids":[],'
+ '"highlight_reel_ids":[{}],"precomposed_overlay":true,'
+ '"show_story_viewer_list":true,'
+ '"story_viewer_fetch_count":50,"story_viewer_cursor":"",'
+ '"stories_video_dash_manifest":false}}'
+ ).format(user_id, highlight_id)
+ headers = {
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ url = '{}/graphql/query/?query_hash={}&variables={}'.format(
+ self.root,
+ query_hash,
+ variables,
+ )
+ shared_data = self.request(url, headers=headers).json()
+
+ # If there are stories present but the user is not authenticated or
+ # does not have permissions no stories are returned.
+ if not shared_data['data']['reels_media']:
+ return [] # no stories present
+
+ medias = []
+ for media in shared_data['data']['reels_media'][0]['items']:
+ media_data = {
+ 'owner_id': media['owner']['id'],
+ 'username': media['owner']['username'],
+ 'date': text.parse_timestamp(media['taken_at_timestamp']),
+ 'expires': text.parse_timestamp(media['expiring_at_timestamp']),
+ 'media_id': media['id'],
+ 'typename': media['__typename'],
+ }
+ if media['__typename'] == 'GraphStoryImage':
+ media_data.update({
+ 'display_url': media['display_url'],
+ 'height': text.parse_int(media['dimensions']['height']),
+ 'width': text.parse_int(media['dimensions']['width']),
+ })
+ elif media['__typename'] == 'GraphStoryVideo':
+ vr = media['video_resources'][0]
+ media_data.update({
+ 'duration': text.parse_float(media['video_duration']),
+ 'display_url': vr['src'],
+ 'height': text.parse_int(vr['config_height']),
+ 'width': text.parse_int(vr['config_width']),
+ })
+ medias.append(media_data)
+
+ return medias
+
def _extract_page(self, url, page_type):
shared_data_fields = {
'ProfilePage': {
+ 'page': 'ProfilePage',
'node': 'user',
'node_id': 'id',
'edge_to_medias': 'edge_owner_to_timeline_media',
'variables_id': 'id',
- 'query_hash': '66eb9403e44cc12e5b5ecda48b667d41',
+ 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a',
+ },
+ 'ProfileChannelPage': {
+ 'page': 'ProfilePage',
+ 'node': 'user',
+ 'node_id': 'id',
+ 'edge_to_medias': 'edge_felix_video_timeline',
+ 'variables_id': 'id',
+ 'query_hash': 'bc78b344a68ed16dd5d7f264681c4c76',
},
'TagPage': {
+ 'page': 'TagPage',
'node': 'hashtag',
'node_id': 'name',
'edge_to_medias': 'edge_hashtag_to_media',
'variables_id': 'tag_name',
- 'query_hash': 'f92f56d47dc7a55b606908374b43a314',
+ 'query_hash': 'f12c9ec5e46a3173b2969c712ad84744',
},
}
page = self.request(url).text
shared_data = self._extract_shared_data(page)
psdf = shared_data_fields[page_type]
+ csrf = shared_data["config"]["csrf_token"]
while True:
# Deal with different structure of pages: the first page
# has interesting data in `entry_data', next pages in `data'.
if 'entry_data' in shared_data:
- base_shared_data = shared_data['entry_data'][page_type][0]['graphql']
+ base_shared_data = shared_data['entry_data'][psdf['page']][0]['graphql']
# variables_id is available only in the first page
variables_id = base_shared_data[psdf['node']][psdf['node_id']]
@@ -192,7 +272,8 @@ class InstagramExtractor(Extractor):
)
headers = {
"X-Requested-With": "XMLHttpRequest",
- "X-Instagram-GIS": hashlib.md5(variables.encode()).hexdigest(),
+ "X-CSRFToken": csrf,
+ "X-IG-App-ID": "936619743392459",
}
url = '{}/graphql/query/?query_hash={}&variables={}'.format(
self.root,
@@ -204,14 +285,20 @@ class InstagramExtractor(Extractor):
def _extract_profilepage(self, url):
yield from self._extract_page(url, 'ProfilePage')
+ def _extract_profilechannelpage(self, url):
+ yield from self._extract_page(url, 'ProfileChannelPage')
+
def _extract_tagpage(self, url):
yield from self._extract_page(url, 'TagPage')
+ def _extract_storiespage(self, url):
+ yield from self._extract_stories(url)
+
class InstagramImageExtractor(InstagramExtractor):
"""Extractor for PostPage"""
subcategory = "image"
- pattern = r"(?:https?://)?(?:www\.)?instagram\.com/p/([^/?&#]+)"
+ pattern = r"(?:https?://)?(?:www\.)?instagram\.com/(?:p|tv)/([^/?&#]+)"
test = (
# GraphImage
("https://www.instagram.com/p/BqvsDleB3lV/", {
@@ -258,6 +345,22 @@ class InstagramImageExtractor(InstagramExtractor):
}
}),
+ # GraphVideo (IGTV)
+ ("https://www.instagram.com/tv/BkQjCfsBIzi/", {
+ "url": "64208f408e11cbbca86c2df4488e90262ae9d9ec",
+ "keyword": {
+ "date": "type:datetime",
+ "description": str,
+ "height": int,
+ "likes": int,
+ "media_id": "1806097553666903266",
+ "shortcode": "BkQjCfsBIzi",
+ "typename": "GraphVideo",
+ "username": "instagram",
+ "width": int,
+ }
+ }),
+
# GraphSidecar with 2 embedded GraphVideo objects
("https://www.instagram.com/p/BtOvDOfhvRr/", {
"count": 2,
@@ -283,10 +386,11 @@ class InstagramUserExtractor(InstagramExtractor):
"""Extractor for ProfilePage"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/(?!p/|explore/|directory/|accounts/)([^/?&#]+)")
+ r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
+ r"([^/?&#]+)/?$")
test = ("https://www.instagram.com/instagram/", {
- "range": "1-12",
- "count": ">= 12",
+ "range": "1-16",
+ "count": ">= 16",
})
def __init__(self, match):
@@ -298,6 +402,26 @@ class InstagramUserExtractor(InstagramExtractor):
return self._extract_profilepage(url)
+class InstagramChannelExtractor(InstagramExtractor):
+ """Extractor for ProfilePage channel"""
+ subcategory = "channel"
+ pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
+ r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
+ r"([^/?&#]+)/channel")
+ test = ("https://www.instagram.com/instagram/channel/", {
+ "range": "1-16",
+ "count": ">= 16",
+ })
+
+ def __init__(self, match):
+ InstagramExtractor.__init__(self, match)
+ self.username = match.group(1)
+
+ def instagrams(self):
+ url = '{}/{}/channel/'.format(self.root, self.username)
+ return self._extract_profilechannelpage(url)
+
+
class InstagramTagExtractor(InstagramExtractor):
"""Extractor for TagPage"""
subcategory = "tag"
@@ -305,8 +429,8 @@ class InstagramTagExtractor(InstagramExtractor):
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/explore/tags/([^/?&#]+)")
test = ("https://www.instagram.com/explore/tags/instagram/", {
- "range": "1-12",
- "count": ">= 12",
+ "range": "1-16",
+ "count": ">= 16",
})
def __init__(self, match):
@@ -319,3 +443,22 @@ class InstagramTagExtractor(InstagramExtractor):
def instagrams(self):
url = '{}/explore/tags/{}/'.format(self.root, self.tag)
return self._extract_tagpage(url)
+
+
+class InstagramStoriesExtractor(InstagramExtractor):
+ """Extractor for StoriesPage"""
+ subcategory = "stories"
+ pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
+ r"/stories/([^/?&#]+)(?:/(\d+))?")
+ test = (
+ ("https://www.instagram.com/stories/instagram/"),
+ ("https://www.instagram.com/stories/highlights/18042509488170095/"),
+ )
+
+ def __init__(self, match):
+ InstagramExtractor.__init__(self, match)
+ self.username, self.highlight_id = match.groups()
+
+ def instagrams(self):
+ url = '{}/stories/{}/'.format(self.root, self.username)
+ return self._extract_storiespage(url)
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 879d38b..a73eb86 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -62,7 +62,7 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor):
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
- "keyword": "ab4e5b71583fd439b4c8012a642aa8b58d8d0758",
+ "keyword": "07c0b915f2ab1cc3bbf28b76e7950fccee1213f3",
"content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
}),
("https://luscious.net/albums/virgin-killer-sweater_282582/", {
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 282c389..1ca1073 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -93,7 +93,7 @@ class NewgroundsUserExtractor(NewgroundsExtractor):
test = (
("https://blitzwuff.newgrounds.com/art", {
"url": "24b19c4a135a09889fac7b46a74e427e4308d02b",
- "keyword": "98566e0c8096a8099b8d71962fea7e31c8b098d4",
+ "keyword": "62981f7bdd66e1f1c72ab1d9b932423c156bc9a1",
}),
("https://blitzwuff.newgrounds.com/"),
)
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 4884497..ab5932d 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -11,6 +11,8 @@
from .common import Extractor, Message
from .. import text
from ..cache import memcache
+import collections
+import json
class PatreonExtractor(Extractor):
@@ -33,70 +35,92 @@ class PatreonExtractor(Extractor):
for post in self.posts():
yield Message.Directory, post
+ ids = set()
post["num"] = 0
content = post.get("content")
postfile = post.get("post_file")
- for url in text.extract_iter(content or "", 'src="', '"'):
+ for image in post["images"]:
+ url = image.get("download_url")
+ if not url:
+ continue
+ ids.add(url.split("/")[-2])
+ name = image.get("file_name") or self._filename(url) or url
+
post["num"] += 1
- yield Message.Url, url, text.nameext_from_url(url, post)
+ post["type"] = "image"
+ yield Message.Url, url, text.nameext_from_url(name, post)
- if postfile:
+ if postfile and postfile["url"].split("/")[-2] not in ids:
post["num"] += 1
+ post["type"] = "postfile"
text.nameext_from_url(postfile["name"], post)
yield Message.Url, postfile["url"], post
for attachment in post["attachments"]:
post["num"] += 1
+ post["type"] = "attachment"
text.nameext_from_url(attachment["name"], post)
yield Message.Url, attachment["url"], post
+ if content:
+ for url in text.extract_iter(content, 'src="', '"'):
+ post["num"] += 1
+ post["type"] = "content"
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
def posts(self):
"""Return all relevant post objects"""
def _pagination(self, url):
headers = {"Referer": self.root}
- empty = []
while url:
posts = self.request(url, headers=headers).json()
- if "included" not in posts:
- return
-
- # collect attachments
- attachments = {}
- for inc in posts["included"]:
- if inc["type"] == "attachment":
- attachments[inc["id"]] = inc["attributes"]
-
- # update posts
- for post in posts["data"]:
- attr = post["attributes"]
- attr["id"] = text.parse_int(post["id"])
- attr["date"] = text.parse_datetime(
- attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
- attr["creator"] = self._user(
- post["relationships"]["user"]["links"]["related"])
-
- # add attachments to post attributes
- files = post["relationships"].get("attachments")
- if files:
- attr["attachments"] = [
- attachments[f["id"]]
- for f in files["data"]
- ]
- else:
- attr["attachments"] = empty
-
- yield attr
+ if "included" in posts:
+ included = self._transform(posts["included"])
+ for post in posts["data"]:
+ yield self._process(post, included)
if "links" not in posts:
return
url = posts["links"].get("next")
+ def _process(self, post, included):
+ """Process and extend a 'post' object"""
+ attr = post["attributes"]
+ attr["id"] = text.parse_int(post["id"])
+ attr["images"] = self._files(post, included, "images")
+ attr["attachments"] = self._files(post, included, "attachments")
+ attr["date"] = text.parse_datetime(
+ attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ attr["creator"] = self._user(
+ post["relationships"]["user"]["links"]["related"])
+ return attr
+
+ @staticmethod
+ def _transform(included):
+ """Transform 'included' into an easier to handle format"""
+ result = collections.defaultdict(dict)
+ for inc in included:
+ result[inc["type"]][inc["id"]] = inc["attributes"]
+ return result
+
+ @staticmethod
+ def _files(post, included, key):
+ """Build a list of files"""
+ files = post["relationships"].get(key)
+ if files and files.get("data"):
+ return [
+ included[file["type"]][file["id"]]
+ for file in files["data"]
+ ]
+ return []
+
@memcache(keyarg=1)
def _user(self, url):
+ """Fetch user information"""
user = self.request(url).json()["data"]
attr = user["attributes"]
attr["id"] = user["id"]
@@ -104,14 +128,21 @@ class PatreonExtractor(Extractor):
attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
return attr
+ def _filename(self, url):
+ """Fetch filename from its Content-Disposition header"""
+ response = self.request(url, method="HEAD", fatal=False)
+ cd = response.headers.get("Content-Disposition")
+ return text.extract(cd, 'filename="', '"')[0]
+
@staticmethod
def _build_url(endpoint, query):
return (
"https://www.patreon.com/api/" + endpoint +
- "?include=user,attachments,user_defined_tags,campaign,poll.choices"
- ",poll.current_user_responses.user,poll.current_user_responses.cho"
- "ice,poll.current_user_responses.poll,access_rules.tier.null"
+ "?include=user,images,attachments,user_defined_tags,campaign,poll."
+ "choices,poll.current_user_responses.user,poll.current_user_respon"
+ "ses.choice,poll.current_user_responses.poll,access_rules.tier.nul"
+ "l"
"&fields[post]=change_visibility_at,comment_count,content,current_"
"user_can_delete,current_user_can_view,current_user_has_liked,embe"
@@ -133,7 +164,8 @@ class PatreonCreatorExtractor(PatreonExtractor):
"""Extractor for a creator's works"""
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
- r"/(?!(?:home|join|login|signup)(?:$|[/?&#]))([^/?&#]+)/?")
+ r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
+ r"([^/?&#]+)/?")
test = ("https://www.patreon.com/koveliana", {
"range": "1-25",
"count": ">= 25",
@@ -144,6 +176,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
"creator": dict,
"date": "type:datetime",
"id": int,
+ "images": list,
"like_count": int,
"post_type": str,
"published_at": str,
@@ -181,3 +214,26 @@ class PatreonUserExtractor(PatreonExtractor):
"&filter[is_following]=true"
))
return self._pagination(url)
+
+
+class PatreonPostExtractor(PatreonExtractor):
+ """Extractor for media from a single post"""
+ subcategory = "post"
+ pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
+ r"/posts/[^/?&#]*?(\d+)")
+ test = ("https://www.patreon.com/posts/precious-metal-23563293", {
+ "count": 4,
+ })
+
+ def __init__(self, match):
+ PatreonExtractor.__init__(self, match)
+ self.post_id = match.group(1)
+
+ def posts(self):
+ url = "{}/posts/{}".format(self.root, self.post_id)
+ page = self.request(url).text
+ data = text.extract(page, "window.patreon.bootstrap,", "\n});")[0]
+ post = json.loads(data + "}")["post"]
+
+ included = self._transform(post["included"])
+ return (self._process(post["data"], included),)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 76d4dc4..4f8ee9c 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -18,8 +18,8 @@ class PixivExtractor(Extractor):
"""Base class for pixiv extractors"""
category = "pixiv"
directory_fmt = ("{category}", "{user[id]} {user[account]}")
- filename_fmt = "{category}_{user[id]}_{id}{num}.{extension}"
- archive_fmt = "{id}{num}.{extension}"
+ filename_fmt = "{id}_p{num}.{extension}"
+ archive_fmt = "{id}{suffix}.{extension}"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -40,9 +40,10 @@ class PixivExtractor(Extractor):
del work["meta_single_page"]
del work["image_urls"]
del work["meta_pages"]
- work["num"] = ""
+ work["num"] = 0
work["tags"] = [tag["name"] for tag in work["tags"]]
work["date"] = text.parse_datetime(work["create_date"])
+ work["suffix"] = ""
work.update(metadata)
yield Message.Directory, work
@@ -55,20 +56,17 @@ class PixivExtractor(Extractor):
url = ugoira["zip_urls"]["medium"].replace(
"_ugoira600x600", "_ugoira1920x1080")
work["frames"] = ugoira["frames"]
- work["extension"] = "zip"
- yield Message.Url, url, work
+ yield Message.Url, url, text.nameext_from_url(url, work)
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
- work["extension"] = url.rpartition(".")[2]
- yield Message.Url, url, work
+ yield Message.Url, url, text.nameext_from_url(url, work)
else:
- for num, img in enumerate(meta_pages):
+ for work["num"], img in enumerate(meta_pages):
url = img["image_urls"]["original"]
- work["num"] = "_p{:02}".format(num)
- work["extension"] = url.rpartition(".")[2]
- yield Message.Url, url, work
+ work["suffix"] = "_p{:02}".format(work["num"])
+ yield Message.Url, url, text.nameext_from_url(url, work)
def works(self):
"""Return an iterable containing all relevant 'work'-objects"""
diff --git a/gallery_dl/extractor/pururin.py b/gallery_dl/extractor/pururin.py
index fa4eb81..aa5c9c6 100644
--- a/gallery_dl/extractor/pururin.py
+++ b/gallery_dl/extractor/pururin.py
@@ -29,7 +29,7 @@ class PururinGalleryExtractor(GalleryExtractor):
"artist" : ["Shoda Norihiro"],
"group" : ["Obsidian Order"],
"parody" : ["Kantai Collection"],
- "characters": ["Iowa", "Teitoku"],
+ "characters": ["Admiral", "Iowa"],
"tags" : list,
"type" : "Doujinshi",
"collection": "",
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 59d502a..f97454b 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -117,6 +117,8 @@ class ReactorExtractor(SharedConfigMixin, Extractor):
url = text.extract(image, ' src="', '"')[0]
if not url:
continue
+ if url.startswith("//"):
+ url = "http:" + url
width = text.extract(image, ' width="', '"')[0]
height = text.extract(image, ' height="', '"')[0]
image_id = url.rpartition("-")[2].partition(".")[0]
@@ -268,8 +270,8 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
"keyword": "dbe148d576f2fc9431020c557ddb78f449e48c47",
}),
("http://joyreactor.com/post/3668724", { # youtube embed
- "url": "be2589e2e8f3ffcaf41b34bc28bfad850ccea34a",
- "keyword": "da61b9e2887db95759950df5fb89c9d32f8e7651",
+ "url": "bf1666eddcff10c9b58f6be63fa94e4e13074214",
+ "keyword": "989112c7888e9cc80fd35870180c6c98165d953b",
}),
("http://joyreactor.cc/post/1299", { # "malformed" JSON
"url": "ac900743ed7cf1baf3db3b531c3bc414bf1ffcde",
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 2ba4b99..94e95e8 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -234,7 +234,7 @@ class RedditAPI():
url = "https://oauth.reddit.com" + endpoint
params["raw_json"] = 1
self.authenticate()
- response = self.extractor.request(url, params=params, fatal=False)
+ response = self.extractor.request(url, params=params, fatal=None)
remaining = response.headers.get("x-ratelimit-remaining")
if remaining and float(remaining) < 2:
wait = int(response.headers["x-ratelimit-reset"])
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index da9735e..bb8a2ae 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -40,17 +40,18 @@ class SankakuExtractor(SharedConfigMixin, Extractor):
def items(self):
self.login()
- data = self.get_metadata()
yield Message.Version, 1
- yield Message.Directory, data
+ data = self.get_metadata()
for post_id in util.advance(self.get_posts(), self.start_post):
self.wait()
post = self.get_post_data(post_id)
url = post["file_url"]
post.update(data)
- yield Message.Url, url, text.nameext_from_url(url, post)
+ text.nameext_from_url(url, post)
+ yield Message.Directory, post
+ yield Message.Url, url, post
def skip(self, num):
self.start_post += num
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index afd4eaa..38b7813 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -78,6 +78,7 @@ class SexcomExtractor(Extractor):
path += "/hd"
data["url"] = self.root + path
else:
+ data["extension"] = None
data["url"] = "ytdl:" + text.extract(
extr('<iframe', '>'), ' src="', '"')[0]
else:
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index 5ad372d..8567155 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -8,14 +8,16 @@
"""Extract hentai-manga from https://www.simply-hentai.com/"""
-from .common import GalleryExtractor, Extractor, Message
+from .common import GalleryExtractor
from .. import text, util, exception
+import json
class SimplyhentaiGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from simply-hentai.com"""
category = "simplyhentai"
archive_fmt = "{image_id}"
+ root = "https://www.simply-hentai.com"
pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com"
r"(?!/(?:album|gifs?|images?|series)(?:/|$))"
r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)")
@@ -23,7 +25,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
(("https://original-work.simply-hentai.com"
"/amazon-no-hiyaku-amazon-elixir"), {
"url": "258289249990502c3138719cb89e995a60861e49",
- "keyword": "eba83ccdbab3022a2280c77aa747f9458196138b",
+ "keyword": "8b2400e4b466e8f46802fa5a6b917d2788bb7e8e",
}),
("https://www.simply-hentai.com/notfound", {
"exception": exception.GalleryDLException,
@@ -40,144 +42,30 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
self.session.headers["Referer"] = url
def metadata(self, page):
- extr = text.extract_from(page)
- split = text.split_html
-
- title = extr('<meta property="og:title" content="', '"')
- if not title:
+ path = text.extract(page, '<a class="preview" href="', '"')[0]
+ if not path:
raise exception.NotFoundError("gallery")
- data = {
- "title" : text.unescape(title),
- "gallery_id": text.parse_int(extr('/Album/', '/')),
- "parody" : split(extr('box-title">Series</div>', '</div>')),
- "language" : text.remove_html(extr(
- 'box-title">Language</div>', '</div>')) or None,
- "characters": split(extr('box-title">Characters</div>', '</div>')),
- "tags" : split(extr('box-title">Tags</div>', '</div>')),
- "artist" : split(extr('box-title">Artists</div>', '</div>')),
- "date" : text.parse_datetime(text.remove_html(
- extr('Uploaded', '</div>')), "%d.%m.%Y"),
+ page = self.request(self.root + path).text
+ data = json.loads(text.unescape(text.extract(
+ page, 'data-react-class="Reader" data-react-props="', '"')[0]))
+ self.manga = manga = data["manga"]
+
+ return {
+ "title" : manga["title"],
+ "parody" : manga["series"]["title"],
+ "language" : manga["language"]["name"],
+ "lang" : util.language_to_code(manga["language"]["name"]),
+ "characters": [x["name"] for x in manga["characters"]],
+ "tags" : [x["name"] for x in manga["tags"]],
+ "artist" : [x["name"] for x in manga["artists"]],
+ "gallery_id": text.parse_int(text.extract(
+ manga["images"][0]["sizes"]["full"], "/Album/", "/")[0]),
+ "date" : text.parse_datetime(
+ manga["publish_date"], "%Y-%m-%dT%H:%M:%S.%f%z"),
}
- data["lang"] = util.language_to_code(data["language"])
- return data
def images(self, _):
- url = self.chapter_url + "/all-pages"
- headers = {"Accept": "application/json"}
- images = self.request(url, headers=headers).json()
return [
- (urls["full"], {"image_id": text.parse_int(image_id)})
- for image_id, urls in sorted(images.items())
+ (image["sizes"]["full"], {"image_id": image["id"]})
+ for image in self.manga["images"]
]
-
-
-class SimplyhentaiImageExtractor(Extractor):
- """Extractor for individual images from simply-hentai.com"""
- category = "simplyhentai"
- subcategory = "image"
- directory_fmt = ("{category}", "{type}s")
- filename_fmt = "{category}_{token}{title:?_//}.{extension}"
- archive_fmt = "{token}"
- pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com"
- r"/(image|gif)/[^/?&#]+)")
- test = (
- (("https://www.simply-hentai.com/image"
- "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), {
- "url": "0338eb137830ab6f81e5f410d3936ef785d063d9",
- "keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2",
- }),
- ("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", {
- "url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1",
- "keyword": "dd97a4bb449c397d6fec9f43a1303c0fb168ae65",
- }),
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.page_url = "https://www." + match.group(1)
- self.type = match.group(2)
-
- def items(self):
- extr = text.extract_from(self.request(self.page_url).text)
- title = extr('"og:title" content="' , '"')
- descr = extr('"og:description" content="', '"')
- url = extr('&quot;image&quot;:&quot;' , '&')
- url = extr("&quot;content&quot;:&quot;", "&") or url
-
- tags = text.extract(descr, " tagged with ", " online for free ")[0]
- if tags:
- tags = tags.split(", ")
- tags[-1] = tags[-1].partition(" ")[2]
- else:
- tags = []
-
- data = text.nameext_from_url(url, {
- "title": text.unescape(title) if title else "",
- "tags": tags,
- "type": self.type,
- })
- data["token"] = data["filename"].rpartition("_")[2]
-
- yield Message.Version, 1
- yield Message.Directory, data
- yield Message.Url, url, data
-
-
-class SimplyhentaiVideoExtractor(Extractor):
- """Extractor for hentai videos from simply-hentai.com"""
- category = "simplyhentai"
- subcategory = "video"
- directory_fmt = ("{category}", "{type}s")
- filename_fmt = "{title}{episode:?_//>02}.{extension}"
- archive_fmt = "{title}_{episode}"
- pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)"
- test = (
- ("https://videos.simply-hentai.com/creamy-pie-episode-02", {
- "pattern": r"https://www\.googleapis\.com/drive/v3/files"
- r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
- "keyword": "706790708b14773efc1e075ddd3b738a375348a5",
- "count": 1,
- }),
- (("https://videos.simply-hentai.com"
- "/1715-tifa-in-hentai-gang-bang-3d-movie"), {
- "url": "ad9a36ae06c601b6490e3c401834b4949d947eb0",
- "keyword": "f9dad94fbde9c95859e631ff4f07297a9567b874",
- }),
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.page_url = "https://" + match.group(1)
-
- def items(self):
- page = self.request(self.page_url).text
-
- title, pos = text.extract(page, "<title>", "</title>")
- tags , pos = text.extract(page, ">Tags</div>", "</div>", pos)
- date , pos = text.extract(page, ">Upload Date</div>", "</div>", pos)
- title = title.rpartition(" - ")[0]
-
- if "<video" in page:
- video_url = text.extract(page, '<source src="', '"', pos)[0]
- episode = 0
- else:
- # video url from myhentai.tv embed
- pos = page.index('<div class="video-frame-container">', pos)
- embed_url = text.extract(page, 'src="', '"', pos)[0].replace(
- "embedplayer.php?link=", "embed.php?name=")
- embed_page = self.request(embed_url).text
- video_url = text.extract(embed_page, '"file":"', '"')[0]
- title, _, episode = title.rpartition(" Episode ")
-
- data = text.nameext_from_url(video_url, {
- "title": text.unescape(title),
- "episode": text.parse_int(episode),
- "tags": text.split_html(tags)[::2],
- "type": "video",
- "date": text.parse_datetime(text.remove_html(
- date), "%B %d, %Y %H:%M"),
- })
-
- yield Message.Version, 1
- yield Message.Directory, data
- yield Message.Url, video_url, data
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ccba640..3672a6d 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -54,6 +54,7 @@ class TwitterExtractor(Extractor):
if self.videos and "-videoContainer" in tweet:
data["num"] = 1
+ data["extension"] = None
url = "ytdl:{}/{}/status/{}".format(
self.root, data["user"], data["tweet_id"])
yield Message.Url, url, data
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index b9c223c..463733f 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -70,7 +70,7 @@ class WikiartArtistExtractor(WikiartExtractor):
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)"
test = ("https://www.wikiart.org/en/thomas-cole", {
"url": "f1eee8158f5b8b7380382ab730a8f53884715c8b",
- "keyword": "b62678394ce645815963883d5c9642255307225f",
+ "keyword": "c61f5a4774b977106000e9554d19cfb9438a7032",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index 9699806..23750db 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -13,13 +13,16 @@ from .. import text
import json
-BASE_PATTERN = r"(?:https?://)?(?:[^.]+\.)?xhamster\.(?:com|one|desi)"
+BASE_PATTERN = r"(?:https?://)?((?:[^.]+\.)?xhamster\d?\.(?:com|one|desi))"
class XhamsterExtractor(Extractor):
"""Base class for xhamster extractors"""
category = "xhamster"
- root = "https://xhamster.com"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.root = "https://" + match.group(1)
class XhamsterGalleryExtractor(XhamsterExtractor):
@@ -66,16 +69,21 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
},
},
}),
+ ("https://jp.xhamster2.com/photos/gallery/11748968", {
+ "pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$",
+ "count": ">= 144",
+ }),
("https://xhamster.com/photos/gallery/make-the-world-better-11748968"),
("https://xhamster.com/photos/gallery/11748968"),
("https://xhamster.one/photos/gallery/11748968"),
("https://xhamster.desi/photos/gallery/11748968"),
+ ("https://xhamster2.com/photos/gallery/11748968"),
("https://en.xhamster.com/photos/gallery/11748968"),
)
def __init__(self, match):
XhamsterExtractor.__init__(self, match)
- self.path = match.group(1)
+ self.path = match.group(2)
self.data = None
def items(self):
@@ -154,7 +162,7 @@ class XhamsterUserExtractor(XhamsterExtractor):
def __init__(self, match):
XhamsterExtractor.__init__(self, match)
- self.user = match.group(1)
+ self.user = match.group(2)
def items(self):
yield Message.Version, 1
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 637561a..6d81e66 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -196,7 +196,7 @@ class DownloadJob(Job):
archive = self.archive
# prepare download
- pathfmt.set_keywords(keywords)
+ pathfmt.set_filename(keywords)
if postprocessors:
for pp in postprocessors:
@@ -316,7 +316,9 @@ class DownloadJob(Job):
skip = self.extractor.config("skip", True)
if skip:
self._skipexc = None
- if isinstance(skip, str):
+ if skip == "enumerate":
+ self.pathfmt.check_file = self.pathfmt._enum_file
+ elif isinstance(skip, str):
skip, _, smax = skip.partition(":")
if skip == "abort":
self._skipexc = exception.StopExtraction
@@ -334,7 +336,8 @@ class DownloadJob(Job):
postprocessors = self.extractor.config("postprocessors")
if postprocessors:
- self.postprocessors = []
+ pp_list = []
+
for pp_dict in postprocessors:
whitelist = pp_dict.get("whitelist")
blacklist = pp_dict.get("blacklist")
@@ -353,16 +356,19 @@ class DownloadJob(Job):
"'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
else:
- self.postprocessors.append(pp_obj)
- self.extractor.log.debug(
- "Active postprocessor modules: %s", self.postprocessors)
+ pp_list.append(pp_obj)
+
+ if pp_list:
+ self.postprocessors = pp_list
+ self.extractor.log.debug(
+ "Active postprocessor modules: %s", pp_list)
class SimulationJob(DownloadJob):
"""Simulate the extraction process without downloading anything"""
def handle_url(self, url, keywords, fallback=None):
- self.pathfmt.set_keywords(keywords)
+ self.pathfmt.set_filename(keywords)
self.out.skip(self.pathfmt.path)
if self.sleep:
time.sleep(self.sleep)
diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py
index 8a12755..69ab4f6 100644
--- a/gallery_dl/oauth.py
+++ b/gallery_dl/oauth.py
@@ -127,6 +127,6 @@ class OAuth1API():
self.api_key = api_key
def request(self, url, method="GET", **kwargs):
- kwargs["fatal"] = False
+ kwargs["fatal"] = None
kwargs["session"] = self.session
return self.extractor.request(url, method, **kwargs)
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index af70fc8..ecc2ee3 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -307,7 +307,8 @@ def build_parser():
"--ugoira-conv",
dest="postprocessors",
action="append_const", const={"name": "ugoira", "ffmpeg-args": (
- "-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an")},
+ "-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"),
+ "whitelist": ("pixiv", "danbooru")},
help="Convert Pixiv Ugoira to WebM (requires FFmpeg)",
)
postprocessor.add_argument(
diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py
index 62460d3..4a9bde9 100644
--- a/gallery_dl/postprocessor/classify.py
+++ b/gallery_dl/postprocessor/classify.py
@@ -33,17 +33,24 @@ class ClassifyPP(PostProcessor):
}
def prepare(self, pathfmt):
- ext = pathfmt.keywords.get("extension")
-
+ ext = pathfmt.extension
if ext in self.mapping:
- self._dir = pathfmt.realdirectory + os.sep + self.mapping[ext]
- pathfmt.realpath = self._dir + os.sep + pathfmt.filename
- else:
- self._dir = None
+ # set initial paths to enable download skips
+ self._build_paths(pathfmt, self.mapping[ext])
def run(self, pathfmt):
- if self._dir:
- os.makedirs(self._dir, exist_ok=True)
+ ext = pathfmt.extension
+ if ext in self.mapping:
+ # rebuild paths in case the filename extension changed
+ path = self._build_paths(pathfmt, self.mapping[ext])
+ os.makedirs(path, exist_ok=True)
+
+ @staticmethod
+ def _build_paths(pathfmt, extra):
+ path = pathfmt.realdirectory + extra
+ pathfmt.realpath = path + os.sep + pathfmt.filename
+ pathfmt.path = pathfmt.directory + extra + os.sep + pathfmt.filename
+ return path
__postprocessor__ = ClassifyPP
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index c642f0f..b967cf6 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018 Mike Fährmann
+# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,3 +23,6 @@ class PostProcessor():
def finalize(self):
"""Cleanup"""
+
+ def __repr__(self):
+ return self.__class__.__name__
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 77be9c7..467ef11 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -36,15 +36,14 @@ class MetadataPP(PostProcessor):
def run(self, pathfmt):
path = "{}.{}".format(pathfmt.realpath, self.extension)
with open(path, "w", encoding="utf-8") as file:
- self.write(file, pathfmt)
+ self.write(file, pathfmt.kwdict)
- def _write_custom(self, file, pathfmt):
- output = self.formatter.format_map(pathfmt.keywords)
+ def _write_custom(self, file, kwdict):
+ output = self.formatter.format_map(kwdict)
file.write(output)
- def _write_tags(self, file, pathfmt):
- kwds = pathfmt.keywords
- tags = kwds.get("tags") or kwds.get("tag_string")
+ def _write_tags(self, file, kwdict):
+ tags = kwdict.get("tags") or kwdict.get("tag_string")
if not tags:
return
@@ -58,8 +57,8 @@ class MetadataPP(PostProcessor):
file.write("\n".join(tags))
file.write("\n")
- def _write_json(self, file, pathfmt):
- util.dump_json(pathfmt.keywords, file, self.ascii, self.indent)
+ def _write_json(self, file, kwdict):
+ util.dump_json(kwdict, file, self.ascii, self.indent)
__postprocessor__ = MetadataPP
diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py
index 03d2f11..7065428 100644
--- a/gallery_dl/postprocessor/mtime.py
+++ b/gallery_dl/postprocessor/mtime.py
@@ -19,9 +19,9 @@ class MtimePP(PostProcessor):
self.key = options.get("key", "date")
def run(self, pathfmt):
- mtime = pathfmt.keywords.get(self.key)
+ mtime = pathfmt.kwdict.get(self.key)
ts = getattr(mtime, "timestamp", None)
- pathfmt.keywords["_mtime"] = ts() if ts else parse_int(mtime)
+ pathfmt.kwdict["_mtime"] = ts() if ts else parse_int(mtime)
__postprocessor__ = MtimePP
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index bd8c5ad..0dbb796 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -52,13 +52,13 @@ class UgoiraPP(PostProcessor):
def prepare(self, pathfmt):
self._frames = None
- if pathfmt.keywords["extension"] != "zip":
+ if pathfmt.extension != "zip":
return
- if "frames" in pathfmt.keywords:
- self._frames = pathfmt.keywords["frames"]
- elif "pixiv_ugoira_frame_data" in pathfmt.keywords:
- self._frames = pathfmt.keywords["pixiv_ugoira_frame_data"]["data"]
+ if "frames" in pathfmt.kwdict:
+ self._frames = pathfmt.kwdict["frames"]
+ elif "pixiv_ugoira_frame_data" in pathfmt.kwdict:
+ self._frames = pathfmt.kwdict["pixiv_ugoira_frame_data"]["data"]
else:
return
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 02d998d..79fa175 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -391,10 +391,18 @@ class Formatter():
if field_name:
self.fields.append((
len(self.result),
- self._field_access(field_name, format_spec, conversion)
+ self._field_access(field_name, format_spec, conversion),
))
self.result.append("")
+ if len(self.result) == 1:
+ if self.fields:
+ self.format_map = self.fields[0][1]
+ else:
+ self.format_map = lambda _: format_string
+ del self.result
+ del self.fields
+
def format_map(self, kwargs):
"""Apply 'kwargs' to the initial format_string and return its result"""
for index, func in self.fields:
@@ -512,48 +520,63 @@ class Formatter():
class PathFormat():
def __init__(self, extractor):
- self.filename_fmt = extractor.config(
- "filename", extractor.filename_fmt)
- self.directory_fmt = extractor.config(
- "directory", extractor.directory_fmt)
- self.kwdefault = extractor.config("keywords-default")
+ filename_fmt = extractor.config("filename", extractor.filename_fmt)
+ directory_fmt = extractor.config("directory", extractor.directory_fmt)
+ kwdefault = extractor.config("keywords-default")
try:
- self.formatter = Formatter(self.filename_fmt, self.kwdefault)
+ self.filename_formatter = Formatter(
+ filename_fmt, kwdefault).format_map
except Exception as exc:
raise exception.FormatError(exc, "filename")
- self.delete = False
- self.has_extension = False
- self.keywords = {}
- self.filename = ""
+ try:
+ self.directory_formatters = [
+ Formatter(dirfmt, kwdefault).format_map
+ for dirfmt in directory_fmt
+ ]
+ except Exception as exc:
+ raise exception.FormatError(exc, "directory")
+
self.directory = self.realdirectory = ""
+ self.filename = ""
+ self.extension = ""
+ self.prefix = ""
+ self.kwdict = {}
+ self.delete = False
self.path = self.realpath = self.temppath = ""
- self.basedirectory = expand_path(
+ basedir = expand_path(
extractor.config("base-directory", (".", "gallery-dl")))
- if os.altsep and os.altsep in self.basedirectory:
- self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
+ if os.altsep and os.altsep in basedir:
+ basedir = basedir.replace(os.altsep, os.sep)
+ if basedir[-1] != os.sep:
+ basedir += os.sep
+ self.basedirectory = basedir
- restrict = extractor.config("restrict-filenames", "auto")
+ restrict = extractor.config("path-restrict", "auto")
if restrict == "auto":
- restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
+ restrict = "\\\\|/<>:\"?*" if os.name == "nt" else "/"
elif restrict == "unix":
restrict = "/"
elif restrict == "windows":
- restrict = "<>:\"\\/|?*"
- self.clean_path = self._build_cleanfunc(restrict)
+ restrict = "\\\\|/<>:\"?*"
+
+ remove = extractor.config("path-remove", "\x00-\x1f\x7f")
+
+ self.clean_segment = self._build_cleanfunc(restrict, "_")
+ self.clean_path = self._build_cleanfunc(remove, "")
@staticmethod
- def _build_cleanfunc(repl):
- if not repl:
+ def _build_cleanfunc(chars, repl):
+ if not chars:
return lambda x: x
- elif len(repl) == 1:
- def func(x, r=repl):
- return x.replace(r, "_")
+ elif len(chars) == 1:
+ def func(x, c=chars, r=repl):
+ return x.replace(c, r)
else:
- def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
- return sub("_", x)
+ def func(x, sub=re.compile("[" + chars + "]").sub, r=repl):
+ return sub(r, x)
return func
def open(self, mode="wb"):
@@ -562,68 +585,91 @@ class PathFormat():
def exists(self, archive=None):
"""Return True if the file exists on disk or in 'archive'"""
- if archive and archive.check(self.keywords):
+ if archive and self.kwdict in archive:
return self.fix_extension()
- if self.has_extension and os.path.exists(self.realpath):
- return True
+ if self.extension and os.path.exists(self.realpath):
+ return self.check_file()
return False
- def set_directory(self, keywords):
+ @staticmethod
+ def check_file():
+ return True
+
+ def _enum_file(self):
+ num = 1
+ while True:
+ self.prefix = str(num) + "."
+ self.set_extension(self.extension, False)
+ if not os.path.exists(self.realpath):
+ return False
+ num += 1
+
+ def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
+
+ # Build path segments by applying 'kwdict' to directory format strings
try:
segments = [
- self.clean_path(
- Formatter(segment, self.kwdefault)
- .format_map(keywords).strip())
- for segment in self.directory_fmt
+ self.clean_segment(format_map(kwdict).strip())
+ for format_map in self.directory_formatters
]
except Exception as exc:
raise exception.FormatError(exc, "directory")
- self.directory = os.path.join(
- self.basedirectory,
- *segments
- )
+ # Join path segements
+ sep = os.sep
+ directory = self.clean_path(self.basedirectory + sep.join(segments))
- # remove trailing path separator;
- # occurs if the last argument to os.path.join() is an empty string
- if self.directory[-1] == os.sep:
- self.directory = self.directory[:-1]
+ # Ensure directory ends with a path separator
+ if directory[-1] != sep:
+ directory += sep
+ self.directory = directory
- self.realdirectory = self.adjust_path(self.directory)
+ # Enable longer-than-260-character paths on Windows
+ if os.name == "nt":
+ self.realdirectory = "\\\\?\\" + os.path.abspath(directory) + sep
+ else:
+ self.realdirectory = directory
+
+ # Create directory tree
os.makedirs(self.realdirectory, exist_ok=True)
- def set_keywords(self, keywords):
- """Set filename keywords"""
- self.keywords = keywords
- self.temppath = ""
- self.has_extension = bool(keywords.get("extension"))
- if self.has_extension:
+ def set_filename(self, kwdict):
+ """Set general filename data"""
+ self.kwdict = kwdict
+ self.temppath = self.prefix = ""
+ self.extension = kwdict["extension"]
+
+ if self.extension:
self.build_path()
def set_extension(self, extension, real=True):
- """Set the 'extension' keyword"""
- self.has_extension = real
- self.keywords["extension"] = extension
+ """Set filename extension"""
+ if real:
+ self.extension = extension
+ self.kwdict["extension"] = self.prefix + extension
self.build_path()
def fix_extension(self, _=None):
- if not self.has_extension:
- self.set_extension("")
+ """Fix filenames without a given filename extension"""
+ if not self.extension:
+ self.set_extension("", False)
if self.path[-1] == ".":
self.path = self.path[:-1]
self.temppath = self.realpath = self.realpath[:-1]
return True
def build_path(self):
- """Use filename-keywords and directory to build a full path"""
+ """Use filename metadata and directory to build a full path"""
+
+ # Apply 'kwdict' to filename format string
try:
- self.filename = self.clean_path(
- self.formatter.format_map(self.keywords))
+ self.filename = filename = self.clean_path(self.clean_segment(
+ self.filename_formatter(self.kwdict)))
except Exception as exc:
raise exception.FormatError(exc, "filename")
- filename = os.sep + self.filename
+ # Combine directory and filename to full paths
self.path = self.directory + filename
self.realpath = self.realdirectory + filename
if not self.temppath:
@@ -631,7 +677,7 @@ class PathFormat():
def part_enable(self, part_directory=None):
"""Enable .part file usage"""
- if self.has_extension:
+ if self.extension:
self.temppath += ".part"
else:
self.set_extension("part", False)
@@ -657,16 +703,16 @@ class PathFormat():
return
if self.temppath != self.realpath:
- # move temp file to its actual location
+ # Move temp file to its actual location
try:
os.replace(self.temppath, self.realpath)
except OSError:
shutil.copyfile(self.temppath, self.realpath)
os.unlink(self.temppath)
- if "_mtime" in self.keywords:
- # set file modification time
- mtime = self.keywords["_mtime"]
+ if "_mtime" in self.kwdict:
+ # Set file modification time
+ mtime = self.kwdict["_mtime"]
if mtime:
try:
if isinstance(mtime, str):
@@ -675,11 +721,6 @@ class PathFormat():
except Exception:
pass
- @staticmethod
- def adjust_path(path):
- """Enable longer-than-260-character paths on windows"""
- return "\\\\?\\" + os.path.abspath(path) if os.name == "nt" else path
-
class DownloadArchive():
@@ -693,8 +734,8 @@ class DownloadArchive():
"archive-format", extractor.archive_fmt)
).format_map
- def check(self, kwdict):
- """Return True if item described by 'kwdict' exists in archive"""
+ def __contains__(self, kwdict):
+ """Return True if the item described by 'kwdict' exists in archive"""
key = self.keygen(kwdict)
self.cursor.execute(
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d9cc3d6..911939d 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.10.1"
+__version__ = "1.10.2"
diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh
index 334671e..d8c8a03 100755
--- a/scripts/run_tests.sh
+++ b/scripts/run_tests.sh
@@ -2,7 +2,7 @@
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-TESTS_CORE=(config cookies downloader extractor oauth text util)
+TESTS_CORE=(config cookies downloader extractor oauth postprocessor text util)
TESTS_RESULTS=(results)
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 498e3fc..78963aa 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -24,7 +24,8 @@ CATEGORY_MAP = {
"dynastyscans" : "Dynasty Reader",
"e621" : "e621",
"erolord" : "EroLord.com",
- "exhentai" : "E-Hentai",
+ "e-hentai" : "E-Hentai",
+ "exhentai" : "ExHentai",
"fallenangels" : "Fallen Angels Scans",
"fashionnova" : "Fashion Nova",
"hbrowse" : "HBrowse",
@@ -109,6 +110,7 @@ SUBCATEGORY_MAP = {
AUTH_MAP = {
"danbooru" : "Optional",
"deviantart" : "Optional (OAuth)",
+ "e-hentai" : "Optional",
"exhentai" : "Optional",
"flickr" : "Optional (OAuth)",
"idolcomplex": "Optional",
@@ -203,6 +205,15 @@ def build_extractor_list():
for extrlist in extractors.values():
extrlist.sort(key=subcategory_key)
+ # ugly hack to add e-hentai.org
+ eh = []
+ for extr in extractors["exhentai"]:
+ class eh_extr(extr):
+ category = "e-hentai"
+ root = "https://e-hentai.org"
+ eh.append(eh_extr)
+ extractors["e-hentai"] = eh
+
# sort lists by category
return sorted(
extractors.values(),
diff --git a/test/test_downloader.py b/test/test_downloader.py
index caed983..0f58d4e 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -120,7 +120,7 @@ class TestDownloaderBase(unittest.TestCase):
}
pathfmt = PathFormat(cls.extractor)
pathfmt.set_directory(kwdict)
- pathfmt.set_keywords(kwdict)
+ pathfmt.set_filename(kwdict)
if content:
mode = "w" + ("b" if isinstance(content, bytes) else "")
@@ -145,7 +145,7 @@ class TestDownloaderBase(unittest.TestCase):
# test filename extension
self.assertEqual(
- pathfmt.keywords["extension"],
+ pathfmt.extension,
expected_extension,
)
self.assertEqual(
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
new file mode 100644
index 0000000..786dc46
--- /dev/null
+++ b/test/test_postprocessor.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os.path
+import zipfile
+import tempfile
+from datetime import datetime, timezone as tz
+
+import unittest
+from unittest.mock import Mock, mock_open, patch
+
+from gallery_dl import postprocessor, extractor, util, config
+from gallery_dl.postprocessor.common import PostProcessor
+
+
+class MockPostprocessorModule(Mock):
+ __postprocessor__ = "mock"
+
+
+class TestPostprocessorModule(unittest.TestCase):
+
+ def setUp(self):
+ postprocessor._cache.clear()
+
+ def test_find(self):
+ for name in (postprocessor.modules):
+ cls = postprocessor.find(name)
+ self.assertEqual(cls.__name__, name.capitalize() + "PP")
+ self.assertIs(cls.__base__, PostProcessor)
+
+ self.assertEqual(postprocessor.find("foo"), None)
+ self.assertEqual(postprocessor.find(1234) , None)
+ self.assertEqual(postprocessor.find(None) , None)
+
+ @patch("importlib.import_module")
+ def test_cache(self, import_module):
+ import_module.return_value = MockPostprocessorModule()
+
+ for name in (postprocessor.modules):
+ postprocessor.find(name)
+ self.assertEqual(import_module.call_count, len(postprocessor.modules))
+
+ # no new calls to import_module
+ for name in (postprocessor.modules):
+ postprocessor.find(name)
+ self.assertEqual(import_module.call_count, len(postprocessor.modules))
+
+
+class BasePostprocessorTest(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ cls.extractor = extractor.find("test:")
+ cls.dir = tempfile.TemporaryDirectory()
+ cls.fnum = 0
+ config.set(("base-directory",), cls.dir.name)
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.dir.cleanup()
+ config.clear()
+
+ def _create(self, options=None, data=None):
+ kwdict = {"category": "test", "filename": "file", "extension": "ext"}
+ if options is None:
+ options = {}
+ if data is not None:
+ kwdict.update(data)
+
+ self.pathfmt = util.PathFormat(self.extractor)
+ self.pathfmt.set_directory(kwdict)
+ self.pathfmt.set_filename(kwdict)
+
+ pp = postprocessor.find(self.__class__.__name__[:-4].lower())
+ return pp(self.pathfmt, options)
+
+
+class ClassifyTest(BasePostprocessorTest):
+
+ def test_classify_default(self):
+ pp = self._create()
+
+ self.assertEqual(pp.mapping, {
+ ext: directory
+ for directory, exts in pp.DEFAULT_MAPPING.items()
+ for ext in exts
+ })
+ self.pathfmt.set_extension("jpg")
+
+ pp.prepare(self.pathfmt)
+ path = os.path.join(self.dir.name, "test", "Pictures")
+ self.assertEqual(self.pathfmt.path, path + "/file.jpg")
+ self.assertEqual(self.pathfmt.realpath, path + "/file.jpg")
+
+ with patch("os.makedirs") as mkdirs:
+ pp.run(self.pathfmt)
+ mkdirs.assert_called_once_with(path, exist_ok=True)
+
+ def test_classify_noop(self):
+ pp = self._create()
+ rp = self.pathfmt.realpath
+
+ pp.prepare(self.pathfmt)
+ self.assertEqual(self.pathfmt.path, rp)
+ self.assertEqual(self.pathfmt.realpath, rp)
+
+ with patch("os.makedirs") as mkdirs:
+ pp.run(self.pathfmt)
+ self.assertEqual(mkdirs.call_count, 0)
+
+ def test_classify_custom(self):
+ pp = self._create({"mapping": {
+ "foo/bar": ["foo", "bar"],
+ }})
+
+ self.assertEqual(pp.mapping, {
+ "foo": "foo/bar",
+ "bar": "foo/bar",
+ })
+ self.pathfmt.set_extension("foo")
+
+ pp.prepare(self.pathfmt)
+ path = os.path.join(self.dir.name, "test", "foo", "bar")
+ self.assertEqual(self.pathfmt.path, path + "/file.foo")
+ self.assertEqual(self.pathfmt.realpath, path + "/file.foo")
+
+ with patch("os.makedirs") as mkdirs:
+ pp.run(self.pathfmt)
+ mkdirs.assert_called_once_with(path, exist_ok=True)
+
+
+class MetadataTest(BasePostprocessorTest):
+
+ def test_metadata_default(self):
+ pp = self._create()
+
+ # default arguments
+ self.assertEqual(pp.write , pp._write_json)
+ self.assertEqual(pp.ascii , False)
+ self.assertEqual(pp.indent , 4)
+ self.assertEqual(pp.extension, "json")
+
+ def test_metadata_json(self):
+ pp = self._create({
+ "mode" : "json",
+ "ascii" : True,
+ "indent" : 2,
+ "extension": "JSON",
+ })
+
+ self.assertEqual(pp.write , pp._write_json)
+ self.assertEqual(pp.ascii , True)
+ self.assertEqual(pp.indent , 2)
+ self.assertEqual(pp.extension, "JSON")
+
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ path = self.pathfmt.realpath + ".JSON"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+ self.assertEqual(self._output(m), """{
+ "category": "test",
+ "extension": "ext",
+ "filename": "file"
+}
+""")
+
+ def test_metadata_tags(self):
+ pp = self._create({"mode": "tags"}, {"tags": ["foo", "bar", "baz"]})
+ self.assertEqual(pp.write, pp._write_tags)
+ self.assertEqual(pp.extension, "txt")
+
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ path = self.pathfmt.realpath + ".txt"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+ self.assertEqual(self._output(m), "foo\nbar\nbaz\n")
+
+ def test_metadata_tags_split_1(self):
+ pp = self._create({"mode": "tags"}, {"tags": "foo, bar, baz"})
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(self._output(m), "foo\nbar\nbaz\n")
+
+ def test_metadata_tags_split_2(self):
+ pp = self._create(
+ {"mode": "tags"},
+ {"tags": "foobar1 foobar2 foobarbaz"},
+ )
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n")
+
+ def test_metadata_tags_tagstring(self):
+ pp = self._create({"mode": "tags"}, {"tag_string": "foo, bar, baz"})
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(self._output(m), "foo\nbar\nbaz\n")
+
+ def test_metadata_custom(self):
+ pp = self._create(
+ {"mode": "custom", "format": "{foo}\n{missing}\n"},
+ {"foo": "bar"},
+ )
+ self.assertEqual(pp.write, pp._write_custom)
+ self.assertEqual(pp.extension, "txt")
+ self.assertTrue(pp.formatter)
+
+ with patch("builtins.open", mock_open()) as m:
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(self._output(m), "bar\nNone\n")
+
+ @staticmethod
+ def _output(mock):
+ return "".join(
+ call[1][0]
+ for call in mock.mock_calls
+ if call[0] == "().write"
+ )
+
+
+class MtimeTest(BasePostprocessorTest):
+
+ def test_mtime_default(self):
+ pp = self._create()
+ self.assertEqual(pp.key, "date")
+
+ def test_mtime_datetime(self):
+ pp = self._create(None, {"date": datetime(1980, 1, 1, tzinfo=tz.utc)})
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800)
+
+ def test_mtime_timestamp(self):
+ pp = self._create(None, {"date": 315532800})
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800)
+
+ def test_mtime_custom(self):
+ pp = self._create({"key": "foo"}, {"foo": 315532800})
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800)
+
+
+class ZipTest(BasePostprocessorTest):
+
+ def test_zip_default(self):
+ pp = self._create()
+ self.assertEqual(pp.path, self.pathfmt.realdirectory)
+ self.assertEqual(pp.run, pp._write)
+ self.assertEqual(pp.delete, True)
+ self.assertFalse(hasattr(pp, "args"))
+ self.assertEqual(pp.zfile.compression, zipfile.ZIP_STORED)
+ self.assertEqual(
+ pp.zfile.filename, self.pathfmt.realdirectory + ".zip")
+
+ def test_zip_options(self):
+ pp = self._create({
+ "keep-files": True,
+ "compression": "zip",
+ "extension": "cbz",
+ })
+ self.assertEqual(pp.delete, False)
+ self.assertEqual(pp.zfile.compression, zipfile.ZIP_DEFLATED)
+ self.assertEqual(
+ pp.zfile.filename, self.pathfmt.realdirectory + ".cbz")
+
+ def test_zip_safe(self):
+ pp = self._create({"mode": "safe"})
+ self.assertEqual(pp.delete, True)
+ self.assertEqual(pp.path, self.pathfmt.realdirectory)
+ self.assertEqual(pp.run, pp._write_safe)
+ self.assertEqual(pp.args, (
+ pp.path + ".zip", "a", zipfile.ZIP_STORED, True
+ ))
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/test/test_results.py b/test/test_results.py
index 839a75c..12f2416 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -26,12 +26,9 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "adultempire",
- "flickr",
+ "8chan",
"imgth",
- "mangafox",
"mangapark",
- "pixnet",
}
@@ -90,13 +87,17 @@ class TestExtractorResults(unittest.TestCase):
# test archive-id uniqueness
self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))
- # test '_extractor' entries
if tjob.queue:
+ # test '_extractor' entries
for url, kwdict in zip(tjob.list_url, tjob.list_keyword):
if "_extractor" in kwdict:
extr = kwdict["_extractor"].from_url(url)
self.assertIsInstance(extr, kwdict["_extractor"])
self.assertEqual(extr.url, url)
+ else:
+ # test 'extension' entries
+ for kwdict in tjob.list_keyword:
+ self.assertIn("extension", kwdict)
# test extraction results
if "url" in result:
@@ -168,7 +169,6 @@ class ResultJob(job.DownloadJob):
if content:
self.fileobj = TestPathfmt(self.hash_content)
- self.get_downloader("http").check_extension = lambda a, b: None
self.format_directory = TestFormatter(
"".join(self.extractor.directory_fmt))
@@ -222,8 +222,8 @@ class TestPathfmt():
self.hashobj = hashobj
self.path = ""
self.size = 0
- self.keywords = {}
- self.has_extension = True
+ self.kwdict = {}
+ self.extension = "jpg"
def __enter__(self):
return self
@@ -280,6 +280,7 @@ def setup_test_config():
config.clear()
config.set(("cache", "file"), ":memory:")
config.set(("downloader", "part"), False)
+ config.set(("downloader", "adjust-extensions"), False)
config.set(("extractor", "timeout"), 60)
config.set(("extractor", "username"), name)
config.set(("extractor", "password"), name)