From a26df18796ff4e506b16bf32fcec9336233b9e2e Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 28 Jan 2025 19:12:09 -0500 Subject: New upstream version 1.28.5. --- CHANGELOG.md | 59 +++++++-- MANIFEST.in | 2 +- PKG-INFO | 49 ++++++- README.rst | 47 ++++++- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 80 ++--------- docs/gallery-dl.conf | 16 +-- gallery_dl.egg-info/PKG-INFO | 49 ++++++- gallery_dl.egg-info/SOURCES.txt | 5 +- gallery_dl/extractor/4archive.py | 2 +- gallery_dl/extractor/__init__.py | 4 +- gallery_dl/extractor/adultempire.py | 3 + gallery_dl/extractor/architizer.py | 10 +- gallery_dl/extractor/artstation.py | 12 +- gallery_dl/extractor/batoto.py | 1 + gallery_dl/extractor/bunkr.py | 10 +- gallery_dl/extractor/cohost.py | 250 ----------------------------------- gallery_dl/extractor/danbooru.py | 2 +- gallery_dl/extractor/deviantart.py | 2 +- gallery_dl/extractor/e621.py | 19 ++- gallery_dl/extractor/facebook.py | 35 ++--- gallery_dl/extractor/fanleaks.py | 87 ------------ gallery_dl/extractor/fapachi.py | 3 +- gallery_dl/extractor/hiperdex.py | 12 +- gallery_dl/extractor/imagehosts.py | 28 ++++ gallery_dl/extractor/issuu.py | 32 +++-- gallery_dl/extractor/kemonoparty.py | 51 ++++--- gallery_dl/extractor/khinsider.py | 26 +++- gallery_dl/extractor/komikcast.py | 11 +- gallery_dl/extractor/lofter.py | 8 ++ gallery_dl/extractor/lolisafe.py | 9 +- gallery_dl/extractor/mangafox.py | 6 +- gallery_dl/extractor/mangahere.py | 6 +- gallery_dl/extractor/mangaread.py | 6 +- gallery_dl/extractor/nekohouse.py | 122 +++++++++++++++++ gallery_dl/extractor/pixiv.py | 42 ++++-- gallery_dl/extractor/pornpics.py | 22 ++- gallery_dl/extractor/rule34xyz.py | 8 +- gallery_dl/extractor/saint.py | 1 + gallery_dl/extractor/shimmie2.py | 4 - gallery_dl/extractor/szurubooru.py | 8 +- gallery_dl/extractor/toyhouse.py | 10 +- gallery_dl/extractor/twitter.py | 44 ++++-- gallery_dl/extractor/urlgalleries.py | 13 +- gallery_dl/extractor/vsco.py | 3 +- gallery_dl/extractor/webtoons.py | 4 +- gallery_dl/extractor/weebcentral.py | 6 +- gallery_dl/extractor/xfolio.py | 146 ++++++++++++++++++++ gallery_dl/extractor/xhamster.py | 74 +++++------ gallery_dl/path.py | 13 +- gallery_dl/version.py | 2 +- scripts/run_tests.py | 46 +++++++ test/test_results.py | 2 +- 53 files changed, 870 insertions(+), 644 deletions(-) delete mode 100644 gallery_dl/extractor/cohost.py delete mode 100644 gallery_dl/extractor/fanleaks.py create mode 100644 gallery_dl/extractor/nekohouse.py create mode 100644 gallery_dl/extractor/xfolio.py create mode 100755 scripts/run_tests.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c7e627..8856682 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,53 @@ -## 1.28.4 - 2025-01-12 +## 1.28.5 - 2025-01-28 ### Extractors #### Additions -- [pexels] add support ([#2286](https://github.com/mikf/gallery-dl/issues/2286), [#4214](https://github.com/mikf/gallery-dl/issues/4214), [#6769](https://github.com/mikf/gallery-dl/issues/6769)) -- [weebcentral] add support ([#6778](https://github.com/mikf/gallery-dl/issues/6778)) +- [nekohouse] add support ([#5241](https://github.com/mikf/gallery-dl/issues/5241), [#6738](https://github.com/mikf/gallery-dl/issues/6738)) +- [turboimagehost] add support for galleries ([#6855](https://github.com/mikf/gallery-dl/issues/6855)) +- [xfolio] add support ([#5514](https://github.com/mikf/gallery-dl/issues/5514), [#6351](https://github.com/mikf/gallery-dl/issues/6351), [#6837](https://github.com/mikf/gallery-dl/issues/6837)) #### Fixes -- [bunkr] update to new site layout ([#6798](https://github.com/mikf/gallery-dl/issues/6798), [#6805](https://github.com/mikf/gallery-dl/issues/6805)) -- [bunkr] fix `ValueError` on relative redirects ([#6790](https://github.com/mikf/gallery-dl/issues/6790)) -- [plurk] fix `user` data extraction and make it non-fatal ([#6742](https://github.com/mikf/gallery-dl/issues/6742)) +- [4archive] fix `TypeError` +- [adultempire] bypass age confirmation check +- [architizer] fix extraction +- [artstation] avoid Cloudflare challenges ([#5817](https://github.com/mikf/gallery-dl/issues/5817), [#5658](https://github.com/mikf/gallery-dl/issues/5658), [#5564](https://github.com/mikf/gallery-dl/issues/5564), [#5554](https://github.com/mikf/gallery-dl/issues/5554)) +- [deviantart] prevent crash when accessing `premium_folder` data ([#6873](https://github.com/mikf/gallery-dl/issues/6873)) +- [fapachi] fix extraction ([#6881](https://github.com/mikf/gallery-dl/issues/6881)) +- [issuu] fix `user` extractor +- [kemonoparty] fix `username` metadata and filtering by `tag` for `/posts` URLs ([#6833](https://github.com/mikf/gallery-dl/issues/6833)) +- [mangafox] fix chapter extraction +- [mangahere] fix chapter extraction +- [pixiv] fix `sanity_level` workaround ([#4327](https://github.com/mikf/gallery-dl/issues/4327)) +- [pornpics] fix pagination results from HTML pages +- [twitter] handle exceptions during file extraction ([#6647](https://github.com/mikf/gallery-dl/issues/6647)) +- [vsco] fix `JSONDecodeError` ([#6887](https://github.com/mikf/gallery-dl/issues/6887), [#6891](https://github.com/mikf/gallery-dl/issues/6891)) +- [weebcentral] fix extraction ([#6860](https://github.com/mikf/gallery-dl/issues/6860)) +- [xhamster] fix `gallery` extractor ([#6818](https://github.com/mikf/gallery-dl/issues/6818), [#6876](https://github.com/mikf/gallery-dl/issues/6876)) #### Improvements -- [bunkr] support `/f/` media URLs -- [e621] accept `tag` search URLs with empty tag ([#6783](https://github.com/mikf/gallery-dl/issues/6783)) -- [pixiv] provide fallback URLs ([#6762](https://github.com/mikf/gallery-dl/issues/6762)) -- [wallhaven] extract `search[tags]` and `search[tag_id]` metadata ([#6772](https://github.com/mikf/gallery-dl/issues/6772)) +- [batoto] use `chapter_id` in default archive IDs ([#6835](https://github.com/mikf/gallery-dl/issues/6835)) +- [e621] support `e621.cc` and `e621.anthro.fr` frontend URLs ([#6809](https://github.com/mikf/gallery-dl/issues/6809)) +- [e621] prevent premature pagination end ([#6886](https://github.com/mikf/gallery-dl/issues/6886)) +- [facebook] allow accessing all metadata in `directory` format strings ([#6874](https://github.com/mikf/gallery-dl/issues/6874)) +- [hiperdex] update domain to `hiperdex.com` +- [kemonoparty] enable filtering creator posts by tag ([#6833](https://github.com/mikf/gallery-dl/issues/6833)) +- [khinsider] add `covers` option ([#6844](https://github.com/mikf/gallery-dl/issues/6844)) +- [komikcast] update domain to `komikcast.la` +- [lofter] improve error handling ([#6865](https://github.com/mikf/gallery-dl/issues/6865)) +- [pornpics] avoid redirect when retrieving a gallery page +- [urlgalleries] support new URL format +#### Metadata +- [bunkr] extract better `filename` metadata ([#6824](https://github.com/mikf/gallery-dl/issues/6824)) +- [hiperdex] fix `description` metadata +- [khinsider] extract more `album` metadata ([#6844](https://github.com/mikf/gallery-dl/issues/6844)) +- [mangaread] fix manga metadata extraction +- [rule34xyz] fix `date` and `tags` metadata +- [saint] fix metadata of `/d/` URLs +- [toyhouse] fix `date`, `artists`, and `characters` metadata +- [webtoons] fix `username` and `author_name` metadata +#### Removals +- [cohost] remove module +- [fanleaks] remove module +- [shimmie2] remove `tentaclerape.net` +- [szurubooru] remove `booru.foalcon.com` ### Miscellaneous -- [util] support not splitting `value` argument when calling `contains()` ([#6773](https://github.com/mikf/gallery-dl/issues/6773)) +- [docs] add `nix` docs to README ([#6606](https://github.com/mikf/gallery-dl/issues/6606)) +- [path] fix exception when using `--rename-to` + `--no-download` ([#6861](https://github.com/mikf/gallery-dl/issues/6861)) +- [release] include `scripts/run_tests.py` in release tarball ([#6856](https://github.com/mikf/gallery-dl/issues/6856)) diff --git a/MANIFEST.in b/MANIFEST.in index 71172df..68120ec 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -include README.rst CHANGELOG.md LICENSE +include README.rst CHANGELOG.md LICENSE scripts/run_tests.py recursive-include docs *.conf diff --git a/PKG-INFO b/PKG-INFO index 2d2156a..6db2d05 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.2 Name: gallery_dl -Version: 1.28.4 +Version: 1.28.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -131,9 +131,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -172,7 +172,6 @@ Scoop scoop install gallery-dl - Homebrew -------- @@ -229,6 +228,48 @@ This will remove the container after every use so you will always have a fresh e You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. +Nix and Home Manager +-------------------------- + +Adding *gallery-dl* to your system environment: + +.. code:: nix + + environment.systemPackages = with pkgs; [ + gallery-dl + ]; + +Using :code:`nix-shell` + +.. code:: bash + + nix-shell -p gallery-dl + +.. code:: bash + + nix-shell -p gallery-dl --run "gallery-dl " + +For Home Manager users, you can manage *gallery-dl* declaratively: + +.. code:: nix + + programs.gallery-dl = { + enable = true; + settings = { + extractor.base-directory = "~/Downloads"; + }; + }; + +Alternatively, you can just add it to :code:`home.packages` if you don't want to manage it declaratively: + +.. code:: nix + + home.packages = with pkgs; [ + gallery-dl + ]; + +After making these changes, simply rebuild your configuration and open a new shell to have *gallery-dl* available. + Usage ===== diff --git a/README.rst b/README.rst index 2a1a3c2..4033183 100644 --- a/README.rst +++ b/README.rst @@ -76,9 +76,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -117,7 +117,6 @@ Scoop scoop install gallery-dl - Homebrew -------- @@ -174,6 +173,48 @@ This will remove the container after every use so you will always have a fresh e You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. +Nix and Home Manager +-------------------------- + +Adding *gallery-dl* to your system environment: + +.. code:: nix + + environment.systemPackages = with pkgs; [ + gallery-dl + ]; + +Using :code:`nix-shell` + +.. code:: bash + + nix-shell -p gallery-dl + +.. code:: bash + + nix-shell -p gallery-dl --run "gallery-dl " + +For Home Manager users, you can manage *gallery-dl* declaratively: + +.. code:: nix + + programs.gallery-dl = { + enable = true; + settings = { + extractor.base-directory = "~/Downloads"; + }; + }; + +Alternatively, you can just add it to :code:`home.packages` if you don't want to manage it declaratively: + +.. code:: nix + + home.packages = with pkgs; [ + gallery-dl + ]; + +After making these changes, simply rebuild your configuration and open a new shell to have *gallery-dl* available. + Usage ===== diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index ff83690..b172453 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2025-01-12" "1.28.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2025-01-28" "1.28.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 9ed6d97..343188a 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2025-01-12" "1.28.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2025-01-28" "1.28.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -472,6 +472,7 @@ response before \f[I]retrying\f[] the request. \f[I]urlgalleries\f[], \f[I]vk\f[], \f[I]weebcentral\f[], +\f[I]xfolio\f[], \f[I]zerochan\f[] .br * \f[I]"1.0-2.0"\f[] @@ -2082,72 +2083,6 @@ Note: Set this option to an arbitrary letter, e.g., \f[I]"w"\f[], to download images in JPEG format at their original resolution. -.SS extractor.cohost.asks -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]true\f[] - -.IP "Description:" 4 -Extract \f[I]ask\f[] posts. - - -.SS extractor.cohost.avatar -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Download \f[I]avatar\f[] images. - - -.SS extractor.cohost.background -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Download \f[I]background\f[]/\f[I]banner\f[]/\f[I]header\f[] images. - - -.SS extractor.cohost.pinned -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Extract pinned posts. - - -.SS extractor.cohost.replies -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]true\f[] - -.IP "Description:" 4 -Extract reply posts. - - -.SS extractor.cohost.shares -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Extract shared posts. - - .SS extractor.cyberdrop.domain .IP "Type:" 6 \f[I]string\f[] @@ -3573,6 +3508,17 @@ are returned. * \f[I]"reverse"\f[]: Same as \f[I]"asc"\f[] +.SS extractor.khinsider.covers +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Download album cover images. + + .SS extractor.khinsider.format .IP "Type:" 6 \f[I]string\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 0d0c412..f3c9fdb 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -173,15 +173,6 @@ "nsfw" : true, "quality" : "original=true" }, - "cohost": - { - "asks" : true, - "avatar" : false, - "background": false, - "pinned" : false, - "replies": true, - "shares" : true - }, "coomerparty": { "username": "", @@ -375,6 +366,7 @@ }, "khinsider": { + "covers": false, "format": "mp3" }, "koharu": @@ -710,6 +702,10 @@ { "sleep-request": "0.5-1.5" }, + "xfolio": + { + "sleep-request": "0.5-1.5" + }, "weibo": { "sleep-request": "1.0-2.0", @@ -931,7 +927,7 @@ "config-file" : null, "enabled" : true, "format" : null, - "forward-cookies": false, + "forward-cookies": true, "logging" : true, "module" : null, "outtmpl" : null, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 2d2156a..6db2d05 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.2 Name: gallery_dl -Version: 1.28.4 +Version: 1.28.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -131,9 +131,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -172,7 +172,6 @@ Scoop scoop install gallery-dl - Homebrew -------- @@ -229,6 +228,48 @@ This will remove the container after every use so you will always have a fresh e You can also add an alias to your shell for "gallery-dl" or create a simple bash script and drop it somewhere in your $PATH to act as a shim for this command. +Nix and Home Manager +-------------------------- + +Adding *gallery-dl* to your system environment: + +.. code:: nix + + environment.systemPackages = with pkgs; [ + gallery-dl + ]; + +Using :code:`nix-shell` + +.. code:: bash + + nix-shell -p gallery-dl + +.. code:: bash + + nix-shell -p gallery-dl --run "gallery-dl " + +For Home Manager users, you can manage *gallery-dl* declaratively: + +.. code:: nix + + programs.gallery-dl = { + enable = true; + settings = { + extractor.base-directory = "~/Downloads"; + }; + }; + +Alternatively, you can just add it to :code:`home.packages` if you don't want to manage it declaratively: + +.. code:: nix + + home.packages = with pkgs; [ + gallery-dl + ]; + +After making these changes, simply rebuild your configuration and open a new shell to have *gallery-dl* available. + Usage ===== diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 2656948..c5f560b 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -74,7 +74,6 @@ gallery_dl/extractor/catbox.py gallery_dl/extractor/chevereto.py gallery_dl/extractor/cien.py gallery_dl/extractor/civitai.py -gallery_dl/extractor/cohost.py gallery_dl/extractor/comicvine.py gallery_dl/extractor/common.py gallery_dl/extractor/cyberdrop.py @@ -89,7 +88,6 @@ gallery_dl/extractor/everia.py gallery_dl/extractor/exhentai.py gallery_dl/extractor/facebook.py gallery_dl/extractor/fanbox.py -gallery_dl/extractor/fanleaks.py gallery_dl/extractor/fantia.py gallery_dl/extractor/fapachi.py gallery_dl/extractor/fapello.py @@ -161,6 +159,7 @@ gallery_dl/extractor/myhentaigallery.py gallery_dl/extractor/myportfolio.py gallery_dl/extractor/naver.py gallery_dl/extractor/naverwebtoon.py +gallery_dl/extractor/nekohouse.py gallery_dl/extractor/newgrounds.py gallery_dl/extractor/nhentai.py gallery_dl/extractor/nijie.py @@ -245,6 +244,7 @@ gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py gallery_dl/extractor/wikifeet.py gallery_dl/extractor/wikimedia.py +gallery_dl/extractor/xfolio.py gallery_dl/extractor/xhamster.py gallery_dl/extractor/xvideos.py gallery_dl/extractor/yiffverse.py @@ -263,6 +263,7 @@ gallery_dl/postprocessor/python.py gallery_dl/postprocessor/rename.py gallery_dl/postprocessor/ugoira.py gallery_dl/postprocessor/zip.py +scripts/run_tests.py test/test_cache.py test/test_config.py test/test_cookies.py diff --git a/gallery_dl/extractor/4archive.py b/gallery_dl/extractor/4archive.py index 948a605..d198369 100644 --- a/gallery_dl/extractor/4archive.py +++ b/gallery_dl/extractor/4archive.py @@ -64,7 +64,7 @@ class _4archiveThreadExtractor(Extractor): data = { "name": extr('class="name">', ""), "date": text.parse_datetime( - extr('class="dateTime postNum">', "<").strip(), + extr('class="dateTime postNum" >', "<").strip(), "%Y-%m-%d %H:%M:%S"), "no" : text.parse_int(extr('href="#p', '"')), } diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index b582c99..fc8d7b2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -39,7 +39,6 @@ modules = [ "chevereto", "cien", "civitai", - "cohost", "comicvine", "cyberdrop", "danbooru", @@ -52,7 +51,6 @@ modules = [ "exhentai", "facebook", "fanbox", - "fanleaks", "fantia", "fapello", "fapachi", @@ -116,6 +114,7 @@ modules = [ "myportfolio", "naver", "naverwebtoon", + "nekohouse", "newgrounds", "nhentai", "nijie", @@ -196,6 +195,7 @@ modules = [ "wikiart", "wikifeet", "wikimedia", + "xfolio", "xhamster", "xvideos", "yiffverse", diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py index 1617414..c891b17 100644 --- a/gallery_dl/extractor/adultempire.py +++ b/gallery_dl/extractor/adultempire.py @@ -24,6 +24,9 @@ class AdultempireGalleryExtractor(GalleryExtractor): GalleryExtractor.__init__(self, match) self.gallery_id = match.group(2) + def _init(self): + self.cookies.set("ageConfirmed", "true", domain="www.adultempire.com") + def metadata(self, page): extr = text.extract_from(page, page.index('
')) return { diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py index 8064e78..0268224 100644 --- a/gallery_dl/extractor/architizer.py +++ b/gallery_dl/extractor/architizer.py @@ -32,10 +32,10 @@ class ArchitizerProjectExtractor(GalleryExtractor): extr('id="Pages"', "") return { - "title" : extr('data-name="', '"'), - "slug" : extr('data-slug="', '"'), - "gid" : extr('data-gid="', '"').rpartition(".")[2], - "firm" : extr('data-firm-leaders-str="', '"'), + "title" : extr("data-name='", "'"), + "slug" : extr("data-slug='", "'"), + "gid" : extr("data-gid='", "'").rpartition(".")[2], + "firm" : extr("data-firm-leaders-str='", "'"), "location" : extr("

", "<").strip(), "type" : text.unescape(text.remove_html(extr( '
Type
', '", " | Bunkr<")) if not file_url: webpage_url = text.unescape(text.rextract( @@ -166,6 +169,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): return { "file" : text.unescape(file_url), + "name" : text.unescape(file_name), "_http_headers" : {"Referer": response.url}, "_http_validate": self._validate, } diff --git a/gallery_dl/extractor/cohost.py b/gallery_dl/extractor/cohost.py deleted file mode 100644 index 6a43224..0000000 --- a/gallery_dl/extractor/cohost.py +++ /dev/null @@ -1,250 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2024 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://cohost.org/""" - -from .common import Extractor, Message -from .. import text, util - -BASE_PATTERN = r"(?:https?://)?(?:www\.)?cohost\.org" - - -class CohostExtractor(Extractor): - """Base class for cohost extractors""" - category = "cohost" - root = "https://cohost.org" - directory_fmt = ("{category}", "{postingProject[handle]}") - filename_fmt = ("{postId}{headline:?_//[b:200]}{num:?_//}.{extension}") - archive_fmt = "{postId}_{num}" - - def _init(self): - self.replies = self.config("replies", True) - self.pinned = self.config("pinned", False) - self.shares = self.config("shares", False) - self.asks = self.config("asks", True) - - self.avatar = self.config("avatar", False) - if self.avatar: - self._urls_avatar = {None, ""} - - self.background = self.config("background", False) - if self.background: - self._urls_background = {None, ""} - - def items(self): - for post in self.posts(): - reason = post.get("limitedVisibilityReason") - if reason and reason != "none": - if reason == "log-in-first": - reason = ("This page's posts are visible only to users " - "who are logged in.") - self.log.warning('%s: "%s"', post["postId"], reason) - - files = self._extract_files(post) - post["count"] = len(files) - post["date"] = text.parse_datetime( - post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") - - yield Message.Directory, post - - project = post["postingProject"] - if self.avatar: - url = project.get("avatarURL") - if url not in self._urls_avatar: - self._urls_avatar.add(url) - p = post.copy() - p["postId"] = p["kind"] = "avatar" - p["headline"] = p["num"] = "" - yield Message.Url, url, text.nameext_from_url(url, p) - - if self.background: - url = project.get("headerURL") - if url not in self._urls_background: - self._urls_background.add(url) - p = post.copy() - p["postId"] = p["kind"] = "background" - p["headline"] = p["num"] = "" - yield Message.Url, url, text.nameext_from_url(url, p) - - for post["num"], file in enumerate(files, 1): - url = file["fileURL"] - post.update(file) - text.nameext_from_url(url, post) - yield Message.Url, url, post - - def posts(self): - return () - - def _request_api(self, endpoint, input): - url = "{}/api/v1/trpc/{}".format(self.root, endpoint) - params = {"batch": "1", "input": util.json_dumps({"0": input})} - headers = {"content-type": "application/json"} - - data = self.request(url, params=params, headers=headers).json() - return data[0]["result"]["data"] - - def _extract_files(self, post): - files = [] - - self._extract_blocks(post, files) - if self.shares and post.get("shareTree"): - for share in post["shareTree"]: - self._extract_blocks(share, files, share) - del post["shareTree"] - - return files - - def _extract_blocks(self, post, files, shared=None): - post["content"] = content = [] - - for block in post.pop("blocks") or (): - try: - type = block["type"] - if type == "attachment": - file = block["attachment"].copy() - file["shared"] = shared - files.append(file) - elif type == "attachment-row": - for att in block["attachments"]: - file = att["attachment"].copy() - file["shared"] = shared - files.append(file) - elif type == "markdown": - content.append(block["markdown"]["content"]) - elif type == "ask": - post["ask"] = block["ask"] - else: - self.log.debug("%s: Unsupported block type '%s'", - post["postId"], type) - except Exception as exc: - self.log.debug("%s: %s", exc.__class__.__name__, exc) - - -class CohostUserExtractor(CohostExtractor): - """Extractor for media from a cohost user""" - subcategory = "user" - pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:$|\?|#)" - example = "https://cohost.org/USER" - - def posts(self): - empty = 0 - params = { - "projectHandle": self.groups[0], - "page": 0, - "options": { - "pinnedPostsAtTop" : True if self.pinned else False, - "hideReplies" : not self.replies, - "hideShares" : not self.shares, - "hideAsks" : not self.asks, - "viewingOnProjectPage": True, - }, - } - - while True: - data = self._request_api("posts.profilePosts", params) - - posts = data["posts"] - if posts: - empty = 0 - yield from posts - else: - empty += 1 - - pagination = data["pagination"] - if not pagination.get("morePagesForward"): - return - if empty >= 3: - return self.log.debug("Empty API results") - params["page"] = pagination["nextPage"] - - -class CohostPostExtractor(CohostExtractor): - """Extractor for media from a single cohost post""" - subcategory = "post" - pattern = BASE_PATTERN + r"/([^/?#]+)/post/(\d+)" - example = "https://cohost.org/USER/post/12345" - - def posts(self): - endpoint = "posts.singlePost" - params = { - "handle": self.groups[0], - "postId": int(self.groups[1]), - } - - data = self._request_api(endpoint, params) - post = data["post"] - - try: - post["comments"] = data["comments"][self.groups[1]] - except LookupError: - post["comments"] = () - - return (post,) - - -class CohostTagExtractor(CohostExtractor): - """Extractor for tagged posts""" - subcategory = "tag" - pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?" - example = "https://cohost.org/USER/tagged/TAG" - - def posts(self): - user, tag, query = self.groups - url = "{}/{}/tagged/{}".format(self.root, user, tag) - params = text.parse_query(query) - post_feed_key = ("tagged-post-feed" if user == "rc" else - "project-tagged-post-feed") - - while True: - page = self.request(url, params=params).text - data = util.json_loads(text.extr( - page, 'id="__COHOST_LOADER_STATE__">', '')) - - try: - feed = data[post_feed_key] - except KeyError: - feed = data.popitem()[1] - - yield from feed["posts"] - - pagination = feed["paginationMode"] - if not pagination.get("morePagesForward"): - return - params["refTimestamp"] = pagination["refTimestamp"] - params["skipPosts"] = \ - pagination["currentSkip"] + pagination["idealPageStride"] - - -class CohostLikesExtractor(CohostExtractor): - """Extractor for liked posts""" - subcategory = "likes" - pattern = BASE_PATTERN + r"/rc/liked-posts" - example = "https://cohost.org/rc/liked-posts" - - def posts(self): - url = "{}/rc/liked-posts".format(self.root) - params = {} - - while True: - page = self.request(url, params=params).text - data = util.json_loads(text.extr( - page, 'id="__COHOST_LOADER_STATE__">', '')) - - try: - feed = data["liked-posts-feed"] - except KeyError: - feed = data.popitem()[1] - - yield from feed["posts"] - - pagination = feed["paginationMode"] - if not pagination.get("morePagesForward"): - return - params["refTimestamp"] = pagination["refTimestamp"] - params["skipPosts"] = \ - pagination["currentSkip"] + pagination["idealPageStride"] diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 37b6747..d0a9397 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -32,7 +32,7 @@ class DanbooruExtractor(BaseExtractor): if isinstance(threshold, int): self.threshold = 1 if threshold < 1 else threshold else: - self.threshold = self.per_page + self.threshold = self.per_page - 20 username, api_key = self._get_auth_info() if username: diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 8172f62..59b2d6d 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -822,7 +822,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ username, folder["gallery_id"], public=False): cache[dev["deviationid"]] = dev if has_access else None - return cache[deviation["deviationid"]] + return cache.get(deviation["deviationid"]) def _unwatch_premium(self): for username in self.unwatch: diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 33e6ba8..eddcb12 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -8,7 +8,7 @@ """Extractors for https://e621.net/ and other e621 instances""" -from .common import Message +from .common import Extractor, Message from . import danbooru from ..cache import memcache from .. import text, util @@ -156,3 +156,20 @@ class E621FavoriteExtractor(E621Extractor): def posts(self): return self._pagination("/favorites.json", self.query) + + +class E621FrontendExtractor(Extractor): + """Extractor for alternative e621 frontends""" + basecategory = "E621" + category = "e621" + subcategory = "frontend" + pattern = r"(?:https?://)?e621\.(?:cc/\?tags|anthro\.fr/\?q)=([^&#]*)" + example = "https://e621.cc/?tags=TAG" + + def initialize(self): + pass + + def items(self): + url = "https://e621.net/posts?tags=" + self.groups[0] + data = {"_extractor": E621TagExtractor} + yield Message.Queue, url, data diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 2f3fdbf..1ec6adc 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -238,8 +238,9 @@ class FacebookExtractor(Extractor): return res - def extract_set(self, first_photo_id, set_id): - all_photo_ids = [first_photo_id] + def extract_set(self, set_data): + set_id = set_data["set_id"] + all_photo_ids = [set_data["first_photo_id"]] retries = 0 i = 0 @@ -252,7 +253,6 @@ class FacebookExtractor(Extractor): photo_page = self.photo_page_request_wrapper(photo_url).text photo = self.parse_photo_page(photo_page) - photo["set_id"] = set_id photo["num"] = i + 1 if self.author_followups: @@ -281,9 +281,11 @@ class FacebookExtractor(Extractor): retries = 0 else: retries = 0 + photo.update(set_data) + yield Message.Directory, photo yield Message.Url, photo["url"], photo - if photo["next_photo_id"] == "": + if not photo["next_photo_id"]: self.log.debug( "Can't find next image in the set. " "Extraction is over." @@ -322,15 +324,11 @@ class FacebookSetExtractor(FacebookExtractor): set_url = self.set_url_fmt.format(set_id=set_id) set_page = self.request(set_url).text + set_data = self.parse_set_page(set_page) + if self.groups[2]: + set_data["first_photo_id"] = self.groups[2] - directory = self.parse_set_page(set_page) - - yield Message.Directory, directory - - yield from self.extract_set( - self.groups[2] or directory["first_photo_id"], - directory["set_id"] - ) + return self.extract_set(set_data) class FacebookPhotoExtractor(FacebookExtractor): @@ -436,13 +434,8 @@ class FacebookProfileExtractor(FacebookExtractor): if set_id: set_url = self.set_url_fmt.format(set_id=set_id) set_page = self.request(set_url).text + set_data = self.parse_set_page(set_page) + return self.extract_set(set_data) - directory = self.parse_set_page(set_page) - - yield Message.Directory, directory - - yield from self.extract_set( - directory["first_photo_id"], directory["set_id"] - ) - else: - self.log.debug("Profile photos set ID not found.") + self.log.debug("Profile photos set ID not found.") + return iter(()) diff --git a/gallery_dl/extractor/fanleaks.py b/gallery_dl/extractor/fanleaks.py deleted file mode 100644 index 886e893..0000000 --- a/gallery_dl/extractor/fanleaks.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://fanleaks.club/""" - -from .common import Extractor, Message -from .. import text - - -class FanleaksExtractor(Extractor): - """Base class for Fanleaks extractors""" - category = "fanleaks" - directory_fmt = ("{category}", "{model}") - filename_fmt = "{model_id}_{id}.{extension}" - archive_fmt = "{model_id}_{id}" - root = "https://fanleaks.club" - - def __init__(self, match): - Extractor.__init__(self, match) - self.model_id = match.group(1) - - def extract_post(self, url): - extr = text.extract_from(self.request(url, notfound="post").text) - data = { - "model_id": self.model_id, - "model" : text.unescape(extr('text-lg">', "")), - "id" : text.parse_int(self.id), - "type" : extr('type="', '"')[:5] or "photo", - } - url = extr('src="', '"') - yield Message.Directory, data - yield Message.Url, url, text.nameext_from_url(url, data) - - -class FanleaksPostExtractor(FanleaksExtractor): - """Extractor for individual posts on fanleaks.club""" - subcategory = "post" - pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)" - example = "https://fanleaks.club/MODEL/12345" - - def __init__(self, match): - FanleaksExtractor.__init__(self, match) - self.id = match.group(2) - - def items(self): - url = "{}/{}/{}".format(self.root, self.model_id, self.id) - return self.extract_post(url) - - -class FanleaksModelExtractor(FanleaksExtractor): - """Extractor for all posts from a fanleaks model""" - subcategory = "model" - pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club" - r"/(?!latest/?$)([^/?#]+)/?$") - example = "https://fanleaks.club/MODEL" - - def items(self): - page_num = 1 - page = self.request( - self.root + "/" + self.model_id, notfound="model").text - data = { - "model_id": self.model_id, - "model" : text.unescape(text.extr(page, 'mt-4">', "

")), - "type" : "photo", - } - page_url = text.extr(page, "url: '", "'") - while True: - page = self.request("{}{}".format(page_url, page_num)).text - if not page: - return - - for item in text.extract_iter(page, '"): - self.id = id = text.extr(item, "/", '"') - if "/icon-play.svg" in item: - url = "{}/{}/{}".format(self.root, self.model_id, id) - yield from self.extract_post(url) - continue - - data["id"] = text.parse_int(id) - url = text.extr(item, 'src="', '"').replace( - "/thumbs/", "/", 1) - yield Message.Directory, data - yield Message.Url, url, text.nameext_from_url(url, data) - page_num += 1 diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py index 80478ca..43627e2 100644 --- a/gallery_dl/extractor/fapachi.py +++ b/gallery_dl/extractor/fapachi.py @@ -33,7 +33,8 @@ class FapachiPostExtractor(Extractor): } page = self.request("{}/{}/media/{}".format( self.root, self.user, self.id)).text - url = self.root + text.extr(page, 'd-block" src="', '"') + url = self.root + text.extract( + page, 'data-src="', '"', page.index('class="media-img'))[0] yield Message.Directory, data yield Message.Url, url, text.nameext_from_url(url, data) diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index c939a3c..f15aab7 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://hipertoon.com/""" +"""Extractors for https://hiperdex.com/""" from .common import ChapterExtractor, MangaExtractor from .. import text @@ -20,7 +20,7 @@ BASE_PATTERN = (r"((?:https?://)?(?:www\.)?" class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" - root = "https://hipertoon.com" + root = "https://hiperdex.com" @memcache(keyarg=1) def manga_data(self, manga, page=None): @@ -49,7 +49,7 @@ class HiperdexBase(): "status" : extr( 'class="summary-content">', '<').strip(), "description": text.remove_html(text.unescape(extr( - "Summary ", "
"))), + '
', "
"))), "language": "English", "lang" : "en", } @@ -69,7 +69,7 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for hiperdex manga chapters""" pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))" - example = "https://hipertoon.com/manga/MANGA/CHAPTER/" + example = "https://hiperdex.com/manga/MANGA/CHAPTER/" def __init__(self, match): root, path, self.manga, self.chapter = match.groups() @@ -91,7 +91,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): """Extractor for hiperdex manga""" chapterclass = HiperdexChapterExtractor pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$" - example = "https://hipertoon.com/manga/MANGA/" + example = "https://hiperdex.com/manga/MANGA/" def __init__(self, match): root, path, self.manga = match.groups() @@ -127,7 +127,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): chapterclass = HiperdexMangaExtractor reverse = False pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))" - example = "https://hipertoon.com/manga-artist/NAME/" + example = "https://hiperdex.com/manga-artist/NAME/" def __init__(self, match): self.root = text.ensure_http_scheme(match.group(1)) diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 5f1e0f4..d6b36cb 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -286,6 +286,34 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor): return url, url +class TurboimagehostGalleryExtractor(ImagehostImageExtractor): + """Extractor for image galleries from turboimagehost.com""" + category = "turboimagehost" + subcategory = "gallery" + pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com" + r"/album/(\d+)/([^/?#]*))") + example = "https://www.turboimagehost.com/album/12345/GALLERY_NAME" + + def items(self): + data = {"_extractor": TurboimagehostImageExtractor} + params = {"p": 1} + + while True: + page = self.request(self.page_url, params=params).text + + if params["p"] == 1 and \ + "Requested gallery don`t exist on our website." in page: + raise exception.NotFoundError("gallery") + + thumb_url = None + for thumb_url in text.extract_iter(page, '">
1 else base + try: + html = self.request(url).text + data = util.json_loads(text.unescape(text.extr( + html, '").rstrip("\n\r;")) @@ -105,12 +105,8 @@ class XhamsterUserExtractor(XhamsterExtractor): pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])" example = "https://xhamster.com/users/USER/photos" - def __init__(self, match): - XhamsterExtractor.__init__(self, match) - self.user = match.group(2) - def items(self): - url = "{}/users/{}/photos".format(self.root, self.user) + url = "{}/users/{}/photos".format(self.root, self.groups[1]) data = {"_extractor": XhamsterGalleryExtractor} while url: diff --git a/gallery_dl/path.py b/gallery_dl/path.py index f57b02e..21e1aa0 100644 --- a/gallery_dl/path.py +++ b/gallery_dl/path.py @@ -342,15 +342,22 @@ class PathFormat(): try: os.replace(self.temppath, self.realpath) except FileNotFoundError: - # delayed directory creation - os.makedirs(self.realdirectory) + try: + # delayed directory creation + os.makedirs(self.realdirectory) + except FileExistsError: + # file at self.temppath does not exist + return False continue except OSError: # move across different filesystems try: shutil.copyfile(self.temppath, self.realpath) except FileNotFoundError: - os.makedirs(self.realdirectory) + try: + os.makedirs(self.realdirectory) + except FileExistsError: + return False shutil.copyfile(self.temppath, self.realpath) os.unlink(self.temppath) break diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 6bceebd..d252bed 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.4" +__version__ = "1.28.5" __variant__ = None diff --git a/scripts/run_tests.py b/scripts/run_tests.py new file mode 100755 index 0000000..d1fd1f1 --- /dev/null +++ b/scripts/run_tests.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import os +import sys +import unittest + +TEST_DIRECTORY = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "test") + +sys.path.insert(0, TEST_DIRECTORY) + +if len(sys.argv) <= 1: + TESTS = [ + file.rpartition(".")[0] + for file in os.listdir(TEST_DIRECTORY) + if file.startswith("test_") and file != "test_results.py" + ] +else: + TESTS = [ + name if name.startswith("test_") else "test_" + name + for name in sys.argv[1:] + ] + + +suite = unittest.TestSuite() + +for test in TESTS: + try: + module = __import__(test) + except ImportError: + print("unable to import", test) + else: + tests = unittest.defaultTestLoader.loadTestsFromModule(module) + suite.addTests(tests) + +if __name__ == "__main__": + result = unittest.TextTestRunner(verbosity=2).run(suite) + if result.errors or result.failures: + sys.exit(1) diff --git a/test/test_results.py b/test/test_results.py index f36f798..c3b9b2d 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -237,7 +237,7 @@ class TestExtractorResults(unittest.TestCase): elif isinstance(test, range): self.assertRange(value, test, msg=path) elif isinstance(test, set): - self.assertIn(value, test, msg=path) + self.assertTrue(value in test or type(value) in test, msg=path) elif isinstance(test, list): subtest = False for idx, item in enumerate(test): -- cgit v1.2.3