diff options
| -rw-r--r-- | CHANGELOG.md | 93 | ||||
| -rw-r--r-- | PKG-INFO | 6 | ||||
| -rw-r--r-- | README.rst | 4 | ||||
| -rw-r--r-- | data/man/gallery-dl.1 | 2 | ||||
| -rw-r--r-- | data/man/gallery-dl.conf.5 | 53 | ||||
| -rw-r--r-- | gallery_dl.egg-info/PKG-INFO | 6 | ||||
| -rw-r--r-- | gallery_dl/extractor/behance.py | 15 | ||||
| -rw-r--r-- | gallery_dl/extractor/bunkr.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/cien.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 17 | ||||
| -rw-r--r-- | gallery_dl/extractor/fanbox.py | 16 | ||||
| -rw-r--r-- | gallery_dl/extractor/furaffinity.py | 40 | ||||
| -rw-r--r-- | gallery_dl/extractor/hentaicosplays.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/hotleak.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/instagram.py | 8 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 5 | ||||
| -rw-r--r-- | gallery_dl/extractor/zerochan.py | 8 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/metadata.py | 23 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 | ||||
| -rw-r--r-- | test/test_postprocessor.py | 21 |
20 files changed, 206 insertions, 137 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ca8647..70a33e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,83 +1,20 @@ -## 1.27.2 - 2024-08-03 +## 1.27.3 - 2024-08-10 ### Extractors #### Additions -- [agnph] add `tag` and `post` extractors ([#5284](https://github.com/mikf/gallery-dl/issues/5284), [#5890](https://github.com/mikf/gallery-dl/issues/5890)) -- [aryion] add `favorite` extractor ([#4511](https://github.com/mikf/gallery-dl/issues/4511), [#5870](https://github.com/mikf/gallery-dl/issues/5870)) -- [cien] add support ([#2885](https://github.com/mikf/gallery-dl/issues/2885), [#4103](https://github.com/mikf/gallery-dl/issues/4103), [#5240](https://github.com/mikf/gallery-dl/issues/5240)) -- [instagram] add `info` extractor ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) -- [koharu] add `gallery`, `search`, and `favorite` extractors ([#5893](https://github.com/mikf/gallery-dl/issues/5893), [#4707](https://github.com/mikf/gallery-dl/issues/4707)) -- [twitter] add `info` extractor ([#3623](https://github.com/mikf/gallery-dl/issues/3623)) +- [bunkr] support `bunkr.ci` and `bunkrrr.org` ([#5970](https://github.com/mikf/gallery-dl/issues/5970)) +- [furaffinity] add `submissions` extractor ([#5954](https://github.com/mikf/gallery-dl/issues/5954)) +- [hentaicosplays] support `hentai-cosplay-xxx.com` ([#5959](https://github.com/mikf/gallery-dl/issues/5959)) #### Fixes -- [8chan] update `TOS` cookie name ([#5868](https://github.com/mikf/gallery-dl/issues/5868)) -- [behance] fix image extraction ([#5873](https://github.com/mikf/gallery-dl/issues/5873), [#5926](https://github.com/mikf/gallery-dl/issues/5926)) -- [booru] prevent crash when file URL is empty ([#5859](https://github.com/mikf/gallery-dl/issues/5859)) -- [deviantart] try to work around journal/status API changes ([#5916](https://github.com/mikf/gallery-dl/issues/5916)) -- [hentainexus] fix error with spread pages ([#5827](https://github.com/mikf/gallery-dl/issues/5827)) -- [hotleak] fix faulty image URLs ([#5915](https://github.com/mikf/gallery-dl/issues/5915)) -- [inkbunny:following] fix potentially infinite loop -- [nijie] fix image URLs of single image posts ([#5842](https://github.com/mikf/gallery-dl/issues/5842)) -- [readcomiconline] fix extraction ([#5866](https://github.com/mikf/gallery-dl/issues/5866)) -- [toyhouse] fix Content Warning bypass ([#5820](https://github.com/mikf/gallery-dl/issues/5820)) -- [tumblr] revert to `offset` pagination, implement `pagination` option ([#5880](https://github.com/mikf/gallery-dl/issues/5880)) -- [twitter] fix `username-alt` option name ([#5715](https://github.com/mikf/gallery-dl/issues/5715)) -- [warosu] fix extraction -- [zerochan] handle `KeyError - 'items'` ([#5826](https://github.com/mikf/gallery-dl/issues/5826)) -- [zerochan] fix error on tag redirections ([#5891](https://github.com/mikf/gallery-dl/issues/5891)) -- [zerochan] fix `Invalid control character` errors ([#5892](https://github.com/mikf/gallery-dl/issues/5892)) +- [behance] fix `KeyError: 'fields'` ([#5965](https://github.com/mikf/gallery-dl/issues/5965)) +- [behance] fix video extraction ([#5965](https://github.com/mikf/gallery-dl/issues/5965)) +- [cien] extract all files when authenticated ([#5934](https://github.com/mikf/gallery-dl/issues/5934)) +- [deviantart] fix `KeyError - 'category'` ([#5960](https://github.com/mikf/gallery-dl/issues/5960), [#5961](https://github.com/mikf/gallery-dl/issues/5961), [#5969](https://github.com/mikf/gallery-dl/issues/5969), [#5971](https://github.com/mikf/gallery-dl/issues/5971), [#5976](https://github.com/mikf/gallery-dl/issues/5976), [#5978](https://github.com/mikf/gallery-dl/issues/5978)) +- [fanbox] update pagination logic ([#5949](https://github.com/mikf/gallery-dl/issues/5949), [#5951](https://github.com/mikf/gallery-dl/issues/5951), [#5956](https://github.com/mikf/gallery-dl/issues/5956)) +- [hotleak] fix AttributeError ([#5950](https://github.com/mikf/gallery-dl/issues/5950)) +- [instagram] restore GraphQL API functionality ([#5920](https://github.com/mikf/gallery-dl/issues/5920)) +- [twitter] update `x-csrf-token` header during login ([#5945](https://github.com/mikf/gallery-dl/issues/5945)) #### Improvements -- [bunkr] support `bunkr.fi` domain ([#5872](https://github.com/mikf/gallery-dl/issues/5872)) -- [deviantart:following] use OAuth API endpoint ([#2511](https://github.com/mikf/gallery-dl/issues/2511)) -- [directlink] extend recognized file extensions ([#5924](https://github.com/mikf/gallery-dl/issues/5924)) -- [exhentai] improve error message when temporarily banned ([#5845](https://github.com/mikf/gallery-dl/issues/5845)) -- [gelbooru_v02] use total number of posts as pagination end marker ([#5830](https://github.com/mikf/gallery-dl/issues/5830)) -- [imagefap] add enumeration index to default filenames ([#1746](https://github.com/mikf/gallery-dl/issues/1746), [#5887](https://github.com/mikf/gallery-dl/issues/5887)) -- [paheal] implement fast `--range` support ([#5905](https://github.com/mikf/gallery-dl/issues/5905)) -- [redgifs] support URLs with numeric IDs ([#5898](https://github.com/mikf/gallery-dl/issues/5898), [#5899](https://github.com/mikf/gallery-dl/issues/5899)) -- [sankaku] match URLs with `www` subdomain ([#5907](https://github.com/mikf/gallery-dl/issues/5907)) -- [sankakucomplex] update domain to `news.sankakucomplex.com` -- [twitter] implement `cursor` support ([#5753](https://github.com/mikf/gallery-dl/issues/5753)) -- [vipergirls] improve `thread` URL pattern -- [wallpapercave] support `album` listings ([#5925](https://github.com/mikf/gallery-dl/issues/5925)) -#### Metadata -- [dynastyscans] extract chapter `tags` ([#5904](https://github.com/mikf/gallery-dl/issues/5904)) -- [erome] extract `date` metadata ([#5796](https://github.com/mikf/gallery-dl/issues/5796)) -- [furaffinity] extract `folders` and `thumbnail` metadata ([#1284](https://github.com/mikf/gallery-dl/issues/1284), [#5824](https://github.com/mikf/gallery-dl/issues/5824)) -- [sankaku] implement `notes` extraction ([#5865](https://github.com/mikf/gallery-dl/issues/5865)) -- [subscribestar] fix `date` parsing in updated posts ([#5783](https://github.com/mikf/gallery-dl/issues/5783)) -- [twitter] extract `bookmark_count` and `view_count` metadata ([#5802](https://github.com/mikf/gallery-dl/issues/5802)) -- [zerochan] fix `source` metadata -- [zerochan] fix tag category extraction ([#5874](https://github.com/mikf/gallery-dl/issues/5874)) -- [zerochan] delay fetching extended metadata ([#5869](https://github.com/mikf/gallery-dl/issues/5869)) -#### Options -- [agnph] implement `tags` option ([#5284](https://github.com/mikf/gallery-dl/issues/5284)) -- [booru] allow multiple `url` keys ([#5859](https://github.com/mikf/gallery-dl/issues/5859)) -- [cien] add `files` option ([#2885](https://github.com/mikf/gallery-dl/issues/2885)) -- [koharu] add `cbz` and `format` options ([#5893](https://github.com/mikf/gallery-dl/issues/5893)) -- [vsco] add `include` option ([#5911](https://github.com/mikf/gallery-dl/issues/5911)) -- [zerochan] implement `tags` option ([#5874](https://github.com/mikf/gallery-dl/issues/5874)) -#### Removals -- [fallenangels] remove module +- [bunkr] fail downloads for `maintenance` files ([#5952](https://github.com/mikf/gallery-dl/issues/5952)) +- [zerochan] improve tag redirect handling, add `redirects` option ([#5891](https://github.com/mikf/gallery-dl/issues/5891)) ### Post Processors -- [metadata] allow using format strings for `directory` ([#5728](https://github.com/mikf/gallery-dl/issues/5728)) -### Options -- add `--print-traffic` command-line option -- add `-J/--resolve-json` command-line option ([#5864](https://github.com/mikf/gallery-dl/issues/5864)) -- add `filters-environment` option -- implement `archive-event` option ([#5784](https://github.com/mikf/gallery-dl/issues/5784)) -### Actions -- [actions] support multiple actions per pattern -- [actions] add `exec` action ([#5619](https://github.com/mikf/gallery-dl/issues/5619)) -- [actions] add `abort` and `terminate` actions ([#5778](https://github.com/mikf/gallery-dl/issues/5778)) -- [actions] allow setting a duration for `wait` -- [actions] emit logging messages before waiting/exiting/etc -### Tests -- [tests] enable test results for external extractors ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) -- [tests] load results from `${GDL_TEST_RESULTS}` ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) -### Miscellaneous -- [cookies] add `thorium` support ([#5781](https://github.com/mikf/gallery-dl/issues/5781)) -- [job] add `resolve` argument to DataJob ([#5864](https://github.com/mikf/gallery-dl/issues/5864)) -- [path] fix moving temporary files across drives on Windows ([#5807](https://github.com/mikf/gallery-dl/issues/5807)) -- [ytdl] fix `--cookies-from-browser` option parsing ([#5885](https://github.com/mikf/gallery-dl/issues/5885)) -- make exceptions in filters/conditionals non-fatal -- update default User-Agent header to Firefox 128 ESR -- include `zstd` in Accept-Encoding header when supported +- [metadata] add `base-directory` option ([#5262](https://github.com/mikf/gallery-dl/issues/5262), [#5728](https://github.com/mikf/gallery-dl/issues/5728)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.27.2 +Version: 1.27.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -114,9 +114,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.bin>`__ Nightly Builds @@ -74,9 +74,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.bin>`__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index d1eddd6..97af9f9 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2024-08-03" "1.27.2" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2024-08-10" "1.27.3" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 8f75284..49c3ec3 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2024-08-03" "1.27.2" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2024-08-10" "1.27.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -186,8 +186,7 @@ Directory path used as base for all download destinations. .IP "Description:" 4 Use an extractor's current target directory as -\f[I]base-directory\f[] -for any spawned child extractors. +\f[I]base-directory\f[] for any spawned child extractors. .SS extractor.*.metadata-parent @@ -1594,7 +1593,17 @@ Supported module types are \f[I]image\f[], \f[I]video\f[], \f[I]mediacollection\f[], \f[I]embed\f[], \f[I]text\f[]. -.SS extractor.blogger.videos +.SS extractor.[blogger].api-key +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +Custom Blogger API key. + +https://developers.google.com/blogger/docs/3.0/using#APIKey + + +.SS extractor.[blogger].videos .IP "Type:" 6 \f[I]bool\f[] @@ -5396,6 +5405,17 @@ Controls how to paginate over tag search results. (limited to 100 pages * 24 posts) +.SS extractor.zerochan.redirects +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Automatically follow tag redirects. + + .SS extractor.[booru].tags .IP "Type:" 6 \f[I]bool\f[] @@ -6415,8 +6435,29 @@ If this option is set, \f[I]metadata.extension\f[] and * ["..", "metadata", "\\fF {id // 500 * 500}"] .IP "Description:" 4 -Directory where metadata files are stored in relative to the -current target location for file downloads. +Directory where metadata files are stored in +relative to \f[I]metadata.base-directory\f[]. + + +.SS metadata.base-directory +.IP "Type:" 6 +.br +* \f[I]bool\f[] +.br +* \f[I]Path\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Selects the relative location for metadata files. + +.br +* \f[I]false\f[]: current target location for file downloads (\f[I]base-directory\f[] + directory_) +.br +* \f[I]true\f[]: current \f[I]base-directory\f[] location +.br +* any \f[I]Path\f[]: custom location .SS metadata.extension diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index eec2e32..5d32a2a 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.27.2 +Version: 1.27.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -114,9 +114,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index f24059f..72f9195 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -49,7 +49,7 @@ class BehanceExtractor(Extractor): def _update(self, data): # compress data to simple lists - if data["fields"] and isinstance(data["fields"][0], dict): + if data.get("fields") and isinstance(data["fields"][0], dict): data["fields"] = [ field.get("name") or field.get("label") for field in data["fields"] @@ -165,6 +165,19 @@ class BehanceGalleryExtractor(BehanceExtractor): elif mtype == "video": try: + url = text.extr(module["embed"], 'src="', '"') + page = self.request(text.unescape(url)).text + + url = text.extr(page, '<source src="', '"') + if text.ext_from_url(url) == "m3u8": + url = "ytdl:" + url + module["extension"] = "mp4" + append((url, module)) + continue + except Exception as exc: + self.log.debug("%s: %s", exc.__class__.__name__, exc) + + try: renditions = module["videoData"]["renditions"] except Exception: self.log.warning("No download URLs for video %s", diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 77f0de6..240bbd3 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -13,7 +13,7 @@ from .. import text BASE_PATTERN = ( r"(?:https?://)?(?:app\.)?(bunkr+" - r"\.(?:s[kiu]|fi|ru|la|is|to|ac|black|cat|media|red|site|ws))" + r"\.(?:s[kiu]|[cf]i|ru|la|is|to|ac|black|cat|media|red|site|ws|org))" ) LEGACY_DOMAINS = { @@ -55,6 +55,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): "album_name" : text.unescape(info[0]), "album_size" : size[1:-1], "count" : len(urls), + "_http_validate": self._validate, } def _extract_files(self, urls): @@ -74,6 +75,12 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): text.rextract(page, ' href="', '"', page.rindex("Download"))[0] ) + def _validate(self, response): + if response.history and response.url.endswith("/maintenance-vid.mp4"): + self.log.warning("File server in maintenance mode") + return False + return True + class BunkrMediaExtractor(BunkrAlbumExtractor): """Extractor for bunkr.sk media links""" @@ -95,4 +102,5 @@ class BunkrMediaExtractor(BunkrAlbumExtractor): "album_size" : -1, "description": "", "count" : 1, + "_http_validate": self._validate, } diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py index bae86d0..378365e 100644 --- a/gallery_dl/extractor/cien.py +++ b/gallery_dl/extractor/cien.py @@ -59,7 +59,7 @@ class CienArticleExtractor(CienExtractor): post = util.json_loads(text.extr( page, '<script type="application/ld+json">', '</script>'))[0] - files = self._extract_files(post.get("articleBody") or page) + files = self._extract_files(page) post["post_url"] = url post["post_id"] = text.parse_int(self.groups[1]) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a70710c..f3ea4e7 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -12,7 +12,6 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache, memcache import collections -import itertools import mimetypes import binascii import time @@ -246,7 +245,6 @@ class DeviantartExtractor(Extractor): deviation["username"] = deviation["author"]["username"] deviation["_username"] = deviation["username"].lower() - deviation["da_category"] = deviation["category"] deviation["published_time"] = text.parse_int( deviation["published_time"]) deviation["date"] = text.parse_timestamp( @@ -301,15 +299,6 @@ class DeviantartExtractor(Extractor): ) else: needle = '<div usr class="gr">' - catlist = deviation["category_path"].split("/") - categories = " / ".join( - ('<span class="crumb"><a href="{}/{}/"><span>{}</span></a>' - '</span>').format(self.root, cpath, cat.capitalize()) - for cat, cpath in zip( - catlist, - itertools.accumulate(catlist, lambda t, c: t + "/" + c) - ) - ) username = deviation["author"]["username"] urlname = deviation.get("username") or username.lower() header = HEADER_TEMPLATE.format( @@ -318,7 +307,6 @@ class DeviantartExtractor(Extractor): userurl="{}/{}/".format(self.root, urlname), username=username, date=deviation["date"], - categories=categories, ) if needle in html: @@ -624,7 +612,7 @@ class DeviantartAvatarExtractor(DeviantartExtractor): def _make_deviation(self, url, user, index, fmt): return { "author" : user, - "category" : "avatar", + "da_category" : "avatar", "index" : text.parse_int(index), "is_deleted" : False, "is_downloadable": False, @@ -1773,9 +1761,6 @@ HEADER_TEMPLATE = """<div usr class="gr"> <span class="user-symbol regular"></span></span></span>, <span>{date}</span> </li> - <li class="category"> - {categories} - </li> </ul> </div> """ diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index d81fd0b..d8337b6 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -309,8 +309,20 @@ class FanboxCreatorExtractor(FanboxExtractor): self.creator_id = match.group(1) or match.group(2) def posts(self): - url = "https://api.fanbox.cc/post.listCreator?creatorId={}&limit=10" - return self._pagination(url.format(self.creator_id)) + url = "https://api.fanbox.cc/post.paginateCreator?creatorId=" + return self._pagination_creator(url + self.creator_id) + + def _pagination_creator(self, url): + urls = self.request(url, headers=self.headers).json()["body"] + for url in urls: + url = text.ensure_http_scheme(url) + body = self.request(url, headers=self.headers).json()["body"] + for item in body: + try: + yield self._get_post_data(item["id"]) + except Exception as exc: + self.log.warning("Skipping post %s (%s: %s)", + item["id"], exc.__class__.__name__, exc) class FanboxPostExtractor(FanboxExtractor): diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index f48a984..3055426 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -78,14 +78,12 @@ class FuraffinityExtractor(Extractor): path = extr('href="//d', '"') if not path: - self.log.warning( - "Unable to download post %s (\"%s\")", - post_id, text.remove_html( - extr('System Message', '</section>') or - extr('System Message', '</table>') - ) - ) - return None + msg = text.remove_html( + extr('System Message', '</section>') or + extr('System Message', '</table>') + ).partition(" . Continue ")[0] + return self.log.warning( + "Unable to download post %s (\"%s\")", post_id, msg) pi = text.parse_int rh = text.remove_html @@ -335,3 +333,29 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor): if url.endswith(path): return url = self.root + path + + +class FuraffinitySubmissionsExtractor(FuraffinityExtractor): + """Extractor for new furaffinity submissions""" + subcategory = "submissions" + pattern = BASE_PATTERN + r"(/msg/submissions(?:/[^/?#]+)?)" + example = "https://www.furaffinity.net/msg/submissions" + + def posts(self): + self.user = None + url = self.root + self.groups[0] + return self._pagination_submissions(url) + + def _pagination_submissions(self, url): + while True: + page = self.request(url).text + + for post_id in text.extract_iter(page, 'id="sid-', '"'): + yield post_id + + path = (text.extr(page, '<a class="button standard more" href="', '"') or # noqa 501 + text.extr(page, '<a class="more-half" href="', '"') or + text.extr(page, '<a class="more" href="', '"')) + if not path: + return + url = self.root + text.unescape(path) diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py index d5ff8c8..fbbae16 100644 --- a/gallery_dl/extractor/hentaicosplays.py +++ b/gallery_dl/extractor/hentaicosplays.py @@ -4,7 +4,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://hentai-cosplays.com/ +"""Extractors for https://hentai-cosplay-xxx.com/ (also works for hentai-img.com and porn-images-xxx.com)""" from .common import GalleryExtractor @@ -13,19 +13,21 @@ from .. import text class HentaicosplaysGalleryExtractor(GalleryExtractor): """Extractor for image galleries from - hentai-cosplays.com, hentai-img.com, and porn-images-xxx.com""" + hentai-cosplay-xxx.com, hentai-img.com, and porn-images-xxx.com""" category = "hentaicosplays" directory_fmt = ("{site}", "{title}") filename_fmt = "{filename}.{extension}" archive_fmt = "{title}_{filename}" pattern = r"((?:https?://)?(?:\w{2}\.)?" \ - r"(hentai-cosplays|hentai-img|porn-images-xxx)\.com)/" \ + r"(hentai-cosplay(?:s|-xxx)|hentai-img|porn-images-xxx)\.com)/" \ r"(?:image|story)/([\w-]+)" - example = "https://hentai-cosplays.com/image/TITLE/" + example = "https://hentai-cosplay-xxx.com/image/TITLE/" def __init__(self, match): root, self.site, self.slug = match.groups() self.root = text.ensure_http_scheme(root) + if self.root == "https://hentai-cosplays.com": + self.root = "https://hentai-cosplay-xxx.com" url = "{}/story/{}/".format(self.root, self.slug) GalleryExtractor.__init__(self, match, url) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index 34fbabd..ddfc54b 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -23,7 +23,7 @@ class HotleakExtractor(Extractor): def items(self): for post in self.posts(): - if self.type == "photo": + if not post["url"].startswith("ytdl:"): post["url"] = ( post["url"] .replace("/storage/storage/", "/storage/") diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index dbe2df3..c05fe72 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -936,23 +936,23 @@ class InstagramGraphqlAPI(): def tags_media(self, tag): query_hash = "9b498c08113f1e09617a1703c22b2f32" - variables = {"tag_name": text.unescape(tag), "first": 50} + variables = {"tag_name": text.unescape(tag), "first": 24} return self._pagination(query_hash, variables, "hashtag", "edge_hashtag_to_media") def user_clips(self, user_id): query_hash = "bc78b344a68ed16dd5d7f264681c4c76" - variables = {"id": user_id, "first": 50} + variables = {"id": user_id, "first": 24} return self._pagination(query_hash, variables) def user_feed(self, user_id): query_hash = "69cba40317214236af40e7efa697781d" - variables = {"id": user_id, "first": 50} + variables = {"id": user_id, "first": 24} return self._pagination(query_hash, variables) def user_tagged(self, user_id): query_hash = "be13233562af2d229b008d2976b998b5" - variables = {"id": user_id, "first": 50} + variables = {"id": user_id, "first": 24} return self._pagination(query_hash, variables) def _call(self, query_hash, variables): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 9fa5b3f..ea57d76 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1849,6 +1849,11 @@ def _login_impl(extr, username, password): url, params=params, headers=headers, json=data, method="POST", fatal=None) + # update 'x-csrf-token' header (#5945) + csrf_token = response.cookies.get("ct0") + if csrf_token: + headers["x-csrf-token"] = csrf_token + try: data = response.json() except ValueError: diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index 126ef49..f9b1a7f 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -198,11 +198,15 @@ class ZerochanTagExtractor(ZerochanExtractor): while True: response = self.request(url, params=params, allow_redirects=False) + if response.status_code >= 300: url = text.urljoin(self.root, response.headers["location"]) - response = self.request(url, params=params) - data = response.json() + self.log.warning("HTTP redirect to %s", url) + if self.config("redirects"): + continue + raise exception.StopExtraction() + data = response.json() try: posts = data["items"] except Exception: diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index a520a34..e89b170 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -55,6 +55,20 @@ class MetadataPP(PostProcessor): self._json_encode = self._make_encoder(options, 4).encode ext = "json" + base_directory = options.get("base-directory") + if base_directory: + if base_directory is True: + self._base = lambda p: p.basedirectory + else: + sep = os.sep + altsep = os.altsep + base_directory = util.expand_path(base_directory) + if altsep and altsep in base_directory: + base_directory = base_directory.replace(altsep, sep) + if base_directory[-1] != sep: + base_directory += sep + self._base = lambda p: base_directory + directory = options.get("directory") if isinstance(directory, list): self._directory = self._directory_format @@ -147,11 +161,14 @@ class MetadataPP(PostProcessor): except Exception: pass - def _directory(self, pathfmt): + def _base(self, pathfmt): return pathfmt.realdirectory + def _directory(self, pathfmt): + return self._base(pathfmt) + def _directory_custom(self, pathfmt): - return os.path.join(pathfmt.realdirectory, self._metadir) + return os.path.join(self._base(pathfmt), self._metadir) def _directory_format(self, pathfmt): formatters = pathfmt.directory_formatters @@ -161,7 +178,7 @@ class MetadataPP(PostProcessor): pathfmt.directory_conditions = () segments = pathfmt.build_directory(pathfmt.kwdict) directory = pathfmt.clean_path(os.sep.join(segments) + os.sep) - return os.path.join(pathfmt.realdirectory, directory) + return os.path.join(self._base(pathfmt), directory) finally: pathfmt.directory_conditions = conditions pathfmt.directory_formatters = formatters diff --git a/gallery_dl/version.py b/gallery_dl/version.py index f234af1..f2462ee 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.27.2" +__version__ = "1.27.3" __variant__ = None diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 3e6d1df..edd8575 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -452,6 +452,27 @@ class MetadataTest(BasePostprocessorTest): path = self.pathfmt.realdirectory + "../json/12500/file.ext.json" m.assert_called_once_with(path, "w", encoding="utf-8") + def test_metadata_basedirectory(self): + self._create({"base-directory": True}) + + with patch("builtins.open", mock_open()) as m: + self._trigger() + + path = self.pathfmt.basedirectory + "file.ext.json" + m.assert_called_once_with(path, "w", encoding="utf-8") + + def test_metadata_basedirectory_custom(self): + self._create({ + "base-directory": "/home/test", + "directory": "meta", + }) + + with patch("builtins.open", mock_open()) as m: + self._trigger() + + path = "/home/test/meta/file.ext.json" + m.assert_called_once_with(path, "w", encoding="utf-8") + def test_metadata_filename(self): self._create({ "filename" : "{category}_{filename}_/meta/\n\r.data", |
