diff options
| author | 2019-10-01 19:12:47 -0400 | |
|---|---|---|
| committer | 2019-10-01 19:12:47 -0400 | |
| commit | 639d9ea4a667733aadc3ff83a1df2cc9f0add3a9 (patch) | |
| tree | 5761b58d6fc3e8bbb99b39b8e4417673bccb0b86 | |
| parent | c09a9f00dd83017d486cd77650347bc2a397ad55 (diff) | |
New upstream version 1.10.5upstream/1.10.5
29 files changed, 586 insertions, 325 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cde46b..c72f971 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## 1.10.5 - 2019-09-28 +### Additions +- `instagram.highlights` option to include highlighted stories when downloading user profiles ([#329](https://github.com/mikf/gallery-dl/issues/329)) +- Support for `/user/` URLs on `reddit` ([#350](https://github.com/mikf/gallery-dl/issues/350)) +- Support for `imgur` user profiles and favorites ([#420](https://github.com/mikf/gallery-dl/issues/420)) +- Additional metadata fields on `nijie`([#423](https://github.com/mikf/gallery-dl/issues/423)) +### Fixes +- Improve handling of private `deviantart` artworks ([#414](https://github.com/mikf/gallery-dl/issues/414)) and 429 status codes ([#424](https://github.com/mikf/gallery-dl/issues/424)) +- Prevent fatal errors when trying to open download-archive files ([#417](https://github.com/mikf/gallery-dl/issues/417)) +- Detect and ignore unavailable videos on `weibo` ([#427](https://github.com/mikf/gallery-dl/issues/427)) +- Update the `scope` of new `reddit` refresh-tokens ([#428](https://github.com/mikf/gallery-dl/issues/428)) +- Fix inconsistencies with the `reddit.comments` option ([#429](https://github.com/mikf/gallery-dl/issues/429)) +- Extend URL patterns for `hentaicafe` manga and `pixiv` artworks +- Improve detection of unavailable albums on `luscious` and `imgbb` +- Miscellaneous fixes for `tsumino` + ## 1.10.4 - 2019-09-08 ### Additions - Support for @@ -78,8 +78,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.4/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.4/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.5/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.5/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -224,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.4.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.5.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _Python: https://www.python.org/downloads/ diff --git a/docs/configuration.rst b/docs/configuration.rst index e384f2c..6b4055e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -603,8 +603,8 @@ extractor.deviantart.refresh-token =========== ===== Type ``string`` Default ``null`` -Description The ``refresh_token`` value you get from linking your - DeviantArt account to *gallery-dl*. +Description The ``refresh_token`` value you get from + `linking your DeviantArt account to gallery-dl <OAuth_>`__. Using a ``refresh_token`` allows you to access private or otherwise not publicly available deviations. @@ -662,7 +662,7 @@ extractor.flickr.access-token & .access-token-secret Type ``string`` Default ``null`` Description The ``access_token`` and ``access_token_secret`` values you get - from linking your Flickr account to *gallery-dl*. + from `linking your Flickr account to gallery-dl <OAuth_>`__. =========== ===== @@ -730,6 +730,16 @@ Description Controls whether to choose the GIF or MP4 version of an animation. =========== ===== +extractor.instagram.highlights +------------------------------ +=========== ===== +Type ``bool`` +Default ``false`` +Description Include *Story Highlights* when downloading a user profile. + (requires authentication) +=========== ===== + + extractor.kissmanga.captcha --------------------------- =========== ===== @@ -820,7 +830,7 @@ Description A list of extractor categories which should be ignored when using extractor.reddit.comments ------------------------- =========== ===== -Type ``integer`` or ``string`` +Type ``integer`` Default ``500`` Description The value of the ``limit`` parameter when loading a submission and its comments. @@ -830,7 +840,7 @@ Description The value of the ``limit`` parameter when loading Reddit's internal default and maximum values for this parameter appear to be 200 and 500 respectively. - The value `0` ignores all comments and significantly reduces the + The value ``0`` ignores all comments and significantly reduces the time required when scanning a subreddit. =========== ===== @@ -887,8 +897,8 @@ extractor.reddit.refresh-token =========== ===== Type ``string`` Default ``null`` -Description The ``refresh_token`` value you get from linking your - Reddit account to *gallery-dl*. +Description The ``refresh_token`` value you get from + `linking your Reddit account to gallery-dl <OAuth_>`__. Using a ``refresh_token`` allows you to access private or otherwise not publicly available subreddits, given that your account is @@ -1853,4 +1863,5 @@ Description An object with the ``name`` of a post-processor and its options. .. _datetime: https://docs.python.org/3/library/datetime.html#datetime-objects .. _datetime.max: https://docs.python.org/3/library/datetime.html#datetime.datetime.max .. _Authentication: https://github.com/mikf/gallery-dl#authentication +.. _OAuth: https://github.com/mikf/gallery-dl#oauth .. _youtube-dl: https://github.com/ytdl-org/youtube-dl diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index eff6da1..ebf47ff 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -69,6 +69,10 @@ { "mp4": true }, + "instagram": + { + "highlights": false + }, "kissmanga": { "captcha": "stop" diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 925185c..b0d6eba 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -5,11 +5,11 @@ Unless otherwise known, assume all sites to be NSFW ==================== =================================== ================================================== ================ Site URL Capabilities Authentication ==================== =================================== ================================================== ================ -35PHOTO https://35photo.pro/ Images from Users, Genres, individual Images +35PHOTO https://35photo.pro/ Genres, individual Images, User Profiles 3dbooru http://behoimi.org/ Pools, Popular Images, Posts, Tag-Searches 4chan https://www.4chan.org/ Threads 4plebs https://archive.4plebs.org/ Threads -500px https://500px.com/ Images from Users, Galleries, individual Images +500px https://500px.com/ Galleries, individual Images, User Profiles 8chan https://8ch.net/ Threads 8muses https://www.8muses.com/ Albums Adobe Portfolio https://www.myportfolio.com/ Galleries @@ -18,7 +18,7 @@ arch.b4k.co https://arch.b4k.co/ Threads Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads ArtStation https://www.artstation.com/ |artstation-C| -Behance https://www.behance.net/ Images from Users, Collections, Galleries +Behance https://www.behance.net/ Collections, Galleries, User Profiles BobX http://www.bobx.com/dark/ Galleries, Idols Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Optional Desuarchive https://desuarchive.org/ Threads @@ -48,14 +48,14 @@ Hitomi.la https://hitomi.la/ Galleries Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag-Searches Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional ImageBam http://www.imagebam.com/ Galleries, individual Images -ImageFap https://imagefap.com/ Images from Users, Galleries, individual Images -ImgBB https://imgbb.com/ Images from Users, Albums, individual Images Optional +ImageFap https://imagefap.com/ Galleries, individual Images, User Profiles +ImgBB https://imgbb.com/ Albums, individual Images, User Profiles Optional imgbox https://imgbox.com/ Galleries, individual Images imgth https://imgth.com/ Galleries -imgur https://imgur.com/ Albums, Galleries, individual Images +imgur https://imgur.com/ |imgur-C| Instagram https://www.instagram.com/ |instagram-C| Optional Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga -Joyreactor http://joyreactor.cc/ |joyreactor-C| +Joyreactor http://joyreactor.com/ Posts, Search Results, Tag-Searches, User Profiles Keenspot http://www.keenspot.com/ Comics Khinsider https://downloads.khinsider.com/ Soundtracks Kirei Cake https://reader.kireicake.com/ Chapters, Manga @@ -73,28 +73,28 @@ Mangapanda https://www.mangapanda.com/ Chapters, Manga MangaPark https://mangapark.me/ Chapters, Manga Mangareader https://www.mangareader.net/ Chapters, Manga Mangoxo https://www.mangoxo.com/ Albums, Channels Optional -Newgrounds https://www.newgrounds.com/ Images from Users, individual Images, Videos +Newgrounds https://www.newgrounds.com/ individual Images, User Profiles, Videos Ngomik http://ngomik.in/ Chapters nhentai https://nhentai.net/ Galleries, Search Results -Niconico Seiga https://seiga.nicovideo.jp/ Images from Users, individual Images Required +Niconico Seiga https://seiga.nicovideo.jp/ individual Images, User Profiles Required nijie https://nijie.info/ |nijie-C| Required NSFWalbum.com https://nsfwalbum.com/ Albums Nyafuu Archive https://archive.nyafuu.org/ Threads -Patreon https://www.patreon.com/ Images from Users, Creators, Posts -Pawoo https://pawoo.net/ Images from Users, Images from Statuses +Patreon https://www.patreon.com/ Creators, Posts, User Profiles +Pawoo https://pawoo.net/ Images from Statuses, User Profiles Photobucket https://photobucket.com/ Albums, individual Images -Piczel https://piczel.tv/ Images from Users, Folders, individual Images +Piczel https://piczel.tv/ Folders, individual Images, User Profiles Pinterest https://www.pinterest.com/ Boards, Pins, pin.it Links, related Pins Pixiv https://www.pixiv.net/ |pixiv-C| Required -Pixnet https://www.pixnet.net/ |pixnet-C| +Pixnet https://www.pixnet.net/ Folders, individual Images, Sets, User Profiles Plurk https://www.plurk.com/ Posts, Timelines -Pornhub https://www.pornhub.com/ Images from Users, Galleries -Pornreactor http://pornreactor.cc/ |pornreactor-C| +Pornhub https://www.pornhub.com/ Galleries, User Profiles +Pornreactor http://pornreactor.cc/ Posts, Search Results, Tag-Searches, User Profiles PowerManga https://read.powermanga.org/ Chapters, Manga Pururin https://pururin.io/ Galleries Read Comic Online https://readcomiconline.to/ Comic-Issues, Comics RebeccaBlackTech https://rbt.asia/ Threads -Reddit https://www.reddit.com/ individual Images, Submissions, Subreddits Optional (OAuth) +Reddit https://www.reddit.com/ |reddit-C| Optional (OAuth) rule #34 https://rule34.paheal.net/ Posts, Tag-Searches Rule 34 https://rule34.xxx/ Pools, Posts, Tag-Searches Safebooru https://safebooru.org/ Pools, Posts, Tag-Searches @@ -104,21 +104,21 @@ Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga Sex.com https://www.sex.com/ Boards, Pins, related Pins, Search Results Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos -SlickPic https://www.slickpic.com/ Images from Users, Albums +SlickPic https://www.slickpic.com/ Albums, User Profiles SlideShare https://www.slideshare.net/ Presentations SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth) The /b/ Archive https://thebarchive.com/ Threads Tsumino https://www.tsumino.com/ Galleries, Search Results Optional -Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) +Tumblr https://www.tumblr.com/ Likes, Posts, Tag-Searches, User Profiles Optional (OAuth) Twitter https://twitter.com/ Media Timelines, Timelines, Tweets Optional -VSCO https://vsco.co/ Images from Users, Collections, individual Images +VSCO https://vsco.co/ Collections, individual Images, User Profiles Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A| Warosu https://warosu.org/ Threads -Weibo https://www.weibo.com/ Images from Users, Images from Statuses +Weibo https://www.weibo.com/ Images from Statuses, User Profiles WikiArt.org https://www.wikiart.org/ Artists, Artworks World Three http://www.slide.world-three.org/ Chapters, Manga -xHamster https://xhamster.com/ Images from Users, Galleries -XVideos https://www.xvideos.com/ Images from Users, Galleries +xHamster https://xhamster.com/ Galleries, User Profiles +XVideos https://www.xvideos.com/ Galleries, User Profiles Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches yaplog! https://yaplog.jp/ Blogs, Posts |yuki-S| https://yuki.la/ Threads @@ -133,16 +133,15 @@ Turboimagehost https://www.turboimagehost.com/ individual Images もえぴりあ https://vanilla-rock.com/ Posts, Tag-Searches ==================== =================================== ================================================== ================ -.. |artstation-C| replace:: Images from Users, Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results +.. |artstation-C| replace:: Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles .. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh -.. |flickr-C| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results -.. |hentaifoundry-C| replace:: Images from Users, Favorites, individual Images, Popular Images, Recent Images, Scraps -.. |instagram-C| replace:: Images from Users, Channels, individual Images, Stories, Tag-Searches -.. |joyreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches -.. |nijie-C| replace:: Images from Users, Doujin, Favorites, individual Images -.. |pixiv-C| replace:: Images from Users, Favorites, Follows, pixiv.me Links, Rankings, Search Results, Individual Images -.. |pixnet-C| replace:: Images from Users, Folders, individual Images, Sets -.. |pornreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches +.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles +.. |hentaifoundry-C| replace:: Favorites, individual Images, Popular Images, Recent Images, Scraps, User Profiles +.. |imgur-C| replace:: Albums, Favorites, Galleries, individual Images, User Profiles +.. |instagram-C| replace:: Channels, individual Images, Stories, Tag-Searches, User Profiles +.. |nijie-C| replace:: Doujin, Favorites, individual Images, User Profiles +.. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, Individual Images +.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles .. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders .. |wallhaven-A| replace:: Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__) .. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 806b229..94a445a 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -248,7 +248,7 @@ def main(): log.error("No suitable extractor found for '%s'", url) except KeyboardInterrupt: - print("\nKeyboardInterrupt", file=sys.stderr) + sys.exit("\nKeyboardInterrupt") except BrokenPipeError: pass except IOError as exc: diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index d0e59ad..15f4207 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -71,7 +71,7 @@ class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin, r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)" r"(?:\?(?P<query>[^#]*))?") test = ("http://behoimi.org/post/popular_by_month?month=2&year=2013", { - "url": "c70268dce441a9ccc3383c244ec15edb059f494f", + "url": "f5a26c624da9a3d1dbc610e4a614bc57df6251c5", "count": 20, }) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 6614755..525cc84 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -620,7 +620,7 @@ class DeviantartExtractorV2(DeviantartExtractor): # extract download target target = files[-1] - if deviation["isJournal"] and self.commit_journal: + if "textContent" in deviation and self.commit_journal: journal = deviation["textContent"] journal["html"] = journal["html"]["markup"] target["src"] = self.commit_journal(deviation, journal)[1] @@ -729,6 +729,16 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", { "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf", }), + # journal + ("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", { + "url": "f33f8127ab71819be7de849175b6d5f8b37bb629", + "pattern": "text:<!DOCTYPE html>\n", + }), + # journal-like post with isJournal == False (#419) + ("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", { + "url": "1534d6ea0561247ab921d07505e57a9d663a833b", + "pattern": "text:<!DOCTYPE html>\n", + }), # old-style URLs ("https://shimoda7.deviantart.com" "/art/For-the-sake-of-a-memory-10073852"), @@ -818,6 +828,12 @@ class DeviantartAPI(): self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) + self.log.debug( + "Using %s API credentials (client-id %s)", + "default" if self.client_id == self.CLIENT_ID else "custom", + self.client_id, + ) + def browse_popular(self, query=None, timerange=None, category_path=None, offset=0): """Yield popular deviations""" @@ -873,6 +889,8 @@ class DeviantartAPI(): def deviation_metadata(self, deviations): """ Fetch deviation metadata for a set of deviations""" + if not deviations: + return [] endpoint = "deviation/metadata?" + "&".join( "deviationids[{}]={}".format(num, deviation["deviationid"]) for num, deviation in enumerate(deviations) @@ -953,7 +971,7 @@ class DeviantartAPI(): if self.delay > self.delay_min: self.delay -= 1 return data - if not fatal: + if not fatal and status != 429: return None if data.get("error_description") == "User not found.": raise exception.NotFoundError("user or group") @@ -975,13 +993,18 @@ class DeviantartAPI(): if "results" not in data: self.log.error("Unexpected API response: %s", data) return - if (public and self.refresh_token and - len(data["results"]) < params["limit"]): - self.log.debug("Switching to private access token") - public = False - continue if extend: + if public and len(data["results"]) < params["limit"]: + if self.refresh_token: + self.log.debug("Switching to private access token") + public = False + continue + elif data["has_more"]: + self.log.warning( + "Private deviations detected! Run 'gallery-dl " + "oauth:deviantart' and follow the instructions to " + "be able to access them.") if self.metadata: self._metadata(data["results"]) if self.folders: @@ -1003,7 +1026,6 @@ class DeviantartAPI(): deviations, self.deviation_metadata(deviations)): deviation.update(metadata) deviation["tags"] = [t["tag_name"] for t in deviation["tags"]] - return deviations def _folders(self, deviations): """Add a list of all containing folders to each deviation object""" diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py index 679b3ad..161073b 100644 --- a/gallery_dl/extractor/hentaicafe.py +++ b/gallery_dl/extractor/hentaicafe.py @@ -45,7 +45,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): """Extractor for manga from hentai.cafe""" category = "hentaicafe" pattern = (r"(?:https?://)?" + r"(?:www\.)?hentai\.cafe" - r"((?:/manga/series)?/[^/?&#]+)/?$") + r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?&#]+)/?$") test = ( # single chapter ("https://hentai.cafe/hazuki-yuuto-summer-blues/", { @@ -57,11 +57,17 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb", }), + # new-style URL + ("https://hentai.cafe/hc.fyi/2782", { + "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", + "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb", + }), # foolslide URL ("https://hentai.cafe/manga/series/saitom-box/", { "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", "keyword": "f0ece32d958f889d8229ed4052716d398a0a875c", }), + ) root = "https://hentai.cafe" reverse = False diff --git a/gallery_dl/extractor/hypnohub.py b/gallery_dl/extractor/hypnohub.py index bf2db96..860cebd 100644 --- a/gallery_dl/extractor/hypnohub.py +++ b/gallery_dl/extractor/hypnohub.py @@ -23,7 +23,7 @@ class HypnohubTagExtractor(booru.TagMixin, HypnohubExtractor): pattern = (r"(?:https?://)?(?:www\.)?hypnohub\.net" r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)") test = ("https://hypnohub.net/post?tags=gonoike_biwa", { - "url": "6bebc4318489ee37e0c3b814352acd6783ba95d6", + "url": "0deaf1a2f832cfc4354c531259b949e850da1e7e", }) diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 4aa670b..2a8dcad 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -28,7 +28,10 @@ class ImgbbExtractor(Extractor): def items(self): self.login() - page = self.request(self.page_url, params={"sort": self.sort}).text + response = self.request(self.page_url, params={"sort": self.sort}) + if response.history and response.url.startswith(self.root): + raise exception.NotFoundError(self.subcategory) + page = response.text data = self.metadata(page) first = True @@ -97,16 +100,24 @@ class ImgbbAlbumExtractor(ImgbbExtractor): directory_fmt = ("{category}", "{user}", "{album_name} {album_id}") pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)/?(?:\?([^#]+))?" test = ( - ("https://ibb.co/album/c6p5Yv", { + ("https://ibb.co/album/i5PggF", { "range": "1-80", - "url": "8adaf0f7dfc19ff8bc4712c97f534af8b1e06412", - "keyword": "155b665a53e83d359e914cab7c69d5b829444d64", + "url": "570872b6eb3e11cf10b618922b780fed204c3f09", + "keyword": "0f2fc956728c36540c577578bd168d2459d6ae4b", }), - ("https://ibb.co/album/c6p5Yv?sort=title_asc", { + ("https://ibb.co/album/i5PggF?sort=title_asc", { "range": "1-80", - "url": "d6c45041d5c8323c435b183a976f3fde2af7c547", - "keyword": "30c3262214e2044bbcf6bf2dee8e3ca7ebd62b71", + "url": "e2e387b8fdb3690bd75d804d0af2833112e385cd", + "keyword": "a307fc9d2085bdc0eb7c538c8d866c59198d460c", }), + # deleted + ("https://ibb.co/album/fDArrF", { + "exception": exception.NotFoundError, + }), + # private + ("https://ibb.co/album/hqgWrF", { + "exception": exception.HttpError, + }) ) def __init__(self, match): @@ -182,9 +193,18 @@ class ImgbbUserExtractor(ImgbbExtractor): class ImgbbImageExtractor(ImgbbExtractor): subcategory = "image" pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?&#]+)" - test = ("https://ibb.co/NLZHgqS", { - "url": "fbca86bac09de6fc0304054b2170b423ca1e84fa", - "keyword": "5d70e779bad03b2dc5273b627638045168671157", + test = ("https://ibb.co/fUqh5b", { + "pattern": "https://image.ibb.co/dY5FQb/Arundel-Ireeman-5.jpg", + "content": "c5a0965178a8b357acd8aa39660092918c63795e", + "keyword": { + "id" : "fUqh5b", + "title" : "Arundel Ireeman 5", + "url" : "https://image.ibb.co/dY5FQb/Arundel-Ireeman-5.jpg", + "width" : 960, + "height": 719, + "user" : "folkie", + "extension": "jpg", + }, }) def __init__(self, match): diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py index a97f2e0..8a6fe1c 100644 --- a/gallery_dl/extractor/imgth.py +++ b/gallery_dl/extractor/imgth.py @@ -44,7 +44,7 @@ class ImgthGalleryExtractor(Extractor): while True: thumbs = text.extract(page, '<ul class="thumbnails">', '</ul>')[0] for url in text.extract_iter(thumbs, '<img src="', '"'): - yield "https://imgth.com/images/" + url[24:] + yield "https://imgth.com/images" + url[24:] if '<li class="next">' not in page: return pnum += 1 diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 8523523..cb36c30 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -10,13 +10,18 @@ from .common import Extractor, Message from .. import text, exception +import itertools import json +BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com" + + class ImgurExtractor(Extractor): """Base class for imgur extractors""" category = "imgur" root = "https://imgur.com" + api_root = "https://api.imgur.com" def __init__(self, match): Extractor.__init__(self, match) @@ -43,14 +48,40 @@ class ImgurExtractor(Extractor): image["extension"] = image["ext"][1:] return url + def _items_apiv3(self, urlfmt): + album_ex = ImgurAlbumExtractor + image_ex = ImgurImageExtractor + + params = { + "IMGURPLATFORM" : "web", + "album_previews": "0", + "client_id" : "546c25a59c58ad7", + } + headers = { + "Origin" : self.root, + "Referer": self.root + "/", + } + + yield Message.Version, 1 + + for num in itertools.count(0): + url = urlfmt.format(num) + data = self.request(url, params=params, headers=headers).json() + + for item in data["data"]: + item["_extractor"] = album_ex if item["is_album"] else image_ex + yield Message.Queue, item["link"], item + + if len(data["data"]) < 60: + return + class ImgurImageExtractor(ImgurExtractor): """Extractor for individual images on imgur.com""" subcategory = "image" filename_fmt = "{category}_{hash}{title:?_//}.{extension}" archive_fmt = "{hash}" - pattern = (r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com" - r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?") + pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?" test = ( ("https://imgur.com/21yMxCS", { "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", @@ -111,8 +142,7 @@ class ImgurAlbumExtractor(ImgurExtractor): directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}") filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" archive_fmt = "{album[hash]}_{hash}" - pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/(?:a|t/unmuted)/(\w{7}|\w{5})") + pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", @@ -181,8 +211,7 @@ class ImgurAlbumExtractor(ImgurExtractor): class ImgurGalleryExtractor(ImgurExtractor): """Extractor for imgur galleries""" subcategory = "gallery" - pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/gallery/(\w{7}|\w{5})") + pattern = BASE_PATTERN + r"/gallery/(\w{7}|\w{5})" test = ( ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380) "pattern": "https://imgur.com/zf2fIms", @@ -205,3 +234,39 @@ class ImgurGalleryExtractor(ImgurExtractor): yield Message.Version, 1 yield Message.Queue, url, {"_extractor": extr} + + +class ImgurUserExtractor(ImgurExtractor): + """Extractor for all images posted by a user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/user/([^/?&#]+)(?:/posts|/submitted)?/?$" + test = ( + ("https://imgur.com/user/Miguenzo", { + "range": "1-100", + "count": 100, + "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + }), + ("https://imgur.com/user/Miguenzo/posts"), + ("https://imgur.com/user/Miguenzo/submitted"), + ) + + def items(self): + urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format( + self.api_root, self.key) + return self._items_apiv3(urlfmt) + + +class ImgurFavoriteExtractor(ImgurExtractor): + """Extractor for a user's favorites""" + subcategory = "favorite" + pattern = BASE_PATTERN + r"/user/([^/?&#]+)/favorites" + test = ("https://imgur.com/user/Miguenzo/favorites", { + "range": "1-100", + "count": 100, + "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + }) + + def items(self): + urlfmt = "{}/3/account/{}/gallery_favorites/{{}}/newest".format( + self.api_root, self.key) + return self._items_apiv3(urlfmt) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index e5cfe8b..8eee390 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -43,6 +43,10 @@ class InstagramExtractor(Extractor): data["extension"] = None yield Message.Url, \ 'ytdl:{}/p/{}/'.format(self.root, data['shortcode']), data + elif data['typename'] == 'GraphHighlightReel': + url = '{}/stories/highlights/{}/'.format(self.root, data['id']) + data['_extractor'] = InstagramStoriesExtractor + yield Message.Queue, url, data def login(self): if self._check_cookies(self.cookienames): @@ -84,13 +88,24 @@ class InstagramExtractor(Extractor): for key in ("sessionid", "mid", "csrftoken") } - def _extract_shared_data(self, page): - return json.loads(text.extract(page, - 'window._sharedData = ', ';</script>')[0]) + def _request_graphql(self, variables, query_hash, csrf=None): + headers = { + 'X-CSRFToken': csrf, + 'X-IG-App-ID': '936619743392459', + 'X-Requested-With': 'XMLHttpRequest', + } + url = '{}/graphql/query/?query_hash={}&variables={}'.format( + self.root, query_hash, variables, + ) + return self.request(url, headers=headers).json() - def _extract_postpage(self, url): + def _extract_shared_data(self, url): page = self.request(url).text - shared_data = self._extract_shared_data(page) + data = text.extract(page, 'window._sharedData = ', ';</script>')[0] + return json.loads(data) + + def _extract_postpage(self, url): + shared_data = self._extract_shared_data(url) media = shared_data['entry_data']['PostPage'][0]['graphql']['shortcode_media'] common = { @@ -121,7 +136,7 @@ class InstagramExtractor(Extractor): 'sidecar_shortcode': media['shortcode'], } if children['__typename'] == 'GraphVideo': - media_data["_ytdl_index"] = yi + media_data['_ytdl_index'] = yi yi += 1 media_data.update(common) medias.append(media_data) @@ -146,8 +161,7 @@ class InstagramExtractor(Extractor): highlight_id = '"{}"'.format(self.highlight_id) query_hash = '30a89afdd826d78a5376008a7b81c205' else: - page = self.request(url).text - shared_data = self._extract_shared_data(page) + shared_data = self._extract_shared_data(url) # If no stories are present the URL redirects to `ProfilePage' if 'StoriesPage' not in shared_data['entry_data']: @@ -164,17 +178,10 @@ class InstagramExtractor(Extractor): '"highlight_reel_ids":[{}],"precomposed_overlay":true,' '"show_story_viewer_list":true,' '"story_viewer_fetch_count":50,"story_viewer_cursor":"",' - '"stories_video_dash_manifest":false}}' + '"stories_video_dash_manifest":false' + '}}' ).format(user_id, highlight_id) - headers = { - "X-Requested-With": "XMLHttpRequest", - } - url = '{}/graphql/query/?query_hash={}&variables={}'.format( - self.root, - query_hash, - variables, - ) - shared_data = self.request(url, headers=headers).json() + shared_data = self._request_graphql(variables, query_hash) # If there are stories present but the user is not authenticated or # does not have permissions no stories are returned. @@ -209,38 +216,38 @@ class InstagramExtractor(Extractor): return medias - def _extract_page(self, url, page_type): - shared_data_fields = { - 'ProfilePage': { - 'page': 'ProfilePage', - 'node': 'user', - 'node_id': 'id', - 'edge_to_medias': 'edge_owner_to_timeline_media', - 'variables_id': 'id', - 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a', - }, - 'ProfileChannelPage': { - 'page': 'ProfilePage', - 'node': 'user', - 'node_id': 'id', - 'edge_to_medias': 'edge_felix_video_timeline', - 'variables_id': 'id', - 'query_hash': 'bc78b344a68ed16dd5d7f264681c4c76', - }, - 'TagPage': { - 'page': 'TagPage', - 'node': 'hashtag', - 'node_id': 'name', - 'edge_to_medias': 'edge_hashtag_to_media', - 'variables_id': 'tag_name', - 'query_hash': 'f12c9ec5e46a3173b2969c712ad84744', - }, - } + def _extract_story_highlights(self, shared_data): + graphql = shared_data['entry_data']['ProfilePage'][0]['graphql'] + variables = ( + '{{' + '"user_id":"{}","include_chaining":true,' + '"include_reel":true,"include_suggested_users":false,' + '"include_logged_out_extras":false,' + '"include_highlight_reels":true' + '}}' + ).format(graphql['user']['id']) + + data = self._request_graphql( + variables, + 'aec5501414615eca36a9acf075655b1e', + shared_data['config']['csrf_token'], + ) - page = self.request(url).text - shared_data = self._extract_shared_data(page) - psdf = shared_data_fields[page_type] - csrf = shared_data["config"]["csrf_token"] + highlights = [] + for edge in data['data']['user']['edge_highlight_reels']['edges']: + story = edge['node'] + highlights.append({ + 'id' : story['id'], + 'title' : story['title'], + 'owner_id': story['owner']['id'], + 'username': story['owner']['username'], + 'typename': story['__typename'], + }) + + return highlights + + def _extract_page(self, shared_data, psdf): + csrf = shared_data['config']['csrf_token'] while True: # Deal with different structure of pages: the first page @@ -270,29 +277,9 @@ class InstagramExtractor(Extractor): variables_id, end_cursor, ) - headers = { - "X-Requested-With": "XMLHttpRequest", - "X-CSRFToken": csrf, - "X-IG-App-ID": "936619743392459", - } - url = '{}/graphql/query/?query_hash={}&variables={}'.format( - self.root, - psdf['query_hash'], - variables, + shared_data = self._request_graphql( + variables, psdf['query_hash'], csrf, ) - shared_data = self.request(url, headers=headers).json() - - def _extract_profilepage(self, url): - yield from self._extract_page(url, 'ProfilePage') - - def _extract_profilechannelpage(self, url): - yield from self._extract_page(url, 'ProfileChannelPage') - - def _extract_tagpage(self, url): - yield from self._extract_page(url, 'TagPage') - - def _extract_storiespage(self, url): - yield from self._extract_stories(url) class InstagramImageExtractor(InstagramExtractor): @@ -382,16 +369,43 @@ class InstagramImageExtractor(InstagramExtractor): return self._extract_postpage(url) +class InstagramStoriesExtractor(InstagramExtractor): + """Extractor for StoriesPage""" + subcategory = "stories" + pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" + r"/stories/([^/?&#]+)(?:/(\d+))?") + test = ( + ("https://www.instagram.com/stories/instagram/"), + ("https://www.instagram.com/stories/highlights/18042509488170095/"), + ) + + def __init__(self, match): + InstagramExtractor.__init__(self, match) + self.username, self.highlight_id = match.groups() + + def instagrams(self): + url = '{}/stories/{}/'.format(self.root, self.username) + return self._extract_stories(url) + + class InstagramUserExtractor(InstagramExtractor): """Extractor for ProfilePage""" subcategory = "user" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)" r"([^/?&#]+)/?$") - test = ("https://www.instagram.com/instagram/", { - "range": "1-16", - "count": ">= 16", - }) + test = ( + ("https://www.instagram.com/instagram/", { + "range": "1-16", + "count": ">= 16", + }), + ("https://www.instagram.com/instagram/", { + "options": (("highlights", True),), + "pattern": InstagramStoriesExtractor.pattern, + "range": "1-2", + "count": 2, + }), + ) def __init__(self, match): InstagramExtractor.__init__(self, match) @@ -399,7 +413,19 @@ class InstagramUserExtractor(InstagramExtractor): def instagrams(self): url = '{}/{}/'.format(self.root, self.username) - return self._extract_profilepage(url) + shared_data = self._extract_shared_data(url) + + if self.config('highlights'): + yield from self._extract_story_highlights(shared_data) + + yield from self._extract_page(shared_data, { + 'page': 'ProfilePage', + 'node': 'user', + 'node_id': 'id', + 'variables_id': 'id', + 'edge_to_medias': 'edge_owner_to_timeline_media', + 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a', + }) class InstagramChannelExtractor(InstagramExtractor): @@ -419,7 +445,16 @@ class InstagramChannelExtractor(InstagramExtractor): def instagrams(self): url = '{}/{}/channel/'.format(self.root, self.username) - return self._extract_profilechannelpage(url) + shared_data = self._extract_shared_data(url) + + return self._extract_page(shared_data, { + 'page': 'ProfilePage', + 'node': 'user', + 'node_id': 'id', + 'variables_id': 'id', + 'edge_to_medias': 'edge_felix_video_timeline', + 'query_hash': 'bc78b344a68ed16dd5d7f264681c4c76', + }) class InstagramTagExtractor(InstagramExtractor): @@ -442,23 +477,13 @@ class InstagramTagExtractor(InstagramExtractor): def instagrams(self): url = '{}/explore/tags/{}/'.format(self.root, self.tag) - return self._extract_tagpage(url) - - -class InstagramStoriesExtractor(InstagramExtractor): - """Extractor for StoriesPage""" - subcategory = "stories" - pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/stories/([^/?&#]+)(?:/(\d+))?") - test = ( - ("https://www.instagram.com/stories/instagram/"), - ("https://www.instagram.com/stories/highlights/18042509488170095/"), - ) - - def __init__(self, match): - InstagramExtractor.__init__(self, match) - self.username, self.highlight_id = match.groups() - - def instagrams(self): - url = '{}/stories/{}/'.format(self.root, self.username) - return self._extract_storiespage(url) + shared_data = self._extract_shared_data(url) + + return self._extract_page(shared_data, { + 'page': 'TagPage', + 'node': 'hashtag', + 'node_id': 'name', + 'variables_id': 'tag_name', + 'edge_to_medias': 'edge_hashtag_to_media', + 'query_hash': 'f12c9ec5e46a3173b2969c712ad84744', + }) diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index a73eb86..965daa0 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -74,7 +74,7 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): }), ("https://members.luscious.net/albums/login-required_323871/", { "options": (("username", None),), - "exception": exception.AuthorizationError, + "exception": exception.HttpError, }), ("https://www.luscious.net/albums/okinami_277031/"), ("https://members.luscious.net/albums/okinami_277031/"), @@ -88,14 +88,14 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): GalleryExtractor.__init__(self, match, url) def metadata(self, page): - pos = page.find("<h1>404 Not Found</h1>") - if pos >= 0: + title, pos = text.extract(page, '"og:title" content="', '"') + + if title is None: msg = text.extract(page, '<div class="content">', '</div>', pos)[0] - if msg and "content is not available" in msg: - raise exception.AuthorizationError() + if msg: + raise exception.AuthorizationError(msg) raise exception.NotFoundError("album") - title, pos = text.extract(page, '"og:title" content="', '"') info , pos = text.extract(page, '<li class="user_info">', "", pos) if info is None: count, pos = text.extract(page, '>Pages:', '<', pos) diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 4c48d73..fdfad87 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -13,12 +13,15 @@ from .. import text, exception from ..cache import cache +BASE_PATTERN = r"(?:https?://)?(?:www\.)?nijie\.info" + + class NijieExtractor(AsynchronousMixin, Extractor): """Base class for nijie extractors""" category = "nijie" directory_fmt = ("{category}", "{user_id}") - filename_fmt = "{category}_{artist_id}_{image_id}_p{index:>02}.{extension}" - archive_fmt = "{image_id}_{index}" + filename_fmt = "{category}_{artist_id}_{image_id}_p{num:>02}.{extension}" + archive_fmt = "{image_id}_{num}" cookiedomain = "nijie.info" cookienames = ("nemail", "nlogin") root = "https://nijie.info" @@ -27,61 +30,66 @@ class NijieExtractor(AsynchronousMixin, Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.user_id = match.group(1) + self.user_id = text.parse_int(match.group(1)) + self.user_name = None self.session.headers["Referer"] = self.root + "/" def items(self): self.login() - data = self.get_job_metadata() - yield Message.Version, 1 - yield Message.Directory, data - for image_id in self.get_image_ids(): - for image_url, image_data in self.get_image_data(image_id): - image_data.update(data) - if not image_data["extension"]: - image_data["extension"] = "jpg" - yield Message.Url, image_url, image_data + for image_id in self.image_ids(): + + response = self.request(self.view_url + image_id, fatal=False) + if response.status_code >= 400: + continue + page = response.text + + data = self._extract_data(page) + data["image_id"] = text.parse_int(image_id) + yield Message.Directory, data - def get_job_metadata(self): - """Collect metadata for extractor-job""" - return {"user_id": text.parse_int(self.user_id)} + for image in self._extract_images(page): + image.update(data) + if not image["extension"]: + image["extension"] = "jpg" + yield Message.Url, image["url"], image - def get_image_ids(self): + def image_ids(self): """Collect all relevant image-ids""" - def get_image_data(self, image_id): - """Get URL and metadata for images specified by 'image_id'""" - page = self.request(self.view_url + image_id).text - return self.extract_image_data(page, image_id) - - def extract_image_data(self, page, image_id): - """Get URL and metadata for images from 'page'""" - title, pos = text.extract( - page, '<meta property="og:title" content="', '"') - description, pos = text.extract( - page, '<meta property="og:description" content="', '"', pos) - artist_id, pos = text.extract( - page, '"sameAs": "https://nijie.info/members.php?id=', '"', pos) - images = list(text.extract_iter( - page, '<a href="./view_popup.php', '</a>', pos)) - - title = title.rpartition("|")[0].strip() - image_id = text.parse_int(image_id) - artist_id = text.parse_int(artist_id) - - for index, image in enumerate(images): + @staticmethod + def _extract_data(page): + """Extract image metadata from 'page'""" + extr = text.extract_from(page) + keywords = text.unescape(extr( + 'name="keywords" content="', '" />')).split(",") + data = { + "title" : keywords[0].strip(), + "description": text.unescape(extr( + '"description": "', '"').replace("&", "&")), + "date" : text.parse_datetime(extr( + '"datePublished": "', '"')[:-4] + "+0900", + "%a %d %b %Y %I:%M:%S %p%z"), + "artist_id" : text.parse_int(extr( + '"sameAs": "https://nijie.info/members.php?id=', '"')), + "artist_name": keywords[1], + "tags" : keywords[2:-1], + } + data["user_id"] = data["artist_id"] + data["user_name"] = data["artist_name"] + return data + + @staticmethod + def _extract_images(page): + """Extract image URLs from 'page'""" + images = text.extract_iter(page, '<a href="./view_popup.php', '</a>') + for num, image in enumerate(images): url = "https:" + text.extract(image, 'src="', '"')[0] - url = url.replace("/__rs_l120x120/", "/", 1) - - yield url, text.nameext_from_url(url, { - "index": index, - "count": len(images), - "title": title, - "description": description, - "image_id": image_id, - "artist_id": artist_id, + url = url.replace("/__rs_l120x120/", "/") + yield text.nameext_from_url(url, { + "num": num, + "url": url, }) def login(self): @@ -107,6 +115,10 @@ class NijieExtractor(AsynchronousMixin, Extractor): while True: page = self.request(url, params=params, notfound="artist").text + + if not self.user_name: + self.user_name = text.unescape(text.extract( + page, '<br />', '<')[0] or "") yield from text.extract_iter(page, 'illust_id="', '"') if '<a rel="next"' not in page: @@ -117,12 +129,25 @@ class NijieExtractor(AsynchronousMixin, Extractor): class NijieUserExtractor(NijieExtractor): """Extractor for works of a nijie-user""" subcategory = "user" - pattern = (r"(?:https?://)?(?:www\.)?nijie\.info" - r"/members(?:_illust)?\.php\?id=(\d+)") + pattern = BASE_PATTERN + r"/members(?:_illust)?\.php\?id=(\d+)" test = ( ("https://nijie.info/members_illust.php?id=44", { "url": "66c4ff94c6e77c0765dd88f2d8c663055fda573e", - "keyword": "d629c69e3172db1d7e026145e8eb640ac31ac16a", + "keyword": { + "artist_id": 44, + "artist_name": "ED", + "date": "type:datetime", + "description": str, + "extension": "jpg", + "filename": str, + "image_id": int, + "num": int, + "tags": list, + "title": str, + "url": r"re:https://pic.nijie.net/\d+/nijie_picture/.*jpg$", + "user_id": 44, + "user_name": "ED", + }, }), ("https://nijie.info/members_illust.php?id=43", { "exception": exception.NotFoundError, @@ -130,20 +155,23 @@ class NijieUserExtractor(NijieExtractor): ("https://nijie.info/members.php?id=44"), ) - def get_image_ids(self): + def image_ids(self): return self._pagination("members_illust") class NijieDoujinExtractor(NijieExtractor): """Extractor for doujin entries of a nijie-user""" subcategory = "doujin" - pattern = (r"(?:https?://)?(?:www\.)?nijie\.info/" - r"members_dojin\.php\?id=(\d+)") + pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)" test = ("https://nijie.info/members_dojin.php?id=6782", { "count": ">= 18", + "keyword": { + "user_id" : 6782, + "user_name": "ジョニー@アビオン村", + }, }) - def get_image_ids(self): + def image_ids(self): return self._pagination("members_dojin") @@ -151,30 +179,38 @@ class NijieFavoriteExtractor(NijieExtractor): """Extractor for all favorites/bookmarks of a nijie-user""" subcategory = "favorite" directory_fmt = ("{category}", "bookmarks", "{user_id}") - archive_fmt = "f_{user_id}_{image_id}_{index}" - pattern = (r"(?:https?://)?(?:www\.)?nijie\.info" - r"/user_like_illust_view\.php\?id=(\d+)") + archive_fmt = "f_{user_id}_{image_id}_{num}" + pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)" test = ("https://nijie.info/user_like_illust_view.php?id=44", { "count": ">= 16", + "keyword": { + "user_id" : 44, + "user_name": "ED", + }, }) - def get_image_ids(self): + def image_ids(self): return self._pagination("user_like_illust_view") + def _extract_data(self, page): + data = NijieExtractor._extract_data(page) + data["user_id"] = self.user_id + data["user_name"] = self.user_name + return data + class NijieImageExtractor(NijieExtractor): """Extractor for a work/image from nijie.info""" subcategory = "image" - pattern = (r"(?:https?://)?(?:www\.)?nijie\.info" - r"/view(?:_popup)?\.php\?id=(\d+)") + pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)" test = ( ("https://nijie.info/view.php?id=70720", { "url": "5497f897311397dafa188521258624346a0af2a3", - "keyword": "408393d010307c76d52cbd0a4368d6d357805aea", + "keyword": "fd12bca6f4402a0c996315d28c65f7914ad70c51", "content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6", }), ("https://nijie.info/view.php?id=70724", { - "exception": exception.NotFoundError, + "count": 0, }), ("https://nijie.info/view_popup.php?id=70720"), ) @@ -182,17 +218,6 @@ class NijieImageExtractor(NijieExtractor): def __init__(self, match): NijieExtractor.__init__(self, match) self.image_id = match.group(1) - self.page = "" - def get_job_metadata(self): - self.page = self.request( - self.view_url + self.image_id, notfound="image").text - self.user_id = text.extract( - self.page, '"sameAs": "https://nijie.info/members.php?id=', '"')[0] - return NijieExtractor.get_job_metadata(self) - - def get_image_ids(self): + def image_ids(self): return (self.image_id,) - - def get_image_data(self, _): - return self.extract_image_data(self.page, self.image_id) diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py index c55f80a..5005fb4 100644 --- a/gallery_dl/extractor/nsfwalbum.py +++ b/gallery_dl/extractor/nsfwalbum.py @@ -21,10 +21,10 @@ class NsfwalbumAlbumExtractor(GalleryExtractor): directory_fmt = ("{category}", "{album_id} {title}") archive_fmt = "{id}" pattern = r"(?:https?://)?(?:www\.)?nsfwalbum\.com(/album/(\d+))" - test = ("https://nsfwalbum.com/album/295201", { + test = ("https://nsfwalbum.com/album/401611", { "range": "1-5", - "url": "e60eced1873215f5deee1ca7226d60cb4dcc051c", - "keyword": "e0573ecb1966611e96d10172a3ca1db1078a7984", + "url": "b0481fc7fad5982da397b6359fbed8421b8ba284", + "keyword": "fc1ad4ebcd6d4cf32da15203120112b8bcf12eec", }) def __init__(self, match): diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index e26eae1..6c6dd0a 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -213,7 +213,7 @@ class OAuthReddit(OAuthBase): "", "https://www.reddit.com/api/v1/authorize", "https://www.reddit.com/api/v1/access_token", - scope="read", + scope="read history", ) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 8e6a74e..d313daa 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -151,13 +151,13 @@ class PixivWorkExtractor(PixivExtractor): """Extractor for a single pixiv work/illustration""" subcategory = "work" pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net" - r"/member(?:_illust)?\.php\?(?:[^&]+&)*illust_id=(\d+)" + r"/(?:(?:en/)?artworks/" + r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)" r"|(?:i(?:\d+\.pixiv|\.pximg)\.net" r"/(?:(?:.*/)?img-[^/]+/img/\d{4}(?:/\d\d){5}|img\d+/img/[^/]+)" r"|img\d*\.pixiv\.net/img/[^/]+|(?:www\.)?pixiv\.net/i)/(\d+))") test = ( - (("http://www.pixiv.net/member_illust.php" - "?mode=medium&illust_id=966412"), { + ("https://www.pixiv.net/artworks/966412", { "url": "90c1715b07b0d1aad300bce256a0bc71f42540ba", "content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a", }), @@ -171,6 +171,8 @@ class PixivWorkExtractor(PixivExtractor): "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef", "keywords": {"frames": list}, }), + ("https://www.pixiv.net/en/artworks/966412"), + ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"), ("http://i1.pixiv.net/c/600x600/img-master" "/img/2008/06/13/00/29/13/966412_p0_master1200.jpg"), ("https://i.pximg.net/img-original" diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 94e95e8..9c283de 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from subreddits at https://www.reddit.com/""" +"""Extractors for https://www.reddit.com/""" from .common import Extractor, Message from .. import text, util, extractor, exception @@ -31,7 +31,8 @@ class RedditExtractor(Extractor): yield Message.Version, 1 with extractor.blacklist( - util.SPECIAL_EXTRACTORS, [RedditSubredditExtractor]): + util.SPECIAL_EXTRACTORS, + [RedditSubredditExtractor, RedditUserExtractor]): while True: extra = [] for url, data in self._urls(submissions): @@ -68,18 +69,18 @@ class RedditExtractor(Extractor): submission["selftext_html"] or "", ' href="', '"'): yield url, submission - for comment in comments: - for url in text.extract_iter( - comment["body_html"] or "", ' href="', '"'): - yield url, comment + if comments: + for comment in comments: + for url in text.extract_iter( + comment["body_html"] or "", ' href="', '"'): + yield url, comment class RedditSubredditExtractor(RedditExtractor): - """Extractor for images from subreddits on reddit.com""" + """Extractor for URLs from subreddits on reddit.com""" subcategory = "subreddit" - pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/([^/?&#]+)" - r"(/[a-z]+)?/?" - r"(?:\?.*?(?:\bt=([a-z]+))?)?$") + pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/" + r"([^/?&#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)") test = ( ("https://www.reddit.com/r/lavaporn/"), ("https://www.reddit.com/r/lavaporn/top/?sort=top&t=month"), @@ -90,24 +91,53 @@ class RedditSubredditExtractor(RedditExtractor): def __init__(self, match): RedditExtractor.__init__(self, match) - self.subreddit, self.order, self.timeframe = match.groups() + self.subreddit = match.group(1) + self.params = text.parse_query(match.group(2)) def submissions(self): - subreddit = self.subreddit + (self.order or "") - params = {"t": self.timeframe} if self.timeframe else {} - return self.api.submissions_subreddit(subreddit, params) + return self.api.submissions_subreddit(self.subreddit, self.params) + + +class RedditUserExtractor(RedditExtractor): + """Extractor for URLs from posts by a reddit user""" + subcategory = "user" + pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/" + r"([^/?&#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?") + test = ( + ("https://www.reddit.com/user/username/", { + "count": ">= 2", + }), + ("https://www.reddit.com/user/username/gilded/?sort=top&t=month"), + ("https://old.reddit.com/user/username/"), + ("https://www.reddit.com/u/username/"), + ) + + def __init__(self, match): + RedditExtractor.__init__(self, match) + self.user = match.group(1) + self.params = text.parse_query(match.group(2)) + + def submissions(self): + return self.api.submissions_user(self.user, self.params) class RedditSubmissionExtractor(RedditExtractor): - """Extractor for images from a submission on reddit.com""" + """Extractor for URLs from a submission on reddit.com""" subcategory = "submission" pattern = (r"(?:https?://)?(?:" r"(?:\w+\.)?reddit\.com/r/[^/?&#]+/comments|" r"redd\.it" r")/([a-z0-9]+)") test = ( - ("https://www.reddit.com/r/lavaporn/comments/2a00np/", { - "pattern": r"https?://i\.imgur\.com/AaAUCgy\.jpg", + ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { + "pattern": r"https://", + "count": 3, + }), + # ignore submission comments (#429) + ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { + "options": (("comments", 0),), + "pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg", + "count": 1, }), ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), @@ -156,7 +186,7 @@ class RedditAPI(): def __init__(self, extractor): self.extractor = extractor - self.comments = extractor.config("comments", 500) + self.comments = text.parse_int(extractor.config("comments", 500)) self.morecomments = extractor.config("morecomments", False) self.refresh_token = extractor.config("refresh-token") self.log = extractor.log @@ -168,7 +198,7 @@ class RedditAPI(): self.client_id = None self.log.warning( "Conflicting values for 'client-id' and 'user-agent': " - "override either both or none of them.") + "overwrite either both or none of them.") else: self.client_id = client_id extractor.session.headers["User-Agent"] = user_agent @@ -179,7 +209,7 @@ class RedditAPI(): link_id = "t3_" + submission_id if self.morecomments else None submission, comments = self._call(endpoint, {"limit": self.comments}) return (submission["data"]["children"][0]["data"], - self._flatten(comments, link_id)) + self._flatten(comments, link_id) if self.comments else None) def submissions_subreddit(self, subreddit, params): """Collect all (submission, comments)-tuples of a subreddit""" @@ -187,6 +217,12 @@ class RedditAPI(): params["limit"] = 100 return self._pagination(endpoint, params) + def submissions_user(self, user, params): + """Collect all (submission, comments)-tuples posted by a user""" + endpoint = "/user/" + user + "/.json" + params["limit"] = 100 + return self._pagination(endpoint, params) + def morechildren(self, link_id, children): """Load additional comments from a submission""" endpoint = "/api/morechildren" @@ -249,7 +285,7 @@ class RedditAPI(): raise Exception(data["message"]) return data - def _pagination(self, endpoint, params, _empty=()): + def _pagination(self, endpoint, params): id_min = self._parse_id("id-min", 0) id_max = self._parse_id("id-max", 2147483647) date_min, date_max = self.extractor._get_date_min_max(0, 253402210800) @@ -267,7 +303,7 @@ class RedditAPI(): except exception.AuthorizationError: pass else: - yield submission, _empty + yield submission, None if not data["after"]: return diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index a6a3da0..ba0fcf4 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -22,8 +22,8 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): test = ( (("https://original-work.simply-hentai.com" "/amazon-no-hiyaku-amazon-elixir"), { - "url": "258289249990502c3138719cb89e995a60861e49", - "keyword": "eba83ccdbab3022a2280c77aa747f9458196138b", + "url": "21613585ae5ec2f69ea579e9713f536fceab5bd5", + "keyword": "bf75f9ff0fb60756b1b9b92403526a72d9178d23", }), ("https://www.simply-hentai.com/notfound", { "exception": exception.GalleryDLException, @@ -112,6 +112,9 @@ class SimplyhentaiImageExtractor(Extractor): else: tags = [] + if url.startswith("//"): + url = "https:" + url + data = text.nameext_from_url(url, { "title": text.unescape(title) if title else "", "tags": tags, @@ -170,6 +173,9 @@ class SimplyhentaiVideoExtractor(Extractor): video_url = text.extract(embed_page, '"file":"', '"')[0] title, _, episode = title.rpartition(" Episode ") + if video_url.startswith("//"): + video_url = "https:" + video_url + data = text.nameext_from_url(video_url, { "title": text.unescape(title), "episode": text.parse_int(episode), diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py index 66ad431..cc0dc90 100644 --- a/gallery_dl/extractor/tsumino.py +++ b/gallery_dl/extractor/tsumino.py @@ -37,22 +37,22 @@ class TsuminoBase(): response = self.request(url, method="POST", headers=headers, data=data) if not response.history: raise exception.AuthenticationError() - return {".aotsumino": response.history[0].cookies[".aotsumino"]} + return self.session.cookies class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor): """Extractor for image galleries on tsumino.com""" pattern = (r"(?i)(?:https?://)?(?:www\.)?tsumino\.com" - r"/(?:Book/Info|Read/View)/(\d+)") + r"/(?:entry|Book/Info|Read/(?:Index|View))/(\d+)") test = ( - ("https://www.tsumino.com/Book/Info/40996", { - "url": "84bf30a86623039fc87855680fada884dc8a1ddd", + ("https://www.tsumino.com/entry/40996", { + "pattern": r"https://content.tsumino.com/parts/40996/\d+\?key=\w+", "keyword": { "title" : r"re:Shikoshiko Daisuki Nightingale \+ Kaijou", "title_en" : r"re:Shikoshiko Daisuki Nightingale \+ Kaijou", "title_jp" : "シコシコ大好きナイチンゲール + 会場限定おまけ本", "gallery_id": 40996, - "date" : "2018 June 29", + "date" : "type:datetime", "count" : 42, "collection": "", "artist" : ["Itou Life"], @@ -65,15 +65,17 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor): "uploader" : "sehki", "lang" : "en", "language" : "English", - "thumbnail" : "re:https?://www.tsumino.com/Image/Thumb/40996", + "thumbnail" : "https://content.tsumino.com/thumbs/40996/1", }, }), + ("https://www.tsumino.com/Book/Info/40996"), ("https://www.tsumino.com/Read/View/45834"), + ("https://www.tsumino.com/Read/Index/45834"), ) def __init__(self, match): self.gallery_id = match.group(1) - url = "{}/Book/Info/{}".format(self.root, self.gallery_id) + url = "{}/entry/{}".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) def metadata(self, page): @@ -90,7 +92,8 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor): "title_jp" : title_jp, "thumbnail" : extr('"og:image" content="', '"'), "uploader" : text.remove_html(extr('id="Uploader">', '</div>')), - "date" : extr('id="Uploaded">', '</div>').strip(), + "date" : text.parse_datetime( + extr('id="Uploaded">', '</div>').strip(), "%Y %B %d"), "rating" : text.parse_float(extr( 'id="Rating">', '</div>').partition(" ")[0]), "type" : text.remove_html(extr('id="Category">' , '</div>')), @@ -105,21 +108,24 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor): } def images(self, page): - url = "{}/Read/Load/?q={}".format(self.root, self.gallery_id) + url = "{}/Read/Index/{}?page=1".format(self.root, self.gallery_id) headers = {"Referer": self.chapter_url} response = self.request(url, headers=headers, fatal=False) - if response.status_code >= 400: - url = "{}/Read/View/{}".format(self.root, self.gallery_id) + if "/Auth/" in response.url: self.log.error( "Failed to get gallery JSON data. Visit '%s' in a browser " - "and solve the CAPTCHA to continue.", url) + "and solve the CAPTCHA to continue.", response.url) raise exception.StopExtraction() - base = self.root + "/Image/Object?name=" + page = response.text + tpl, pos = text.extract(page, 'data-cdn="', '"') + cnt, pos = text.extract(page, '> of ', '<', pos) + base, _, params = text.unescape(tpl).partition("[PAGE]") + return [ - (base + text.quote(name), None) - for name in response.json()["reader_page_urls"] + (base + str(i) + params, None) + for i in range(1, text.parse_int(cnt)+1) ] @@ -149,13 +155,13 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor): def items(self): yield Message.Version, 1 for gallery in self.galleries(): - url = "{}/Book/Info/{}".format(self.root, gallery["Id"]) + url = "{}/entry/{}".format(self.root, gallery["id"]) gallery["_extractor"] = TsuminoGalleryExtractor yield Message.Queue, url, gallery def galleries(self): """Return all gallery results matching 'self.query'""" - url = "{}/Books/Operate".format(self.root) + url = "{}/Search/Operate?type=Book".format(self.root) headers = { "Referer": "{}/".format(self.root), "X-Requested-With": "XMLHttpRequest", @@ -176,10 +182,10 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor): info = self.request( url, method="POST", headers=headers, data=data).json() - for gallery in info["Data"]: - yield gallery["Entry"] + for gallery in info["data"]: + yield gallery["entry"] - if info["PageNumber"] >= info["PageCount"]: + if info["pageNumber"] >= info["pageCount"]: return data["PageNumber"] += 1 diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 2fa69d5..8105ede 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -190,7 +190,7 @@ class TwitterTweetExtractor(TwitterExtractor): """Extractor for images from individual tweets""" subcategory = "tweet" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/([^/?&#]+)/status/(\d+)") + r"/([^/?&#]+|i/web)/status/(\d+)") test = ( ("https://twitter.com/supernaturepics/status/604341487988576256", { "url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580", @@ -217,6 +217,10 @@ class TwitterTweetExtractor(TwitterExtractor): "options": (("videos", True),), "pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$", }), + # /i/web/ URL + ("https://twitter.com/i/web/status/1155074198240292865", { + "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig", + }), ) def __init__(self, match): @@ -228,7 +232,7 @@ class TwitterTweetExtractor(TwitterExtractor): def tweets(self): self.session.cookies.clear() - url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id) + url = "{}/i/web/status/{}".format(self.root, self.tweet_id) page = self.request(url).text end = page.index('class="js-tweet-stats-container') beg = page.rindex('<div class="tweet ', 0, end) diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 7a4ee8f..0f4ebd2 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -25,18 +25,14 @@ class WeiboExtractor(Extractor): self.retweets = self.config("retweets", True) def items(self): - first = True + yield Message.Version, 1 for status in self.statuses(): + yield Message.Directory, status obj = status num = 1 - if first: - yield Message.Version, 1 - yield Message.Directory, status - first = False - while True: if "pics" in obj: @@ -44,11 +40,13 @@ class WeiboExtractor(Extractor): pid = image["pid"] if "large" in image: image = image["large"] + geo = image.get("geo") or {} data = text.nameext_from_url(image["url"], { - "num": num, - "pid": pid, - "width": text.parse_int(image["geo"]["width"]), - "height": text.parse_int(image["geo"]["height"]), + "num" : num, + "pid" : pid, + "url" : image["url"], + "width" : text.parse_int(geo.get("width")), + "height": text.parse_int(geo.get("height")), "status": status, }) yield Message.Url, image["url"], data @@ -56,15 +54,18 @@ class WeiboExtractor(Extractor): if "page_info" in obj and "media_info" in obj["page_info"]: info = obj["page_info"]["media_info"] - url = info.get("stream_url_hd") or info["stream_url"] - data = text.nameext_from_url(url, { - "num": num, - "url": url, - "width": 0, - "height": 0, - "status": status, - }) - yield Message.Url, url, data + url = info.get("stream_url_hd") or info.get("stream_url") + + if url and not info.get("goto"): + data = text.nameext_from_url(url, { + "num" : num, + "pid" : 0, + "url" : url, + "width" : 0, + "height": 0, + "status": status, + }) + yield Message.Url, url, data if self.retweets and "retweeted_status" in obj: obj = obj["retweeted_status"] @@ -104,7 +105,7 @@ class WeiboUserExtractor(WeiboExtractor): if "mblog" in card: yield card["mblog"] - if len(data["data"]["cards"]) < 5: + if not data["data"]["cards"]: return params["page"] += 1 @@ -121,6 +122,10 @@ class WeiboStatusExtractor(WeiboExtractor): ("https://m.weibo.cn/detail/4339748116375525", { "pattern": r"https?://f.us.sinaimg.cn/\w+\.mp4\?label=mp4_hd", }), + # unavailable video (#427) + ("https://m.weibo.cn/status/4268682979207023", { + "count": 0, + }), ("https://m.weibo.cn/status/4339748116375525"), ("https://m.weibo.cn/5746766133/4339748116375525"), ) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index b6b5a6f..d529705 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -266,6 +266,8 @@ class DownloadJob(Job): if self.postprocessors: for pp in self.postprocessors: pp.finalize() + if self.archive: + self.archive.close() def handle_skip(self): self.out.skip(self.pathfmt.path) @@ -332,7 +334,14 @@ class DownloadJob(Job): archive = self.extractor.config("archive") if archive: path = util.expand_path(archive) - self.archive = util.DownloadArchive(path, self.extractor) + try: + self.archive = util.DownloadArchive(path, self.extractor) + except Exception as exc: + self.extractor.log.warning( + "Failed to open download archive at '%s' ('%s: %s')", + path, exc.__class__.__name__, exc) + else: + self.extractor.log.debug("Using download archive '%s'", path) postprocessors = self.extractor.config("postprocessors") if postprocessors: diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 17cd73a..d87184d 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -733,6 +733,7 @@ class DownloadArchive(): def __init__(self, path, extractor): con = sqlite3.connect(path) con.isolation_level = None + self.close = con.close self.cursor = con.cursor() self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " "(entry PRIMARY KEY) WITHOUT ROWID") diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 5209e95..6303fab 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.10.4" +__version__ = "1.10.5" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 2213ffa..96c11d6 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -102,7 +102,7 @@ SUBCATEGORY_MAP = { "stash" : "Sta.sh", "status" : "Images from Statuses", "tag" : "Tag-Searches", - "user" : "Images from Users", + "user" : "User Profiles", "work" : "Individual Images", "related-pin" : "related Pins", "related-board": "", @@ -187,7 +187,7 @@ def category_key(cls): def subcategory_key(cls): """Generate sorting keys by subcategory""" - if cls.subcategory in ("user", "issue"): + if cls.subcategory == "issue": return "A" return cls.subcategory diff --git a/test/test_results.py b/test/test_results.py index fb29a87..bde3af5 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,13 +21,12 @@ TRAVIS_SKIP = { "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx", "archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs", "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex", - "sankakucomplex", "warosu", + "sankakucomplex", "warosu", "fuskator", } # temporary issues, etc. BROKEN = { "8chan", - "imgth", "mangapark", } |
