From 639d9ea4a667733aadc3ff83a1df2cc9f0add3a9 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 1 Oct 2019 19:12:47 -0400 Subject: New upstream version 1.10.5 --- CHANGELOG.md | 16 +++ README.rst | 6 +- docs/configuration.rst | 25 ++-- docs/gallery-dl.conf | 4 + docs/supportedsites.rst | 61 +++++----- gallery_dl/__init__.py | 2 +- gallery_dl/extractor/3dbooru.py | 2 +- gallery_dl/extractor/deviantart.py | 38 ++++-- gallery_dl/extractor/hentaicafe.py | 8 +- gallery_dl/extractor/hypnohub.py | 2 +- gallery_dl/extractor/imgbb.py | 40 +++++-- gallery_dl/extractor/imgth.py | 2 +- gallery_dl/extractor/imgur.py | 77 +++++++++++- gallery_dl/extractor/instagram.py | 219 +++++++++++++++++++---------------- gallery_dl/extractor/luscious.py | 12 +- gallery_dl/extractor/nijie.py | 171 +++++++++++++++------------ gallery_dl/extractor/nsfwalbum.py | 6 +- gallery_dl/extractor/oauth.py | 2 +- gallery_dl/extractor/pixiv.py | 8 +- gallery_dl/extractor/reddit.py | 80 +++++++++---- gallery_dl/extractor/simplyhentai.py | 10 +- gallery_dl/extractor/tsumino.py | 46 ++++---- gallery_dl/extractor/twitter.py | 8 +- gallery_dl/extractor/weibo.py | 45 +++---- gallery_dl/job.py | 11 +- gallery_dl/util.py | 1 + gallery_dl/version.py | 2 +- scripts/supportedsites.py | 4 +- test/test_results.py | 3 +- 29 files changed, 586 insertions(+), 325 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cde46b..c72f971 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## 1.10.5 - 2019-09-28 +### Additions +- `instagram.highlights` option to include highlighted stories when downloading user profiles ([#329](https://github.com/mikf/gallery-dl/issues/329)) +- Support for `/user/` URLs on `reddit` ([#350](https://github.com/mikf/gallery-dl/issues/350)) +- Support for `imgur` user profiles and favorites ([#420](https://github.com/mikf/gallery-dl/issues/420)) +- Additional metadata fields on `nijie`([#423](https://github.com/mikf/gallery-dl/issues/423)) +### Fixes +- Improve handling of private `deviantart` artworks ([#414](https://github.com/mikf/gallery-dl/issues/414)) and 429 status codes ([#424](https://github.com/mikf/gallery-dl/issues/424)) +- Prevent fatal errors when trying to open download-archive files ([#417](https://github.com/mikf/gallery-dl/issues/417)) +- Detect and ignore unavailable videos on `weibo` ([#427](https://github.com/mikf/gallery-dl/issues/427)) +- Update the `scope` of new `reddit` refresh-tokens ([#428](https://github.com/mikf/gallery-dl/issues/428)) +- Fix inconsistencies with the `reddit.comments` option ([#429](https://github.com/mikf/gallery-dl/issues/429)) +- Extend URL patterns for `hentaicafe` manga and `pixiv` artworks +- Improve detection of unavailable albums on `luscious` and `imgbb` +- Miscellaneous fixes for `tsumino` + ## 1.10.4 - 2019-09-08 ### Additions - Support for diff --git a/README.rst b/README.rst index f9b3e87..27a99ca 100644 --- a/README.rst +++ b/README.rst @@ -78,8 +78,8 @@ Download a standalone executable file, put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -224,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.4.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.5.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _Python: https://www.python.org/downloads/ diff --git a/docs/configuration.rst b/docs/configuration.rst index e384f2c..6b4055e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -603,8 +603,8 @@ extractor.deviantart.refresh-token =========== ===== Type ``string`` Default ``null`` -Description The ``refresh_token`` value you get from linking your - DeviantArt account to *gallery-dl*. +Description The ``refresh_token`` value you get from + `linking your DeviantArt account to gallery-dl `__. Using a ``refresh_token`` allows you to access private or otherwise not publicly available deviations. @@ -662,7 +662,7 @@ extractor.flickr.access-token & .access-token-secret Type ``string`` Default ``null`` Description The ``access_token`` and ``access_token_secret`` values you get - from linking your Flickr account to *gallery-dl*. + from `linking your Flickr account to gallery-dl `__. =========== ===== @@ -730,6 +730,16 @@ Description Controls whether to choose the GIF or MP4 version of an animation. =========== ===== +extractor.instagram.highlights +------------------------------ +=========== ===== +Type ``bool`` +Default ``false`` +Description Include *Story Highlights* when downloading a user profile. + (requires authentication) +=========== ===== + + extractor.kissmanga.captcha --------------------------- =========== ===== @@ -820,7 +830,7 @@ Description A list of extractor categories which should be ignored when using extractor.reddit.comments ------------------------- =========== ===== -Type ``integer`` or ``string`` +Type ``integer`` Default ``500`` Description The value of the ``limit`` parameter when loading a submission and its comments. @@ -830,7 +840,7 @@ Description The value of the ``limit`` parameter when loading Reddit's internal default and maximum values for this parameter appear to be 200 and 500 respectively. - The value `0` ignores all comments and significantly reduces the + The value ``0`` ignores all comments and significantly reduces the time required when scanning a subreddit. =========== ===== @@ -887,8 +897,8 @@ extractor.reddit.refresh-token =========== ===== Type ``string`` Default ``null`` -Description The ``refresh_token`` value you get from linking your - Reddit account to *gallery-dl*. +Description The ``refresh_token`` value you get from + `linking your Reddit account to gallery-dl `__. Using a ``refresh_token`` allows you to access private or otherwise not publicly available subreddits, given that your account is @@ -1853,4 +1863,5 @@ Description An object with the ``name`` of a post-processor and its options. .. _datetime: https://docs.python.org/3/library/datetime.html#datetime-objects .. _datetime.max: https://docs.python.org/3/library/datetime.html#datetime.datetime.max .. _Authentication: https://github.com/mikf/gallery-dl#authentication +.. _OAuth: https://github.com/mikf/gallery-dl#oauth .. _youtube-dl: https://github.com/ytdl-org/youtube-dl diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index eff6da1..ebf47ff 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -69,6 +69,10 @@ { "mp4": true }, + "instagram": + { + "highlights": false + }, "kissmanga": { "captcha": "stop" diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 925185c..b0d6eba 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -5,11 +5,11 @@ Unless otherwise known, assume all sites to be NSFW ==================== =================================== ================================================== ================ Site URL Capabilities Authentication ==================== =================================== ================================================== ================ -35PHOTO https://35photo.pro/ Images from Users, Genres, individual Images +35PHOTO https://35photo.pro/ Genres, individual Images, User Profiles 3dbooru http://behoimi.org/ Pools, Popular Images, Posts, Tag-Searches 4chan https://www.4chan.org/ Threads 4plebs https://archive.4plebs.org/ Threads -500px https://500px.com/ Images from Users, Galleries, individual Images +500px https://500px.com/ Galleries, individual Images, User Profiles 8chan https://8ch.net/ Threads 8muses https://www.8muses.com/ Albums Adobe Portfolio https://www.myportfolio.com/ Galleries @@ -18,7 +18,7 @@ arch.b4k.co https://arch.b4k.co/ Threads Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads ArtStation https://www.artstation.com/ |artstation-C| -Behance https://www.behance.net/ Images from Users, Collections, Galleries +Behance https://www.behance.net/ Collections, Galleries, User Profiles BobX http://www.bobx.com/dark/ Galleries, Idols Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Optional Desuarchive https://desuarchive.org/ Threads @@ -48,14 +48,14 @@ Hitomi.la https://hitomi.la/ Galleries Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag-Searches Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional ImageBam http://www.imagebam.com/ Galleries, individual Images -ImageFap https://imagefap.com/ Images from Users, Galleries, individual Images -ImgBB https://imgbb.com/ Images from Users, Albums, individual Images Optional +ImageFap https://imagefap.com/ Galleries, individual Images, User Profiles +ImgBB https://imgbb.com/ Albums, individual Images, User Profiles Optional imgbox https://imgbox.com/ Galleries, individual Images imgth https://imgth.com/ Galleries -imgur https://imgur.com/ Albums, Galleries, individual Images +imgur https://imgur.com/ |imgur-C| Instagram https://www.instagram.com/ |instagram-C| Optional Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga -Joyreactor http://joyreactor.cc/ |joyreactor-C| +Joyreactor http://joyreactor.com/ Posts, Search Results, Tag-Searches, User Profiles Keenspot http://www.keenspot.com/ Comics Khinsider https://downloads.khinsider.com/ Soundtracks Kirei Cake https://reader.kireicake.com/ Chapters, Manga @@ -73,28 +73,28 @@ Mangapanda https://www.mangapanda.com/ Chapters, Manga MangaPark https://mangapark.me/ Chapters, Manga Mangareader https://www.mangareader.net/ Chapters, Manga Mangoxo https://www.mangoxo.com/ Albums, Channels Optional -Newgrounds https://www.newgrounds.com/ Images from Users, individual Images, Videos +Newgrounds https://www.newgrounds.com/ individual Images, User Profiles, Videos Ngomik http://ngomik.in/ Chapters nhentai https://nhentai.net/ Galleries, Search Results -Niconico Seiga https://seiga.nicovideo.jp/ Images from Users, individual Images Required +Niconico Seiga https://seiga.nicovideo.jp/ individual Images, User Profiles Required nijie https://nijie.info/ |nijie-C| Required NSFWalbum.com https://nsfwalbum.com/ Albums Nyafuu Archive https://archive.nyafuu.org/ Threads -Patreon https://www.patreon.com/ Images from Users, Creators, Posts -Pawoo https://pawoo.net/ Images from Users, Images from Statuses +Patreon https://www.patreon.com/ Creators, Posts, User Profiles +Pawoo https://pawoo.net/ Images from Statuses, User Profiles Photobucket https://photobucket.com/ Albums, individual Images -Piczel https://piczel.tv/ Images from Users, Folders, individual Images +Piczel https://piczel.tv/ Folders, individual Images, User Profiles Pinterest https://www.pinterest.com/ Boards, Pins, pin.it Links, related Pins Pixiv https://www.pixiv.net/ |pixiv-C| Required -Pixnet https://www.pixnet.net/ |pixnet-C| +Pixnet https://www.pixnet.net/ Folders, individual Images, Sets, User Profiles Plurk https://www.plurk.com/ Posts, Timelines -Pornhub https://www.pornhub.com/ Images from Users, Galleries -Pornreactor http://pornreactor.cc/ |pornreactor-C| +Pornhub https://www.pornhub.com/ Galleries, User Profiles +Pornreactor http://pornreactor.cc/ Posts, Search Results, Tag-Searches, User Profiles PowerManga https://read.powermanga.org/ Chapters, Manga Pururin https://pururin.io/ Galleries Read Comic Online https://readcomiconline.to/ Comic-Issues, Comics RebeccaBlackTech https://rbt.asia/ Threads -Reddit https://www.reddit.com/ individual Images, Submissions, Subreddits Optional (OAuth) +Reddit https://www.reddit.com/ |reddit-C| Optional (OAuth) rule #34 https://rule34.paheal.net/ Posts, Tag-Searches Rule 34 https://rule34.xxx/ Pools, Posts, Tag-Searches Safebooru https://safebooru.org/ Pools, Posts, Tag-Searches @@ -104,21 +104,21 @@ Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga Sex.com https://www.sex.com/ Boards, Pins, related Pins, Search Results Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos -SlickPic https://www.slickpic.com/ Images from Users, Albums +SlickPic https://www.slickpic.com/ Albums, User Profiles SlideShare https://www.slideshare.net/ Presentations SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth) The /b/ Archive https://thebarchive.com/ Threads Tsumino https://www.tsumino.com/ Galleries, Search Results Optional -Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) +Tumblr https://www.tumblr.com/ Likes, Posts, Tag-Searches, User Profiles Optional (OAuth) Twitter https://twitter.com/ Media Timelines, Timelines, Tweets Optional -VSCO https://vsco.co/ Images from Users, Collections, individual Images +VSCO https://vsco.co/ Collections, individual Images, User Profiles Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A| Warosu https://warosu.org/ Threads -Weibo https://www.weibo.com/ Images from Users, Images from Statuses +Weibo https://www.weibo.com/ Images from Statuses, User Profiles WikiArt.org https://www.wikiart.org/ Artists, Artworks World Three http://www.slide.world-three.org/ Chapters, Manga -xHamster https://xhamster.com/ Images from Users, Galleries -XVideos https://www.xvideos.com/ Images from Users, Galleries +xHamster https://xhamster.com/ Galleries, User Profiles +XVideos https://www.xvideos.com/ Galleries, User Profiles Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches yaplog! https://yaplog.jp/ Blogs, Posts |yuki-S| https://yuki.la/ Threads @@ -133,16 +133,15 @@ Turboimagehost https://www.turboimagehost.com/ individual Images もえぴりあ https://vanilla-rock.com/ Posts, Tag-Searches ==================== =================================== ================================================== ================ -.. |artstation-C| replace:: Images from Users, Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results +.. |artstation-C| replace:: Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles .. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh -.. |flickr-C| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results -.. |hentaifoundry-C| replace:: Images from Users, Favorites, individual Images, Popular Images, Recent Images, Scraps -.. |instagram-C| replace:: Images from Users, Channels, individual Images, Stories, Tag-Searches -.. |joyreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches -.. |nijie-C| replace:: Images from Users, Doujin, Favorites, individual Images -.. |pixiv-C| replace:: Images from Users, Favorites, Follows, pixiv.me Links, Rankings, Search Results, Individual Images -.. |pixnet-C| replace:: Images from Users, Folders, individual Images, Sets -.. |pornreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches +.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles +.. |hentaifoundry-C| replace:: Favorites, individual Images, Popular Images, Recent Images, Scraps, User Profiles +.. |imgur-C| replace:: Albums, Favorites, Galleries, individual Images, User Profiles +.. |instagram-C| replace:: Channels, individual Images, Stories, Tag-Searches, User Profiles +.. |nijie-C| replace:: Doujin, Favorites, individual Images, User Profiles +.. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, Individual Images +.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles .. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders .. |wallhaven-A| replace:: Optional (`API Key `__) .. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 806b229..94a445a 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -248,7 +248,7 @@ def main(): log.error("No suitable extractor found for '%s'", url) except KeyboardInterrupt: - print("\nKeyboardInterrupt", file=sys.stderr) + sys.exit("\nKeyboardInterrupt") except BrokenPipeError: pass except IOError as exc: diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index d0e59ad..15f4207 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -71,7 +71,7 @@ class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin, r"/post/popular_(?Pby_(?:day|week|month)|recent)" r"(?:\?(?P[^#]*))?") test = ("http://behoimi.org/post/popular_by_month?month=2&year=2013", { - "url": "c70268dce441a9ccc3383c244ec15edb059f494f", + "url": "f5a26c624da9a3d1dbc610e4a614bc57df6251c5", "count": 20, }) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 6614755..525cc84 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -620,7 +620,7 @@ class DeviantartExtractorV2(DeviantartExtractor): # extract download target target = files[-1] - if deviation["isJournal"] and self.commit_journal: + if "textContent" in deviation and self.commit_journal: journal = deviation["textContent"] journal["html"] = journal["html"]["markup"] target["src"] = self.commit_journal(deviation, journal)[1] @@ -729,6 +729,16 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2): ("https://www.deviantart.com/ikatxfruti/art/Bang-Bang-528130222", { "pattern": r"https://images-wixmp-.*wixmp.com/f/.*\.swf", }), + # journal + ("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", { + "url": "f33f8127ab71819be7de849175b6d5f8b37bb629", + "pattern": "text:\n", + }), + # journal-like post with isJournal == False (#419) + ("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", { + "url": "1534d6ea0561247ab921d07505e57a9d663a833b", + "pattern": "text:\n", + }), # old-style URLs ("https://shimoda7.deviantart.com" "/art/For-the-sake-of-a-memory-10073852"), @@ -818,6 +828,12 @@ class DeviantartAPI(): self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) + self.log.debug( + "Using %s API credentials (client-id %s)", + "default" if self.client_id == self.CLIENT_ID else "custom", + self.client_id, + ) + def browse_popular(self, query=None, timerange=None, category_path=None, offset=0): """Yield popular deviations""" @@ -873,6 +889,8 @@ class DeviantartAPI(): def deviation_metadata(self, deviations): """ Fetch deviation metadata for a set of deviations""" + if not deviations: + return [] endpoint = "deviation/metadata?" + "&".join( "deviationids[{}]={}".format(num, deviation["deviationid"]) for num, deviation in enumerate(deviations) @@ -953,7 +971,7 @@ class DeviantartAPI(): if self.delay > self.delay_min: self.delay -= 1 return data - if not fatal: + if not fatal and status != 429: return None if data.get("error_description") == "User not found.": raise exception.NotFoundError("user or group") @@ -975,13 +993,18 @@ class DeviantartAPI(): if "results" not in data: self.log.error("Unexpected API response: %s", data) return - if (public and self.refresh_token and - len(data["results"]) < params["limit"]): - self.log.debug("Switching to private access token") - public = False - continue if extend: + if public and len(data["results"]) < params["limit"]: + if self.refresh_token: + self.log.debug("Switching to private access token") + public = False + continue + elif data["has_more"]: + self.log.warning( + "Private deviations detected! Run 'gallery-dl " + "oauth:deviantart' and follow the instructions to " + "be able to access them.") if self.metadata: self._metadata(data["results"]) if self.folders: @@ -1003,7 +1026,6 @@ class DeviantartAPI(): deviations, self.deviation_metadata(deviations)): deviation.update(metadata) deviation["tags"] = [t["tag_name"] for t in deviation["tags"]] - return deviations def _folders(self, deviations): """Add a list of all containing folders to each deviation object""" diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py index 679b3ad..161073b 100644 --- a/gallery_dl/extractor/hentaicafe.py +++ b/gallery_dl/extractor/hentaicafe.py @@ -45,7 +45,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): """Extractor for manga from hentai.cafe""" category = "hentaicafe" pattern = (r"(?:https?://)?" + r"(?:www\.)?hentai\.cafe" - r"((?:/manga/series)?/[^/?&#]+)/?$") + r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?&#]+)/?$") test = ( # single chapter ("https://hentai.cafe/hazuki-yuuto-summer-blues/", { @@ -57,11 +57,17 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb", }), + # new-style URL + ("https://hentai.cafe/hc.fyi/2782", { + "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", + "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb", + }), # foolslide URL ("https://hentai.cafe/manga/series/saitom-box/", { "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", "keyword": "f0ece32d958f889d8229ed4052716d398a0a875c", }), + ) root = "https://hentai.cafe" reverse = False diff --git a/gallery_dl/extractor/hypnohub.py b/gallery_dl/extractor/hypnohub.py index bf2db96..860cebd 100644 --- a/gallery_dl/extractor/hypnohub.py +++ b/gallery_dl/extractor/hypnohub.py @@ -23,7 +23,7 @@ class HypnohubTagExtractor(booru.TagMixin, HypnohubExtractor): pattern = (r"(?:https?://)?(?:www\.)?hypnohub\.net" r"/post\?(?:[^&#]*&)*tags=(?P[^&#]+)") test = ("https://hypnohub.net/post?tags=gonoike_biwa", { - "url": "6bebc4318489ee37e0c3b814352acd6783ba95d6", + "url": "0deaf1a2f832cfc4354c531259b949e850da1e7e", }) diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 4aa670b..2a8dcad 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -28,7 +28,10 @@ class ImgbbExtractor(Extractor): def items(self): self.login() - page = self.request(self.page_url, params={"sort": self.sort}).text + response = self.request(self.page_url, params={"sort": self.sort}) + if response.history and response.url.startswith(self.root): + raise exception.NotFoundError(self.subcategory) + page = response.text data = self.metadata(page) first = True @@ -97,16 +100,24 @@ class ImgbbAlbumExtractor(ImgbbExtractor): directory_fmt = ("{category}", "{user}", "{album_name} {album_id}") pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)/?(?:\?([^#]+))?" test = ( - ("https://ibb.co/album/c6p5Yv", { + ("https://ibb.co/album/i5PggF", { "range": "1-80", - "url": "8adaf0f7dfc19ff8bc4712c97f534af8b1e06412", - "keyword": "155b665a53e83d359e914cab7c69d5b829444d64", + "url": "570872b6eb3e11cf10b618922b780fed204c3f09", + "keyword": "0f2fc956728c36540c577578bd168d2459d6ae4b", }), - ("https://ibb.co/album/c6p5Yv?sort=title_asc", { + ("https://ibb.co/album/i5PggF?sort=title_asc", { "range": "1-80", - "url": "d6c45041d5c8323c435b183a976f3fde2af7c547", - "keyword": "30c3262214e2044bbcf6bf2dee8e3ca7ebd62b71", + "url": "e2e387b8fdb3690bd75d804d0af2833112e385cd", + "keyword": "a307fc9d2085bdc0eb7c538c8d866c59198d460c", }), + # deleted + ("https://ibb.co/album/fDArrF", { + "exception": exception.NotFoundError, + }), + # private + ("https://ibb.co/album/hqgWrF", { + "exception": exception.HttpError, + }) ) def __init__(self, match): @@ -182,9 +193,18 @@ class ImgbbUserExtractor(ImgbbExtractor): class ImgbbImageExtractor(ImgbbExtractor): subcategory = "image" pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?&#]+)" - test = ("https://ibb.co/NLZHgqS", { - "url": "fbca86bac09de6fc0304054b2170b423ca1e84fa", - "keyword": "5d70e779bad03b2dc5273b627638045168671157", + test = ("https://ibb.co/fUqh5b", { + "pattern": "https://image.ibb.co/dY5FQb/Arundel-Ireeman-5.jpg", + "content": "c5a0965178a8b357acd8aa39660092918c63795e", + "keyword": { + "id" : "fUqh5b", + "title" : "Arundel Ireeman 5", + "url" : "https://image.ibb.co/dY5FQb/Arundel-Ireeman-5.jpg", + "width" : 960, + "height": 719, + "user" : "folkie", + "extension": "jpg", + }, }) def __init__(self, match): diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py index a97f2e0..8a6fe1c 100644 --- a/gallery_dl/extractor/imgth.py +++ b/gallery_dl/extractor/imgth.py @@ -44,7 +44,7 @@ class ImgthGalleryExtractor(Extractor): while True: thumbs = text.extract(page, '
    ', '
')[0] for url in text.extract_iter(thumbs, '' not in page: return pnum += 1 diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 8523523..cb36c30 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -10,13 +10,18 @@ from .common import Extractor, Message from .. import text, exception +import itertools import json +BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com" + + class ImgurExtractor(Extractor): """Base class for imgur extractors""" category = "imgur" root = "https://imgur.com" + api_root = "https://api.imgur.com" def __init__(self, match): Extractor.__init__(self, match) @@ -43,14 +48,40 @@ class ImgurExtractor(Extractor): image["extension"] = image["ext"][1:] return url + def _items_apiv3(self, urlfmt): + album_ex = ImgurAlbumExtractor + image_ex = ImgurImageExtractor + + params = { + "IMGURPLATFORM" : "web", + "album_previews": "0", + "client_id" : "546c25a59c58ad7", + } + headers = { + "Origin" : self.root, + "Referer": self.root + "/", + } + + yield Message.Version, 1 + + for num in itertools.count(0): + url = urlfmt.format(num) + data = self.request(url, params=params, headers=headers).json() + + for item in data["data"]: + item["_extractor"] = album_ex if item["is_album"] else image_ex + yield Message.Queue, item["link"], item + + if len(data["data"]) < 60: + return + class ImgurImageExtractor(ImgurExtractor): """Extractor for individual images on imgur.com""" subcategory = "image" filename_fmt = "{category}_{hash}{title:?_//}.{extension}" archive_fmt = "{hash}" - pattern = (r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com" - r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?") + pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?" test = ( ("https://imgur.com/21yMxCS", { "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", @@ -111,8 +142,7 @@ class ImgurAlbumExtractor(ImgurExtractor): directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}") filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" archive_fmt = "{album[hash]}_{hash}" - pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/(?:a|t/unmuted)/(\w{7}|\w{5})") + pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", @@ -181,8 +211,7 @@ class ImgurAlbumExtractor(ImgurExtractor): class ImgurGalleryExtractor(ImgurExtractor): """Extractor for imgur galleries""" subcategory = "gallery" - pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/gallery/(\w{7}|\w{5})") + pattern = BASE_PATTERN + r"/gallery/(\w{7}|\w{5})" test = ( ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380) "pattern": "https://imgur.com/zf2fIms", @@ -205,3 +234,39 @@ class ImgurGalleryExtractor(ImgurExtractor): yield Message.Version, 1 yield Message.Queue, url, {"_extractor": extr} + + +class ImgurUserExtractor(ImgurExtractor): + """Extractor for all images posted by a user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/user/([^/?&#]+)(?:/posts|/submitted)?/?$" + test = ( + ("https://imgur.com/user/Miguenzo", { + "range": "1-100", + "count": 100, + "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + }), + ("https://imgur.com/user/Miguenzo/posts"), + ("https://imgur.com/user/Miguenzo/submitted"), + ) + + def items(self): + urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format( + self.api_root, self.key) + return self._items_apiv3(urlfmt) + + +class ImgurFavoriteExtractor(ImgurExtractor): + """Extractor for a user's favorites""" + subcategory = "favorite" + pattern = BASE_PATTERN + r"/user/([^/?&#]+)/favorites" + test = ("https://imgur.com/user/Miguenzo/favorites", { + "range": "1-100", + "count": 100, + "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + }) + + def items(self): + urlfmt = "{}/3/account/{}/gallery_favorites/{{}}/newest".format( + self.api_root, self.key) + return self._items_apiv3(urlfmt) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index e5cfe8b..8eee390 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -43,6 +43,10 @@ class InstagramExtractor(Extractor): data["extension"] = None yield Message.Url, \ 'ytdl:{}/p/{}/'.format(self.root, data['shortcode']), data + elif data['typename'] == 'GraphHighlightReel': + url = '{}/stories/highlights/{}/'.format(self.root, data['id']) + data['_extractor'] = InstagramStoriesExtractor + yield Message.Queue, url, data def login(self): if self._check_cookies(self.cookienames): @@ -84,13 +88,24 @@ class InstagramExtractor(Extractor): for key in ("sessionid", "mid", "csrftoken") } - def _extract_shared_data(self, page): - return json.loads(text.extract(page, - 'window._sharedData = ', ';')[0]) + def _request_graphql(self, variables, query_hash, csrf=None): + headers = { + 'X-CSRFToken': csrf, + 'X-IG-App-ID': '936619743392459', + 'X-Requested-With': 'XMLHttpRequest', + } + url = '{}/graphql/query/?query_hash={}&variables={}'.format( + self.root, query_hash, variables, + ) + return self.request(url, headers=headers).json() - def _extract_postpage(self, url): + def _extract_shared_data(self, url): page = self.request(url).text - shared_data = self._extract_shared_data(page) + data = text.extract(page, 'window._sharedData = ', ';')[0] + return json.loads(data) + + def _extract_postpage(self, url): + shared_data = self._extract_shared_data(url) media = shared_data['entry_data']['PostPage'][0]['graphql']['shortcode_media'] common = { @@ -121,7 +136,7 @@ class InstagramExtractor(Extractor): 'sidecar_shortcode': media['shortcode'], } if children['__typename'] == 'GraphVideo': - media_data["_ytdl_index"] = yi + media_data['_ytdl_index'] = yi yi += 1 media_data.update(common) medias.append(media_data) @@ -146,8 +161,7 @@ class InstagramExtractor(Extractor): highlight_id = '"{}"'.format(self.highlight_id) query_hash = '30a89afdd826d78a5376008a7b81c205' else: - page = self.request(url).text - shared_data = self._extract_shared_data(page) + shared_data = self._extract_shared_data(url) # If no stories are present the URL redirects to `ProfilePage' if 'StoriesPage' not in shared_data['entry_data']: @@ -164,17 +178,10 @@ class InstagramExtractor(Extractor): '"highlight_reel_ids":[{}],"precomposed_overlay":true,' '"show_story_viewer_list":true,' '"story_viewer_fetch_count":50,"story_viewer_cursor":"",' - '"stories_video_dash_manifest":false}}' + '"stories_video_dash_manifest":false' + '}}' ).format(user_id, highlight_id) - headers = { - "X-Requested-With": "XMLHttpRequest", - } - url = '{}/graphql/query/?query_hash={}&variables={}'.format( - self.root, - query_hash, - variables, - ) - shared_data = self.request(url, headers=headers).json() + shared_data = self._request_graphql(variables, query_hash) # If there are stories present but the user is not authenticated or # does not have permissions no stories are returned. @@ -209,38 +216,38 @@ class InstagramExtractor(Extractor): return medias - def _extract_page(self, url, page_type): - shared_data_fields = { - 'ProfilePage': { - 'page': 'ProfilePage', - 'node': 'user', - 'node_id': 'id', - 'edge_to_medias': 'edge_owner_to_timeline_media', - 'variables_id': 'id', - 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a', - }, - 'ProfileChannelPage': { - 'page': 'ProfilePage', - 'node': 'user', - 'node_id': 'id', - 'edge_to_medias': 'edge_felix_video_timeline', - 'variables_id': 'id', - 'query_hash': 'bc78b344a68ed16dd5d7f264681c4c76', - }, - 'TagPage': { - 'page': 'TagPage', - 'node': 'hashtag', - 'node_id': 'name', - 'edge_to_medias': 'edge_hashtag_to_media', - 'variables_id': 'tag_name', - 'query_hash': 'f12c9ec5e46a3173b2969c712ad84744', - }, - } + def _extract_story_highlights(self, shared_data): + graphql = shared_data['entry_data']['ProfilePage'][0]['graphql'] + variables = ( + '{{' + '"user_id":"{}","include_chaining":true,' + '"include_reel":true,"include_suggested_users":false,' + '"include_logged_out_extras":false,' + '"include_highlight_reels":true' + '}}' + ).format(graphql['user']['id']) + + data = self._request_graphql( + variables, + 'aec5501414615eca36a9acf075655b1e', + shared_data['config']['csrf_token'], + ) - page = self.request(url).text - shared_data = self._extract_shared_data(page) - psdf = shared_data_fields[page_type] - csrf = shared_data["config"]["csrf_token"] + highlights = [] + for edge in data['data']['user']['edge_highlight_reels']['edges']: + story = edge['node'] + highlights.append({ + 'id' : story['id'], + 'title' : story['title'], + 'owner_id': story['owner']['id'], + 'username': story['owner']['username'], + 'typename': story['__typename'], + }) + + return highlights + + def _extract_page(self, shared_data, psdf): + csrf = shared_data['config']['csrf_token'] while True: # Deal with different structure of pages: the first page @@ -270,29 +277,9 @@ class InstagramExtractor(Extractor): variables_id, end_cursor, ) - headers = { - "X-Requested-With": "XMLHttpRequest", - "X-CSRFToken": csrf, - "X-IG-App-ID": "936619743392459", - } - url = '{}/graphql/query/?query_hash={}&variables={}'.format( - self.root, - psdf['query_hash'], - variables, + shared_data = self._request_graphql( + variables, psdf['query_hash'], csrf, ) - shared_data = self.request(url, headers=headers).json() - - def _extract_profilepage(self, url): - yield from self._extract_page(url, 'ProfilePage') - - def _extract_profilechannelpage(self, url): - yield from self._extract_page(url, 'ProfileChannelPage') - - def _extract_tagpage(self, url): - yield from self._extract_page(url, 'TagPage') - - def _extract_storiespage(self, url): - yield from self._extract_stories(url) class InstagramImageExtractor(InstagramExtractor): @@ -382,16 +369,43 @@ class InstagramImageExtractor(InstagramExtractor): return self._extract_postpage(url) +class InstagramStoriesExtractor(InstagramExtractor): + """Extractor for StoriesPage""" + subcategory = "stories" + pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" + r"/stories/([^/?&#]+)(?:/(\d+))?") + test = ( + ("https://www.instagram.com/stories/instagram/"), + ("https://www.instagram.com/stories/highlights/18042509488170095/"), + ) + + def __init__(self, match): + InstagramExtractor.__init__(self, match) + self.username, self.highlight_id = match.groups() + + def instagrams(self): + url = '{}/stories/{}/'.format(self.root, self.username) + return self._extract_stories(url) + + class InstagramUserExtractor(InstagramExtractor): """Extractor for ProfilePage""" subcategory = "user" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)" r"([^/?&#]+)/?$") - test = ("https://www.instagram.com/instagram/", { - "range": "1-16", - "count": ">= 16", - }) + test = ( + ("https://www.instagram.com/instagram/", { + "range": "1-16", + "count": ">= 16", + }), + ("https://www.instagram.com/instagram/", { + "options": (("highlights", True),), + "pattern": InstagramStoriesExtractor.pattern, + "range": "1-2", + "count": 2, + }), + ) def __init__(self, match): InstagramExtractor.__init__(self, match) @@ -399,7 +413,19 @@ class InstagramUserExtractor(InstagramExtractor): def instagrams(self): url = '{}/{}/'.format(self.root, self.username) - return self._extract_profilepage(url) + shared_data = self._extract_shared_data(url) + + if self.config('highlights'): + yield from self._extract_story_highlights(shared_data) + + yield from self._extract_page(shared_data, { + 'page': 'ProfilePage', + 'node': 'user', + 'node_id': 'id', + 'variables_id': 'id', + 'edge_to_medias': 'edge_owner_to_timeline_media', + 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a', + }) class InstagramChannelExtractor(InstagramExtractor): @@ -419,7 +445,16 @@ class InstagramChannelExtractor(InstagramExtractor): def instagrams(self): url = '{}/{}/channel/'.format(self.root, self.username) - return self._extract_profilechannelpage(url) + shared_data = self._extract_shared_data(url) + + return self._extract_page(shared_data, { + 'page': 'ProfilePage', + 'node': 'user', + 'node_id': 'id', + 'variables_id': 'id', + 'edge_to_medias': 'edge_felix_video_timeline', + 'query_hash': 'bc78b344a68ed16dd5d7f264681c4c76', + }) class InstagramTagExtractor(InstagramExtractor): @@ -442,23 +477,13 @@ class InstagramTagExtractor(InstagramExtractor): def instagrams(self): url = '{}/explore/tags/{}/'.format(self.root, self.tag) - return self._extract_tagpage(url) - - -class InstagramStoriesExtractor(InstagramExtractor): - """Extractor for StoriesPage""" - subcategory = "stories" - pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/stories/([^/?&#]+)(?:/(\d+))?") - test = ( - ("https://www.instagram.com/stories/instagram/"), - ("https://www.instagram.com/stories/highlights/18042509488170095/"), - ) - - def __init__(self, match): - InstagramExtractor.__init__(self, match) - self.username, self.highlight_id = match.groups() - - def instagrams(self): - url = '{}/stories/{}/'.format(self.root, self.username) - return self._extract_storiespage(url) + shared_data = self._extract_shared_data(url) + + return self._extract_page(shared_data, { + 'page': 'TagPage', + 'node': 'hashtag', + 'node_id': 'name', + 'variables_id': 'tag_name', + 'edge_to_medias': 'edge_hashtag_to_media', + 'query_hash': 'f12c9ec5e46a3173b2969c712ad84744', + }) diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index a73eb86..965daa0 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -74,7 +74,7 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): }), ("https://members.luscious.net/albums/login-required_323871/", { "options": (("username", None),), - "exception": exception.AuthorizationError, + "exception": exception.HttpError, }), ("https://www.luscious.net/albums/okinami_277031/"), ("https://members.luscious.net/albums/okinami_277031/"), @@ -88,14 +88,14 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): GalleryExtractor.__init__(self, match, url) def metadata(self, page): - pos = page.find("

404 Not Found

") - if pos >= 0: + title, pos = text.extract(page, '"og:title" content="', '"') + + if title is None: msg = text.extract(page, '
', '
', pos)[0] - if msg and "content is not available" in msg: - raise exception.AuthorizationError() + if msg: + raise exception.AuthorizationError(msg) raise exception.NotFoundError("album") - title, pos = text.extract(page, '"og:title" content="', '"') info , pos = text.extract(page, '