aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2020-03-28 23:01:51 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2020-03-28 23:01:51 -0400
commite4887ae6b00c50fbbde531cc274c77b076bd821d (patch)
tree051849d0ce8ed35aa229ba828a2dfe1faf10c5c0
parente8cc000750de972384f2f34d02d42222b4018ae9 (diff)
downloadgallery-dl-e4887ae6b00c50fbbde531cc274c77b076bd821d.tar.bz2
gallery-dl-e4887ae6b00c50fbbde531cc274c77b076bd821d.tar.xz
gallery-dl-e4887ae6b00c50fbbde531cc274c77b076bd821d.tar.zst
New upstream version 1.13.3upstream/1.13.3
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.52
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl/__init__.py12
-rw-r--r--gallery_dl/extractor/35photo.py54
-rw-r--r--gallery_dl/extractor/danbooru.py17
-rw-r--r--gallery_dl/extractor/deviantart.py12
-rw-r--r--gallery_dl/extractor/e621.py111
-rw-r--r--gallery_dl/extractor/instagram.py56
-rw-r--r--gallery_dl/extractor/mangadex.py14
-rw-r--r--gallery_dl/extractor/mangapark.py57
-rw-r--r--gallery_dl/extractor/newgrounds.py2
-rw-r--r--gallery_dl/extractor/nozomi.py101
-rw-r--r--gallery_dl/extractor/piczel.py16
-rw-r--r--gallery_dl/extractor/sexcom.py20
-rw-r--r--gallery_dl/extractor/simplyhentai.py1
-rw-r--r--gallery_dl/extractor/twitter.py2
-rw-r--r--gallery_dl/output.py13
-rw-r--r--gallery_dl/util.py6
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_results.py5
23 files changed, 278 insertions, 249 deletions
diff --git a/PKG-INFO b/PKG-INFO
index 84237a6..1cec073 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.13.2
+Version: 1.13.3
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -247,7 +247,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.2.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 3b5945c..37f07b7 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -236,7 +236,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.2.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 304c345..af9ac7d 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-03-14" "1.13.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-03-28" "1.13.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 4ad93f8..9a374da 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-03-14" "1.13.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-03-28" "1.13.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 92ded16..c9ca17b 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.13.2
+Version: 1.13.3
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.3/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -247,7 +247,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.2.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 6fba5e2..e71a5b0 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -108,7 +108,7 @@ def parse_inputfile(file, log):
def main():
try:
- if sys.stdout.encoding.lower() != "utf-8":
+ if sys.stdout and sys.stdout.encoding.lower() != "utf-8":
output.replace_std_streams()
parser = option.build_parser()
@@ -205,11 +205,13 @@ def main():
if args.inputfile:
try:
if args.inputfile == "-":
- file = sys.stdin
+ if sys.stdin:
+ urls += parse_inputfile(sys.stdin, log)
+ else:
+ log.warning("input file: stdin is not readable")
else:
- file = open(args.inputfile, encoding="utf-8")
- urls += parse_inputfile(file, log)
- file.close()
+ with open(args.inputfile, encoding="utf-8") as file:
+ urls += parse_inputfile(file, log)
except OSError as exc:
log.warning("input file: %s", exc)
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index d3e9276..e33aa2d 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
"""Extractor for all images of a user on 35photo.pro"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
- r"/(?!photo_|genre_|rating/)([^/?&#]+)")
+ r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)")
test = (
("https://35photo.pro/liya", {
"pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
@@ -137,25 +137,49 @@ class _35photoUserExtractor(_35photoExtractor):
})
+class _35photoTagExtractor(_35photoExtractor):
+ """Extractor for all photos from a tag listing"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "Tags", "{search_tag}")
+ archive_fmt = "t{search_tag}_{id}_{num}"
+ pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)"
+ test = ("https://35photo.pro/tags/landscape/", {
+ "range": "1-25",
+ "count": 25,
+ })
+
+ def __init__(self, match):
+ _35photoExtractor.__init__(self, match)
+ self.tag = match.group(1)
+
+ def metadata(self):
+ return {"search_tag": text.unquote(self.tag).lower()}
+
+ def photos(self):
+ num = 1
+
+ while True:
+ url = "{}/tags/{}/list_{}/".format(self.root, self.tag, num)
+ page = self.request(url).text
+ prev = None
+
+ for photo_id in text.extract_iter(page, "35photo.pro/photo_", "/"):
+ if photo_id != prev:
+ prev = photo_id
+ yield photo_id
+
+ if not prev:
+ return
+ num += 1
+
+
class _35photoGenreExtractor(_35photoExtractor):
"""Extractor for images of a specific genre on 35photo.pro"""
subcategory = "genre"
directory_fmt = ("{category}", "Genre", "{genre}")
archive_fmt = "g{genre_id}_{id}_{num}"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/genre_(\d+)(/new/)?"
- test = (
- ("https://35photo.pro/genre_109/", {
- "range": "1-30",
- }),
- ("https://35photo.pro/genre_103/", {
- "range": "1-30",
- "count": 30,
- }),
- ("https://35photo.pro/genre_103/new/", {
- "range": "1-30",
- "count": 30,
- }),
- )
+ test = ("https://35photo.pro/genre_109/",)
def __init__(self, match):
_35photoExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 3fdeaf9..3a0d0ef 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -27,10 +27,10 @@ class DanbooruExtractor(SharedConfigMixin, Extractor):
filename_fmt = "{category}_{id}_{md5}.{extension}"
page_limit = 1000
page_start = None
- per_page = 100
+ per_page = 200
def __init__(self, match):
- Extractor.__init__(self, match)
+ super().__init__(match)
self.root = "https://{}.donmai.us".format(match.group(1))
self.ugoira = self.config("ugoira", True)
self.params = {}
@@ -83,6 +83,8 @@ class DanbooruExtractor(SharedConfigMixin, Extractor):
while True:
posts = self.request(url, params=params).json()
+ if "posts" in posts:
+ posts = posts["posts"]
yield from posts
if len(posts) < self.per_page:
@@ -114,7 +116,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
)
def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
+ super().__init__(match)
self.params["tags"] = text.unquote(match.group(2).replace("+", " "))
def metadata(self):
@@ -132,7 +134,7 @@ class DanbooruPoolExtractor(DanbooruExtractor):
})
def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
+ super().__init__(match)
self.pool_id = match.group(2)
self.params["tags"] = "pool:" + self.pool_id
@@ -160,12 +162,13 @@ class DanbooruPostExtractor(DanbooruExtractor):
)
def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
+ super().__init__(match)
self.post_id = match.group(2)
def posts(self):
url = "{}/posts/{}.json".format(self.root, self.post_id)
- return (self.request(url).json(),)
+ post = self.request(url).json()
+ return (post["post"] if "post" in post else post,)
class DanbooruPopularExtractor(DanbooruExtractor):
@@ -184,7 +187,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
)
def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
+ super().__init__(match)
self.params.update(text.parse_query(match.group(2)))
def metadata(self):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 90b27d1..d6669d1 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1041,10 +1041,18 @@ class DeviantartAPI():
}
response = self.extractor.request(
url, headers=headers, params=params, fatal=None)
- if response.status_code == 404:
+ code = response.status_code
+
+ if code == 404:
raise exception.StopExtraction(
"Your account must use the Eclipse interface.")
- return response.json()
+ elif code == 403 and b"Request blocked." in response.content:
+ raise exception.StopExtraction(
+ "Requests to deviantart.com blocked due to too much traffic.")
+ try:
+ return response.json()
+ except Exception:
+ return {"error": response.text}
def deviation_metadata(self, deviations):
""" Fetch deviation metadata for a set of deviations"""
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index bc3f67a..5c5c36c 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -8,41 +8,34 @@
"""Extractors for https://e621.net/"""
-from .common import Extractor, Message, SharedConfigMixin
-from .. import text
-import datetime
+from .common import Extractor, Message
+from . import danbooru
import time
BASE_PATTERN = r"(?:https?://)?e(621|926)\.net"
-class E621Extractor(SharedConfigMixin, Extractor):
+class E621Extractor(danbooru.DanbooruExtractor):
"""Base class for e621 extractors"""
- basecategory = "booru"
category = "e621"
filename_fmt = "{category}_{id}_{file[md5]}.{extension}"
page_limit = 750
page_start = None
- per_page = 200
+ per_page = 320
_last_request = 0
def __init__(self, match):
- Extractor.__init__(self, match)
+ super().__init__(match)
self.root = "https://e{}.net".format(match.group(1))
- self.params = {}
-
- username, api_key = self._get_auth_info()
- if username:
- self.log.debug("Using HTTP Basic Auth for user '%s'", username)
- self.session.auth = (username, api_key)
def request(self, url, **kwargs):
diff = time.time() - E621Extractor._last_request
if diff < 1.0:
- self.log.debug("Sleeping for %s seconds", diff)
- time.sleep(diff)
- kwargs["headers"] = {"User-Agent": "gallery-dl/1.13.0 (by mikf)"}
+ delay = 1.0 - diff
+ self.log.debug("Sleeping for %s seconds", delay)
+ time.sleep(delay)
+ kwargs["headers"] = {"User-Agent": "gallery-dl/1.14.0 (by mikf)"}
response = Extractor.request(self, url, **kwargs)
E621Extractor._last_request = time.time()
return response
@@ -63,31 +56,9 @@ class E621Extractor(SharedConfigMixin, Extractor):
yield Message.Directory, post
yield Message.Url, file["url"], post
- def metadata(self):
- return {}
-
- def posts(self):
- return self._pagination(self.root + "/posts.json")
-
- def _pagination(self, url):
- params = self.params.copy()
- params["limit"] = self.per_page
- tags = params.get("tags", "")
-
- while True:
- posts = self.request(url, params=params).json()["posts"]
- yield from posts
-
- if len(posts) < self.per_page:
- return
- params["tags"] = "id:<{} {}".format(posts[-1]["id"], tags)
-
-class E621TagExtractor(E621Extractor):
+class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
"""Extractor for e621 posts from tag searches"""
- subcategory = "tag"
- directory_fmt = ("{category}", "{search_tags}")
- archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
test = (
("https://e621.net/posts?tags=anry", {
@@ -99,19 +70,9 @@ class E621TagExtractor(E621Extractor):
("https://e621.net/post?tags=anry"),
)
- def __init__(self, match):
- E621Extractor.__init__(self, match)
- self.params["tags"] = text.unquote(match.group(2).replace("+", " "))
-
- def metadata(self):
- return {"search_tags": self.params["tags"]}
-
-class E621PoolExtractor(E621Extractor):
+class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
"""Extractor for e621 pools"""
- subcategory = "pool"
- directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
- archive_fmt = "p_{pool[id]}_{id}"
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
test = (
("https://e621.net/pools/73", {
@@ -121,23 +82,9 @@ class E621PoolExtractor(E621Extractor):
("https://e621.net/pool/show/73"),
)
- def __init__(self, match):
- E621Extractor.__init__(self, match)
- self.pool_id = match.group(2)
- self.params["tags"] = "pool:" + self.pool_id
-
- def metadata(self):
- url = "{}/pools/{}.json".format(self.root, self.pool_id)
- pool = self.request(url).json()
- pool["name"] = pool["name"].replace("_", " ")
- del pool["post_ids"]
- return {"pool": pool}
-
-class E621PostExtractor(E621Extractor):
+class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
"""Extractor for single e621 posts"""
- subcategory = "post"
- archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
test = (
("https://e621.net/posts/535", {
@@ -147,20 +94,9 @@ class E621PostExtractor(E621Extractor):
("https://e621.net/post/show/535"),
)
- def __init__(self, match):
- E621Extractor.__init__(self, match)
- self.post_id = match.group(2)
-
- def posts(self):
- url = "{}/posts/{}.json".format(self.root, self.post_id)
- return (self.request(url).json()["post"],)
-
-class E621PopularExtractor(E621Extractor):
+class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
"""Extractor for popular images from e621"""
- subcategory = "popular"
- directory_fmt = ("{category}", "popular", "{scale}", "{date}")
- archive_fmt = "P_{scale[0]}_{date}_{id}"
pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
test = (
("https://e621.net/explore/posts/popular"),
@@ -170,24 +106,3 @@ class E621PopularExtractor(E621Extractor):
"count": ">= 70",
})
)
-
- def __init__(self, match):
- E621Extractor.__init__(self, match)
- self.params.update(text.parse_query(match.group(2)))
-
- def metadata(self):
- scale = self.params.get("scale", "day")
- date = self.params.get("date") or datetime.date.today().isoformat()
- date = date[:10]
-
- if scale == "week":
- date = datetime.date.fromisoformat(date)
- date = (date - datetime.timedelta(days=date.weekday())).isoformat()
- elif scale == "month":
- date = date[:-3]
-
- return {"date": date, "scale": scale}
-
- def posts(self):
- url = self.root + "/explore/posts/popular.json"
- return self._pagination(url)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 96afea1..4af12f1 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Leonardo Taccari
+# Copyright 2018-2020 Leonardo Taccari
# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
@@ -12,6 +12,7 @@
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
+import itertools
import json
@@ -208,8 +209,10 @@ class InstagramExtractor(Extractor):
media_data = {
'owner_id': media['owner']['id'],
'username': media['owner']['username'],
- 'date': text.parse_timestamp(media['taken_at_timestamp']),
- 'expires': text.parse_timestamp(media['expiring_at_timestamp']),
+ 'date' : text.parse_timestamp(
+ media['taken_at_timestamp']),
+ 'expires' : text.parse_timestamp(
+ media['expiring_at_timestamp']),
'media_id': media['id'],
'typename': media['__typename'],
'display_url': media['display_url'],
@@ -268,7 +271,10 @@ class InstagramExtractor(Extractor):
# Deal with different structure of pages: the first page
# has interesting data in `entry_data', next pages in `data'.
if 'entry_data' in shared_data:
- base_shared_data = shared_data['entry_data'][psdf['page']][0]['graphql']
+ entry_data = shared_data['entry_data']
+ if 'HttpErrorPage' in entry_data:
+ return
+ base_shared_data = entry_data[psdf['page']][0]['graphql']
# variables_id is available only in the first page
variables_id = base_shared_data[psdf['node']][psdf['node_id']]
@@ -404,12 +410,38 @@ class InstagramStoriesExtractor(InstagramExtractor):
return self._extract_stories(url)
+class InstagramSavedExtractor(InstagramExtractor):
+ """Extractor for ProfilePage saved media"""
+ subcategory = "saved"
+ pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
+ r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
+ r"([^/?&#]+)/saved")
+ test = ("https://www.instagram.com/instagram/saved/",)
+
+ def __init__(self, match):
+ InstagramExtractor.__init__(self, match)
+ self.username = match.group(1)
+
+ def instagrams(self):
+ url = '{}/{}/saved/'.format(self.root, self.username)
+ shared_data = self._extract_shared_data(url)
+
+ return self._extract_page(shared_data, {
+ 'page': 'ProfilePage',
+ 'node': 'user',
+ 'node_id': 'id',
+ 'variables_id': 'id',
+ 'edge_to_medias': 'edge_saved_media',
+ 'query_hash': '8c86fed24fa03a8a2eea2a70a80c7b6b',
+ })
+
+
class InstagramUserExtractor(InstagramExtractor):
"""Extractor for ProfilePage"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
- r"([^/?&#]+)/?$")
+ r"([^/?&#]+)/?(?:$|[?#])")
test = (
("https://www.instagram.com/instagram/", {
"range": "1-16",
@@ -421,6 +453,7 @@ class InstagramUserExtractor(InstagramExtractor):
"range": "1-2",
"count": 2,
}),
+ ("https://www.instagram.com/instagram/?hl=en"),
)
def __init__(self, match):
@@ -431,10 +464,7 @@ class InstagramUserExtractor(InstagramExtractor):
url = '{}/{}/'.format(self.root, self.username)
shared_data = self._extract_shared_data(url)
- if self.config('highlights'):
- yield from self._extract_story_highlights(shared_data)
-
- yield from self._extract_page(shared_data, {
+ instagrams = self._extract_page(shared_data, {
'page': 'ProfilePage',
'node': 'user',
'node_id': 'id',
@@ -443,6 +473,14 @@ class InstagramUserExtractor(InstagramExtractor):
'query_hash': 'f2405b236d85e8296cf30347c9f08c2a',
})
+ if self.config('highlights'):
+ instagrams = itertools.chain(
+ self._extract_story_highlights(shared_data),
+ instagrams,
+ )
+
+ return instagrams
+
class InstagramChannelExtractor(InstagramExtractor):
"""Extractor for ProfilePage channel"""
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 9fd9f3f..38c90df 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -50,13 +50,13 @@ class MangadexChapterExtractor(MangadexExtractor):
pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)"
test = (
("https://mangadex.org/chapter/122094", {
- "keyword": "1c834dca33025f521e1874aee1f71c51e28ebf99",
+ "keyword": "ef1084c2845825979e150512fed8fdc209baf05a",
"content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
}),
# oneshot
("https://mangadex.cc/chapter/138086", {
"count": 64,
- "keyword": "178777bd0352fb19eb934cbee5630d16e3fb60ab",
+ "keyword": "f3da80e57b1acfe1bede7d6ebe82a4bae3f9101a",
}),
)
@@ -93,7 +93,7 @@ class MangadexChapterExtractor(MangadexExtractor):
"chapter_minor": sep + minor,
"chapter_id": cdata["id"],
"group": mdata["chapter"][self.chapter_id]["group_name"],
- "date": cdata["timestamp"],
+ "date": text.parse_timestamp(cdata["timestamp"]),
"lang": util.language_to_code(cdata["lang_name"]),
"language": cdata["lang_name"],
}
@@ -115,16 +115,16 @@ class MangadexMangaExtractor(MangadexExtractor):
test = (
("https://mangadex.org/manga/2946/souten-no-koumori", {
"pattern": r"https://mangadex.org/chapter/\d+",
- "keywords": {
+ "keyword": {
"manga": "Souten no Koumori",
"manga_id": 2946,
- "title": "Oneshot",
+ "title": "re:One[Ss]hot",
"volume": 0,
"chapter": 0,
"chapter_minor": "",
"chapter_id": int,
"group": str,
- "date": int,
+ "date": "type:datetime",
"lang": str,
"language": str,
},
@@ -169,7 +169,7 @@ class MangadexMangaExtractor(MangadexExtractor):
"chapter_minor": sep + minor,
"chapter_id": text.parse_int(chid),
"group": text.unescape(info["group_name"]),
- "date": info["timestamp"],
+ "date": text.parse_timestamp(info["timestamp"]),
"lang": lang,
"language": util.code_to_language(lang),
"_extractor": MangadexChapterExtractor,
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index ee11231..228324f 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -1,16 +1,17 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://mangapark.me/"""
+"""Extractors for https://mangapark.net/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, exception
import json
+import re
class MangaparkBase():
@@ -37,26 +38,35 @@ class MangaparkBase():
elif key == "e":
data["chapter_minor"] = "v" + value
+ @staticmethod
+ def parse_chapter_title(title, data):
+ match = re.search(r"(?i)(?:vol(?:ume)?[ .]*(\d+) )?"
+ r"ch(?:apter)?[ .]*(\d+)(\.\w+)?", title)
+ if match:
+ vol, ch, data["chapter_minor"] = match.groups()
+ data["volume"] = text.parse_int(vol)
+ data["chapter"] = text.parse_int(ch)
+
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
- """Extractor for manga-chapters from mangapark.me"""
+ """Extractor for manga-chapters from mangapark.net"""
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
r"/manga/([^?&#]+/i\d+)")
test = (
- ("https://mangapark.me/manga/gosu/i811615/c55/1", {
+ ("https://mangapark.net/manga/gosu/i811615/c55/1", {
"count": 50,
"keyword": "373d678048d29492f9763743ccaa9b6d840f17cf",
}),
- (("https://mangapark.me/manga"
+ (("https://mangapark.net/manga"
"/ad-astra-per-aspera-hata-kenjirou/i662054/c001.2/1"), {
"count": 40,
"keyword": "8e9cce4ed0e25d12a45e02f840d6f32ef838e257",
}),
- ("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/i655476/c70/1", {
+ ("https://mangapark.net/manga/gekkan-shoujo-nozaki-kun/i655476/c70", {
"count": 15,
"keyword": "19f730617074d65f91c0781f429de324890925bf",
}),
- ("https://mangapark.net/manga/gosu/i811615/c55/1"),
+ ("https://mangapark.me/manga/gosu/i811615/c55/1"),
("https://mangapark.com/manga/gosu/i811615/c55/1"),
)
@@ -78,7 +88,10 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
if not data["path"]:
raise exception.NotFoundError("chapter")
+
self.parse_chapter_path(data["path"], data)
+ if "chapter" not in data:
+ self.parse_chapter_title(data["title"], data)
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
data["manga"] = text.unescape(data["manga"])
@@ -89,8 +102,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
return data
def images(self, page):
- data = json.loads(text.extract(
- page, "var _load_pages =", ";")[0] or "[]")
+ data = json.loads(text.extract(page, "var _load_pages =", ";")[0])
return [
(text.urljoin(self.root, item["u"]), {
"width": text.parse_int(item["w"]),
@@ -101,16 +113,16 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
- """Extractor for manga from mangapark.me"""
+ """Extractor for manga from mangapark.net"""
chapterclass = MangaparkChapterExtractor
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
r"(/manga/[^/?&#]+)/?$")
test = (
- ("https://mangapark.me/manga/aria", {
- "url": "a58be23ef3874fe9705b0b41dd462b67eaaafd9a",
- "keyword": "b3b5a30aa2a326bc0ca8b74c65b5ecd4bf676ebf",
+ ("https://mangapark.net/manga/aria", {
+ "url": "9b0b31e4992260876f56d7bfc8ff0ae71295c4f4",
+ "keyword": "6e44744a28d01b889b1e8291847abd84b591590d",
}),
- ("https://mangapark.net/manga/aria"),
+ ("https://mangapark.me/manga/aria"),
("https://mangapark.com/manga/aria"),
)
@@ -128,13 +140,22 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
data["stream"] = text.parse_int(text.extract(stream, '', '"')[0])
for chapter in text.extract_iter(stream, '<li ', '</li>'):
- path , pos = text.extract(chapter, 'href="', '"')
- title, pos = text.extract(chapter, '>: </span>', '<', pos)
- count, pos = text.extract(chapter, ' of ', ' ', pos)
+ path , pos = text.extract(chapter, 'href="', '"')
+ title1, pos = text.extract(chapter, '>', '<', pos)
+ title2, pos = text.extract(chapter, '>: </span>', '<', pos)
+ count , pos = text.extract(chapter, ' of ', ' ', pos)
self.parse_chapter_path(path[8:], data)
- data["title"] = title.strip() if title else ""
+ if "chapter" not in data:
+ self.parse_chapter_title(title1, data)
+
+ if title2:
+ data["title"] = title2.strip()
+ else:
+ data["title"] = title1.partition(":")[2].strip()
+
data["count"] = text.parse_int(count)
results.append((self.root + path, data.copy()))
+ data.pop("chapter", None)
return results
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 21afeae..1f10319 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -38,7 +38,7 @@ class NewgroundsExtractor(Extractor):
try:
post = self.extract_post(post_url)
url = post.get("url")
- except OSError:
+ except Exception:
url = None
if url:
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index dfe31e3..a936370 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -16,8 +16,8 @@ class NozomiExtractor(Extractor):
"""Base class for nozomi extractors"""
category = "nozomi"
root = "https://nozomi.la"
- filename_fmt = "{postid}.{extension}"
- archive_fmt = "{postid}"
+ filename_fmt = "{postid} {dataid}.{extension}"
+ archive_fmt = "{dataid}"
def items(self):
yield Message.Version, 1
@@ -37,24 +37,27 @@ class NozomiExtractor(Extractor):
post_id, response.status_code, response.reason)
continue
- image = response.json()
- image["tags"] = self._list(image.get("general"))
- image["artist"] = self._list(image.get("artist"))
- image["copyright"] = self._list(image.get("copyright"))
- image["character"] = self._list(image.get("character"))
- image["is_video"] = bool(image.get("is_video"))
- image["date"] = text.parse_datetime(
- image["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
- image["url"] = text.urljoin(self.root, image["imageurl"])
- text.nameext_from_url(image["url"], image)
- image.update(data)
+ post = response.json()
+ post["tags"] = self._list(post.get("general"))
+ post["artist"] = self._list(post.get("artist"))
+ post["copyright"] = self._list(post.get("copyright"))
+ post["character"] = self._list(post.get("character"))
+ post["date"] = text.parse_datetime(
+ post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
+ post.update(data)
+ images = post["imageurls"]
for key in ("general", "imageurl", "imageurls"):
- if key in image:
- del image[key]
+ if key in post:
+ del post[key]
- yield Message.Directory, image
- yield Message.Url, image["url"], image
+ yield Message.Directory, post
+ for image in images:
+ post["url"] = url = text.urljoin(self.root, image["imageurl"])
+ text.nameext_from_url(url, post)
+ post["is_video"] = bool(image.get("is_video"))
+ post["dataid"] = post["filename"]
+ yield Message.Url, url, post
def metadata(self):
return {}
@@ -64,9 +67,7 @@ class NozomiExtractor(Extractor):
@staticmethod
def _list(src):
- if not src:
- return []
- return [x["tagname_display"] for x in src]
+ return [x["tagname_display"] for x in src] if src else ()
@staticmethod
def _unpack(b):
@@ -78,29 +79,37 @@ class NozomiPostExtractor(NozomiExtractor):
"""Extractor for individual posts on nozomi.la"""
subcategory = "post"
pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
- test = ("https://nozomi.la/post/3649262.html", {
- "url": "f4522adfc8159355fd0476de28761b5be0f02068",
- "content": "cd20d2c5149871a0b80a1b0ce356526278964999",
- "keyword": {
- "artist" : ["hammer (sunset beach)"],
- "character": ["patchouli knowledge"],
- "copyright": ["touhou"],
- "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5cf5a",
- "date" : "dt:2016-07-26 02:32:03",
- "extension": "jpg",
- "favorites": int,
- "filename" : str,
- "height" : 768,
- "is_video" : False,
- "postid" : 3649262,
- "source" : "danbooru",
- "sourceid" : 2434215,
- "tags" : list,
- "type" : "jpg",
- "url" : str,
- "width" : 1024,
- },
- })
+ test = (
+ ("https://nozomi.la/post/3649262.html", {
+ "url": "f4522adfc8159355fd0476de28761b5be0f02068",
+ "content": "cd20d2c5149871a0b80a1b0ce356526278964999",
+ "keyword": {
+ "artist" : ["hammer (sunset beach)"],
+ "character": ["patchouli knowledge"],
+ "copyright": ["touhou"],
+ "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5",
+ "date" : "dt:2016-07-26 02:32:03",
+ "extension": "jpg",
+ "favorites": int,
+ "filename" : str,
+ "height" : 768,
+ "is_video" : False,
+ "postid" : 3649262,
+ "source" : "danbooru",
+ "sourceid" : 2434215,
+ "tags" : list,
+ "type" : "jpg",
+ "url" : str,
+ "width" : 1024,
+ },
+ }),
+ # multiple images per post
+ ("https://nozomi.la/post/25588032.html", {
+ "url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
+ "keyword": "0aa99cbaaeada2984a1fbf912274409c6ba106d4",
+ "count": 7,
+ }),
+ )
def __init__(self, match):
NozomiExtractor.__init__(self, match)
@@ -118,8 +127,8 @@ class NozomiTagExtractor(NozomiExtractor):
pattern = r"(?:https?://)?nozomi\.la/tag/([^/?&#]+)-\d+\."
test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
"pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$",
- "count": ">= 75",
- "range": "1-75",
+ "count": ">= 25",
+ "range": "1-25",
})
def __init__(self, match):
@@ -182,4 +191,4 @@ class NozomiSearchExtractor(NozomiExtractor):
else:
result.update(items)
- return result
+ return sorted(result, reverse=True)
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 35f9f91..41b1039 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text
+import json
class PiczelExtractor(Extractor):
@@ -49,7 +50,6 @@ class PiczelExtractor(Extractor):
def _pagination(self, url, folder_id=None):
params = {
- "hideNsfw" : "false",
"from_id" : None,
"folder_id": folder_id,
}
@@ -59,7 +59,10 @@ class PiczelExtractor(Extractor):
if not data:
return
params["from_id"] = data[-1]["id"]
- yield from data
+
+ for post in data:
+ if not folder_id or folder_id == post["folder_id"]:
+ yield post
class PiczelUserExtractor(PiczelExtractor):
@@ -97,7 +100,7 @@ class PiczelFolderExtractor(PiczelExtractor):
def posts(self):
url = "{}/api/users/{}/gallery".format(self.root, self.user)
- return self._pagination(url, self.folder_id)
+ return self._pagination(url, int(self.folder_id))
class PiczelImageExtractor(PiczelExtractor):
@@ -134,5 +137,8 @@ class PiczelImageExtractor(PiczelExtractor):
self.image_id = match.group(1)
def posts(self):
- url = "{}/api/gallery/image/{}".format(self.root, self.image_id)
- return (self.request(url).json(),)
+ url = "{}/gallery/image/{}".format(self.root, self.image_id)
+ page = self.request(url).text
+ data = json.loads(text.extract(
+ page, 'window.__PRELOADED_STATE__ =', '</script>')[0])
+ return (data["gallery"]["images"]["byId"][self.image_id],)
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 521b034..b21ad32 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -101,22 +101,22 @@ class SexcomPinExtractor(SexcomExtractor):
pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+)(?!.*#related$)"
test = (
# picture
- ("https://www.sex.com/pin/56714360/", {
- "pattern": "https://cdn.sex.com/images/.+/2018/10/02/20037816.jpg",
- "content": "e579e3283fea812d0545a3f79734b79bc3c51acb",
+ ("https://www.sex.com/pin/21241874-sexy-ecchi-girls-166/", {
+ "pattern": "https://cdn.sex.com/images/.+/2014/08/26/7637609.jpg",
+ "content": "ebe1814dadfebf15d11c6af4f6afb1a50d6c2a1c",
"keyword": {
"comments" : int,
- "date" : "dt:2018-10-02 21:18:17",
+ "date" : "dt:2014-10-19 15:45:44",
"extension": "jpg",
- "filename" : "20037816",
+ "filename" : "7637609",
"likes" : int,
- "pin_id" : 56714360,
+ "pin_id" : 21241874,
"repins" : int,
"tags" : list,
"thumbnail": str,
- "title" : "Pin #56714360",
+ "title" : "Sexy Ecchi Girls 166",
"type" : "picture",
- "uploader" : "alguem",
+ "uploader" : "mangazeta",
"url" : str,
},
}),
@@ -149,8 +149,8 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[pin_id]}")
pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$"
- test = ("https://www.sex.com/pin/56714360/#related", {
- "count": ">= 22",
+ test = ("https://www.sex.com/pin/21241874/#related", {
+ "count": ">= 20",
})
def metadata(self):
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index 82a61da..abf9995 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -143,6 +143,7 @@ class SimplyhentaiVideoExtractor(Extractor):
"pattern": r"https://www\.googleapis\.com/drive/v3/files"
r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
"keyword": "706790708b14773efc1e075ddd3b738a375348a5",
+ "options": (("verify", False),),
"count": 1,
}),
(("https://videos.simply-hentai.com"
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2a04463..cbb075c 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -459,7 +459,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
"Referer": self.root + "/i/bookmarks",
"x-csrf-token": self.session.cookies.get("ct0"),
"x-twitter-active-user": "yes",
- "x-twitter-auth-type": "Auth2Session",
+ "x-twitter-auth-type": "OAuth2Session",
"x-twitter-client-language": "en",
}
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index f084950..9e2f8a6 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -149,12 +149,13 @@ def replace_std_streams(errors="replace"):
"""Replace standard streams and set their error handlers to 'errors'"""
for name in ("stdout", "stdin", "stderr"):
stream = getattr(sys, name)
- setattr(sys, name, stream.__class__(
- stream.buffer,
- errors=errors,
- newline=stream.newlines,
- line_buffering=stream.line_buffering,
- ))
+ if stream:
+ setattr(sys, name, stream.__class__(
+ stream.buffer,
+ errors=errors,
+ newline=stream.newlines,
+ line_buffering=stream.line_buffering,
+ ))
# --------------------------------------------------------------------
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 232047c..47fad9e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -726,6 +726,7 @@ class PathFormat():
def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
+ windows = os.name == "nt"
# Build path segments by applying 'kwdict' to directory format strings
segments = []
@@ -733,6 +734,9 @@ class PathFormat():
try:
for formatter in self.directory_formatters:
segment = formatter(kwdict).strip()
+ if windows:
+ # remove trailing dots and spaces (#647)
+ segment = segment.rstrip(". ")
if segment:
append(self.clean_segment(segment))
except Exception as exc:
@@ -747,7 +751,7 @@ class PathFormat():
directory += sep
self.directory = directory
- if os.name == "nt":
+ if windows:
# Enable longer-than-260-character paths on Windows
directory = "\\\\?\\" + os.path.abspath(directory)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 9171f15..37d133e 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.13.2"
+__version__ = "1.13.3"
diff --git a/test/test_results.py b/test/test_results.py
index 538abfa..b697d15 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -27,11 +27,8 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "35photo",
- "mangapark",
+ "myportfolio",
"photobucket",
- "sexcom",
- "hentaicafe",
"worldthree",
}