aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2020-01-21 01:08:43 -0500
committerLibravatarUnit 193 <unit193@ubuntu.com>2020-01-21 01:08:43 -0500
commit4366125d2580982abb57bc65a26fc1fb8ef2a5df (patch)
tree743a26348e360c8b7f5eb89d4f704b015e902e68
parentbc435e826dbe37969d9cbe280f58810d054932cc (diff)
New upstream version 1.12.3upstream/1.12.3
-rw-r--r--PKG-INFO12
-rw-r--r--README.rst9
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.538
-rw-r--r--gallery_dl.egg-info/PKG-INFO12
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl.egg-info/requires.txt4
-rw-r--r--gallery_dl/downloader/common.py7
-rw-r--r--gallery_dl/downloader/http.py10
-rw-r--r--gallery_dl/extractor/__init__.py3
-rw-r--r--gallery_dl/extractor/common.py6
-rw-r--r--gallery_dl/extractor/erolord.py64
-rw-r--r--gallery_dl/extractor/hentaifoundry.py88
-rw-r--r--gallery_dl/extractor/hitomi.py32
-rw-r--r--gallery_dl/extractor/imgur.py2
-rw-r--r--gallery_dl/extractor/issuu.py36
-rw-r--r--gallery_dl/extractor/livedoor.py3
-rw-r--r--gallery_dl/extractor/mangadex.py25
-rw-r--r--gallery_dl/extractor/pinterest.py9
-rw-r--r--gallery_dl/extractor/pixiv.py73
-rw-r--r--gallery_dl/extractor/shopify.py3
-rw-r--r--gallery_dl/extractor/slickpic.py5
-rw-r--r--gallery_dl/extractor/twitter.py34
-rw-r--r--gallery_dl/extractor/xhamster.py2
-rw-r--r--gallery_dl/job.py3
-rw-r--r--gallery_dl/postprocessor/__init__.py3
-rw-r--r--gallery_dl/postprocessor/compare.py62
-rw-r--r--gallery_dl/postprocessor/zip.py21
-rw-r--r--gallery_dl/util.py33
-rw-r--r--gallery_dl/version.py4
-rw-r--r--setup.py4
-rw-r--r--test/test_downloader.py1
-rw-r--r--test/test_results.py13
33 files changed, 378 insertions, 247 deletions
diff --git a/PKG-INFO b/PKG-INFO
index 0e6429e..e86eb0c 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.12.2
+Version: 1.12.3
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -33,7 +33,6 @@ Description: ==========
- FFmpeg_: Pixiv Ugoira to WebM conversion
- youtube-dl_: Video downloads
- - pyOpenSSL_: Access Cloudflare protected sites
Installation
@@ -95,10 +94,10 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.3/gallery-dl.bin>`__
- These executables include a Python 3.7 interpreter
+ These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -241,7 +240,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -282,5 +281,4 @@ Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Multimedia :: Graphics
Classifier: Topic :: Utilities
Requires-Python: >=3.4
-Provides-Extra: cloudflare
Provides-Extra: video
diff --git a/README.rst b/README.rst
index e6846b6..f450c81 100644
--- a/README.rst
+++ b/README.rst
@@ -22,7 +22,6 @@ Optional
- FFmpeg_: Pixiv Ugoira to WebM conversion
- youtube-dl_: Video downloads
-- pyOpenSSL_: Access Cloudflare protected sites
Installation
@@ -84,10 +83,10 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.3/gallery-dl.bin>`__
-These executables include a Python 3.7 interpreter
+These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -230,7 +229,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 7249537..a2cd77d 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-01-05" "1.12.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-01-19" "1.12.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 7e7993a..d7bb941 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-01-05" "1.12.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-01-19" "1.12.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1207,6 +1207,16 @@ Extract tweet text as \f[I]content\f[] metadata.
.IP "Description:" 4
Extract images from retweets.
+.SS extractor.twitter.twitpic
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract \f[I]TwitPic <https://twitpic.com/>\f[] embeds.
+
.SS extractor.twitter.videos
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -1594,6 +1604,32 @@ be stored in them.
Files with an extension not listed will be ignored and stored
in their default location.
+.SS compare.action
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"replace"\f[]
+
+.IP "Description:" 4
+The action to take when files do not compare as equal.
+
+
+* \f[I]"replace"\f[]: Replace/Overwrite the old version with the new one
+
+* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new
+version like \f[I]skip = "enumerate" <extractor.*.skip_>\f[]
+
+.SS compare.shallow
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Only compare file sizes. Do not read and compare their content.
+
.SS exec.async
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 3aa6d61..8f4897f 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.12.2
+Version: 1.12.3
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -33,7 +33,6 @@ Description: ==========
- FFmpeg_: Pixiv Ugoira to WebM conversion
- youtube-dl_: Video downloads
- - pyOpenSSL_: Access Cloudflare protected sites
Installation
@@ -95,10 +94,10 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.12.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.12.3/gallery-dl.bin>`__
- These executables include a Python 3.7 interpreter
+ These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -241,7 +240,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.2.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.12.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
@@ -282,5 +281,4 @@ Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Multimedia :: Graphics
Classifier: Topic :: Utilities
Requires-Python: >=3.4
-Provides-Extra: cloudflare
Provides-Extra: video
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 513b6c7..bbe9bbe 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -48,7 +48,6 @@ gallery_dl/extractor/deviantart.py
gallery_dl/extractor/directlink.py
gallery_dl/extractor/dynastyscans.py
gallery_dl/extractor/e621.py
-gallery_dl/extractor/erolord.py
gallery_dl/extractor/exhentai.py
gallery_dl/extractor/fallenangels.py
gallery_dl/extractor/flickr.py
@@ -148,6 +147,7 @@ gallery_dl/extractor/yuki.py
gallery_dl/postprocessor/__init__.py
gallery_dl/postprocessor/classify.py
gallery_dl/postprocessor/common.py
+gallery_dl/postprocessor/compare.py
gallery_dl/postprocessor/exec.py
gallery_dl/postprocessor/metadata.py
gallery_dl/postprocessor/mtime.py
diff --git a/gallery_dl.egg-info/requires.txt b/gallery_dl.egg-info/requires.txt
index 821055e..44dd863 100644
--- a/gallery_dl.egg-info/requires.txt
+++ b/gallery_dl.egg-info/requires.txt
@@ -1,8 +1,4 @@
requests>=2.11.0
-[cloudflare]
-pyOpenSSL>=19.0.0
-cryptography>=2.8.0
-
[video]
youtube-dl
diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py
index 596c956..eca1284 100644
--- a/gallery_dl/downloader/common.py
+++ b/gallery_dl/downloader/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,10 +20,13 @@ class DownloaderBase():
def __init__(self, extractor, output):
self.session = extractor.session
self.out = output
- self.log = logging.getLogger("downloader." + self.scheme)
self.part = self.config("part", True)
self.partdir = self.config("part-directory")
+ self.log = logging.getLogger("downloader." + self.scheme)
+ self.log.job = extractor.log.job
+ self.log.extractor = extractor
+
if self.partdir:
self.partdir = util.expand_path(self.partdir)
os.makedirs(self.partdir, exist_ok=True)
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index fab96ba..9cd2aa6 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,12 +8,11 @@
"""Downloader module for http:// and https:// URLs"""
-import os
import time
import mimetypes
from requests.exceptions import RequestException, ConnectionError, Timeout
from .common import DownloaderBase
-from .. import text
+from .. import text, util
from ssl import SSLError
try:
@@ -57,10 +56,7 @@ class HttpDownloader(DownloaderBase):
finally:
# remove file from incomplete downloads
if self.downloading and not self.part:
- try:
- os.unlink(pathfmt.temppath)
- except (OSError, AttributeError):
- pass
+ util.remove_file(pathfmt.temppath)
def _download_impl(self, url, pathfmt):
response = None
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 9ff3746..66203fe 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,7 +25,6 @@ modules = [
"deviantart",
"dynastyscans",
"e621",
- "erolord",
"exhentai",
"fallenangels",
"flickr",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 380bcc7..55b15d4 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -344,7 +344,11 @@ class GalleryExtractor(Extractor):
for data[self.enum], (url, imgdata) in images:
if imgdata:
data.update(imgdata)
- yield Message.Url, url, text.nameext_from_url(url, data)
+ if "extension" not in imgdata:
+ text.nameext_from_url(url, data)
+ else:
+ text.nameext_from_url(url, data)
+ yield Message.Url, url, data
def login(self):
"""Login and set necessary cookies"""
diff --git a/gallery_dl/extractor/erolord.py b/gallery_dl/extractor/erolord.py
deleted file mode 100644
index 8628039..0000000
--- a/gallery_dl/extractor/erolord.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extract images from http://erolord.com/"""
-
-from .common import GalleryExtractor
-from .. import text, util
-import json
-
-
-class ErolordGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries from erolord.com"""
- category = "erolord"
- root = "http://erolord.com"
- pattern = r"(?:https?://)?(?:www\.)?erolord.com(/doujin/(\d+)/?)"
- test = ("http://erolord.com/doujin/2189055/", {
- "url": "7ce6d10a3934102b95c9718a34ccd3d35f55d85f",
- "keyword": {
- "title" : "Amazon No Hiyaku | Amazon Elixir",
- "gallery_id": 2189055,
- "count" : 16,
- "artist" : ["Morris"],
- "group" : list,
- "parody" : list,
- "characters": list,
- "tags" : list,
- "lang" : "en",
- "language" : "English",
- },
- })
-
- def __init__(self, match):
- GalleryExtractor.__init__(self, match)
- self.gallery_id = match.group(2)
-
- def metadata(self, page):
- extr = text.extract_from(page)
- split = text.split_html
- title, _, language = extr('<h1 class="t64">', '</h1>').rpartition(" ")
- language = language.strip("[]")
-
- return {
- "gallery_id": text.parse_int(self.gallery_id),
- "title" : text.unescape(title),
- # double quotes for anime, circle, tags
- # single quotes for characters, artist
- "parody" : split(extr('class="sp1">Anime:' , "</div>\r")),
- "characters": split(extr("class='sp1'>Characters:", "</div>\r")),
- "artist" : split(extr("class='sp1'>Artist:" , "</div>\r")),
- "group" : split(extr('class="sp1">Circle:' , "</div>\r")),
- "tags" : split(extr('class="sp1">Tags:' , "</div>\r")),
- "lang" : util.language_to_code(language),
- "language" : language,
- }
-
- def images(self, page):
- url = self.root + text.extract(page, 'id="d1"><a href="', '"')[0]
- imgs = text.extract(self.request(url).text, 'var imgs=', ';')[0]
- return [(self.root + path, None) for path in json.loads(imgs)]
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index b6b9876..19f9481 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.hentai-foundry.com/"""
+"""Extractors for https://www.hentai-foundry.com/"""
from .common import Extractor, Message
from .. import text, util, exception
@@ -35,9 +35,9 @@ class HentaifoundryExtractor(Extractor):
self.set_filters()
for page_url in util.advance(self.get_image_pages(), self.start_post):
- url, image = self.get_image_metadata(page_url)
+ image = self.get_image_metadata(page_url)
image.update(data)
- yield Message.Url, url, image
+ yield Message.Url, image["src"], image
def skip(self, num):
pages, posts = divmod(num, self.per_page)
@@ -62,28 +62,33 @@ class HentaifoundryExtractor(Extractor):
return
num += 1
- def get_image_metadata(self, page_url):
+ def get_image_metadata(self, path):
"""Collect url and metadata from an image page"""
- page = self.request(text.urljoin(self.root, page_url)).text
- index = page_url.rsplit("/", 2)[1]
- title , pos = text.extract(page, '<title>', '</title>')
- _ , pos = text.extract(page, 'id="picBox"', '', pos)
- width , pos = text.extract(page, 'width="', '"', pos)
- height, pos = text.extract(page, 'height="', '"', pos)
- url , pos = text.extract(page, 'src="', '"', pos)
-
- title, _, artist = title.rpartition(" - ")[0].rpartition(" by ")
-
- data = text.nameext_from_url(url, {
- "title": text.unescape(title),
- "artist": text.unescape(artist),
- "index": text.parse_int(index),
- "width": text.parse_int(width),
- "height": text.parse_int(height),
- })
- if not data["extension"]:
- data["extension"] = "jpg"
- return text.urljoin(self.root, url), data
+ url = text.urljoin(self.root, path)
+ page = self.request(url).text
+ extr = text.extract_from(page, page.index('id="picBox"'))
+
+ data = {
+ "title" : text.unescape(extr('class="imageTitle">', '<')),
+ "artist" : text.unescape(extr('/profile">', '<')),
+ "width" : text.parse_int(extr('width="', '"')),
+ "height" : text.parse_int(extr('height="', '"')),
+ "index" : text.parse_int(path.rsplit("/", 2)[1]),
+ "src" : "https:" + text.unescape(extr('src="', '"')),
+ "description": text.unescape(text.remove_html(extr(
+ '>Description</div>', '</section>')
+ .replace("\r\n", "\n"), "", "")),
+ "ratings" : [text.unescape(r) for r in text.extract_iter(extr(
+ "class='ratings_box'", "</div>"), "title='", "'")],
+ "media" : text.unescape(extr("Media</b></td>\t\t<td>", "<")),
+ "date" : text.parse_datetime(extr("datetime='", "'")),
+ "views" : text.parse_int(extr("Views</b></td>\t\t<td>", "<")),
+ "tags" : text.split_html(extr(
+ "<td><b>Keywords</b></td>", "</tr>"))[::2],
+ "score" : text.parse_int(extr('Score</b></td>\t\t<td>', '<')),
+ }
+
+ return text.nameext_from_url(data["src"], data)
def set_filters(self):
"""Set site-internal filters to show all images"""
@@ -127,7 +132,6 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
test = (
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
- "keyword": "63ad576f87f82fa166ca4676761762f7f8496cf5",
}),
("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"),
("https://www.hentai-foundry.com/user/Tenpura/profile"),
@@ -153,7 +157,6 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
test = (
("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
"url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
- "keyword": "40b07a9822b6b868fea2fa9b1c0b212ae8735da7",
}),
("https://www.hentai-foundry.com"
"/pictures/user/Evulchibi/scraps/page/3"),
@@ -181,7 +184,6 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
test = (
("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", {
"url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b",
- "keyword": "2b9478725e66d46ea043fa87476bbd28546958e7",
}),
("https://www.hentai-foundry.com"
"/user/Tenpura/faves/pictures/page/3"),
@@ -201,7 +203,10 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
archive_fmt = "r_{index}"
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/recent/(\d+-\d+-\d+)(?:/page/(\d+))?")
- test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20",)
+ test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20", {
+ "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/]+/\d+/",
+ "range": "20-30",
+ })
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match, "", match.group(2))
@@ -220,7 +225,10 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
archive_fmt = "p_{index}"
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
r"/pictures/popular(?:/page/(\d+))?")
- test = ("http://www.hentai-foundry.com/pictures/popular",)
+ test = ("http://www.hentai-foundry.com/pictures/popular", {
+ "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/]+/\d+/",
+ "range": "20-30",
+ })
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match, "", match.group(1))
@@ -236,8 +244,22 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
(("https://www.hentai-foundry.com"
"/pictures/user/Tenpura/407501/shimakaze"), {
"url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
- "keyword": "cbb9381e6c2acce58db4adf4efc0ad7d138bddc4",
"content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
+ "keyword": {
+ "artist" : "Tenpura",
+ "date" : "type:datetime",
+ "description": "Thank you!",
+ "height" : 700,
+ "index" : 407501,
+ "media" : "Other digital art",
+ "ratings": ["Sexual content", "Contains female nudity"],
+ "score" : int,
+ "tags" : ["kancolle", "kantai", "collection", "shimakaze"],
+ "title" : "shimakaze",
+ "user" : "Tenpura",
+ "views" : int,
+ "width" : 495,
+ },
}),
("https://www.hentai-foundry.com/pictures/user/Tenpura/340853/", {
"exception": exception.HttpError,
@@ -253,12 +275,12 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
def items(self):
post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format(
self.root, self.user, self.index)
- url, data = self.get_image_metadata(post_url)
+ data = self.get_image_metadata(post_url)
data["user"] = self.user
yield Message.Version, 1
yield Message.Directory, data
- yield Message.Url, url, data
+ yield Message.Url, data["src"], data
def skip(self, _):
return 0
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index e53b051..d6fdcf2 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,7 +23,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
r"/(?:[^/?&#]+-)?(\d+)")
test = (
("https://hitomi.la/galleries/867789.html", {
- "pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg",
+ "pattern": r"https://[a-c]a.hitomi.la/images/./../[0-9a-f]+.jpg",
"keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2",
"count": 16,
}),
@@ -34,12 +34,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
}),
("https://hitomi.la/galleries/733697.html", {
# Game CG with scenes (#321)
- "url": "c2a84185f467450b8b9b72fbe40c0649029ce007",
+ "url": "21064f9e3c244aca87f1a91967a3fbe79032c4ce",
"count": 210,
}),
("https://hitomi.la/galleries/1045954.html", {
# fallback for galleries only available through /reader/ URLs
- "url": "055c898a36389719799d6bce76889cc4ea4421fc",
+ "url": "0a67f5e6c3c6a384b578e328f4817fa6ccdf856a",
"count": 1413,
}),
("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"),
@@ -96,12 +96,6 @@ class HitomiGalleryExtractor(GalleryExtractor):
return data
def images(self, page):
- # see https://ltn.hitomi.la/common.js
- offset = text.parse_int(self.gallery_id[-1]) % 3
- subdomain = chr(97 + offset) + "a"
- base = "https://{}.hitomi.la/galleries/{}/".format(
- subdomain, self.gallery_id)
-
# set Referer header before image downloads (#239)
self.session.headers["Referer"] = self.gallery_url
@@ -109,10 +103,20 @@ class HitomiGalleryExtractor(GalleryExtractor):
url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gallery_id)
page = self.request(url).text
- return [
- (base + image["name"], None)
- for image in json.loads(page.partition("=")[2])
- ]
+ result = []
+ for image in json.loads(page.partition("=")[2]):
+ ihash = image["hash"]
+ idata = text.nameext_from_url(image["name"])
+
+ # see https://ltn.hitomi.la/common.js
+ offset = int(ihash[-3:-1], 16) % 3
+ url = "https://{}a.hitomi.la/images/{}/{}/{}.{}".format(
+ chr(97 + offset),
+ ihash[-1], ihash[-3:-1], ihash,
+ idata["extension"],
+ )
+ result.append((url, idata))
+ return result
@staticmethod
def _prep(value):
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 6ff6588..5084e80 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -153,7 +153,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
"is_album" : True,
"layout" : "blog",
"link" : "https://imgur.com/a/TcBmP",
- "nsfw" : True,
+ "nsfw" : bool,
"privacy" : "hidden",
"section" : None,
"title" : "138",
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index 12d7487..49d68ef 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -32,23 +32,23 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
"count" : 36,
"keyword": {
"document": {
- "access" : "public",
- "contentRating": dict,
- "date" : "type:datetime",
- "description" : "re:Motions, the brand new publication by Is",
- "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510",
- "documentName" : "motions-1-2019",
- "downloadState": "NOT_AVAILABLE",
- "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510",
- "isConverting" : False,
- "isQuarantined": False,
- "lang" : "en",
- "language" : "English",
- "pageCount" : 36,
- "publicationId": "d99ec95935f15091b040cb8060f05510",
- "sections" : list,
- "title" : "Motions by Issuu - Issue 1",
- "userName" : "issuu",
+ "access" : "public",
+ "articleStories": list,
+ "contentRating" : dict,
+ "date" : "type:datetime",
+ "description" : "re:Motions, the brand new publication by I",
+ "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510",
+ "documentName" : "motions-1-2019",
+ "downloadState" : "NOT_AVAILABLE",
+ "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510",
+ "isConverting" : False,
+ "isQuarantined" : False,
+ "lang" : "en",
+ "language" : "English",
+ "pageCount" : 36,
+ "publicationId" : "d99ec95935f15091b040cb8060f05510",
+ "title" : "Motions by Issuu - Issue 1",
+ "userName" : "issuu",
},
"extension": "jpg",
"filename" : r"re:page_\d+",
diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py
index e47b7db..9d2383f 100644
--- a/gallery_dl/extractor/livedoor.py
+++ b/gallery_dl/extractor/livedoor.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -89,6 +89,7 @@ class LivedoorBlogExtractor(LivedoorExtractor):
("http://blog.livedoor.jp/zatsu_ke/", {
"range": "1-50",
"count": 50,
+ "archive": False,
"pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+",
"keyword": {
"post": {
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 558aa9d..9fd9f3f 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://mangadex.cc/"""
+"""Extractors for https://mangadex.org/"""
from .common import Extractor, Message
from .. import text, util
@@ -16,7 +16,7 @@ from ..cache import memcache
class MangadexExtractor(Extractor):
"""Base class for mangadex extractors"""
category = "mangadex"
- root = "https://mangadex.cc"
+ root = "https://mangadex.org"
# mangadex-to-iso639-1 codes
iso639_map = {
@@ -39,7 +39,7 @@ class MangadexExtractor(Extractor):
class MangadexChapterExtractor(MangadexExtractor):
- """Extractor for manga-chapters from mangadex.cc"""
+ """Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
directory_fmt = (
"{category}", "{manga}",
@@ -47,11 +47,11 @@ class MangadexChapterExtractor(MangadexExtractor):
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{chapter_id}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)/chapter/(\d+)"
+ pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)"
test = (
- ("https://mangadex.cc/chapter/122094", {
+ ("https://mangadex.org/chapter/122094", {
"keyword": "1c834dca33025f521e1874aee1f71c51e28ebf99",
- # "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
+ "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
}),
# oneshot
("https://mangadex.cc/chapter/138086", {
@@ -107,14 +107,14 @@ class MangadexChapterExtractor(MangadexExtractor):
class MangadexMangaExtractor(MangadexExtractor):
- """Extractor for manga from mangadex.cc"""
+ """Extractor for manga from mangadex.org"""
subcategory = "manga"
categorytransfer = True
- pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc|com)"
+ pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
r"/(?:title|manga)/(\d+)")
test = (
- ("https://mangadex.cc/manga/2946/souten-no-koumori", {
- "pattern": r"https://mangadex.cc/chapter/\d+",
+ ("https://mangadex.org/manga/2946/souten-no-koumori", {
+ "pattern": r"https://mangadex.org/chapter/\d+",
"keywords": {
"manga": "Souten no Koumori",
"manga_id": 2946,
@@ -129,13 +129,12 @@ class MangadexMangaExtractor(MangadexExtractor):
"language": str,
},
}),
- ("https://mangadex.org/manga/13318/dagashi-kashi/chapters/2/", {
+ ("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", {
"count": ">= 100",
}),
("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", {
"count": 0,
}),
- ("https://mangadex.org/title/2946/souten-no-koumori"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index b72a896..24a0a55 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -60,9 +60,8 @@ class PinterestPinExtractor(PinterestExtractor):
test = (
("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
- # image version depends on CDN server used
- # "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
- # "content": "4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca",
+ "content": ("4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca",
+ "d3e24bc9f7af585e8c23b9136956bd45a4d9b947"),
}),
("https://www.pinterest.com/pin/858146903966145188/", {
"exception": exception.NotFoundError,
@@ -171,9 +170,7 @@ class PinterestPinitExtractor(PinterestExtractor):
self.shortened_id)
response = self.request(url, method="HEAD", allow_redirects=False)
location = response.headers.get("Location")
- if not location or location in ("https://api.pinterest.com/None",
- "https://pin.it/None",
- "https://www.pinterest.com"):
+ if not location or not PinterestPinExtractor.pattern.match(location):
raise exception.NotFoundError("pin")
yield Message.Queue, location, {"_extractor": PinterestPinExtractor}
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 36fa0fe..8a10028 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -84,14 +84,20 @@ class PixivExtractor(Extractor):
class PixivUserExtractor(PixivExtractor):
"""Extractor for works of a pixiv-user"""
subcategory = "user"
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/"
- r"(?:member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
+ pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
+ r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
+ r"(?:/([^/?&#]+))?)?/?(?:$|[?#])"
+ r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
test = (
- ("http://www.pixiv.net/member_illust.php?id=173530", {
+ ("https://www.pixiv.net/en/users/173530/artworks", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
}),
# illusts with specific tag
+ (("https://www.pixiv.net/en/users/173530/artworks"
+ "/%E6%89%8B%E3%81%B6%E3%82%8D"), {
+ "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
+ }),
(("https://www.pixiv.net/member_illust.php?id=173530"
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
@@ -99,6 +105,10 @@ class PixivUserExtractor(PixivExtractor):
("http://www.pixiv.net/member_illust.php?id=173531", {
"exception": exception.NotFoundError,
}),
+ ("https://www.pixiv.net/en/users/173530"),
+ ("https://www.pixiv.net/en/users/173530/manga"),
+ ("https://www.pixiv.net/en/users/173530/illustrations"),
+ ("https://www.pixiv.net/member_illust.php?id=173530"),
("https://www.pixiv.net/u/173530"),
("https://www.pixiv.net/user/173530"),
("https://www.pixiv.net/mypage.php#id=173530"),
@@ -108,14 +118,19 @@ class PixivUserExtractor(PixivExtractor):
def __init__(self, match):
PixivExtractor.__init__(self, match)
- self.user_id = match.group(1) or match.group(3)
- self.query = text.parse_query(match.group(2))
+ u1, t1, u2, t2, u3 = match.groups()
+ if t1:
+ t1 = text.unquote(t1)
+ elif t2:
+ t2 = text.parse_query(t2).get("tag")
+ self.user_id = u1 or u2 or u3
+ self.tag = t1 or t2
def works(self):
works = self.api.user_illusts(self.user_id)
- if "tag" in self.query:
- tag = text.unquote(self.query["tag"]).lower()
+ if self.tag:
+ tag = self.tag.lower()
works = (
work for work in works
if tag in [t["name"].lower() for t in work["tags"]]
@@ -130,7 +145,7 @@ class PixivMeExtractor(PixivExtractor):
pattern = r"(?:https?://)?pixiv\.me/([^/?&#]+)"
test = (
("https://pixiv.me/del_shannon", {
- "url": "0b1a18c3e3553c44ee6e0ccc36a7fd906c498e8f",
+ "url": "29c295ce75150177e6b0a09089a949804c708fbf",
}),
("https://pixiv.me/del_shanno", {
"exception": exception.NotFoundError,
@@ -205,9 +220,13 @@ class PixivFavoriteExtractor(PixivExtractor):
directory_fmt = ("{category}", "bookmarks",
"{user_bookmark[id]} {user_bookmark[account]}")
archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
- pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/bookmark\.php(?:\?([^#]*))?")
+ pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/"
+ r"(?:(?:en/)?users/(\d+)/(bookmarks/artworks|following)"
+ r"|bookmark\.php(?:\?([^#]*))?)")
test = (
+ ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
+ "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
+ }),
("https://www.pixiv.net/bookmark.php?id=173530", {
"url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
}),
@@ -221,6 +240,11 @@ class PixivFavoriteExtractor(PixivExtractor):
"url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
}),
# followed users (#515)
+ ("https://www.pixiv.net/en/users/173530/following", {
+ "pattern": PixivUserExtractor.pattern,
+ "count": ">= 12",
+ }),
+ # followed users (#515)
("https://www.pixiv.net/bookmark.php?id=173530&type=user", {
"pattern": PixivUserExtractor.pattern,
"count": ">= 12",
@@ -231,13 +255,24 @@ class PixivFavoriteExtractor(PixivExtractor):
)
def __init__(self, match):
+ uid, kind, query = match.groups()
+
+ if query:
+ self.query = text.parse_query(query)
+ uid = self.query.get("id", -1)
+ if not uid:
+ self.subcategory = "bookmark"
+ elif self.query.get("type") == "user":
+ self.subcategory = "following"
+ self.items = self._items_following
+ else:
+ self.query = {}
+ if kind == "following":
+ self.subcategory = "following"
+ self.items = self._items_following
+
PixivExtractor.__init__(self, match)
- self.query = text.parse_query(match.group(1))
- if "id" not in self.query:
- self.subcategory = "bookmark"
- elif self.query.get("type") == "user":
- self.subcategory = "following"
- self.items = self._items_following
+ self.user_id = uid
def works(self):
tag = None
@@ -251,8 +286,8 @@ class PixivFavoriteExtractor(PixivExtractor):
return self.api.user_bookmarks_illust(self.user_id, tag, restrict)
def get_metadata(self, user=None):
- if "id" in self.query:
- user = self.api.user_detail(self.query["id"])
+ if self.user_id:
+ user = self.api.user_detail(self.user_id)
else:
self.api.login()
user = self.api.user
@@ -263,7 +298,7 @@ class PixivFavoriteExtractor(PixivExtractor):
def _items_following(self):
yield Message.Version, 1
- for preview in self.api.user_following(self.query["id"]):
+ for preview in self.api.user_following(self.user_id):
user = preview["user"]
user["_extractor"] = PixivUserExtractor
url = "https://www.pixiv.net/member.php?id={}".format(user["id"])
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index b2498a0..28ee46c 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -116,6 +116,7 @@ EXTRACTORS = {
("https://www.fashionnova.com/collections/mini-dresses", {
"range": "1-20",
"count": 20,
+ "archive": False,
}),
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index 127cce8..1063716 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -41,7 +41,8 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
}),
("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
"range": "34",
- "content": "cec6630e659dc72db1ee1a9a6f3b525189261988",
+ "content": ("cec6630e659dc72db1ee1a9a6f3b525189261988",
+ "6f81e1e74c6cd6db36844e7211eef8e7cd30055d"),
}),
)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 610e0ee..dc558c0 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -30,6 +30,7 @@ class TwitterExtractor(Extractor):
self._user_dict = None
self.logged_in = False
self.retweets = self.config("retweets", True)
+ self.twitpic = self.config("twitpic", False)
self.content = self.config("content", False)
self.videos = self.config("videos", False)
@@ -79,6 +80,26 @@ class TwitterExtractor(Extractor):
urls = [url + size for size in self.sizes]
yield Message.Urllist, urls, data
+ if self.twitpic and "//twitpic.com/" in tweet:
+ urls = [
+ url for url in text.extract_iter(
+ tweet, 'data-expanded-url="', '"')
+ if "//twitpic.com/" in url
+ ]
+
+ if "num" not in data:
+ if urls:
+ yield Message.Directory, data
+ data["num"] = 0
+
+ for data["num"], url in enumerate(urls, data["num"]+1):
+ response = self.request(url, fatal=False)
+ if response.status_code >= 400:
+ continue
+ url = text.extract(
+ response.text, 'name="twitter:image" value="', '"')[0]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
def metadata(self):
"""Return general metadata"""
return {}
@@ -231,8 +252,11 @@ class TwitterExtractor(Extractor):
data["items_html"], '<div class="tweet ', '\n</li>'):
yield tweet
- if not data["has_more_items"]:
- return
+ if data.get("min_position") is None:
+ if data["has_more_items"] and "min_position" not in data:
+ pass
+ else:
+ return
if "min_position" in data:
position = data["min_position"]
@@ -345,6 +369,12 @@ class TwitterTweetExtractor(TwitterExtractor):
"count": 4,
"keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8",
}),
+ # TwitPic embeds (#579)
+ ("https://twitter.com/i/web/status/112900228289540096", {
+ "options": (("twitpic", True),),
+ "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
+ "count": 3,
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index e125184..62acb28 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -152,7 +152,7 @@ class XhamsterUserExtractor(XhamsterExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/users/([^/?&#]+)(?:/photos)?/?(?:$|[?#])"
test = (
- ("https://xhamster.com/users/nickname68/photos", {
+ ("https://xhamster.com/users/goldenpalomino/photos", {
"pattern": XhamsterGalleryExtractor.pattern,
"count": 50,
"range": "1-50",
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 699f057..c717dc2 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -209,6 +209,7 @@ class DownloadJob(Job):
# use fallback URLs if available
for num, url in enumerate(fallback or (), 1):
+ util.remove_file(pathfmt.temppath)
self.log.info("Trying fallback URL #%d", num)
if self.download(url):
break
diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py
index e63d442..7a3bf23 100644
--- a/gallery_dl/postprocessor/__init__.py
+++ b/gallery_dl/postprocessor/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,6 +13,7 @@ import logging
modules = [
"classify",
+ "compare",
"exec",
"metadata",
"mtime",
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
new file mode 100644
index 0000000..ddbcef0
--- /dev/null
+++ b/gallery_dl/postprocessor/compare.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Compare versions of the same file and replace/enumerate them on mismatch"""
+
+from .common import PostProcessor
+import os
+
+
+class ComparePP(PostProcessor):
+
+ def __init__(self, pathfmt, options):
+ PostProcessor.__init__(self)
+ if options.get("action") == "enumerate":
+ self.run = self._run_enumerate
+ if options.get("shallow"):
+ self.compare = self._compare_size
+
+ def run(self, pathfmt):
+ try:
+ if self.compare(pathfmt.realpath, pathfmt.temppath):
+ pathfmt.delete = True
+ except OSError:
+ pass
+
+ def _run_enumerate(self, pathfmt):
+ num = 1
+ try:
+ while not self.compare(pathfmt.realpath, pathfmt.temppath):
+ pathfmt.prefix = str(num) + "."
+ pathfmt.set_extension(pathfmt.extension, False)
+ num += 1
+ pathfmt.delete = True
+ except OSError:
+ pass
+
+ def compare(self, f1, f2):
+ return self._compare_size(f1, f2) and self._compare_content(f1, f2)
+
+ @staticmethod
+ def _compare_size(f1, f2):
+ return os.stat(f1).st_size == os.stat(f2).st_size
+
+ @staticmethod
+ def _compare_content(f1, f2):
+ size = 16384
+ with open(f1, "rb") as fp1, open(f2, "rb") as fp2:
+ while True:
+ buf1 = fp1.read(size)
+ buf2 = fp2.read(size)
+ if buf1 != buf2:
+ return False
+ if not buf1:
+ return True
+
+
+__postprocessor__ = ComparePP
diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py
index 42f7608..a43c43a 100644
--- a/gallery_dl/postprocessor/zip.py
+++ b/gallery_dl/postprocessor/zip.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,17 +9,17 @@
"""Store files in ZIP archives"""
from .common import PostProcessor
+from .. import util
import zipfile
-import os
class ZipPP(PostProcessor):
COMPRESSION_ALGORITHMS = {
"store": zipfile.ZIP_STORED,
- "zip": zipfile.ZIP_DEFLATED,
+ "zip" : zipfile.ZIP_DEFLATED,
"bzip2": zipfile.ZIP_BZIP2,
- "lzma": zipfile.ZIP_LZMA,
+ "lzma" : zipfile.ZIP_LZMA,
}
def __init__(self, pathfmt, options):
@@ -64,18 +64,11 @@ class ZipPP(PostProcessor):
self.zfile.close()
if self.delete:
- try:
- # remove target directory
- os.rmdir(self.path)
- except OSError:
- pass
+ util.remove_directory(self.path)
if self.zfile and not self.zfile.NameToInfo:
- try:
- # delete empty zip archive
- os.unlink(self.zfile.filename)
- except OSError:
- pass
+ # remove empty zip archive
+ util.remove_file(self.zfile.filename)
__postprocessor__ = ZipPP
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index f426829..13bf80e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -121,6 +121,20 @@ def expand_path(path):
return os.path.expandvars(os.path.expanduser(path))
+def remove_file(path):
+ try:
+ os.unlink(path)
+ except OSError:
+ pass
+
+
+def remove_directory(path):
+ try:
+ os.rmdir(path)
+ except OSError:
+ pass
+
+
def code_to_language(code, default=None):
"""Map an ISO 639-1 language code to its actual name"""
return CODES.get((code or "").lower(), default)
@@ -602,12 +616,15 @@ class PathFormat():
def _enum_file(self):
num = 1
- while True:
- self.prefix = str(num) + "."
- self.set_extension(self.extension, False)
- if not os.path.exists(self.realpath):
- return False
- num += 1
+ try:
+ while True:
+ self.prefix = str(num) + "."
+ self.set_extension(self.extension, False)
+ os.stat(self.realpath) # raises OSError if file doesn't exist
+ num += 1
+ except OSError:
+ pass
+ return False
def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
@@ -623,7 +640,7 @@ class PathFormat():
except Exception as exc:
raise exception.DirectoryFormatError(exc)
- # Join path segements
+ # Join path segments
sep = os.sep
directory = self.clean_path(self.basedirectory + sep.join(segments))
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 4b83107..36d729e 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.12.2"
+__version__ = "1.12.3"
diff --git a/setup.py b/setup.py
index f0f0f16..e31a38c 100644
--- a/setup.py
+++ b/setup.py
@@ -61,10 +61,6 @@ setup(
"requests>=2.11.0",
],
extras_require={
- "cloudflare": [
- "pyOpenSSL>=19.0.0",
- "cryptography>=2.8.0",
- ],
"video": [
"youtube-dl",
],
diff --git a/test/test_downloader.py b/test/test_downloader.py
index a7c4ce6..c43b533 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -98,6 +98,7 @@ class TestDownloaderBase(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.extractor = extractor.find("test:")
+ cls.extractor.log.job = None
cls.dir = tempfile.TemporaryDirectory()
cls.fnum = 0
config.set((), "base-directory", cls.dir.name)
diff --git a/test/test_results.py b/test/test_results.py
index 869ff83..e87b4b8 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@ TRAVIS_SKIP = {
"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx",
"archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs",
"sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex",
- "sankakucomplex", "warosu", "fuskator",
+ "sankakucomplex", "warosu", "fuskator", "patreon",
}
# temporary issues, etc.
BROKEN = {
- "erolord",
+ "imxto",
"mangapark",
"photobucket",
}
@@ -109,7 +109,12 @@ class TestExtractorResults(unittest.TestCase):
self.assertEqual(result["url"], tjob.url_hash.hexdigest())
if "content" in result:
- self.assertEqual(result["content"], tjob.content_hash.hexdigest())
+ expected = result["content"]
+ digest = tjob.content_hash.hexdigest()
+ if isinstance(expected, str):
+ self.assertEqual(digest, expected, "content")
+ else: # assume iterable
+ self.assertIn(digest, expected, "content")
if "keyword" in result:
expected = result["keyword"]