summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/bash_completion.py56
-rwxr-xr-xscripts/build_testresult_db.py56
-rwxr-xr-xscripts/create_test_data.py69
-rw-r--r--scripts/hook-gallery_dl.py9
-rwxr-xr-xscripts/man.py304
-rwxr-xr-xscripts/pyinstaller.py18
-rwxr-xr-xscripts/release.sh167
-rwxr-xr-xscripts/run_tests.sh24
-rwxr-xr-xscripts/supportedsites.py264
-rw-r--r--scripts/util.py11
10 files changed, 978 insertions, 0 deletions
diff --git a/scripts/bash_completion.py b/scripts/bash_completion.py
new file mode 100755
index 0000000..69e6a79
--- /dev/null
+++ b/scripts/bash_completion.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Generate bash completion script from gallery-dl's argument parser"""
+
+import util
+from gallery_dl import option
+
+
+TEMPLATE = """_gallery_dl()
+{
+ local cur prev
+ COMPREPLY=()
+ cur="${COMP_WORDS[COMP_CWORD]}"
+ prev="${COMP_WORDS[COMP_CWORD-1]}"
+
+ if [[ "${prev}" =~ ^(%(fileopts)s)$ ]]; then
+ COMPREPLY=( $(compgen -f -- "${cur}") )
+ elif [[ "${prev}" =~ ^(%(diropts)s)$ ]]; then
+ COMPREPLY=( $(compgen -d -- "${cur}") )
+ else
+ COMPREPLY=( $(compgen -W "%(opts)s" -- "${cur}") )
+ fi
+}
+
+complete -F _gallery_dl gallery-dl
+"""
+
+opts = []
+diropts = []
+fileopts = []
+for action in option.build_parser()._actions:
+
+ if action.metavar in ("DEST",):
+ diropts.extend(action.option_strings)
+
+ elif action.metavar in ("FILE", "CFG"):
+ fileopts.extend(action.option_strings)
+
+ for opt in action.option_strings:
+ if opt.startswith("--"):
+ opts.append(opt)
+
+PATH = util.path("gallery-dl.bash_completion")
+with open(PATH, "w", encoding="utf-8") as file:
+ file.write(TEMPLATE % {
+ "opts" : " ".join(opts),
+ "diropts" : "|".join(diropts),
+ "fileopts": "|".join(fileopts),
+ })
diff --git a/scripts/build_testresult_db.py b/scripts/build_testresult_db.py
new file mode 100755
index 0000000..fda9f64
--- /dev/null
+++ b/scripts/build_testresult_db.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Collect results of extractor unit tests"""
+
+import sys
+import os.path
+import datetime
+
+import util
+from gallery_dl import extractor, job, config
+from test.test_results import setup_test_config
+
+
+# filter test cases
+
+tests = [
+ (idx, extr, url, result)
+
+ for extr in extractor.extractors()
+ if hasattr(extr, "test") and extr.test
+ if len(sys.argv) <= 1 or extr.category in sys.argv
+
+ for idx, (url, result) in enumerate(extr._get_tests())
+ if result
+]
+
+
+# setup target directory
+
+path = util.path("archive", "testdb", str(datetime.date.today()))
+os.makedirs(path, exist_ok=True)
+
+
+for idx, extr, url, result in tests:
+
+ # filename
+ name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx)
+ print(name)
+
+ # config values
+ setup_test_config()
+
+ if "options" in result:
+ for key, value in result["options"]:
+ config.set(key.split("."), value)
+ if "range" in result:
+ config.set(("image-range",), result["range"])
+ config.set(("chapter-range",), result["range"])
+
+ # write test data
+ try:
+ with open(os.path.join(path, name), "w") as outfile:
+ job.DataJob(url, file=outfile, ensure_ascii=False).run()
+ except KeyboardInterrupt:
+ sys.exit()
diff --git a/scripts/create_test_data.py b/scripts/create_test_data.py
new file mode 100755
index 0000000..14ab0c0
--- /dev/null
+++ b/scripts/create_test_data.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2015-2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Create testdata for extractor tests"""
+
+import argparse
+
+import util # noqa
+from gallery_dl import extractor
+from test.test_results import ResultJob, setup_test_config
+
+
+TESTDATA_FMT = """
+ test = ("{}", {{
+ "url": "{}",
+ "keyword": "{}",
+ "content": "{}",
+ }})
+"""
+
+TESTDATA_EXCEPTION_FMT = """
+ test = ("{}", {{
+ "exception": exception.{},
+ }})
+"""
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--content", action="store_true")
+ parser.add_argument("--recreate", action="store_true")
+ parser.add_argument("urls", nargs="*")
+ args = parser.parse_args()
+
+ if args.recreate:
+ urls = [
+ test[0]
+ for extr in extractor.extractors() if extr.category in args.urls
+ for test in extr.test
+ ]
+ else:
+ urls = args.urls
+
+ setup_test_config()
+
+ for url in urls:
+ tjob = ResultJob(url, content=args.content)
+ try:
+ tjob.run()
+ except Exception as exc:
+ fmt = TESTDATA_EXCEPTION_FMT
+ data = (exc.__class__.__name__,)
+ else:
+ fmt = TESTDATA_FMT
+ data = (tjob.hash_url.hexdigest(),
+ tjob.hash_keyword.hexdigest(),
+ tjob.hash_content.hexdigest())
+ print(tjob.extractor.__class__.__name__)
+ print(fmt.format(url, *data))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/scripts/hook-gallery_dl.py b/scripts/hook-gallery_dl.py
new file mode 100644
index 0000000..d549019
--- /dev/null
+++ b/scripts/hook-gallery_dl.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+
+from gallery_dl import extractor, downloader, postprocessor
+
+hiddenimports = [
+ package.__name__ + "." + module
+ for package in (extractor, downloader, postprocessor)
+ for module in package.modules
+]
diff --git a/scripts/man.py b/scripts/man.py
new file mode 100755
index 0000000..91608a3
--- /dev/null
+++ b/scripts/man.py
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Generate man pages"""
+
+import re
+import datetime
+
+import util
+import gallery_dl.option
+import gallery_dl.version
+
+
+def build_gallery_dl_1(path=None):
+
+ OPTS_FMT = """.TP\n.B "{}" {}\n{}"""
+
+ TEMPLATE = r"""
+.TH "GALLERY-DL" "1" "%(date)s" "%(version)s" "gallery-dl Manual"
+.\" disable hyphenation
+.nh
+
+.SH NAME
+gallery-dl \- download image-galleries and -collections
+
+.SH SYNOPSIS
+.B gallery-dl
+[OPTION]... URL...
+
+.SH DESCRIPTION
+.B gallery-dl
+is a command-line program to download image-galleries and -collections
+from several image hosting sites. It is a cross-platform tool
+with many configuration options and powerful filenaming capabilities.
+
+.SH OPTIONS
+%(options)s
+
+.SH EXAMPLES
+.TP
+gallery-dl \f[I]URL\f[]
+Download images from \f[I]URL\f[].
+.TP
+gallery-dl -g -u <username> -p <password> \f[I]URL\f[]
+Print direct URLs from a site that requires authentication.
+.TP
+gallery-dl --filter 'type == "ugoira"' --range '2-4' \f[I]URL\f[]
+Apply filter and range expressions. This will only download
+the second, third, and fourth file where its type value is equal to "ugoira".
+.TP
+gallery-dl r:\f[I]URL\f[]
+Scan \f[I]URL\f[] for other URLs and invoke \f[B]gallery-dl\f[] on them.
+.TP
+gallery-dl oauth:\f[I]SITE\-NAME\f[]
+Gain OAuth authentication tokens for
+.IR deviantart ,
+.IR flickr ,
+.IR reddit ,
+.IR smugmug ", and"
+.IR tumblr .
+
+.SH FILES
+.TP
+.I /etc/gallery-dl.conf
+The system wide configuration file.
+.TP
+.I ~/.config/gallery-dl/config.json
+Per user configuration file.
+.TP
+.I ~/.gallery-dl.conf
+Alternate per user configuration file.
+
+.SH BUGS
+https://github.com/mikf/gallery-dl/issues
+
+.SH AUTHORS
+Mike Fährmann <mike_faehrmann@web.de>
+.br
+and https://github.com/mikf/gallery-dl/graphs/contributors
+
+.SH "SEE ALSO"
+.BR gallery-dl.conf (5)
+"""
+
+ options = []
+ for action in gallery_dl.option.build_parser()._actions:
+ if action.help.startswith("=="):
+ continue
+ options.append(OPTS_FMT.format(
+ ", ".join(action.option_strings).replace("-", r"\-"),
+ r"\f[I]{}\f[]".format(action.metavar) if action.metavar else "",
+ action.help,
+ ))
+
+ if not path:
+ path = util.path("gallery-dl.1")
+ with open(path, "w", encoding="utf-8") as file:
+ file.write(TEMPLATE.lstrip() % {
+ "options": "\n".join(options),
+ "version": gallery_dl.version.__version__,
+ "date" : datetime.datetime.now().strftime("%Y-%m-%d"),
+ })
+
+
+def build_gallery_dl_conf_5(path=None):
+
+ TEMPLATE = r"""
+.TH "GALLERY-DL.CONF" "5" "%(date)s" "%(version)s" "gallery-dl Manual"
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+
+.SH NAME
+gallery-dl.conf \- gallery-dl configuration file
+
+.SH DESCRIPTION
+gallery-dl will search for configuration files in the following places
+every time it is started, unless
+.B --ignore-config
+is specified:
+.PP
+.RS 4
+.nf
+.I /etc/gallery-dl.conf
+.I $HOME/.config/gallery-dl/config.json
+.I $HOME/.gallery-dl.conf
+.fi
+.RE
+.PP
+It is also possible to specify additional configuration files with the
+.B -c/--config
+command-line option or to add further option values with
+.B -o/--option
+as <key>=<value> pairs,
+
+Configuration files are JSON-based and therefore don't allow any ordinary
+comments, but, since unused keys are simply ignored, it is possible to utilize
+those as makeshift comments by settings their values to arbitrary strings.
+
+.SH EXAMPLE
+{
+.RS 4
+"base-directory": "/tmp/",
+.br
+"extractor": {
+.RS 4
+"pixiv": {
+.RS 4
+"directory": ["Pixiv", "Works", "{user[id]}"],
+.br
+"filename": "{id}{num}.{extension}",
+.br
+"username": "foo",
+.br
+"password": "bar"
+.RE
+},
+.br
+"flickr": {
+.RS 4
+"_comment": "OAuth keys for account 'foobar'",
+.br
+"access-token": "0123456789-0123456789abcdef",
+.br
+"access-token-secret": "fedcba9876543210"
+.RE
+}
+.RE
+},
+.br
+"downloader": {
+.RS 4
+"retries": 3,
+.br
+"timeout": 2.5
+.RE
+}
+.RE
+}
+
+%(options)s
+
+.SH BUGS
+https://github.com/mikf/gallery-dl/issues
+
+.SH AUTHORS
+Mike Fährmann <mike_faehrmann@web.de>
+.br
+and https://github.com/mikf/gallery-dl/graphs/contributors
+
+.SH "SEE ALSO"
+.BR gallery-dl (1)
+"""
+
+ sections = parse_docs_configuration()
+ content = []
+
+ for sec_name, section in sections.items():
+ content.append(".SH " + sec_name.upper())
+
+ for opt_name, option in section.items():
+ content.append(".SS " + opt_name)
+
+ for field, text in option.items():
+ if field in ("Type", "Default"):
+ content.append('.IP "{}:" {}'.format(field, len(field)+2))
+ content.append(strip_rst(text))
+ else:
+ content.append('.IP "{}:" 4'.format(field))
+ content.append(strip_rst(text, field != "Example"))
+
+ if not path:
+ path = util.path("gallery-dl.conf.5")
+ with open(path, "w", encoding="utf-8") as file:
+ file.write(TEMPLATE.lstrip() % {
+ "options": "\n".join(content),
+ "version": gallery_dl.version.__version__,
+ "date" : datetime.datetime.now().strftime("%Y-%m-%d"),
+ })
+
+
+def parse_docs_configuration():
+
+ doc_path = util.path("docs", "configuration.rst")
+ with open(doc_path, encoding="utf-8") as file:
+ doc_lines = file.readlines()
+
+ sections = {}
+ sec_name = None
+ options = None
+ opt_name = None
+ opt_desc = None
+ name = None
+ last = last2 = None
+ for line in doc_lines:
+
+ # start of new section
+ if re.match(r"^=+$", line):
+ if sec_name and options:
+ sections[sec_name] = options
+ sec_name = last.strip()
+ options = {}
+
+ elif re.match(r"^=+ =+$", line):
+ # start of option table
+ if re.match(r"^-+$", last):
+ opt_name = last2.strip()
+ opt_desc = {}
+ # end of option table
+ elif opt_desc:
+ options[opt_name] = opt_desc
+ opt_name = None
+ name = None
+
+ # inside option table
+ elif opt_name:
+ if line[0].isalpha():
+ name, _, line = line.partition(" ")
+ opt_desc[name] = ""
+ line = line.strip()
+ if line.startswith(("* ", "- ")):
+ line = "\n" + line
+ elif line.startswith("| "):
+ line = line[2:] + "\n.br"
+ opt_desc[name] += line + "\n"
+
+ last2 = last
+ last = line
+ sections[sec_name] = options
+
+ return sections
+
+
+def strip_rst(text, extended=True, *, ITALIC=r"\\f[I]\1\\f[]", REGULAR=r"\1"):
+
+ text = text.replace("\\", "\\\\")
+
+ # ``foo``
+ repl = ITALIC if extended else REGULAR
+ text = re.sub(r"``([^`]+)``", repl, text)
+ # |foo|_
+ text = re.sub(r"\|([^|]+)\|_*", ITALIC, text)
+ # `foo`_
+ text = re.sub(r"`([^`]+)`_+", ITALIC, text)
+ # `foo`
+ text = re.sub(r"`([^`]+)`", REGULAR, text)
+ # foo_
+ text = re.sub(r"([A-Za-z0-9-]+)_+(?=\s)", ITALIC, text)
+ # -------
+ text = re.sub(r"---+", "", text)
+
+ return text
+
+
+if __name__ == "__main__":
+ build_gallery_dl_1()
+ build_gallery_dl_conf_5()
diff --git a/scripts/pyinstaller.py b/scripts/pyinstaller.py
new file mode 100755
index 0000000..879ae50
--- /dev/null
+++ b/scripts/pyinstaller.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Build a standalone executable using PyInstaller"""
+
+import PyInstaller.__main__
+import util
+
+PyInstaller.__main__.run([
+ "--onefile",
+ "--console",
+ "--name", "gallery-dl." + ("exe" if PyInstaller.is_win else "bin"),
+ "--additional-hooks-dir", util.path("scripts"),
+ "--distpath", util.path("dist"),
+ "--workpath", util.path("build"),
+ "--specpath", util.path("build"),
+ util.path("gallery_dl", "__main__.py"),
+])
diff --git a/scripts/release.sh b/scripts/release.sh
new file mode 100755
index 0000000..ef444e0
--- /dev/null
+++ b/scripts/release.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+set -e
+
+prompt() {
+ echo "root: ${ROOTDIR} old: ${OLDVERSION} - new: ${NEWVERSION}"
+ read -n 1 -r -p "Proceed? [Y/n] " P
+ echo
+ if [ "$P" == y -o "$P" == Y -o -z "$P" ]; then
+ return 0
+ else
+ exit 1
+ fi
+}
+
+cleanup() {
+ cd "${ROOTDIR}"
+ echo Removing old build directory
+
+ if [ -d ./build ]; then
+ rm -rf ./build
+ fi
+}
+
+update() {
+ cd "${ROOTDIR}"
+ echo Updating version to ${NEWVERSION}
+
+ sed -i "s#\"${PYVERSION}\"#\"${NEWVERSION}\"#" "gallery_dl/version.py"
+ sed -i "s#v${OLDVERSION}#v${NEWVERSION}#" "${README}"
+}
+
+update-dev() {
+ cd "${ROOTDIR}"
+
+ IFS="." read MAJOR MINOR BUILD <<< "${NEWVERSION}"
+ BUILD=$((BUILD+1))
+ # update version to -dev
+ sed -i "s#\"${NEWVERSION}\"#\"${MAJOR}.${MINOR}.${BUILD}-dev\"#" "gallery_dl/version.py"
+ # add 'unreleased' line to changelog
+ sed -i "2i\\\n## Unreleased" "${CHANGELOG}"
+
+ git add "gallery_dl/version.py" "${CHANGELOG}"
+}
+
+build-python() {
+ cd "${ROOTDIR}"
+ echo Building bdist_wheel and sdist
+
+ python setup.py bdist_wheel sdist
+}
+
+build-linux() {
+ cd "${ROOTDIR}"
+ echo Building Linux executable
+
+ make executable
+}
+
+build-windows() {
+ cd "${ROOTDIR}/dist"
+ echo Building Windows executable
+
+ # remove old executable
+ rm -f "gallery-dl.exe"
+
+ # build windows exe in vm
+ ln -fs "${ROOTDIR}" /tmp/
+ vmstart "Windows 7" &
+ disown
+ while [ ! -e "gallery-dl.exe" ] ; do
+ sleep 5
+ done
+ sleep 2
+
+ # check exe version
+ OUTPUT="$(wine gallery-dl.exe --version)"
+ if [[ ! "${OUTPUT%?}" == "${NEWVERSION}" ]]; then
+ echo "exe version mismatch: ${OUTPUT} != ${NEWVERSION}"
+ exit 3
+ fi
+}
+
+sign() {
+ cd "${ROOTDIR}/dist"
+ echo Signing files
+
+ gpg --detach-sign --armor gallery_dl-${NEWVERSION}-py3-none-any.whl
+ gpg --detach-sign --armor gallery_dl-${NEWVERSION}.tar.gz
+ gpg --detach-sign --yes gallery-dl.exe
+ gpg --detach-sign --yes gallery-dl.bin
+}
+
+changelog() {
+ cd "${ROOTDIR}"
+ echo Updating "${CHANGELOG}"
+
+ # - replace "#NN" with link to actual issue
+ # - insert new version and date
+ sed -i \
+ -e "s*\([( ]\)#\([0-9]\+\)*\1[#\2](https://github.com/mikf/gallery-dl/issues/\2)*g" \
+ -e "s*^## [Uu]nreleased*## ${NEWVERSION} - $(date +%Y-%m-%d)*" \
+ "${CHANGELOG}"
+}
+
+supportedsites() {
+ cd "${ROOTDIR}"
+ echo Checking if "${SUPPORTEDSITES}" is up to date
+
+ ./scripts/supportedsites.py
+ if ! git diff --quiet "${SUPPORTEDSITES}"; then
+ echo "updated ${SUPPORTEDSITES} contains changes"
+ exit 4
+ fi
+}
+
+git-upload() {
+ cd "${ROOTDIR}"
+ echo Pushing changes to github
+
+ git add "gallery_dl/version.py" "${README}" "${CHANGELOG}"
+ git commit -S -m "release version ${NEWVERSION}"
+ git tag -s -m "version ${NEWVERSION}" "v${NEWVERSION}"
+ git push
+ git push origin "v${NEWVERSION}"
+}
+
+pypi-upload() {
+ cd "${ROOTDIR}/dist"
+ echo Uploading to PyPI
+
+ twine upload gallery_dl-${NEWVERSION}*
+}
+
+
+ROOTDIR="$(realpath "$(dirname "$0")/..")/"
+README="README.rst"
+CHANGELOG="CHANGELOG.md"
+SUPPORTEDSITES="./docs/supportedsites.rst"
+
+LASTTAG="$(git describe --abbrev=0 --tags)"
+OLDVERSION="${LASTTAG#v}"
+PYVERSION="$(python -c "import gallery_dl as g; print(g.__version__)")"
+
+if [[ "$1" ]]; then
+ NEWVERSION="$1"
+else
+ NEWVERSION="${PYVERSION%-dev}"
+fi
+
+if [[ ! $NEWVERSION =~ [0-9]+\.[0-9]+\.[0-9]+(-[a-z]+(\.[0-9]+)?)?$ ]]; then
+ echo "invalid version: $NEWVERSION"
+ exit 2
+fi
+
+
+prompt
+supportedsites
+cleanup
+update
+build-python
+build-linux
+build-windows
+sign
+changelog
+git-upload
+pypi-upload
+update-dev
diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh
new file mode 100755
index 0000000..334671e
--- /dev/null
+++ b/scripts/run_tests.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+TESTS_CORE=(config cookies downloader extractor oauth text util)
+TESTS_RESULTS=(results)
+
+
+# select tests
+case "${1:-${GALLERYDL_TESTS:-core}}" in
+ core) TESTS=( ${TESTS_CORE[@]} );;
+ results) TESTS=( ${TESTS_RESULTS[@]} );;
+ *) TESTS=( );;
+esac
+
+
+# transform each array element to test_###.py
+TESTS=( ${TESTS[@]/#/test_} )
+TESTS=( ${TESTS[@]/%/.py} )
+
+
+# run 'nosetests' with selected tests
+# (or all tests if ${TESTS} is empty)
+nosetests --verbose -w "${DIR}/../test" ${TESTS[@]}
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
new file mode 100755
index 0000000..f326617
--- /dev/null
+++ b/scripts/supportedsites.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Generate a reStructuredText document with all supported sites"""
+
+import sys
+import collections
+
+import util
+from gallery_dl import extractor
+
+
+CATEGORY_MAP = {
+ "2chan" : "Futaba Channel",
+ "35photo" : "35PHOTO",
+ "archivedmoe" : "Archived.Moe",
+ "archiveofsins" : "Archive of Sins",
+ "artstation" : "ArtStation",
+ "b4k" : "arch.b4k.co",
+ "bobx" : "BobX",
+ "deviantart" : "DeviantArt",
+ "dokireader" : "Doki Reader",
+ "dynastyscans" : "Dynasty Reader",
+ "e621" : "e621",
+ "exhentai" : "ExHentai",
+ "fallenangels" : "Fallen Angels Scans",
+ "fashionnova" : "Fashion Nova",
+ "hbrowse" : "HBrowse",
+ "hentai2read" : "Hentai2Read",
+ "hentaicafe" : "Hentai Cafe",
+ "hentaifoundry" : "Hentai Foundry",
+ "hentaifox" : "HentaiFox",
+ "hentaihere" : "HentaiHere",
+ "hitomi" : "Hitomi.la",
+ "idolcomplex" : "Idol Complex",
+ "imagebam" : "ImageBam",
+ "imagefap" : "ImageFap",
+ "imgbox" : "imgbox",
+ "imgth" : "imgth",
+ "imgur" : "imgur",
+ "jaiminisbox" : "Jaimini's Box",
+ "kireicake" : "Kirei Cake",
+ "kissmanga" : "KissManga",
+ "livedoor" : "livedoor Blog",
+ "mangadex" : "MangaDex",
+ "mangafox" : "Manga Fox",
+ "mangahere" : "Manga Here",
+ "mangapark" : "MangaPark",
+ "mangastream" : "Manga Stream",
+ "myportfolio" : "Adobe Portfolio",
+ "nhentai" : "nhentai",
+ "nijie" : "nijie",
+ "nsfwalbum" : "NSFWalbum.com",
+ "nyafuu" : "Nyafuu Archive",
+ "paheal" : "rule #34",
+ "powermanga" : "PowerManga",
+ "readcomiconline": "Read Comic Online",
+ "rbt" : "RebeccaBlackTech",
+ "rule34" : "Rule 34",
+ "sankaku" : "Sankaku Channel",
+ "sankakucomplex" : "Sankaku Complex",
+ "seaotterscans" : "Sea Otter Scans",
+ "seiga" : "Niconico Seiga",
+ "senmanga" : "Sen Manga",
+ "sensescans" : "Sense-Scans",
+ "sexcom" : "Sex.com",
+ "simplyhentai" : "Simply Hentai",
+ "slickpic" : "SlickPic",
+ "slideshare" : "SlideShare",
+ "smugmug" : "SmugMug",
+ "thebarchive" : "The /b/ Archive",
+ "vanillarock" : "もえぴりあ",
+ "wikiart" : "WikiArt.org",
+ "worldthree" : "World Three",
+ "xhamster" : "xHamster",
+ "xvideos" : "XVideos",
+ "yaplog" : "yaplog!",
+ "yuki" : "yuki.la 4chan archive",
+}
+
+SUBCATEGORY_MAP = {
+ "artwork": "Artwork Listings",
+ "artists": "",
+ "doujin" : "Doujin",
+ "gallery": "Galleries",
+ "image" : "individual Images",
+ "issue" : "Comic-Issues",
+ "manga" : "Manga",
+ "me" : "pixiv.me Links",
+ "media" : "Media Timelines",
+ "path" : "Images from Users and Folders",
+ "pinit" : "pin.it Links",
+ "popular": "Popular Images",
+ "recent" : "Recent Images",
+ "search" : "Search Results",
+ "stash" : "Sta.sh",
+ "status" : "Images from Statuses",
+ "tag" : "Tag-Searches",
+ "user" : "Images from Users",
+ "work" : "Individual Images",
+ "related-pin" : "related Pins",
+ "related-board": "",
+}
+
+AUTH_MAP = {
+ "danbooru" : "Optional",
+ "deviantart" : "Optional (OAuth)",
+ "exhentai" : "Optional",
+ "flickr" : "Optional (OAuth)",
+ "idolcomplex": "Optional",
+ "luscious" : "Optional",
+ "mangoxo" : "Optional",
+ "nijie" : "Required",
+ "pixiv" : "Required",
+ "reddit" : "Optional (OAuth)",
+ "sankaku" : "Optional",
+ "seiga" : "Required",
+ "smugmug" : "Optional (OAuth)",
+ "tsumino" : "Optional",
+ "tumblr" : "Optional (OAuth)",
+ "twitter" : "Optional",
+}
+
+IGNORE_LIST = (
+ "directlink",
+ "oauth",
+ "recursive",
+ "test",
+)
+
+
+def domain(cls):
+ """Return the web-domain related to an extractor class"""
+ url = sys.modules[cls.__module__].__doc__.split()[-1]
+ if url.startswith("http"):
+ return url
+
+ if hasattr(cls, "root") and cls.root:
+ return cls.root + "/"
+
+ if hasattr(cls, "https"):
+ scheme = "https" if cls.https else "http"
+ netloc = cls.__doc__.split()[-1]
+ return "{}://{}/".format(scheme, netloc)
+
+ test = next(cls._get_tests(), None)
+ if test:
+ url = test[0]
+ return url[:url.find("/", 8)+1]
+
+ return ""
+
+
+def category_text(cls):
+ """Return a human-readable representation of a category"""
+ c = cls.category
+ return CATEGORY_MAP.get(c) or c.capitalize()
+
+
+def subcategory_text(cls):
+ """Return a human-readable representation of a subcategory"""
+ sc = cls.subcategory
+ if sc in SUBCATEGORY_MAP:
+ return SUBCATEGORY_MAP[sc]
+ sc = sc.capitalize()
+ return sc if sc.endswith("s") else sc + "s"
+
+
+def category_key(cls):
+ """Generate sorting keys by category"""
+ key = category_text(cls).lower()
+ if cls.__module__.endswith(".imagehosts"):
+ key = "zz" + key
+ return key
+
+
+def subcategory_key(cls):
+ """Generate sorting keys by subcategory"""
+ if cls.subcategory in ("user", "issue"):
+ return "A"
+ return cls.subcategory
+
+
+def build_extractor_list():
+ """Generate a sorted list of lists of extractor classes"""
+ extractors = collections.defaultdict(list)
+
+ # get lists of extractor classes grouped by category
+ for extr in extractor.extractors():
+ if not extr.category or extr.category in IGNORE_LIST:
+ continue
+ extractors[extr.category].append(extr)
+
+ # sort extractor lists with the same category
+ for extrlist in extractors.values():
+ extrlist.sort(key=subcategory_key)
+
+ # sort lists by category
+ return sorted(
+ extractors.values(),
+ key=lambda lst: category_key(lst[0]),
+ )
+
+
+# define table columns
+COLUMNS = (
+ ("Site", 20,
+ lambda x: category_text(x[0])),
+ ("URL" , 35,
+ lambda x: domain(x[0])),
+ ("Capabilities", 50,
+ lambda x: ", ".join(subcategory_text(extr) for extr in x
+ if subcategory_text(extr))),
+ ("Authentication", 16,
+ lambda x: AUTH_MAP.get(x[0].category, "")),
+)
+
+
+def write_output(fobj, columns, extractors):
+
+ def pad(output, col, category=None):
+ size = col[1]
+ output = output if isinstance(output, str) else col[2](output)
+
+ if len(output) > size:
+ sub = "|{}-{}|".format(category, col[0][0])
+ subs.append((sub, output))
+ output = sub
+
+ return output + " " * (size - len(output))
+
+ w = fobj.write
+ subs = []
+
+ # caption
+ w("Supported Sites\n")
+ w("===============\n")
+
+ # table head
+ sep = " ".join("=" * c[1] for c in columns) + "\n"
+ w(sep)
+ w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
+ w(sep)
+
+ # table body
+ for lst in extractors:
+ w(" ".join(
+ pad(col[2](lst), col, lst[0].category)
+ for col in columns
+ ).strip())
+ w("\n")
+
+ # table bottom
+ w(sep)
+ w("\n")
+
+ # substitutions
+ for sub, value in subs:
+ w(".. {} replace:: {}\n".format(sub, value))
+
+
+outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
+with open(util.path("docs", outfile), "w") as file:
+ write_output(file, COLUMNS, build_extractor_list())
diff --git a/scripts/util.py b/scripts/util.py
new file mode 100644
index 0000000..bfbd6cb
--- /dev/null
+++ b/scripts/util.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import os.path
+
+ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, os.path.realpath(ROOTDIR))
+
+
+def path(*segments, join=os.path.join):
+ return join(ROOTDIR, *segments)