10 files changed, 978 insertions, 0 deletions
diff --git a/scripts/bash_completion.py b/scripts/bash_completion.py
new file mode 100755
index 0000000..69e6a79
--- /dev/null
+++ b/scripts/bash_completion.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Generate bash completion script from gallery-dl's argument parser"""
+
+import util
+from gallery_dl import option
+
+
+TEMPLATE = """_gallery_dl()
+{
+    local cur prev
+    COMPREPLY=()
+    cur="${COMP_WORDS[COMP_CWORD]}"
+    prev="${COMP_WORDS[COMP_CWORD-1]}"
+
+    if [[ "${prev}" =~ ^(%(fileopts)s)$ ]]; then
+        COMPREPLY=( $(compgen -f -- "${cur}") )
+    elif [[ "${prev}" =~ ^(%(diropts)s)$ ]]; then
+        COMPREPLY=( $(compgen -d -- "${cur}") )
+    else
+        COMPREPLY=( $(compgen -W "%(opts)s" -- "${cur}") )
+    fi
+}
+
+complete -F _gallery_dl gallery-dl
+"""
+
+opts = []
+diropts = []
+fileopts = []
+for action in option.build_parser()._actions:
+
+    if action.metavar in ("DEST",):
+        diropts.extend(action.option_strings)
+
+    elif action.metavar in ("FILE", "CFG"):
+        fileopts.extend(action.option_strings)
+
+    for opt in action.option_strings:
+        if opt.startswith("--"):
+            opts.append(opt)
+
+PATH = util.path("gallery-dl.bash_completion")
+with open(PATH, "w", encoding="utf-8") as file:
+    file.write(TEMPLATE % {
+        "opts"    : " ".join(opts),
+        "diropts" : "|".join(diropts),
+        "fileopts": "|".join(fileopts),
+    })
diff --git a/scripts/build_testresult_db.py b/scripts/build_testresult_db.py
new file mode 100755
index 0000000..fda9f64
--- /dev/null
+++ b/scripts/build_testresult_db.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Collect results of extractor unit tests"""
+
+import sys
+import os.path
+import datetime
+
+import util
+from gallery_dl import extractor, job, config
+from test.test_results import setup_test_config
+
+
+# filter test cases
+
+tests = [
+    (idx, extr, url, result)
+
+    for extr in extractor.extractors()
+    if hasattr(extr, "test") and extr.test
+    if len(sys.argv) <= 1 or extr.category in sys.argv
+
+    for idx, (url, result) in enumerate(extr._get_tests())
+    if result
+]
+
+
+# setup target directory
+
+path = util.path("archive", "testdb", str(datetime.date.today()))
+os.makedirs(path, exist_ok=True)
+
+
+for idx, extr, url, result in tests:
+
+    # filename
+    name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx)
+    print(name)
+
+    # config values
+    setup_test_config()
+
+    if "options" in result:
+        for key, value in result["options"]:
+            config.set(key.split("."), value)
+    if "range" in result:
+        config.set(("image-range",), result["range"])
+        config.set(("chapter-range",), result["range"])
+
+    # write test data
+    try:
+        with open(os.path.join(path, name), "w") as outfile:
+            job.DataJob(url, file=outfile, ensure_ascii=False).run()
+    except KeyboardInterrupt:
+        sys.exit()
diff --git a/scripts/create_test_data.py b/scripts/create_test_data.py
new file mode 100755
index 0000000..14ab0c0
--- /dev/null
+++ b/scripts/create_test_data.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2015-2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Create testdata for extractor tests"""
+
+import argparse
+
+import util  # noqa
+from gallery_dl import extractor
+from test.test_results import ResultJob, setup_test_config
+
+
+TESTDATA_FMT = """
+    test = ("{}", {{
+        "url": "{}",
+        "keyword": "{}",
+        "content": "{}",
+    }})
+"""
+
+TESTDATA_EXCEPTION_FMT = """
+    test = ("{}", {{
+        "exception": exception.{},
+    }})
+"""
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--content", action="store_true")
+    parser.add_argument("--recreate", action="store_true")
+    parser.add_argument("urls", nargs="*")
+    args = parser.parse_args()
+
+    if args.recreate:
+        urls = [
+            test[0]
+            for extr in extractor.extractors() if extr.category in args.urls
+            for test in extr.test
+        ]
+    else:
+        urls = args.urls
+
+    setup_test_config()
+
+    for url in urls:
+        tjob = ResultJob(url, content=args.content)
+        try:
+            tjob.run()
+        except Exception as exc:
+            fmt = TESTDATA_EXCEPTION_FMT
+            data = (exc.__class__.__name__,)
+        else:
+            fmt = TESTDATA_FMT
+            data = (tjob.hash_url.hexdigest(),
+                    tjob.hash_keyword.hexdigest(),
+                    tjob.hash_content.hexdigest())
+        print(tjob.extractor.__class__.__name__)
+        print(fmt.format(url, *data))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/hook-gallery_dl.py b/scripts/hook-gallery_dl.py
new file mode 100644
index 0000000..d549019
--- /dev/null
+++ b/scripts/hook-gallery_dl.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+
+from gallery_dl import extractor, downloader, postprocessor
+
+hiddenimports = [
+    package.__name__ + "." + module
+    for package in (extractor, downloader, postprocessor)
+    for module in package.modules
+]
diff --git a/scripts/man.py b/scripts/man.py
new file mode 100755
index 0000000..91608a3
--- /dev/null
+++ b/scripts/man.py
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Generate man pages"""
+
+import re
+import datetime
+
+import util
+import gallery_dl.option
+import gallery_dl.version
+
+
+def build_gallery_dl_1(path=None):
+
+    OPTS_FMT = """.TP\n.B "{}" {}\n{}"""
+
+    TEMPLATE = r"""
+.TH "GALLERY-DL" "1" "%(date)s" "%(version)s" "gallery-dl Manual"
+.\" disable hyphenation
+.nh
+
+.SH NAME
+gallery-dl \- download image-galleries and -collections
+
+.SH SYNOPSIS
+.B gallery-dl
+[OPTION]... URL...
+
+.SH DESCRIPTION
+.B gallery-dl
+is a command-line program to download image-galleries and -collections
+from several image hosting sites. It is a cross-platform tool
+with many configuration options and powerful filenaming capabilities.
+
+.SH OPTIONS
+%(options)s
+
+.SH EXAMPLES
+.TP
+gallery-dl \f[I]URL\f[]
+Download images from \f[I]URL\f[].
+.TP
+gallery-dl -g -u <username> -p <password> \f[I]URL\f[]
+Print direct URLs from a site that requires authentication.
+.TP
+gallery-dl --filter 'type == "ugoira"' --range '2-4' \f[I]URL\f[]
+Apply filter and range expressions. This will only download
+the second, third, and fourth file where its type value is equal to "ugoira".
+.TP
+gallery-dl r:\f[I]URL\f[]
+Scan \f[I]URL\f[] for other URLs and invoke \f[B]gallery-dl\f[] on them.
+.TP
+gallery-dl oauth:\f[I]SITE\-NAME\f[]
+Gain OAuth authentication tokens for
+.IR deviantart ,
+.IR flickr ,
+.IR reddit ,
+.IR smugmug ", and"
+.IR tumblr .
+
+.SH FILES
+.TP
+.I /etc/gallery-dl.conf
+The system wide configuration file.
+.TP
+.I ~/.config/gallery-dl/config.json
+Per user configuration file.
+.TP
+.I ~/.gallery-dl.conf
+Alternate per user configuration file.
+
+.SH BUGS
+https://github.com/mikf/gallery-dl/issues
+
+.SH AUTHORS
+Mike Fährmann <mike_faehrmann@web.de>
+.br
+and https://github.com/mikf/gallery-dl/graphs/contributors
+
+.SH "SEE ALSO"
+.BR gallery-dl.conf (5)
+"""
+
+    options = []
+    for action in gallery_dl.option.build_parser()._actions:
+        if action.help.startswith("=="):
+            continue
+        options.append(OPTS_FMT.format(
+            ", ".join(action.option_strings).replace("-", r"\-"),
+            r"\f[I]{}\f[]".format(action.metavar) if action.metavar else "",
+            action.help,
+        ))
+
+    if not path:
+        path = util.path("gallery-dl.1")
+    with open(path, "w", encoding="utf-8") as file:
+        file.write(TEMPLATE.lstrip() % {
+            "options": "\n".join(options),
+            "version": gallery_dl.version.__version__,
+            "date"   : datetime.datetime.now().strftime("%Y-%m-%d"),
+        })
+
+
+def build_gallery_dl_conf_5(path=None):
+
+    TEMPLATE = r"""
+.TH "GALLERY-DL.CONF" "5" "%(date)s" "%(version)s" "gallery-dl Manual"
+.\" disable hyphenation
+.nh
+.\" disable justification (adjust text to left margin only)
+.ad l
+
+.SH NAME
+gallery-dl.conf \- gallery-dl configuration file
+
+.SH DESCRIPTION
+gallery-dl will search for configuration files in the following places
+every time it is started, unless
+.B --ignore-config
+is specified:
+.PP
+.RS 4
+.nf
+.I /etc/gallery-dl.conf
+.I $HOME/.config/gallery-dl/config.json
+.I $HOME/.gallery-dl.conf
+.fi
+.RE
+.PP
+It is also possible to specify additional configuration files with the
+.B -c/--config
+command-line option or to add further option values with
+.B -o/--option
+as <key>=<value> pairs,
+
+Configuration files are JSON-based and therefore don't allow any ordinary
+comments, but, since unused keys are simply ignored, it is possible to utilize
+those as makeshift comments by settings their values to arbitrary strings.
+
+.SH EXAMPLE
+{
+.RS 4
+"base-directory": "/tmp/",
+.br
+"extractor": {
+.RS 4
+"pixiv": {
+.RS 4
+"directory": ["Pixiv", "Works", "{user[id]}"],
+.br
+"filename": "{id}{num}.{extension}",
+.br
+"username": "foo",
+.br
+"password": "bar"
+.RE
+},
+.br
+"flickr": {
+.RS 4
+"_comment": "OAuth keys for account 'foobar'",
+.br
+"access-token": "0123456789-0123456789abcdef",
+.br
+"access-token-secret": "fedcba9876543210"
+.RE
+}
+.RE
+},
+.br
+"downloader": {
+.RS 4
+"retries": 3,
+.br
+"timeout": 2.5
+.RE
+}
+.RE
+}
+
+%(options)s
+
+.SH BUGS
+https://github.com/mikf/gallery-dl/issues
+
+.SH AUTHORS
+Mike Fährmann <mike_faehrmann@web.de>
+.br
+and https://github.com/mikf/gallery-dl/graphs/contributors
+
+.SH "SEE ALSO"
+.BR gallery-dl (1)
+"""
+
+    sections = parse_docs_configuration()
+    content = []
+
+    for sec_name, section in sections.items():
+        content.append(".SH " + sec_name.upper())
+
+        for opt_name, option in section.items():
+            content.append(".SS " + opt_name)
+
+            for field, text in option.items():
+                if field in ("Type", "Default"):
+                    content.append('.IP "{}:" {}'.format(field, len(field)+2))
+                    content.append(strip_rst(text))
+                else:
+                    content.append('.IP "{}:" 4'.format(field))
+                    content.append(strip_rst(text, field != "Example"))
+
+    if not path:
+        path = util.path("gallery-dl.conf.5")
+    with open(path, "w", encoding="utf-8") as file:
+        file.write(TEMPLATE.lstrip() % {
+            "options": "\n".join(content),
+            "version": gallery_dl.version.__version__,
+            "date"   : datetime.datetime.now().strftime("%Y-%m-%d"),
+        })
+
+
+def parse_docs_configuration():
+
+    doc_path = util.path("docs", "configuration.rst")
+    with open(doc_path, encoding="utf-8") as file:
+        doc_lines = file.readlines()
+
+    sections = {}
+    sec_name = None
+    options = None
+    opt_name = None
+    opt_desc = None
+    name = None
+    last = last2 = None
+    for line in doc_lines:
+
+        # start of new section
+        if re.match(r"^=+$", line):
+            if sec_name and options:
+                sections[sec_name] = options
+            sec_name = last.strip()
+            options = {}
+
+        elif re.match(r"^=+ =+$", line):
+            # start of option table
+            if re.match(r"^-+$", last):
+                opt_name = last2.strip()
+                opt_desc = {}
+            # end of option table
+            elif opt_desc:
+                options[opt_name] = opt_desc
+                opt_name = None
+                name = None
+
+        # inside option table
+        elif opt_name:
+            if line[0].isalpha():
+                name, _, line = line.partition(" ")
+                opt_desc[name] = ""
+            line = line.strip()
+            if line.startswith(("* ", "- ")):
+                line = "\n" + line
+            elif line.startswith("| "):
+                line = line[2:] + "\n.br"
+            opt_desc[name] += line + "\n"
+
+        last2 = last
+        last = line
+    sections[sec_name] = options
+
+    return sections
+
+
+def strip_rst(text, extended=True, *, ITALIC=r"\\f[I]\1\\f[]", REGULAR=r"\1"):
+
+    text = text.replace("\\", "\\\\")
+
+    # ``foo``
+    repl = ITALIC if extended else REGULAR
+    text = re.sub(r"``([^`]+)``", repl, text)
+    # |foo|_
+    text = re.sub(r"\|([^|]+)\|_*", ITALIC, text)
+    # `foo`_
+    text = re.sub(r"`([^`]+)`_+", ITALIC, text)
+    # `foo`
+    text = re.sub(r"`([^`]+)`", REGULAR, text)
+    # foo_
+    text = re.sub(r"([A-Za-z0-9-]+)_+(?=\s)", ITALIC, text)
+    # -------
+    text = re.sub(r"---+", "", text)
+
+    return text
+
+
+if __name__ == "__main__":
+    build_gallery_dl_1()
+    build_gallery_dl_conf_5()
diff --git a/scripts/pyinstaller.py b/scripts/pyinstaller.py
new file mode 100755
index 0000000..879ae50
--- /dev/null
+++ b/scripts/pyinstaller.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Build a standalone executable using PyInstaller"""
+
+import PyInstaller.__main__
+import util
+
+PyInstaller.__main__.run([
+    "--onefile",
+    "--console",
+    "--name", "gallery-dl." + ("exe" if PyInstaller.is_win else "bin"),
+    "--additional-hooks-dir", util.path("scripts"),
+    "--distpath", util.path("dist"),
+    "--workpath", util.path("build"),
+    "--specpath", util.path("build"),
+    util.path("gallery_dl", "__main__.py"),
+])
diff --git a/scripts/release.sh b/scripts/release.sh
new file mode 100755
index 0000000..ef444e0
--- /dev/null
+++ b/scripts/release.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+set -e
+
+prompt() {
+    echo "root: ${ROOTDIR} old: ${OLDVERSION} - new: ${NEWVERSION}"
+    read -n 1 -r -p "Proceed? [Y/n] " P
+    echo
+    if [ "$P" == y -o "$P" == Y -o -z "$P" ]; then
+        return 0
+    else
+        exit 1
+    fi
+}
+
+cleanup() {
+    cd "${ROOTDIR}"
+    echo Removing old build directory
+
+    if [ -d ./build ]; then
+        rm -rf ./build
+    fi
+}
+
+update() {
+    cd "${ROOTDIR}"
+    echo Updating version to ${NEWVERSION}
+
+    sed -i "s#\"${PYVERSION}\"#\"${NEWVERSION}\"#" "gallery_dl/version.py"
+    sed -i "s#v${OLDVERSION}#v${NEWVERSION}#" "${README}"
+}
+
+update-dev() {
+    cd "${ROOTDIR}"
+
+    IFS="." read MAJOR MINOR BUILD <<< "${NEWVERSION}"
+    BUILD=$((BUILD+1))
+    # update version to -dev
+    sed -i "s#\"${NEWVERSION}\"#\"${MAJOR}.${MINOR}.${BUILD}-dev\"#" "gallery_dl/version.py"
+    # add 'unreleased' line to changelog
+    sed -i "2i\\\n## Unreleased" "${CHANGELOG}"
+
+    git add "gallery_dl/version.py" "${CHANGELOG}"
+}
+
+build-python() {
+    cd "${ROOTDIR}"
+    echo Building bdist_wheel and sdist
+
+    python setup.py bdist_wheel sdist
+}
+
+build-linux() {
+    cd "${ROOTDIR}"
+    echo Building Linux executable
+
+    make executable
+}
+
+build-windows() {
+    cd "${ROOTDIR}/dist"
+    echo Building Windows executable
+
+    # remove old executable
+    rm -f "gallery-dl.exe"
+
+    # build windows exe in vm
+    ln -fs "${ROOTDIR}" /tmp/
+    vmstart "Windows 7" &
+    disown
+    while [ ! -e "gallery-dl.exe" ] ; do
+        sleep 5
+    done
+    sleep 2
+
+    # check exe version
+    OUTPUT="$(wine gallery-dl.exe --version)"
+    if [[ ! "${OUTPUT%?}" == "${NEWVERSION}" ]]; then
+        echo "exe version mismatch: ${OUTPUT} != ${NEWVERSION}"
+        exit 3
+    fi
+}
+
+sign() {
+    cd "${ROOTDIR}/dist"
+    echo Signing files
+
+    gpg --detach-sign --armor gallery_dl-${NEWVERSION}-py3-none-any.whl
+    gpg --detach-sign --armor gallery_dl-${NEWVERSION}.tar.gz
+    gpg --detach-sign --yes gallery-dl.exe
+    gpg --detach-sign --yes gallery-dl.bin
+}
+
+changelog() {
+    cd "${ROOTDIR}"
+    echo Updating "${CHANGELOG}"
+
+    # - replace "#NN" with link to actual issue
+    # - insert new version and date
+    sed -i \
+        -e "s*\([( ]\)#\([0-9]\+\)*\1[#\2](https://github.com/mikf/gallery-dl/issues/\2)*g" \
+        -e "s*^## [Uu]nreleased*## ${NEWVERSION} - $(date +%Y-%m-%d)*" \
+        "${CHANGELOG}"
+}
+
+supportedsites() {
+    cd "${ROOTDIR}"
+    echo Checking if "${SUPPORTEDSITES}" is up to date
+
+    ./scripts/supportedsites.py
+    if ! git diff --quiet "${SUPPORTEDSITES}"; then
+        echo "updated ${SUPPORTEDSITES} contains changes"
+        exit 4
+    fi
+}
+
+git-upload() {
+    cd "${ROOTDIR}"
+    echo Pushing changes to github
+
+    git add "gallery_dl/version.py" "${README}" "${CHANGELOG}"
+    git commit -S -m "release version ${NEWVERSION}"
+    git tag -s -m "version ${NEWVERSION}" "v${NEWVERSION}"
+    git push
+    git push origin "v${NEWVERSION}"
+}
+
+pypi-upload() {
+    cd "${ROOTDIR}/dist"
+    echo Uploading to PyPI
+
+    twine upload gallery_dl-${NEWVERSION}*
+}
+
+
+ROOTDIR="$(realpath "$(dirname "$0")/..")/"
+README="README.rst"
+CHANGELOG="CHANGELOG.md"
+SUPPORTEDSITES="./docs/supportedsites.rst"
+
+LASTTAG="$(git describe --abbrev=0 --tags)"
+OLDVERSION="${LASTTAG#v}"
+PYVERSION="$(python -c "import gallery_dl as g; print(g.__version__)")"
+
+if [[ "$1" ]]; then
+    NEWVERSION="$1"
+else
+    NEWVERSION="${PYVERSION%-dev}"
+fi
+
+if [[ ! $NEWVERSION =~ [0-9]+\.[0-9]+\.[0-9]+(-[a-z]+(\.[0-9]+)?)?$ ]]; then
+    echo "invalid version: $NEWVERSION"
+    exit 2
+fi
+
+
+prompt
+supportedsites
+cleanup
+update
+build-python
+build-linux
+build-windows
+sign
+changelog
+git-upload
+pypi-upload
+update-dev
diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh
new file mode 100755
index 0000000..334671e
--- /dev/null
+++ b/scripts/run_tests.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+TESTS_CORE=(config cookies downloader extractor oauth text util)
+TESTS_RESULTS=(results)
+
+
+# select tests
+case "${1:-${GALLERYDL_TESTS:-core}}" in
+    core)    TESTS=( ${TESTS_CORE[@]}    );;
+    results) TESTS=( ${TESTS_RESULTS[@]} );;
+    *)       TESTS=(                     );;
+esac
+
+
+# transform each array element to test_###.py
+TESTS=( ${TESTS[@]/#/test_} )
+TESTS=( ${TESTS[@]/%/.py}   )
+
+
+# run 'nosetests' with selected tests
+# (or all tests if ${TESTS} is empty)
+nosetests --verbose -w "${DIR}/../test" ${TESTS[@]}
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
new file mode 100755
index 0000000..f326617
--- /dev/null
+++ b/scripts/supportedsites.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Generate a reStructuredText document with all supported sites"""
+
+import sys
+import collections
+
+import util
+from gallery_dl import extractor
+
+
+CATEGORY_MAP = {
+    "2chan"          : "Futaba Channel",
+    "35photo"        : "35PHOTO",
+    "archivedmoe"    : "Archived.Moe",
+    "archiveofsins"  : "Archive of Sins",
+    "artstation"     : "ArtStation",
+    "b4k"            : "arch.b4k.co",
+    "bobx"           : "BobX",
+    "deviantart"     : "DeviantArt",
+    "dokireader"     : "Doki Reader",
+    "dynastyscans"   : "Dynasty Reader",
+    "e621"           : "e621",
+    "exhentai"       : "ExHentai",
+    "fallenangels"   : "Fallen Angels Scans",
+    "fashionnova"    : "Fashion Nova",
+    "hbrowse"        : "HBrowse",
+    "hentai2read"    : "Hentai2Read",
+    "hentaicafe"     : "Hentai Cafe",
+    "hentaifoundry"  : "Hentai Foundry",
+    "hentaifox"      : "HentaiFox",
+    "hentaihere"     : "HentaiHere",
+    "hitomi"         : "Hitomi.la",
+    "idolcomplex"    : "Idol Complex",
+    "imagebam"       : "ImageBam",
+    "imagefap"       : "ImageFap",
+    "imgbox"         : "imgbox",
+    "imgth"          : "imgth",
+    "imgur"          : "imgur",
+    "jaiminisbox"    : "Jaimini's Box",
+    "kireicake"      : "Kirei Cake",
+    "kissmanga"      : "KissManga",
+    "livedoor"       : "livedoor Blog",
+    "mangadex"       : "MangaDex",
+    "mangafox"       : "Manga Fox",
+    "mangahere"      : "Manga Here",
+    "mangapark"      : "MangaPark",
+    "mangastream"    : "Manga Stream",
+    "myportfolio"    : "Adobe Portfolio",
+    "nhentai"        : "nhentai",
+    "nijie"          : "nijie",
+    "nsfwalbum"      : "NSFWalbum.com",
+    "nyafuu"         : "Nyafuu Archive",
+    "paheal"         : "rule #34",
+    "powermanga"     : "PowerManga",
+    "readcomiconline": "Read Comic Online",
+    "rbt"            : "RebeccaBlackTech",
+    "rule34"         : "Rule 34",
+    "sankaku"        : "Sankaku Channel",
+    "sankakucomplex" : "Sankaku Complex",
+    "seaotterscans"  : "Sea Otter Scans",
+    "seiga"          : "Niconico Seiga",
+    "senmanga"       : "Sen Manga",
+    "sensescans"     : "Sense-Scans",
+    "sexcom"         : "Sex.com",
+    "simplyhentai"   : "Simply Hentai",
+    "slickpic"       : "SlickPic",
+    "slideshare"     : "SlideShare",
+    "smugmug"        : "SmugMug",
+    "thebarchive"    : "The /b/ Archive",
+    "vanillarock"    : "もえぴりあ",
+    "wikiart"        : "WikiArt.org",
+    "worldthree"     : "World Three",
+    "xhamster"       : "xHamster",
+    "xvideos"        : "XVideos",
+    "yaplog"         : "yaplog!",
+    "yuki"           : "yuki.la 4chan archive",
+}
+
+SUBCATEGORY_MAP = {
+    "artwork": "Artwork Listings",
+    "artists": "",
+    "doujin" : "Doujin",
+    "gallery": "Galleries",
+    "image"  : "individual Images",
+    "issue"  : "Comic-Issues",
+    "manga"  : "Manga",
+    "me"     : "pixiv.me Links",
+    "media"  : "Media Timelines",
+    "path"   : "Images from Users and Folders",
+    "pinit"  : "pin.it Links",
+    "popular": "Popular Images",
+    "recent" : "Recent Images",
+    "search" : "Search Results",
+    "stash"  : "Sta.sh",
+    "status" : "Images from Statuses",
+    "tag"    : "Tag-Searches",
+    "user"   : "Images from Users",
+    "work"   : "Individual Images",
+    "related-pin"  : "related Pins",
+    "related-board": "",
+}
+
+AUTH_MAP = {
+    "danbooru"   : "Optional",
+    "deviantart" : "Optional (OAuth)",
+    "exhentai"   : "Optional",
+    "flickr"     : "Optional (OAuth)",
+    "idolcomplex": "Optional",
+    "luscious"   : "Optional",
+    "mangoxo"    : "Optional",
+    "nijie"      : "Required",
+    "pixiv"      : "Required",
+    "reddit"     : "Optional (OAuth)",
+    "sankaku"    : "Optional",
+    "seiga"      : "Required",
+    "smugmug"    : "Optional (OAuth)",
+    "tsumino"    : "Optional",
+    "tumblr"     : "Optional (OAuth)",
+    "twitter"    : "Optional",
+}
+
+IGNORE_LIST = (
+    "directlink",
+    "oauth",
+    "recursive",
+    "test",
+)
+
+
+def domain(cls):
+    """Return the web-domain related to an extractor class"""
+    url = sys.modules[cls.__module__].__doc__.split()[-1]
+    if url.startswith("http"):
+        return url
+
+    if hasattr(cls, "root") and cls.root:
+        return cls.root + "/"
+
+    if hasattr(cls, "https"):
+        scheme = "https" if cls.https else "http"
+        netloc = cls.__doc__.split()[-1]
+        return "{}://{}/".format(scheme, netloc)
+
+    test = next(cls._get_tests(), None)
+    if test:
+        url = test[0]
+        return url[:url.find("/", 8)+1]
+
+    return ""
+
+
+def category_text(cls):
+    """Return a human-readable representation of a category"""
+    c = cls.category
+    return CATEGORY_MAP.get(c) or c.capitalize()
+
+
+def subcategory_text(cls):
+    """Return a human-readable representation of a subcategory"""
+    sc = cls.subcategory
+    if sc in SUBCATEGORY_MAP:
+        return SUBCATEGORY_MAP[sc]
+    sc = sc.capitalize()
+    return sc if sc.endswith("s") else sc + "s"
+
+
+def category_key(cls):
+    """Generate sorting keys by category"""
+    key = category_text(cls).lower()
+    if cls.__module__.endswith(".imagehosts"):
+        key = "zz" + key
+    return key
+
+
+def subcategory_key(cls):
+    """Generate sorting keys by subcategory"""
+    if cls.subcategory in ("user", "issue"):
+        return "A"
+    return cls.subcategory
+
+
+def build_extractor_list():
+    """Generate a sorted list of lists of extractor classes"""
+    extractors = collections.defaultdict(list)
+
+    # get lists of extractor classes grouped by category
+    for extr in extractor.extractors():
+        if not extr.category or extr.category in IGNORE_LIST:
+            continue
+        extractors[extr.category].append(extr)
+
+    # sort extractor lists with the same category
+    for extrlist in extractors.values():
+        extrlist.sort(key=subcategory_key)
+
+    # sort lists by category
+    return sorted(
+        extractors.values(),
+        key=lambda lst: category_key(lst[0]),
+    )
+
+
+# define table columns
+COLUMNS = (
+    ("Site", 20,
+     lambda x: category_text(x[0])),
+    ("URL" , 35,
+     lambda x: domain(x[0])),
+    ("Capabilities", 50,
+     lambda x: ", ".join(subcategory_text(extr) for extr in x
+                         if subcategory_text(extr))),
+    ("Authentication", 16,
+     lambda x: AUTH_MAP.get(x[0].category, "")),
+)
+
+
+def write_output(fobj, columns, extractors):
+
+    def pad(output, col, category=None):
+        size = col[1]
+        output = output if isinstance(output, str) else col[2](output)
+
+        if len(output) > size:
+            sub = "|{}-{}|".format(category, col[0][0])
+            subs.append((sub, output))
+            output = sub
+
+        return output + " " * (size - len(output))
+
+    w = fobj.write
+    subs = []
+
+    # caption
+    w("Supported Sites\n")
+    w("===============\n")
+
+    # table head
+    sep = " ".join("=" * c[1] for c in columns) + "\n"
+    w(sep)
+    w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
+    w(sep)
+
+    # table body
+    for lst in extractors:
+        w(" ".join(
+            pad(col[2](lst), col, lst[0].category)
+            for col in columns
+        ).strip())
+        w("\n")
+
+    # table bottom
+    w(sep)
+    w("\n")
+
+    # substitutions
+    for sub, value in subs:
+        w(".. {} replace:: {}\n".format(sub, value))
+
+
+outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
+with open(util.path("docs", outfile), "w") as file:
+    write_output(file, COLUMNS, build_extractor_list())
diff --git a/scripts/util.py b/scripts/util.py
new file mode 100644
index 0000000..bfbd6cb
--- /dev/null
+++ b/scripts/util.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import os.path
+
+ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, os.path.realpath(ROOTDIR))
+
+
+def path(*segments, join=os.path.join):
+    return join(ROOTDIR, *segments)