summaryrefslogtreecommitdiffstats
path: root/scripts/supportedsites.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/supportedsites.py')
-rwxr-xr-xscripts/supportedsites.py264
1 files changed, 264 insertions, 0 deletions
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
new file mode 100755
index 0000000..f326617
--- /dev/null
+++ b/scripts/supportedsites.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Generate a reStructuredText document with all supported sites"""
+
+import sys
+import collections
+
+import util
+from gallery_dl import extractor
+
+
+CATEGORY_MAP = {
+ "2chan" : "Futaba Channel",
+ "35photo" : "35PHOTO",
+ "archivedmoe" : "Archived.Moe",
+ "archiveofsins" : "Archive of Sins",
+ "artstation" : "ArtStation",
+ "b4k" : "arch.b4k.co",
+ "bobx" : "BobX",
+ "deviantart" : "DeviantArt",
+ "dokireader" : "Doki Reader",
+ "dynastyscans" : "Dynasty Reader",
+ "e621" : "e621",
+ "exhentai" : "ExHentai",
+ "fallenangels" : "Fallen Angels Scans",
+ "fashionnova" : "Fashion Nova",
+ "hbrowse" : "HBrowse",
+ "hentai2read" : "Hentai2Read",
+ "hentaicafe" : "Hentai Cafe",
+ "hentaifoundry" : "Hentai Foundry",
+ "hentaifox" : "HentaiFox",
+ "hentaihere" : "HentaiHere",
+ "hitomi" : "Hitomi.la",
+ "idolcomplex" : "Idol Complex",
+ "imagebam" : "ImageBam",
+ "imagefap" : "ImageFap",
+ "imgbox" : "imgbox",
+ "imgth" : "imgth",
+ "imgur" : "imgur",
+ "jaiminisbox" : "Jaimini's Box",
+ "kireicake" : "Kirei Cake",
+ "kissmanga" : "KissManga",
+ "livedoor" : "livedoor Blog",
+ "mangadex" : "MangaDex",
+ "mangafox" : "Manga Fox",
+ "mangahere" : "Manga Here",
+ "mangapark" : "MangaPark",
+ "mangastream" : "Manga Stream",
+ "myportfolio" : "Adobe Portfolio",
+ "nhentai" : "nhentai",
+ "nijie" : "nijie",
+ "nsfwalbum" : "NSFWalbum.com",
+ "nyafuu" : "Nyafuu Archive",
+ "paheal" : "rule #34",
+ "powermanga" : "PowerManga",
+ "readcomiconline": "Read Comic Online",
+ "rbt" : "RebeccaBlackTech",
+ "rule34" : "Rule 34",
+ "sankaku" : "Sankaku Channel",
+ "sankakucomplex" : "Sankaku Complex",
+ "seaotterscans" : "Sea Otter Scans",
+ "seiga" : "Niconico Seiga",
+ "senmanga" : "Sen Manga",
+ "sensescans" : "Sense-Scans",
+ "sexcom" : "Sex.com",
+ "simplyhentai" : "Simply Hentai",
+ "slickpic" : "SlickPic",
+ "slideshare" : "SlideShare",
+ "smugmug" : "SmugMug",
+ "thebarchive" : "The /b/ Archive",
+ "vanillarock" : "もえぴりあ",
+ "wikiart" : "WikiArt.org",
+ "worldthree" : "World Three",
+ "xhamster" : "xHamster",
+ "xvideos" : "XVideos",
+ "yaplog" : "yaplog!",
+ "yuki" : "yuki.la 4chan archive",
+}
+
+SUBCATEGORY_MAP = {
+ "artwork": "Artwork Listings",
+ "artists": "",
+ "doujin" : "Doujin",
+ "gallery": "Galleries",
+ "image" : "individual Images",
+ "issue" : "Comic-Issues",
+ "manga" : "Manga",
+ "me" : "pixiv.me Links",
+ "media" : "Media Timelines",
+ "path" : "Images from Users and Folders",
+ "pinit" : "pin.it Links",
+ "popular": "Popular Images",
+ "recent" : "Recent Images",
+ "search" : "Search Results",
+ "stash" : "Sta.sh",
+ "status" : "Images from Statuses",
+ "tag" : "Tag-Searches",
+ "user" : "Images from Users",
+ "work" : "Individual Images",
+ "related-pin" : "related Pins",
+ "related-board": "",
+}
+
+AUTH_MAP = {
+ "danbooru" : "Optional",
+ "deviantart" : "Optional (OAuth)",
+ "exhentai" : "Optional",
+ "flickr" : "Optional (OAuth)",
+ "idolcomplex": "Optional",
+ "luscious" : "Optional",
+ "mangoxo" : "Optional",
+ "nijie" : "Required",
+ "pixiv" : "Required",
+ "reddit" : "Optional (OAuth)",
+ "sankaku" : "Optional",
+ "seiga" : "Required",
+ "smugmug" : "Optional (OAuth)",
+ "tsumino" : "Optional",
+ "tumblr" : "Optional (OAuth)",
+ "twitter" : "Optional",
+}
+
+IGNORE_LIST = (
+ "directlink",
+ "oauth",
+ "recursive",
+ "test",
+)
+
+
+def domain(cls):
+ """Return the web-domain related to an extractor class"""
+ url = sys.modules[cls.__module__].__doc__.split()[-1]
+ if url.startswith("http"):
+ return url
+
+ if hasattr(cls, "root") and cls.root:
+ return cls.root + "/"
+
+ if hasattr(cls, "https"):
+ scheme = "https" if cls.https else "http"
+ netloc = cls.__doc__.split()[-1]
+ return "{}://{}/".format(scheme, netloc)
+
+ test = next(cls._get_tests(), None)
+ if test:
+ url = test[0]
+ return url[:url.find("/", 8)+1]
+
+ return ""
+
+
+def category_text(cls):
+ """Return a human-readable representation of a category"""
+ c = cls.category
+ return CATEGORY_MAP.get(c) or c.capitalize()
+
+
+def subcategory_text(cls):
+ """Return a human-readable representation of a subcategory"""
+ sc = cls.subcategory
+ if sc in SUBCATEGORY_MAP:
+ return SUBCATEGORY_MAP[sc]
+ sc = sc.capitalize()
+ return sc if sc.endswith("s") else sc + "s"
+
+
+def category_key(cls):
+ """Generate sorting keys by category"""
+ key = category_text(cls).lower()
+ if cls.__module__.endswith(".imagehosts"):
+ key = "zz" + key
+ return key
+
+
+def subcategory_key(cls):
+ """Generate sorting keys by subcategory"""
+ if cls.subcategory in ("user", "issue"):
+ return "A"
+ return cls.subcategory
+
+
+def build_extractor_list():
+ """Generate a sorted list of lists of extractor classes"""
+ extractors = collections.defaultdict(list)
+
+ # get lists of extractor classes grouped by category
+ for extr in extractor.extractors():
+ if not extr.category or extr.category in IGNORE_LIST:
+ continue
+ extractors[extr.category].append(extr)
+
+ # sort extractor lists with the same category
+ for extrlist in extractors.values():
+ extrlist.sort(key=subcategory_key)
+
+ # sort lists by category
+ return sorted(
+ extractors.values(),
+ key=lambda lst: category_key(lst[0]),
+ )
+
+
+# define table columns
+COLUMNS = (
+ ("Site", 20,
+ lambda x: category_text(x[0])),
+ ("URL" , 35,
+ lambda x: domain(x[0])),
+ ("Capabilities", 50,
+ lambda x: ", ".join(subcategory_text(extr) for extr in x
+ if subcategory_text(extr))),
+ ("Authentication", 16,
+ lambda x: AUTH_MAP.get(x[0].category, "")),
+)
+
+
+def write_output(fobj, columns, extractors):
+
+ def pad(output, col, category=None):
+ size = col[1]
+ output = output if isinstance(output, str) else col[2](output)
+
+ if len(output) > size:
+ sub = "|{}-{}|".format(category, col[0][0])
+ subs.append((sub, output))
+ output = sub
+
+ return output + " " * (size - len(output))
+
+ w = fobj.write
+ subs = []
+
+ # caption
+ w("Supported Sites\n")
+ w("===============\n")
+
+ # table head
+ sep = " ".join("=" * c[1] for c in columns) + "\n"
+ w(sep)
+ w(" ".join(pad(c[0], c) for c in columns).strip() + "\n")
+ w(sep)
+
+ # table body
+ for lst in extractors:
+ w(" ".join(
+ pad(col[2](lst), col, lst[0].category)
+ for col in columns
+ ).strip())
+ w("\n")
+
+ # table bottom
+ w(sep)
+ w("\n")
+
+ # substitutions
+ for sub, value in subs:
+ w(".. {} replace:: {}\n".format(sub, value))
+
+
+outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst"
+with open(util.path("docs", outfile), "w") as file:
+ write_output(file, COLUMNS, build_extractor_list())