New upstream version 1.15.3.upstream/1.15.3

author: Unit 193 <unit193@unit193.net> 2020-11-13 19:17:03 -0500
committer: Unit 193 <unit193@unit193.net> 2020-11-13 19:17:03 -0500
commit: 209a3c800871cd68edd2bc7ae661a24ecd496d2d (patch)
tree: cf81c47ab57540b58292295c7d5641e9d2668291 /gallery_dl/extractor
parent: 5dc7d6f5902ddaee5223d041d5c10060f0c72430 (diff)
15 files changed, 237 insertions, 86 deletions
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index c34cfec..51e461e 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2020 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -21,9 +21,9 @@ class _2chanThreadExtractor(Extractor):
     archive_fmt = "{board}_{thread}_{tim}"
     url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
     pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)"
-    test = ("http://dec.2chan.net/70/res/947.htm", {
-        "url": "c5c12b80b290e224b6758507b3bb952044f4595b",
-        "keyword": "4bd22e7a9c3636faecd6ea7082509e8655e10dd0",
+    test = ("http://dec.2chan.net/70/res/11048.htm", {
+        "url": "2ecf919139bd5d915930530b3576d67c388a2a49",
+        "keyword": "8def4ec98a89fd4fff8bbcbae603604dcb4a3bb9",
     })
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 624b14d..df9941a 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -109,8 +109,8 @@ class _500pxUserExtractor(_500pxExtractor):
         variables = {"username": self.user, "pageSize": 20}
         photos = self._request_graphql(
             "OtherPhotosQuery", variables,
-            "54524abbdc809f8d4e10d37839e8ab2d"
-            "3035413688cad9c7fbece13b66637e9d",
+            "018a5e5117bd72bdf28066aad02c4f2d"
+            "8acdf7f6127215d231da60e24080eb1b",
         )["user"]["photos"]
 
         while True:
@@ -122,8 +122,8 @@ class _500pxUserExtractor(_500pxExtractor):
             variables["cursor"] = photos["pageInfo"]["endCursor"]
             photos = self._request_graphql(
                 "OtherPhotosPaginationContainerQuery", variables,
-                "6d31e01104456ce642a2c6fc2f936812"
-                "b0f2a65c442d03e1521d769c20efe507",
+                "b4af70d42c71a5e43f0be36ce60dc81e"
+                "9742ebc117cde197350f2b86b5977d98",
             )["userByUsername"]["photos"]
 
 
@@ -153,7 +153,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
     def metadata(self):
         user = self._request_graphql(
             "ProfileRendererQuery", {"username": self.user_name},
-            "4d02ff5c13927a3ac73b3eef306490508bc765956940c31051468cf30402a503",
+            "5a17a9af1830b58b94a912995b7947b24f27f1301c6ea8ab71a9eb1a6a86585b",
         )["profile"]
         self.user_id = str(user["legacyId"])
 
@@ -166,7 +166,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
         }
         gallery = self._request_graphql(
             "GalleriesDetailQueryRendererQuery", variables,
-            "fd367cacf9bebcdc0620bd749dbd8fc9b0ccbeb54fc76b8b4b95e66a8c0cba49",
+            "fb8bb66d31b58903e2f01ebe66bbe7937b982753be3211855b7bce4e286c1a49",
         )["gallery"]
 
         self._photos = gallery["photos"]
diff --git a/gallery_dl/extractor/8kun.py b/gallery_dl/extractor/8kun.py
index 47fe672..e55bb08 100644
--- a/gallery_dl/extractor/8kun.py
+++ b/gallery_dl/extractor/8kun.py
@@ -20,10 +20,17 @@ class _8kunThreadExtractor(Extractor):
     filename_fmt = "{time}{num:?-//} {filename}.{extension}"
     archive_fmt = "{board}_{thread}_{tim}"
     pattern = r"(?:https?://)?8kun\.top/([^/]+)/res/(\d+)"
-    test = ("https://8kun.top/test/res/65248.html", {
-        "pattern": r"https://media\.8kun\.top/file_store/\w{64}\.\w+",
-        "count": ">= 8",
-    })
+    test = (
+        ("https://8kun.top/test/res/65248.html", {
+            "pattern": r"https://media\.8kun\.top/file_store/\w{64}\.\w+",
+            "count": ">= 8",
+        }),
+        # old-style file URLs (#1101)
+        ("https://8kun.top/d/res/13258.html", {
+            "pattern": r"https://media\.8kun\.top/d/src/\d+(-\d)?\.\w+",
+            "range": "1-20",
+        }),
+    )
 
     def __init__(self, match):
         Extractor.__init__(self, match)
@@ -56,7 +63,10 @@ class _8kunThreadExtractor(Extractor):
     def _process(post, data):
         post.update(data)
         post["extension"] = post["ext"][1:]
-        url = "https://media.8kun.top/file_store/" + post["tim"] + post["ext"]
+        tim = post["tim"]
+        url = ("https://media.8kun.top/" +
+               ("file_store/" if len(tim) > 16 else post["board"] + "/src/") +
+               tim + post["ext"])
         return Message.Url, url, post
 
 
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b8e39bc..d0c327a 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -185,6 +185,8 @@ def _list_classes():
         module = importlib.import_module("."+module_name, __package__)
         yield from add_module(module)
 
+    globals()["_list_classes"] = lambda : _cache
+
 
 def _get_classes(module):
     """Return a list of all extractor classes in a module"""
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 357deac..5efea4a 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -518,6 +518,15 @@ class SharedConfigMixin():
             ), key, default,
         )
 
+    def config_accumulate(self, key):
+        values = config.accumulate(self._cfgpath, key)
+
+        conf = config.get(("extractor",), self.basecategory)
+        if conf:
+            values[:0] = config.accumulate((self.subcategory,), key, conf=conf)
+
+        return values
+
 
 def generate_extractors(extractor_data, symtable, classes):
     """Dynamically generate Extractor classes"""
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index e40ec51..456a173 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -165,11 +165,12 @@ class DeviantartExtractor(Extractor):
 
         # filename metadata
         alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
+        deviation["index_base36"] = util.bencode(deviation["index"], alphabet)
         sub = re.compile(r"\W").sub
         deviation["filename"] = "".join((
             sub("_", deviation["title"].lower()), "_by_",
             sub("_", deviation["author"]["username"].lower()), "-d",
-            util.bencode(deviation["index"], alphabet),
+            deviation["index_base36"],
         ))
 
     @staticmethod
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 06b5ba2..4ead3fb 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -347,24 +347,33 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
 
     @staticmethod
     def _parse_image_info(url):
-        parts = url.split("/")[4].split("-")
+        for part in url.split("/")[4:]:
+            try:
+                _, size, width, height, _ = part.split("-")
+                break
+            except ValueError:
+                pass
+        else:
+            size = width = height = 0
+
         return {
-            "width": text.parse_int(parts[2]),
-            "height": text.parse_int(parts[3]),
-            "size": text.parse_int(parts[1]),
-            "cost": 1,
+            "cost"  : 1,
+            "size"  : text.parse_int(size),
+            "width" : text.parse_int(width),
+            "height": text.parse_int(height),
         }
 
     @staticmethod
     def _parse_original_info(info):
         parts = info.lstrip().split(" ")
         size = text.parse_bytes(parts[3] + parts[4][0])
+
         return {
-            "width": text.parse_int(parts[0]),
-            "height": text.parse_int(parts[2]),
-            "size": size,
             # 1 initial point + 1 per 0.1 MB
-            "cost": 1 + math.ceil(size / 100000)
+            "cost"  : 1 + math.ceil(size / 100000),
+            "size"  : size,
+            "width" : text.parse_int(parts[0]),
+            "height": text.parse_int(parts[2]),
         }
 
 
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 493c1d2..f878dbd 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -55,7 +55,7 @@ class GfycatExtractor(Extractor):
 class GfycatUserExtractor(GfycatExtractor):
     """Extractor for gfycat user profiles"""
     subcategory = "user"
-    directory_fmt = ("{category}", "{userName}")
+    directory_fmt = ("{category}", "{username|userName}")
     pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)"
     test = ("https://gfycat.com/@gretta", {
         "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 0be528d..691cefb 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -11,7 +11,7 @@
 from .common import Extractor, Message
 from .. import text, util
 
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
+BASE_PATTERN = r"(https?://)?(?:www\.)?hentai-foundry\.com"
 
 
 class HentaifoundryExtractor(Extractor):
@@ -20,12 +20,14 @@ class HentaifoundryExtractor(Extractor):
     directory_fmt = ("{category}", "{user}")
     filename_fmt = "{category}_{index}_{title}.{extension}"
     archive_fmt = "{index}"
+    cookiedomain = "www.hentai-foundry.com"
     root = "https://www.hentai-foundry.com"
     per_page = 25
 
     def __init__(self, match):
+        self.root = (match.group(1) or "https://") + "www.hentai-foundry.com"
+        self.user = match.group(2)
         Extractor.__init__(self, match)
-        self.user = match.group(1)
         self.page_url = ""
         self.start_post = 0
         self.start_page = 1
@@ -75,7 +77,8 @@ class HentaifoundryExtractor(Extractor):
             "width"      : text.parse_int(extr('width="', '"')),
             "height"     : text.parse_int(extr('height="', '"')),
             "index"      : text.parse_int(path.rsplit("/", 2)[1]),
-            "src"        : "https:" + text.unescape(extr('src="', '"')),
+            "src"        : text.urljoin(self.root, text.unescape(extr(
+                'src="', '"'))),
             "description": text.unescape(text.remove_html(extr(
                 '>Description</div>', '</section>')
                 .replace("\r\n", "\n"), "", "")),
@@ -121,7 +124,13 @@ class HentaifoundryExtractor(Extractor):
     def _init_site_filters(self):
         """Set site-internal filters to show all images"""
         url = self.root + "/?enterAgree=1"
-        response = self.request(url, method="HEAD")
+        self.request(url, method="HEAD")
+
+        csrf_token = self.session.cookies.get(
+            "YII_CSRF_TOKEN", domain=self.cookiedomain)
+        if not csrf_token:
+            self.log.warning("Unable to update site content filters")
+            return
 
         url = self.root + "/site/filters"
         data = {
@@ -148,7 +157,7 @@ class HentaifoundryExtractor(Extractor):
             "filter_order"    : "date_new",
             "filter_type"     : "0",
             "YII_CSRF_TOKEN"  : text.unquote(text.extract(
-                response.cookies["YII_CSRF_TOKEN"], "%22", "%22")[0]),
+                csrf_token, "%22", "%22")[0]),
         }
         self.request(url, method="POST", data=data)
 
@@ -235,7 +244,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
     directory_fmt = ("{category}", "Recent Pictures", "{date}")
     archive_fmt = "r_{index}"
     pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
-    test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20", {
+    test = ("https://www.hentai-foundry.com/pictures/recent/2018-09-20", {
         "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
         "range": "20-30",
     })
@@ -254,7 +263,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
     directory_fmt = ("{category}", "Popular Pictures")
     archive_fmt = "p_{index}"
     pattern = BASE_PATTERN + r"/pictures/popular()"
-    test = ("http://www.hentai-foundry.com/pictures/popular", {
+    test = ("https://www.hentai-foundry.com/pictures/popular", {
         "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
         "range": "20-30",
     })
@@ -267,7 +276,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
 class HentaifoundryImageExtractor(HentaifoundryExtractor):
     """Extractor for a single image from hentaifoundry.com"""
     subcategory = "image"
-    pattern = (r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
+    pattern = (r"(https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
                r"/(?:pictures/user|[^/?#])/([^/?#]+)/(\d+)")
     test = (
         (("https://www.hentai-foundry.com"
@@ -290,7 +299,10 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
                 "width"  : 495,
             },
         }),
-        ("https://www.hentai-foundry.com/pictures/user/Tenpura/340853/"),
+        ("http://www.hentai-foundry.com/pictures/user/Tenpura/407501/", {
+            "pattern": "http://pictures.hentai-foundry.com/t/Tenpura/407501/",
+        }),
+        ("https://www.hentai-foundry.com/pictures/user/Tenpura/407501/"),
         ("https://pictures.hentai-foundry.com"
          "/t/Tenpura/407501/Tenpura-407501-shimakaze.png"),
     )
@@ -298,7 +310,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
 
     def __init__(self, match):
         HentaifoundryExtractor.__init__(self, match)
-        self.index = match.group(2)
+        self.index = match.group(3)
 
     def items(self):
         post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format(
@@ -359,7 +371,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
 
     def __init__(self, match):
         HentaifoundryExtractor.__init__(self, match)
-        self.index = match.group(2)
+        self.index = match.group(3)
 
     def items(self):
         story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index 6ddf0e8..679b5a0 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -23,9 +23,10 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
     root = "https://downloads.khinsider.com"
     test = (("https://downloads.khinsider.com"
              "/game-soundtracks/album/horizon-riders-wii"), {
-        "pattern": r"https?://vgmdownloads.com/soundtracks/horizon-riders-wii/"
-                   r"[^/]+/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack.mp3",
-        "keyword": "5b2c35cce638c326cab2a4f7a79f245d008d62ff",
+        "pattern": r"https?://vgm(site|downloads).com"
+                   r"/soundtracks/horizon-riders-wii/[^/]+"
+                   r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack.mp3",
+        "keyword": "12ca70e0709ea15250e577ea388cf2b5b0c65630",
     })
 
     def __init__(self, match):
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index 5743498..344dd56 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -86,7 +86,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
             "album": {
                 "id": "lzVOv1Q9",
                 "name": "re:池永康晟 Ikenaga Yasunari 透出古朴",
-                "date": "2019.3.22 14:42",
+                "date": "dt:2019-03-22 14:42:00",
                 "description": str,
             },
             "num": int,
@@ -113,23 +113,24 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
     def metadata(self, page):
         """Return general metadata"""
         title, pos = text.extract(page, '<title>', '</title>')
-        count, pos = text.extract(page, 'id="pic-count">', '<', pos)
-        cover, pos = text.extract(page, ' src="', '"', pos)
+        _    , pos = text.extract(page, 'class="desc"', '', pos)
         cid  , pos = text.extract(page, '//www.mangoxo.com/channel/', '"', pos)
         cname, pos = text.extract(page, '>', '<', pos)
+        count, pos = text.extract(page, 'id="pic-count">', '<', pos)
+        cover, pos = text.extract(page, ' src="', '"', pos)
         date , pos = text.extract(page, '</i>', '<', pos)
         descr, pos = text.extract(page, '<pre>', '</pre>', pos)
 
         return {
             "channel": {
                 "id": cid,
-                "name": text.unescape(cname),
+                "name": text.unescape(cname.strip()),
                 "cover": cover,
             },
             "album": {
                 "id": self.album_id,
                 "name": text.unescape(title),
-                "date": date.strip(),
+                "date": text.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
                 "description": text.unescape(descr),
             },
             "count": text.parse_int(count),
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 57521d6..e0b0496 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -21,6 +21,9 @@ class PahealExtractor(SharedConfigMixin, Extractor):
     root = "https://rule34.paheal.net"
 
     def items(self):
+        self.session.cookies.set(
+            "ui-tnc-agreed", "true", domain="rule34.paheal.net")
+
         yield Message.Version, 1
         yield Message.Directory, self.get_metadata()
 
@@ -65,7 +68,7 @@ class PahealTagExtractor(PahealExtractor):
             page = self.request(url).text
 
             for post in text.extract_iter(
-                    page, '<img id="thumb_', '>Image Only<'):
+                    page, '<img id="thumb_', 'Only</a>'):
                 yield self._extract_data(post)
 
             if ">Next<" not in page:
@@ -79,7 +82,8 @@ class PahealTagExtractor(PahealExtractor):
         md5 , pos = text.extract(post, '/_thumbs/', '/', pos)
         url , pos = text.extract(post, '<a href="', '"', pos)
 
-        tags, dimensions, size, _ = data.split(" // ")
+        tags, data, date = data.split("\n")
+        dimensions, size, ext = data.split(" // ")
         width, _, height = dimensions.partition("x")
 
         return {
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index f6ad327..972750c 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,7 @@
 """Extractors for https://www.sankakucomplex.com/"""
 
 from .common import Extractor, Message
-from .. import text
+from .. import text, util
 import re
 
 
@@ -40,6 +40,21 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
             "url": "a1e249173fd6c899a8134fcfbd9c925588a63f7c",
             "keyword": "e78fcc23c2711befc0969a45ea5082a29efccf68",
         }),
+        # videos (#308)
+        (("https://www.sankakucomplex.com/2019/06/11"
+          "/darling-ol-goddess-shows-off-her-plump-lower-area/"), {
+            "pattern": r"/wp-content/uploads/2019/06/[^/]+\d\.mp4",
+            "range": "26-",
+            "count": 5,
+        }),
+        # youtube embeds (#308)
+        (("https://www.sankakucomplex.com/2015/02/12"
+          "/snow-miku-2015-live-magical-indeed/"), {
+            "options": (("embeds", True),),
+            "pattern": r"https://www.youtube.com/embed/",
+            "range": "2-",
+            "count": 2,
+        }),
     )
 
     def items(self):
@@ -53,38 +68,44 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
             "date"       : text.parse_datetime(
                 extr('property="article:published_time" content="', '"')),
         }
-        imgs = self.images(extr)
-        data["count"] = len(imgs)
+        content = extr('<div class="entry-content">', '</article>')
         data["tags"] = text.split_html(extr('="meta-tags">', '</div>'))[::2]
 
-        yield Message.Version, 1
-        yield Message.Directory, data
-        for img in imgs:
-            img.update(data)
-            yield Message.Url, img["url"], img
+        files = self._extract_images(content)
+        if self.config("videos", True):
+            files += self._extract_videos(content)
+        if self.config("embeds", False):
+            files += self._extract_embeds(content)
+        data["count"] = len(files)
 
-    def images(self, extr):
-        num = 0
-        imgs = []
-        urls = set()
-        orig = re.compile(r"-\d+x\d+\.")
-
-        extr('<div class="entry-content">', '')
-        while True:
-            url = extr('data-lazy-src="', '"')
-            if not url:
-                return imgs
-            if url in urls:
-                continue
+        yield Message.Directory, data
+        for num, url in enumerate(files, 1):
+            file = text.nameext_from_url(url)
             if url[0] == "/":
                 url = text.urljoin(self.root, url)
-            url = orig.sub(".", url)
-            num += 1
-            imgs.append(text.nameext_from_url(url, {
-                "url"   : url,
-                "num"   : num,
-            }))
-            urls.add(url)
+            file["url"] = url
+            file["num"] = num
+            file.update(data)
+            yield Message.Url, url, file
+
+    @staticmethod
+    def _extract_images(content):
+        orig_sub = re.compile(r"-\d+x\d+\.").sub
+        return [
+            orig_sub(".", url) for url in
+            util.unique(text.extract_iter(content, 'data-lazy-src="', '"'))
+        ]
+
+    @staticmethod
+    def _extract_videos(content):
+        return re.findall(r"<source [^>]*src=[\"']([^\"']+)", content)
+
+    @staticmethod
+    def _extract_embeds(content):
+        return [
+            "ytdl:" + url for url in
+            re.findall(r"<iframe [^>]*src=[\"']([^\"']+)", content)
+        ]
 
 
 class SankakucomplexTagExtractor(SankakucomplexExtractor):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 06973b2..fe0b3c5 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -11,7 +11,7 @@
 from .common import Extractor, Message
 from .. import text, util, exception
 from ..cache import cache
-
+import json
 
 BASE_PATTERN = (
     r"(?:https?://)?(?:www\.|mobile\.)?"
@@ -78,8 +78,8 @@ class TwitterExtractor(Extractor):
 
     def _extract_media(self, tweet, files):
         for media in tweet["extended_entities"]["media"]:
-            width = media["original_info"].get("width", 0),
-            height = media["original_info"].get("height", 0),
+            width = media["original_info"].get("width", 0)
+            height = media["original_info"].get("height", 0)
 
             if "video_info" in media:
                 if self.videos == "ytdl":
@@ -321,6 +321,35 @@ class TwitterBookmarkExtractor(TwitterExtractor):
         return TwitterAPI(self).timeline_bookmark()
 
 
+class TwitterListExtractor(TwitterExtractor):
+    """Extractor for Twitter lists"""
+    subcategory = "list"
+    pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
+    test = ("https://twitter.com/i/lists/784214683683127296", {
+        "range": "1-40",
+        "count": 40,
+        "archive": False,
+    })
+
+    def tweets(self):
+        return TwitterAPI(self).timeline_list(self.user)
+
+
+class TwitterListMembersExtractor(TwitterExtractor):
+    """Extractor for members of a Twitter list"""
+    subcategory = "list-members"
+    pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
+    test = ("https://twitter.com/i/lists/784214683683127296/members",)
+
+    def items(self):
+        self.login()
+        for user in TwitterAPI(self).list_members(self.user):
+            user["_extractor"] = TwitterTimelineExtractor
+            url = "{}/intent/user?user_id={}".format(
+                self.root, user["rest_id"])
+            yield Message.Queue, url, user
+
+
 class TwitterSearchExtractor(TwitterExtractor):
     """Extractor for all images from a search timeline"""
     subcategory = "search"
@@ -399,7 +428,7 @@ class TwitterTweetExtractor(TwitterExtractor):
         # Twitter card (#1005)
         ("https://twitter.com/billboard/status/1306599586602135555", {
             "options": (("cards", True),),
-            "pattern": r"https://pbs.twimg.com/card_img/1317274761030856707/",
+            "pattern": r"https://pbs.twimg.com/card_img/\d+/",
         }),
         # original retweets (#1026)
         ("https://twitter.com/jessica_3978/status/1296304589591810048", {
@@ -511,6 +540,13 @@ class TwitterAPI():
         endpoint = "2/timeline/bookmark.json"
         return self._pagination(endpoint)
 
+    def timeline_list(self, list_id):
+        endpoint = "2/timeline/list.json"
+        params = self.params.copy()
+        params["list_id"] = list_id
+        params["ranking_mode"] = "reverse_chronological"
+        return self._pagination(endpoint, params)
+
     def search(self, query):
         endpoint = "2/search/adaptive.json"
         params = self.params.copy()
@@ -522,12 +558,29 @@ class TwitterAPI():
         return self._pagination(
             endpoint, params, "sq-I-t-", "sq-cursor-bottom")
 
-    def user_by_screen_name(self, screen_name):
-        endpoint = "graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName"
-        params = {
-            "variables": '{"screen_name":"' + screen_name + '"'
-                         ',"withHighlightedLabel":true}'
+    def list_members(self, list_id):
+        endpoint = "graphql/M74V2EwlxxVYGB4DbyAphQ/ListMembers"
+        variables = {
+            "listId": list_id,
+            "count" : 20,
+            "withTweetResult": False,
+            "withUserResult" : False,
         }
+        return self._pagination_members(endpoint, variables)
+
+    def list_by_rest_id(self, list_id):
+        endpoint = "graphql/LXXTUytSX1QY-2p8Xp9BFA/ListByRestId"
+        params = {"variables": '{"listId":"' + list_id + '"'
+                               ',"withUserResult":false}'}
+        try:
+            return self._call(endpoint, params)["data"]["list"]
+        except KeyError:
+            raise exception.NotFoundError("list")
+
+    def user_by_screen_name(self, screen_name):
+        endpoint = "graphql/jMaTS-_Ea8vh9rpKggJbCQ/UserByScreenName"
+        params = {"variables": '{"screen_name":"' + screen_name + '"'
+                               ',"withHighlightedLabel":true}'}
         try:
             return self._call(endpoint, params)["data"]["user"]
         except KeyError:
@@ -627,3 +680,30 @@ class TwitterAPI():
             if not cursor or not tweet:
                 return
             params["cursor"] = cursor
+
+    def _pagination_members(self, endpoint, variables):
+        while True:
+            cursor = entry = stop = None
+            params = {"variables": json.dumps(variables)}
+            data = self._call(endpoint, params)
+
+            try:
+                instructions = (data["data"]["list"]["members_timeline"]
+                                ["timeline"]["instructions"])
+            except KeyError:
+                raise exception.AuthorizationError()
+
+            for instr in instructions:
+                if instr["type"] == "TimelineAddEntries":
+                    for entry in instr["entries"]:
+                        if entry["entryId"].startswith("user-"):
+                            yield entry["content"]["itemContent"]["user"]
+                        elif entry["entryId"].startswith("cursor-bottom-"):
+                            cursor = entry["content"]["value"]
+                elif instr["type"] == "TimelineTerminateTimeline":
+                    if instr["direction"] == "Bottom":
+                        stop = True
+
+            if stop or not cursor or not entry:
+                return
+            variables["cursor"] = cursor
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index 6799784..1dd5b09 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -226,7 +226,6 @@ class WeasylFavoriteExtractor(WeasylExtractor):
 
             if not owner_login:
                 owner_login = text.extract(page, '<a href="/~', '"')[0]
-                yield Message.Directory, {"owner_login": owner_login}
 
             for submitid in text.extract_iter(page, "/submissions/", "/", pos):
                 if submitid == lastid:
@@ -234,6 +233,8 @@ class WeasylFavoriteExtractor(WeasylExtractor):
                 lastid = submitid
                 submission = self.request_submission(submitid)
                 if self.populate_submission(submission):
+                    submission["user"] = owner_login
+                    yield Message.Directory, submission
                     yield Message.Url, submission["url"], submission
 
             if "&amp;nextid=" not in page:
author	Unit 193 <unit193@unit193.net>	2020-11-13 19:17:03 -0500
committer	Unit 193 <unit193@unit193.net>	2020-11-13 19:17:03 -0500
commit	209a3c800871cd68edd2bc7ae661a24ecd496d2d (patch)
tree	cf81c47ab57540b58292295c7d5641e9d2668291 /gallery_dl/extractor
parent	5dc7d6f5902ddaee5223d041d5c10060f0c72430 (diff)