diff options
| author | 2024-09-07 18:33:25 -0400 | |
|---|---|---|
| committer | 2024-09-07 18:33:25 -0400 | |
| commit | 05335f2b4f60f6948edc96c71a7ef1c3ca71c9b3 (patch) | |
| tree | 2c455afb2e2fcd51788500ce8a3455a1ef659b0e /gallery_dl/extractor/generic.py | |
| parent | c45c7a86c313075d1fbd5803e7efdda680b27cd7 (diff) | |
| parent | 1f3ffe32342852fd9ea9e7704022488f3a1222bd (diff) | |
Update upstream source from tag 'upstream/1.27.4'
Update to upstream version '1.27.4'
with Debian dir 9c7b608ab0b9fa99a0cd692418a8f3965bf3d1c3
Diffstat (limited to 'gallery_dl/extractor/generic.py')
| -rw-r--r-- | gallery_dl/extractor/generic.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py index 16d4340..a6c1d5a 100644 --- a/gallery_dl/extractor/generic.py +++ b/gallery_dl/extractor/generic.py @@ -15,7 +15,7 @@ import re class GenericExtractor(Extractor): """Extractor for images in a generic web page.""" category = "generic" - directory_fmt = ("{category}", "{pageurl}") + directory_fmt = ("{category}", "{subcategory}", "{path}") archive_fmt = "{imageurl}" # By default, the generic extractor is disabled @@ -52,7 +52,10 @@ class GenericExtractor(Extractor): self.scheme = match.group('scheme') else: self.scheme = 'https://' - self.url = self.scheme + self.url + self.url = text.ensure_http_scheme(self.url, self.scheme) + + self.subcategory = match.group('domain') + self.path = match.group('path') # Used to resolve relative image urls self.root = self.scheme + match.group('domain') @@ -87,6 +90,7 @@ class GenericExtractor(Extractor): def metadata(self, page): """Extract generic webpage metadata, return them in a dict.""" data = {} + data['path'] = self.path.replace("/", "") data['pageurl'] = self.url data['title'] = text.extr(page, '<title>', "</title>") data['description'] = text.extr( |
