summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-12-20 05:49:04 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2025-12-20 05:49:04 -0500
commita24ec1647aeac35a63b744ea856011ad6e06be3b (patch)
treeae94416de786aeddd05d99559098f7f16bb103a6
parent33f8a8a37a9cba738ef25fb99955f0730da9eb48 (diff)
New upstream version 1.31.1.upstream/1.31.1
-rw-r--r--CHANGELOG.md60
-rw-r--r--PKG-INFO14
-rw-r--r--README.rst11
-rw-r--r--data/completion/_gallery-dl6
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish6
-rw-r--r--data/man/gallery-dl.112
-rw-r--r--data/man/gallery-dl.conf.5642
-rw-r--r--docs/gallery-dl.conf98
-rw-r--r--gallery_dl.egg-info/PKG-INFO14
-rw-r--r--gallery_dl.egg-info/SOURCES.txt19
-rw-r--r--gallery_dl/__init__.py4
-rw-r--r--gallery_dl/actions.py5
-rw-r--r--gallery_dl/cookies.py2
-rw-r--r--gallery_dl/downloader/__init__.py2
-rw-r--r--gallery_dl/downloader/common.py11
-rw-r--r--gallery_dl/downloader/http.py18
-rw-r--r--gallery_dl/downloader/ytdl.py295
-rw-r--r--gallery_dl/dt.py115
-rw-r--r--gallery_dl/extractor/2ch.py18
-rw-r--r--gallery_dl/extractor/2chan.py2
-rw-r--r--gallery_dl/extractor/2chen.py73
-rw-r--r--gallery_dl/extractor/35photo.py2
-rw-r--r--gallery_dl/extractor/4archive.py11
-rw-r--r--gallery_dl/extractor/4chan.py2
-rw-r--r--gallery_dl/extractor/4chanarchives.py2
-rw-r--r--gallery_dl/extractor/500px.py12
-rw-r--r--gallery_dl/extractor/8chan.py13
-rw-r--r--gallery_dl/extractor/8muses.py5
-rw-r--r--gallery_dl/extractor/__init__.py18
-rw-r--r--gallery_dl/extractor/adultempire.py2
-rw-r--r--gallery_dl/extractor/agnph.py10
-rw-r--r--gallery_dl/extractor/ao3.py38
-rw-r--r--gallery_dl/extractor/arcalive.py13
-rw-r--r--gallery_dl/extractor/arena.py89
-rw-r--r--gallery_dl/extractor/artstation.py7
-rw-r--r--gallery_dl/extractor/aryion.py117
-rw-r--r--gallery_dl/extractor/audiochan.py158
-rw-r--r--gallery_dl/extractor/batoto.py17
-rw-r--r--gallery_dl/extractor/bbc.py7
-rw-r--r--gallery_dl/extractor/behance.py4
-rw-r--r--gallery_dl/extractor/bellazon.py47
-rw-r--r--gallery_dl/extractor/bilibili.py2
-rw-r--r--gallery_dl/extractor/blogger.py20
-rw-r--r--gallery_dl/extractor/bluesky.py45
-rw-r--r--gallery_dl/extractor/booru.py2
-rw-r--r--gallery_dl/extractor/boosty.py30
-rw-r--r--gallery_dl/extractor/booth.py5
-rw-r--r--gallery_dl/extractor/bunkr.py37
-rw-r--r--gallery_dl/extractor/catbox.py4
-rw-r--r--gallery_dl/extractor/cfake.py149
-rw-r--r--gallery_dl/extractor/chevereto.py73
-rw-r--r--gallery_dl/extractor/cien.py14
-rw-r--r--gallery_dl/extractor/civitai.py138
-rw-r--r--gallery_dl/extractor/comedywildlifephoto.py51
-rw-r--r--gallery_dl/extractor/comick.py12
-rw-r--r--gallery_dl/extractor/comicvine.py2
-rw-r--r--gallery_dl/extractor/common.py47
-rw-r--r--gallery_dl/extractor/cyberdrop.py20
-rw-r--r--gallery_dl/extractor/cyberfile.py58
-rw-r--r--gallery_dl/extractor/danbooru.py58
-rw-r--r--gallery_dl/extractor/dankefuerslesen.py6
-rw-r--r--gallery_dl/extractor/desktopography.py8
-rw-r--r--gallery_dl/extractor/deviantart.py50
-rw-r--r--gallery_dl/extractor/directlink.py2
-rw-r--r--gallery_dl/extractor/discord.py18
-rw-r--r--gallery_dl/extractor/dynastyscans.py22
-rw-r--r--gallery_dl/extractor/e621.py40
-rw-r--r--gallery_dl/extractor/eporner.py54
-rw-r--r--gallery_dl/extractor/erome.py16
-rw-r--r--gallery_dl/extractor/everia.py20
-rw-r--r--gallery_dl/extractor/exhentai.py22
-rw-r--r--gallery_dl/extractor/facebook.py49
-rw-r--r--gallery_dl/extractor/fanbox.py63
-rw-r--r--gallery_dl/extractor/fansly.py26
-rw-r--r--gallery_dl/extractor/fantia.py4
-rw-r--r--gallery_dl/extractor/fapachi.py2
-rw-r--r--gallery_dl/extractor/fapello.py16
-rw-r--r--gallery_dl/extractor/fikfap.py105
-rw-r--r--gallery_dl/extractor/fitnakedgirls.py208
-rw-r--r--gallery_dl/extractor/flickr.py43
-rw-r--r--gallery_dl/extractor/foolfuuka.py10
-rw-r--r--gallery_dl/extractor/foolslide.py6
-rw-r--r--gallery_dl/extractor/furaffinity.py24
-rw-r--r--gallery_dl/extractor/furry34.py9
-rw-r--r--gallery_dl/extractor/gelbooru.py18
-rw-r--r--gallery_dl/extractor/gelbooru_v01.py9
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py14
-rw-r--r--gallery_dl/extractor/generic.py10
-rw-r--r--gallery_dl/extractor/girlsreleased.py10
-rw-r--r--gallery_dl/extractor/girlswithmuscle.py15
-rw-r--r--gallery_dl/extractor/gofile.py14
-rw-r--r--gallery_dl/extractor/hatenablog.py20
-rw-r--r--gallery_dl/extractor/hentai2read.py2
-rw-r--r--gallery_dl/extractor/hentaicosplays.py2
-rw-r--r--gallery_dl/extractor/hentaifoundry.py37
-rw-r--r--gallery_dl/extractor/hentaihand.py3
-rw-r--r--gallery_dl/extractor/hentaihere.py2
-rw-r--r--gallery_dl/extractor/hiperdex.py10
-rw-r--r--gallery_dl/extractor/hitomi.py2
-rw-r--r--gallery_dl/extractor/hotleak.py10
-rw-r--r--gallery_dl/extractor/idolcomplex.py9
-rw-r--r--gallery_dl/extractor/imagebam.py14
-rw-r--r--gallery_dl/extractor/imagechest.py10
-rw-r--r--gallery_dl/extractor/imagefap.py20
-rw-r--r--gallery_dl/extractor/imagehosts.py101
-rw-r--r--gallery_dl/extractor/imgbb.py5
-rw-r--r--gallery_dl/extractor/imgbox.py11
-rw-r--r--gallery_dl/extractor/imgpile.py2
-rw-r--r--gallery_dl/extractor/imgth.py2
-rw-r--r--gallery_dl/extractor/imgur.py29
-rw-r--r--gallery_dl/extractor/imhentai.py9
-rw-r--r--gallery_dl/extractor/inkbunny.py24
-rw-r--r--gallery_dl/extractor/instagram.py88
-rw-r--r--gallery_dl/extractor/issuu.py6
-rw-r--r--gallery_dl/extractor/itaku.py64
-rw-r--r--gallery_dl/extractor/itchio.py2
-rw-r--r--gallery_dl/extractor/iwara.py15
-rw-r--r--gallery_dl/extractor/jschan.py6
-rw-r--r--gallery_dl/extractor/kabeuchi.py5
-rw-r--r--gallery_dl/extractor/keenspot.py2
-rw-r--r--gallery_dl/extractor/kemono.py102
-rw-r--r--gallery_dl/extractor/khinsider.py2
-rw-r--r--gallery_dl/extractor/komikcast.py10
-rw-r--r--gallery_dl/extractor/koofr.py55
-rw-r--r--gallery_dl/extractor/leakgallery.py10
-rw-r--r--gallery_dl/extractor/lensdump.py9
-rw-r--r--gallery_dl/extractor/lexica.py2
-rw-r--r--gallery_dl/extractor/lightroom.py2
-rw-r--r--gallery_dl/extractor/livedoor.py4
-rw-r--r--gallery_dl/extractor/lofter.py4
-rw-r--r--gallery_dl/extractor/lolisafe.py4
-rw-r--r--gallery_dl/extractor/luscious.py6
-rw-r--r--gallery_dl/extractor/lynxchan.py6
-rw-r--r--gallery_dl/extractor/madokami.py5
-rw-r--r--gallery_dl/extractor/mangadex.py22
-rw-r--r--gallery_dl/extractor/mangafox.py8
-rw-r--r--gallery_dl/extractor/mangahere.py4
-rw-r--r--gallery_dl/extractor/manganelo.py16
-rw-r--r--gallery_dl/extractor/mangapark.py12
-rw-r--r--gallery_dl/extractor/mangaread.py4
-rw-r--r--gallery_dl/extractor/mangataro.py6
-rw-r--r--gallery_dl/extractor/mangoxo.py4
-rw-r--r--gallery_dl/extractor/mastodon.py25
-rw-r--r--gallery_dl/extractor/message.py9
-rw-r--r--gallery_dl/extractor/misskey.py80
-rw-r--r--gallery_dl/extractor/moebooru.py23
-rw-r--r--gallery_dl/extractor/motherless.py43
-rw-r--r--gallery_dl/extractor/myhentaigallery.py43
-rw-r--r--gallery_dl/extractor/myportfolio.py2
-rw-r--r--gallery_dl/extractor/naverblog.py11
-rw-r--r--gallery_dl/extractor/naverchzzk.py12
-rw-r--r--gallery_dl/extractor/naverwebtoon.py4
-rw-r--r--gallery_dl/extractor/nekohouse.py12
-rw-r--r--gallery_dl/extractor/newgrounds.py37
-rw-r--r--gallery_dl/extractor/nijie.py31
-rw-r--r--gallery_dl/extractor/nitter.py20
-rw-r--r--gallery_dl/extractor/noop.py6
-rw-r--r--gallery_dl/extractor/nozomi.py9
-rw-r--r--gallery_dl/extractor/nudostar.py6
-rw-r--r--gallery_dl/extractor/oauth.py32
-rw-r--r--gallery_dl/extractor/okporn.py39
-rw-r--r--gallery_dl/extractor/paheal.py7
-rw-r--r--gallery_dl/extractor/patreon.py62
-rw-r--r--gallery_dl/extractor/pexels.py13
-rw-r--r--gallery_dl/extractor/philomena.py9
-rw-r--r--gallery_dl/extractor/photovogue.py7
-rw-r--r--gallery_dl/extractor/picarto.py5
-rw-r--r--gallery_dl/extractor/picazor.py59
-rw-r--r--gallery_dl/extractor/pictoa.py6
-rw-r--r--gallery_dl/extractor/piczel.py13
-rw-r--r--gallery_dl/extractor/pillowfort.py16
-rw-r--r--gallery_dl/extractor/pinterest.py22
-rw-r--r--gallery_dl/extractor/pixeldrain.py24
-rw-r--r--gallery_dl/extractor/pixiv.py81
-rw-r--r--gallery_dl/extractor/pixnet.py12
-rw-r--r--gallery_dl/extractor/plurk.py13
-rw-r--r--gallery_dl/extractor/poipiku.py2
-rw-r--r--gallery_dl/extractor/poringa.py8
-rw-r--r--gallery_dl/extractor/pornhub.py17
-rw-r--r--gallery_dl/extractor/pornpics.py38
-rw-r--r--gallery_dl/extractor/pornstarstube.py43
-rw-r--r--gallery_dl/extractor/postmill.py28
-rw-r--r--gallery_dl/extractor/rawkuma.py63
-rw-r--r--gallery_dl/extractor/reactor.py14
-rw-r--r--gallery_dl/extractor/readcomiconline.py4
-rw-r--r--gallery_dl/extractor/realbooru.py39
-rw-r--r--gallery_dl/extractor/recursive.py4
-rw-r--r--gallery_dl/extractor/redbust.py186
-rw-r--r--gallery_dl/extractor/reddit.py36
-rw-r--r--gallery_dl/extractor/redgifs.py4
-rw-r--r--gallery_dl/extractor/rule34us.py6
-rw-r--r--gallery_dl/extractor/rule34vault.py9
-rw-r--r--gallery_dl/extractor/rule34xyz.py9
-rw-r--r--gallery_dl/extractor/s3ndpics.py8
-rw-r--r--gallery_dl/extractor/saint.py8
-rw-r--r--gallery_dl/extractor/sankaku.py22
-rw-r--r--gallery_dl/extractor/sankakucomplex.py10
-rw-r--r--gallery_dl/extractor/schalenetwork.py25
-rw-r--r--gallery_dl/extractor/scrolller.py8
-rw-r--r--gallery_dl/extractor/seiga.py4
-rw-r--r--gallery_dl/extractor/sexcom.py54
-rw-r--r--gallery_dl/extractor/shimmie2.py18
-rw-r--r--gallery_dl/extractor/shopify.py6
-rw-r--r--gallery_dl/extractor/simpcity.py186
-rw-r--r--gallery_dl/extractor/simplyhentai.py8
-rw-r--r--gallery_dl/extractor/sizebooru.py4
-rw-r--r--gallery_dl/extractor/skeb.py18
-rw-r--r--gallery_dl/extractor/slickpic.py6
-rw-r--r--gallery_dl/extractor/slideshare.py5
-rw-r--r--gallery_dl/extractor/smugmug.py8
-rw-r--r--gallery_dl/extractor/soundgasm.py6
-rw-r--r--gallery_dl/extractor/speakerdeck.py4
-rw-r--r--gallery_dl/extractor/steamgriddb.py12
-rw-r--r--gallery_dl/extractor/subscribestar.py28
-rw-r--r--gallery_dl/extractor/sxypix.py39
-rw-r--r--gallery_dl/extractor/szurubooru.py7
-rw-r--r--gallery_dl/extractor/tapas.py10
-rw-r--r--gallery_dl/extractor/tcbscans.py4
-rw-r--r--gallery_dl/extractor/telegraph.py5
-rw-r--r--gallery_dl/extractor/tenor.py17
-rw-r--r--gallery_dl/extractor/thehentaiworld.py7
-rw-r--r--gallery_dl/extractor/tiktok.py47
-rw-r--r--gallery_dl/extractor/tmohentai.py2
-rw-r--r--gallery_dl/extractor/toyhouse.py6
-rw-r--r--gallery_dl/extractor/tsumino.py2
-rw-r--r--gallery_dl/extractor/tumblr.py37
-rw-r--r--gallery_dl/extractor/tumblrgallery.py6
-rw-r--r--gallery_dl/extractor/tungsten.py4
-rw-r--r--gallery_dl/extractor/twibooru.py13
-rw-r--r--gallery_dl/extractor/twitter.py252
-rw-r--r--gallery_dl/extractor/unsplash.py14
-rw-r--r--gallery_dl/extractor/uploadir.py2
-rw-r--r--gallery_dl/extractor/urlgalleries.py4
-rw-r--r--gallery_dl/extractor/urlshortener.py2
-rw-r--r--gallery_dl/extractor/vanillarock.py6
-rw-r--r--gallery_dl/extractor/vichan.py6
-rw-r--r--gallery_dl/extractor/vipergirls.py10
-rw-r--r--gallery_dl/extractor/vk.py16
-rw-r--r--gallery_dl/extractor/vsco.py24
-rw-r--r--gallery_dl/extractor/wallhaven.py5
-rw-r--r--gallery_dl/extractor/wallpapercave.py6
-rw-r--r--gallery_dl/extractor/warosu.py4
-rw-r--r--gallery_dl/extractor/weasyl.py29
-rw-r--r--gallery_dl/extractor/webmshare.py4
-rw-r--r--gallery_dl/extractor/webtoons.py10
-rw-r--r--gallery_dl/extractor/weebcentral.py8
-rw-r--r--gallery_dl/extractor/weebdex.py132
-rw-r--r--gallery_dl/extractor/weibo.py12
-rw-r--r--gallery_dl/extractor/wikiart.py10
-rw-r--r--gallery_dl/extractor/wikifeet.py4
-rw-r--r--gallery_dl/extractor/wikimedia.py98
-rw-r--r--gallery_dl/extractor/xasiat.py25
-rw-r--r--gallery_dl/extractor/xenforo.py348
-rw-r--r--gallery_dl/extractor/xfolio.py8
-rw-r--r--gallery_dl/extractor/xhamster.py8
-rw-r--r--gallery_dl/extractor/xvideos.py4
-rw-r--r--gallery_dl/extractor/yiffverse.py9
-rw-r--r--gallery_dl/extractor/ytdl.py2
-rw-r--r--gallery_dl/extractor/zerochan.py8
-rw-r--r--gallery_dl/formatter.py86
-rw-r--r--gallery_dl/job.py171
-rw-r--r--gallery_dl/option.py18
-rw-r--r--gallery_dl/output.py56
-rw-r--r--gallery_dl/path.py95
-rw-r--r--gallery_dl/postprocessor/__init__.py2
-rw-r--r--gallery_dl/postprocessor/exec.py9
-rw-r--r--gallery_dl/postprocessor/metadata.py10
-rw-r--r--gallery_dl/postprocessor/mtime.py7
-rw-r--r--gallery_dl/postprocessor/ugoira.py6
-rw-r--r--gallery_dl/text.py65
-rw-r--r--gallery_dl/update.py2
-rw-r--r--gallery_dl/util.py74
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py6
-rw-r--r--setup.py1
-rw-r--r--test/test_downloader.py15
-rw-r--r--test/test_dt.py167
-rw-r--r--test/test_extractor.py86
-rw-r--r--test/test_formatter.py41
-rw-r--r--test/test_job.py3
-rw-r--r--test/test_path.py297
-rw-r--r--test/test_postprocessor.py86
-rw-r--r--test/test_results.py14
-rw-r--r--test/test_text.py69
-rw-r--r--test/test_util.py90
286 files changed, 6087 insertions, 2910 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b2f503..ed0715f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,38 +1,32 @@
-## 1.30.10 - 2025-10-12
+## 1.31.1 - 2025-12-20
### Extractors
#### Additions
-- [bluesky] add `bookmark` extractor ([#8370](https://github.com/mikf/gallery-dl/issues/8370))
-- [dandadan] add support ([#8381](https://github.com/mikf/gallery-dl/issues/8381))
+- [2chen] implement generic `2chen` board extractors
+ - support `https://schan.help/` ([#8680](https://github.com/mikf/gallery-dl/issues/8680))
+- [aryion] add `watch` extractor ([#8705](https://github.com/mikf/gallery-dl/issues/8705))
+- [comedywildlifephoto] add `gallery` extractor ([#8690](https://github.com/mikf/gallery-dl/issues/8690))
+- [koofr] add `shared` extractor ([#8700](https://github.com/mikf/gallery-dl/issues/8700))
+- [picazor] add `user` extractor ([#7083](https://github.com/mikf/gallery-dl/issues/7083) [#7504](https://github.com/mikf/gallery-dl/issues/7504) [#7795](https://github.com/mikf/gallery-dl/issues/7795) [#8717](https://github.com/mikf/gallery-dl/issues/8717))
+- [weebdex] add support ([#8722](https://github.com/mikf/gallery-dl/issues/8722))
+- [xenforo] support `allthefallen.moe/forum` ([#3249](https://github.com/mikf/gallery-dl/issues/3249) [#8268](https://github.com/mikf/gallery-dl/issues/8268))
#### Fixes
-- [bellazon] fix video URL extraction ([#8392](https://github.com/mikf/gallery-dl/issues/8392))
-- [bluesky] handle exceptions during file extraction
-- [civitai] prevent downloading random posts from deleted users ([#8299](https://github.com/mikf/gallery-dl/issues/8299))
-- [girlsreleased] update API endpoints ([#8360](https://github.com/mikf/gallery-dl/issues/8360))
-- [instagram] restore `video_dash_manifest` downloads ([#8364](https://github.com/mikf/gallery-dl/issues/8364))
-- [kemono] prevent fatal exceptions when retrieving user profile data ([#8382](https://github.com/mikf/gallery-dl/issues/8382))
-- [mangadex] fix `RuntimeError` for titles without a `description` ([#8389](https://github.com/mikf/gallery-dl/issues/8389))
-- [naver-blog] fix video extraction ([#8385](https://github.com/mikf/gallery-dl/issues/8385))
-- [poipiku] fix original file downloads ([#8356](https://github.com/mikf/gallery-dl/issues/8356))
-- [weibo] fix retrieving followers-only content ([#6447](https://github.com/mikf/gallery-dl/issues/6447) [#7939](https://github.com/mikf/gallery-dl/issues/7939) [#8063](https://github.com/mikf/gallery-dl/issues/8063) [#8354](https://github.com/mikf/gallery-dl/issues/8354) [#8357](https://github.com/mikf/gallery-dl/issues/8357))
-- [weibo] use `page` parameter for `feed` results ([#7523](https://github.com/mikf/gallery-dl/issues/7523) [#8128](https://github.com/mikf/gallery-dl/issues/8128) [#8357](https://github.com/mikf/gallery-dl/issues/8357))
-- [wikimedia] fix name & extension of files without an extension ([#8344](https://github.com/mikf/gallery-dl/issues/8344))
-- [wikimedia] ignore missing files ([#8388](https://github.com/mikf/gallery-dl/issues/8388))
+- [aryion:favorite] fix extraction ([#8705](https://github.com/mikf/gallery-dl/issues/8705) [#8723](https://github.com/mikf/gallery-dl/issues/8723) [#8728](https://github.com/mikf/gallery-dl/issues/8728))
+- [aryion] fix `description` metadata
+- [boosty] include `Authorization` header with file downloads ([#8704](https://github.com/mikf/gallery-dl/issues/8704))
+- [fanbox] make `_extract_post()` non-fatal ([#8711](https://github.com/mikf/gallery-dl/issues/8711))
+- [furaffinity] fix `tags` metadata ([#8724](https://github.com/mikf/gallery-dl/issues/8724))
+- [mastodon] fix `AttributeError: 'parse_datetime_iso'` ([#8709](https://github.com/mikf/gallery-dl/issues/8709))
+- [tenor] fix `title` metadata
+- [twitter] fix `avatar` & `background` downloads with `"expand": true` ([#8698](https://github.com/mikf/gallery-dl/issues/8698))
#### Improvements
-- [bellazon] ignore links to other threads ([#8392](https://github.com/mikf/gallery-dl/issues/8392))
-- [common] disable delay for `request_location()`
-- [fansly] update format selection ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
-- [fansly] download user posts from all account walls ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
-- [instagram] support `/share/SHORTCODE` URLs ([#8340](https://github.com/mikf/gallery-dl/issues/8340))
-- [weibo] ignore ongoing live streams ([#8339](https://github.com/mikf/gallery-dl/issues/8339))
-- [zerochan] forward URL parameters to API requests ([#8377](https://github.com/mikf/gallery-dl/issues/8377))
-#### Metadata
-- [instagram] extract `subscription` metadata ([#8349](https://github.com/mikf/gallery-dl/issues/8349))
-- [webtoons] fix `episode` metadata extraction ([#2591](https://github.com/mikf/gallery-dl/issues/2591))
-#### Removals
-- [twitter] remove login support ([#4202](https://github.com/mikf/gallery-dl/issues/4202) [#6029](https://github.com/mikf/gallery-dl/issues/6029) [#6040](https://github.com/mikf/gallery-dl/issues/6040) [#8362](https://github.com/mikf/gallery-dl/issues/8362))
-### Post Processors
-- [exec] support `{_temppath}` replacement fields ([#8329](https://github.com/mikf/gallery-dl/issues/8329))
+- [boosty] warn about expired `auth` cookie tokens ([#8704](https://github.com/mikf/gallery-dl/issues/8704))
+- [misskey] implement `order-posts` option ([#8516](https://github.com/mikf/gallery-dl/issues/8516))
+- [reddit] use `"videos": "dash"` by default ([#8657](https://github.com/mikf/gallery-dl/issues/8657))
+- [pixiv] warn about invalid `PHPSESSID` cookie ([#8689](https://github.com/mikf/gallery-dl/issues/8689))
+### Downloaders
+- [ytdl] fix `UnboundLocalError: 'tries'` ([#8707](https://github.com/mikf/gallery-dl/issues/8707))
+- [ytdl] respect `--no-skip`
### Miscellaneous
-- [formatter] improve error messages ([#8369](https://github.com/mikf/gallery-dl/issues/8369))
-- [path] implement conditional `base-directory`
-- use `utf-8` encoding when opening files in text mode ([#8376](https://github.com/mikf/gallery-dl/issues/8376))
+- [path] implement dynamic length directories ([#1350](https://github.com/mikf/gallery-dl/issues/1350))
+- [formatter] add `I` format specifier - identity
+- [tests] add `path` tests
diff --git a/PKG-INFO b/PKG-INFO
index 6a8f856..abf3e16 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.10
+Version: 1.31.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -24,6 +24,7 @@ Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
@@ -141,9 +142,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.10/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.31.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.10/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.31.1/gallery-dl.bin>`__
Nightly Builds
@@ -224,6 +225,13 @@ Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-d
docker pull ghcr.io/mikf/gallery-dl
docker tag ghcr.io/mikf/gallery-dl gallery-dl
+Pulling *Nightly Build* images built from the latest commit by using the ``dev`` tag:
+
+.. code:: bash
+
+ docker pull mikf123/gallery-dl:dev
+ docker pull ghcr.io/mikf/gallery-dl:dev
+
To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs.
Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there.
diff --git a/README.rst b/README.rst
index d892901..a557bb9 100644
--- a/README.rst
+++ b/README.rst
@@ -79,9 +79,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.10/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.31.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.10/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.31.1/gallery-dl.bin>`__
Nightly Builds
@@ -162,6 +162,13 @@ Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-d
docker pull ghcr.io/mikf/gallery-dl
docker tag ghcr.io/mikf/gallery-dl gallery-dl
+Pulling *Nightly Build* images built from the latest commit by using the ``dev`` tag:
+
+.. code:: bash
+
+ docker pull mikf123/gallery-dl:dev
+ docker pull ghcr.io/mikf/gallery-dl:dev
+
To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs.
Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there.
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 539ec1b..681b429 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -78,9 +78,11 @@ _arguments -s -S \
--filesize-max'[Do not download files larger than SIZE (e.g. 500k or 2.5M)]':'<size>' \
--download-archive'[Record successfully downloaded files in FILE and skip downloading any file already in it]':'<file>':_files \
--range'[Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. '\''5'\'', '\''8-20'\'', or '\''1:24:3'\'')]':'<range>' \
---chapter-range'[Like '\''--range'\'', but applies to manga chapters and other delegated URLs]':'<range>' \
+--post-range'[Like '\''--range'\'', but for posts]':'<range>' \
+--chapter-range'[Like '\''--range'\'', but for child extractors handling manga chapters, external URLs, etc.]':'<range>' \
--filter'[Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '\''-K'\''. Example: --filter "image_width >= 1000 and rating in ('\''s'\'', '\''q'\'')"]':'<expr>' \
---chapter-filter'[Like '\''--filter'\'', but applies to manga chapters and other delegated URLs]':'<expr>' \
+--post-filter'[Like '\''--filter'\'', but for posts]':'<expr>' \
+--chapter-filter'[Like '\''--filter'\'', but for child extractors handling manga chapters, external URLs, etc.]':'<expr>' \
{-P,--postprocessor}'[Activate the specified post processor]':'<name>' \
--no-postprocessors'[Do not run any post processors]' \
{-O,--postprocessor-option}'[Additional post processor options]':'<key=value>' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index ae4cb0f..1f3a33d 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --compat --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --print --Print --print-to-file --Print-to-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --force-ipv4 --force-ipv6 --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-429 --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --rename --rename-to --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --compat --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --print --Print --print-to-file --Print-to-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --force-ipv4 --force-ipv6 --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-429 --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --post-range --chapter-range --filter --post-filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --rename --rename-to --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 84ff5b5..5a3d8aa 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -73,9 +73,11 @@ complete -c gallery-dl -x -l 'filesize-min' -d 'Do not download files smaller th
complete -c gallery-dl -x -l 'filesize-max' -d 'Do not download files larger than SIZE (e.g. 500k or 2.5M)'
complete -c gallery-dl -r -F -l 'download-archive' -d 'Record successfully downloaded files in FILE and skip downloading any file already in it'
complete -c gallery-dl -x -l 'range' -d 'Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. "5", "8-20", or "1:24:3")'
-complete -c gallery-dl -x -l 'chapter-range' -d 'Like "--range", but applies to manga chapters and other delegated URLs'
+complete -c gallery-dl -x -l 'post-range' -d 'Like "--range", but for posts'
+complete -c gallery-dl -x -l 'chapter-range' -d 'Like "--range", but for child extractors handling manga chapters, external URLs, etc.'
complete -c gallery-dl -x -l 'filter' -d 'Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"'
-complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies to manga chapters and other delegated URLs'
+complete -c gallery-dl -x -l 'post-filter' -d 'Like "--filter", but for posts'
+complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but for child extractors handling manga chapters, external URLs, etc.'
complete -c gallery-dl -x -s 'P' -l 'postprocessor' -d 'Activate the specified post processor'
complete -c gallery-dl -l 'no-postprocessors' -d 'Do not run any post processors'
complete -c gallery-dl -x -s 'O' -l 'postprocessor-option' -d 'Additional post processor options'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 9751705..90a423a 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-10-12" "1.30.10" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-12-20" "1.31.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -236,14 +236,20 @@ Record successfully downloaded files in FILE and skip downloading any file alrea
.B "\-\-range" \f[I]RANGE\f[]
Index range(s) specifying which files to download. These can be either a constant value, range, or slice (e.g. '5', '8-20', or '1:24:3')
.TP
+.B "\-\-post\-range" \f[I]RANGE\f[]
+Like '--range', but for posts
+.TP
.B "\-\-chapter\-range" \f[I]RANGE\f[]
-Like '--range', but applies to manga chapters and other delegated URLs
+Like '--range', but for child extractors handling manga chapters, external URLs, etc.
.TP
.B "\-\-filter" \f[I]EXPR\f[]
Python expression controlling which files to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '-K'. Example: --filter "image_width >= 1000 and rating in ('s', 'q')"
.TP
+.B "\-\-post\-filter" \f[I]EXPR\f[]
+Like '--filter', but for posts
+.TP
.B "\-\-chapter\-filter" \f[I]EXPR\f[]
-Like '--filter', but applies to manga chapters and other delegated URLs
+Like '--filter', but for child extractors handling manga chapters, external URLs, etc.
.TP
.B "\-P, \-\-postprocessor" \f[I]NAME\f[]
Activate the specified post processor
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 320963f..7729342 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-10-12" "1.30.10" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-12-20" "1.31.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -222,11 +222,14 @@ Use an extractor's current target directory as
* \f[I]string\f[]
.IP "Default:" 9
+\f[I]true\f[]
+\f[I][chevereto]\f[] |
+\f[I][imagehost]\f[]
\f[I]false\f[]
+otherwise
.IP "Description:" 4
-If \f[I]true\f[], overwrite any metadata provided by a child extractor
-with its parent's.
+Forward a parent's metadata to its child extractors.
If this is a \f[I]string\f[], add a parent's metadata to its children's
.br
@@ -650,6 +653,8 @@ This is supported for
.br
* \f[I]nijie\f[] (\f[I]R\f[])
.br
+* \f[I]nudostarforum\f[]
+.br
* \f[I]pillowfort\f[]
.br
* \f[I]rule34xyz\f[]
@@ -660,6 +665,8 @@ This is supported for
.br
* \f[I]seiga\f[]
.br
+* \f[I]simpcity\f[]
+.br
* \f[I]subscribestar\f[]
.br
* \f[I]tapas\f[]
@@ -945,8 +952,9 @@ and use the \f[I]User-Agent\f[] header of this installed browser.
\f[I]"firefox"\f[]
\f[I]artstation\f[] \f[I]
\f[I]behance\f[] \f[]
-\f[I]fanbox\f[] |
-\f[I]twitter\f[]
+\f[I]fanbox\f[] \f[I]
+\f[I]twitter\f[] \f[]
+\f[I]vsco\f[]
\f[I]null\f[]
otherwise
@@ -1090,9 +1098,8 @@ to use these browser's default ciphers.
.IP "Default:" 9
\f[I]false\f[]
-\f[I]artstation\f[] \f[I]
-\f[I]behance\f[] \f[]
-\f[I]vsco\f[]
+\f[I]artstation\f[] |
+\f[I]behance\f[]
\f[I]true\f[]
otherwise
@@ -1114,6 +1121,18 @@ and potentially bypass Cloudflare blocks.
Additional name-value pairs to be added to each metadata dictionary.
+.SS extractor.*.keywords-default
+.IP "Type:" 6
+any
+
+.IP "Default:" 9
+\f[I]"None"\f[]
+
+.IP "Description:" 4
+Default value used for missing or undefined keyword names in a
+\f[I]Format String\f[].
+
+
.SS extractor.*.keywords-eval
.IP "Type:" 6
\f[I]bool\f[]
@@ -1122,20 +1141,27 @@ Additional name-value pairs to be added to each metadata dictionary.
\f[I]false\f[]
.IP "Description:" 4
-Evaluate each \f[I]keywords\f[] \f[I]string\f[] value
-as a \f[I]Format String\f[].
+Evaluate each
+\f[I]keywords\f[]
+and
+\f[I]keywords-global\f[]
+\f[I]string\f[] value as a \f[I]Format String\f[].
-.SS extractor.*.keywords-default
+.SS extractor.*.keywords-global
.IP "Type:" 6
-any
+\f[I]object\f[] (name → value)
-.IP "Default:" 9
-\f[I]"None"\f[]
+.IP "Example:" 4
+{"type": "Original", "type_id": 1, "type_category": "meta"}
.IP "Description:" 4
-Default value used for missing or undefined keyword names in a
-\f[I]Format String\f[].
+Global name-value pairs to be added to each metadata dictionary.
+
+.IP "Note:" 4
+Keywords defined here will be overwritten by keywords from
+\f[I]extractor.keywords\f[]
+with the same name.
.SS extractor.*.url-metadata
@@ -1299,17 +1325,22 @@ may pose a security risk.
.IP "Example:" 4
.br
-* "file,skip"
+* "after,skip"
.br
-* ["file", "skip"]
+* ["after", "skip"]
.IP "Description:" 4
\f[I]Event(s)\f[]
for which IDs get written to an
\f[I]archive\f[].
-Available events are:
-\f[I]file\f[], \f[I]skip\f[]
+.IP "Available Events:" 4
+.br
+* \f[I]file\f[]
+.br
+* \f[I]after\f[]
+.br
+* \f[I]skip\f[]
.SS extractor.*.archive-format
@@ -1647,13 +1678,22 @@ For example \f[I]5-\f[], \f[I]5:\f[], and \f[I]5::\f[] all mean "Start at file n
The index of the first file is \f[I]1\f[].
+.SS extractor.*.post-range
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+Like \f[I]image-range\f[],
+but for posts.
+
+
.SS extractor.*.chapter-range
.IP "Type:" 6
\f[I]string\f[]
.IP "Description:" 4
Like \f[I]image-range\f[],
-but applies to delegated URLs like manga chapters, etc.
+but for child extractors handling manga chapters, external URLs, etc.
.SS extractor.*.image-filter
@@ -1677,6 +1717,24 @@ A file only gets downloaded when *all* of the given \f[I]Expressions\f[] evaluat
Available values are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j\f[].
+.SS extractor.*.post-filter
+.IP "Type:" 6
+.br
+* \f[I]Condition\f[]
+.br
+* \f[I]list\f[] of \f[I]Conditions\f[]
+
+.IP "Example:" 4
+.br
+* "post['id'] > 12345"
+.br
+* ["date >= datetime(2025, 5, 1)", "print(post_id)"]
+
+.IP "Description:" 4
+Like \f[I]image-filter\f[],
+but for posts.
+
+
.SS extractor.*.chapter-filter
.IP "Type:" 6
.br
@@ -1692,7 +1750,7 @@ Available values are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j
.IP "Description:" 4
Like \f[I]image-filter\f[],
-but applies to delegated URLs like manga chapters, etc.
+but for child extractors handling manga chapters, external URLs, etc.
.SS extractor.*.image-unique
@@ -1724,7 +1782,7 @@ but applies to delegated URLs like manga chapters, etc.
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"%Y-%m-%dT%H:%M:%S"\f[]
+\f[I]null\f[]
.IP "Description:" 4
Format string used to parse \f[I]string\f[] values of
@@ -1732,6 +1790,16 @@ date-min and date-max.
See \f[I]strptime\f[] for a list of formatting directives.
+.IP "Special Values:" 4
+\f[I]null\f[]
+Parse date-min and date-max according to
+.br
+\f[I]ISO 8601\f[]
+See
+.br
+\f[I]datetime.fromisoformat()\f[]
+for details and examples.
+
.IP "Note:" 4
Despite its name, this option does **not** control how
\f[I]{date}\f[] metadata fields are formatted.
@@ -2018,6 +2086,25 @@ https://developers.google.com/blogger/docs/3.0/using#APIKey
Download embedded videos hosted on https://www.blogger.com/
+.SS extractor.bluesky.api-server
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"https://bsky.social"\f[] if a
+.br
+\f[I]username\f[]
+is provided
+\f[I]"https://api.bsky.app"\f[] otherwise
+.br
+
+.IP "Description:" 4
+Server address for API requests.
+
+Can be used when self-hosting a
+\f[I]PDS\f[]
+
+
.SS extractor.bluesky.include
.IP "Type:" 6
.br
@@ -2403,15 +2490,26 @@ as well as the default \f[I]user-images\f[] and \f[I]user-videos\f[]:
.IP "Example:" 4
.br
-* "generation,post,version"
+* "generation,tags,post,version"
.br
* ["version", "generation"]
.IP "Description:" 4
-Extract additional \f[I]generation\f[], \f[I]version\f[], and \f[I]post\f[] metadata.
+Extract additional metadata.
+
+.IP "Supported Values:" 4
+.br
+* \f[I]generation\f[]
+.br
+* \f[I]post\f[]
+.br
+* \f[I]tags\f[]
+.br
+* \f[I]version\f[]
.IP "Note:" 4
-This requires 1 or more additional API requests per image or video.
+This requires 1 additional API request
+for each selected value per image or video.
.SS extractor.civitai.nsfw
@@ -2444,6 +2542,60 @@ while \f[I]3\f[] (\f[I]1|2\f[]) would return only
\f[I]None\f[] and \f[I]Soft\f[] rated images,
+.SS extractor.civitai.period
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"AllTime"\f[]
+
+.IP "Description:" 4
+Sets the \f[I]period\f[] parameter
+when paginating over results.
+
+.IP "Supported Values:" 4
+.br
+* \f[I]"AllTime"\f[]
+.br
+* \f[I]"Year"\f[]
+.br
+* \f[I]"Month"\f[]
+.br
+* \f[I]"Week"\f[]
+.br
+* \f[I]"Day"\f[]
+
+
+.SS extractor.civitai.sort
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"Newest"\f[]
+
+.IP "Description:" 4
+Sets the \f[I]sort\f[] parameter
+when paginating over results.
+
+.IP "Supported Values:" 4
+.br
+* \f[I]"Newest"\f[]
+.br
+* \f[I]"Oldest"\f[]
+.br
+* \f[I]"Most Reactions"\f[]
+.br
+* \f[I]"Most Comments"\f[]
+.br
+* \f[I]"Most Collected"\f[]
+
+.IP "Special Values:" 4
+\f[I]"asc"\f[]
+Ascending order (\f[I]"Oldest"\f[])
+\f[I]"desc"\f[] | \f[I]"reverse"\f[]
+Descending order (\f[I]"Newest"\f[])
+
+
.SS extractor.civitai.quality
.IP "Type:" 6
.br
@@ -2559,6 +2711,17 @@ to be interactively prompted for a password when needed
(see \f[I]getpass()\f[]).
+.SS extractor.cyberfile.recursive
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Recursively download files from subfolders.
+
+
.SS extractor.[Danbooru].external
.IP "Type:" 6
\f[I]bool\f[]
@@ -3483,6 +3646,17 @@ Extract \f[I]plan\f[] and extended \f[I]user\f[] metadata.
\f[I]fanbox.comments\f[]
+.SS extractor.fanbox.creator.offset
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]0\f[]
+
+.IP "Description:" 4
+Custom \f[I]offset\f[] starting value when paginating over posts.
+
+
.SS extractor.fansly.formats
.IP "Type:" 6
\f[I]list\f[] of \f[I]integers\f[]
@@ -3799,6 +3973,21 @@ to attempt to fetch the current value used by gofile.
Recursively download files from subfolders.
+.SS extractor.hdoujin.cbz
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download each gallery as a single \f[I].cbz\f[] file.
+
+.IP "Note:" 4
+Requires a
+\f[I]token\f[]
+
+
.SS extractor.hdoujin.crt
.IP "Type:" 6
\f[I]string\f[]
@@ -3878,7 +4067,10 @@ for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
.IP "Description:" 4
\f[I]Authorization\f[] header value
used for requests to \f[I]https://api.hdoujin.org\f[]
-to access \f[I]favorite\f[] galleries.
+to access \f[I]favorite\f[] galleries
+or download
+\f[I].cbz\f[]
+archives.
.SS extractor.hentaifoundry.descriptions
@@ -4277,6 +4469,25 @@ when processing a user profile.
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.itaku.order
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which
+images/posts/users are returned.
+
+\f[I]"asc"\f[] | \f[I]"reverse"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[]
+Descending order (newest first)
+any other \f[I]string\f[]
+Custom result order
+
+
.SS extractor.itaku.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -4605,6 +4816,17 @@ Additional query parameters to send when fetching manga chapters.
and \f[I]/user/follows/manga/feed\f[])
+.SS extractor.mangadex.data-saver
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Enable Data Saver mode and download lower quality versions of chapters.
+
+
.SS extractor.mangadex.lang
.IP "Type:" 6
.br
@@ -4764,6 +4986,17 @@ Also emit metadata for text-only posts without media content.
Your access token, necessary to fetch favorited notes.
+.SS extractor.[misskey].date-min & .date-max
+.IP "Type:" 6
+\f[I]Date\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Retrieve only notes posted after/before this \f[I]Date\f[]
+
+
.SS extractor.[misskey].include
.IP "Type:" 6
.br
@@ -4798,6 +5031,22 @@ when processing a user profile.
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.[misskey].order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which posts are processed.
+
+\f[I]"asc"\f[] | \f[I]"reverse"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[]
+Descending order (newest first)
+
+
.SS extractor.[misskey].renotes
.IP "Type:" 6
\f[I]bool\f[]
@@ -4820,6 +5069,17 @@ Fetch media from renoted notes.
Fetch media from replies to other notes.
+.SS extractor.[misskey].text-posts
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Also retrieve text-only notes without media content.
+
+
.SS extractor.[moebooru].pool.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -5153,6 +5413,36 @@ Selects the format of \f[I]images\f[] \f[I]files\f[].
* \f[I]thumbnail_small\f[] (\f[I]"h":100,"w":100\f[])
+.SS extractor.patreon.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]collection\f[]
+\f[I]"asc"\f[]
+otherwise
+\f[I]"desc"\f[]
+
+.IP "Example:" 4
+.br
+* "-published_at"
+.br
+* "collection_order"
+
+.IP "Description:" 4
+Controls the order in which
+posts are returned and processed.
+
+\f[I]"asc"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[]
+Descending order (newest first)
+\f[I]"reverse"\f[]
+Reverse order
+any other \f[I]string\f[]
+Custom \f[I]sort\f[] order
+
+
.SS extractor.patreon.user.date-max
.IP "Type:" 6
\f[I]Date\f[]
@@ -5681,7 +5971,7 @@ or \f[I]"hq"\f[] if not present.
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"oauth"\f[]
+\f[I]"rest"\f[]
.IP "Description:" 4
Selects which API endpoints to use.
@@ -5859,7 +6149,7 @@ Follow links in the original post's \f[I]selftext\f[].
* \f[I]string\f[]
.IP "Default:" 9
-\f[I]true\f[]
+\f[I]"dash"\f[]
.IP "Description:" 4
Control video download behavior.
@@ -6029,6 +6319,21 @@ Download video embeds from external sites.
Download videos.
+.SS extractor.schalenetwork.cbz
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download each gallery as a single \f[I].cbz\f[] file.
+
+.IP "Note:" 4
+Requires a
+\f[I]token\f[]
+
+
.SS extractor.schalenetwork.crt
.IP "Type:" 6
\f[I]string\f[]
@@ -6115,7 +6420,10 @@ for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
.IP "Description:" 4
\f[I]Authorization\f[] header value
used for requests to \f[I]https://api.schale.network\f[]
-to access \f[I]favorite\f[] galleries.
+to access \f[I]favorite\f[] galleries
+or download
+\f[I].cbz\f[]
+archives.
.SS extractor.sexcom.gifs
@@ -6129,23 +6437,6 @@ to access \f[I]favorite\f[] galleries.
Download animated images as \f[I].gif\f[] instead of \f[I].webp\f[]
-.SS extractor.simpcity.order-posts
-.IP "Type:" 6
-\f[I]string\f[]
-
-.IP "Default:" 9
-\f[I]"desc"\f[]
-
-.IP "Description:" 4
-Controls the order in which
-posts of a \f[I]thread\f[] are processed.
-
-\f[I]"asc"\f[]
-Ascending order (oldest first)
-\f[I]"desc"\f[] | \f[I]"reverse"\f[]
-Descending order (newest first)
-
-
.SS extractor.sizebooru.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -6558,6 +6849,17 @@ Download audio tracks using \f[I]ytdl\f[]
Ignore audio tracks
+.SS extractor.tiktok.covers
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download video covers.
+
+
.SS extractor.tiktok.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -7091,6 +7393,27 @@ Known available sizes are
* \f[I]360x360\f[]
+.SS extractor.twitter.limit
+.IP "Type:" 6
+.br
+* \f[I]integer\f[]
+.br
+* \f[I]list\f[] of \f[I]integers\f[]
+
+.IP "Default:" 9
+\f[I]50\f[]
+
+.IP "Example:" 4
+[40, 30, 20, 10, 5]
+
+.IP "Description:" 4
+Number of requested results per API query.
+
+When given as a \f[I]list\f[],
+start with the first element as \f[I]count\f[] parameter
+and switch to the next element whenever no results are returned.
+
+
.SS extractor.twitter.logout
.IP "Type:" 6
\f[I]bool\f[]
@@ -7183,6 +7506,19 @@ It is possible to exclude unwanted Tweets using \f[I]image-filter
<extractor.*.image-filter_>\f[].
+.SS extractor.twitter.retries-api
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]9\f[]
+
+.IP "Description:" 4
+Maximum number of retries
+for API requests when encountering server \f[I]errors\f[],
+or \f[I]-1\f[] for infinite retries.
+
+
.SS extractor.twitter.retweets
.IP "Type:" 6
\f[I]bool\f[]
@@ -7199,14 +7535,26 @@ will be taken from the original Tweets, not the Retweets.
.SS extractor.twitter.search-limit
.IP "Type:" 6
-\f[I]integer\f[]
+.br
+* \f[I]integer\f[]
+.br
+* \f[I]list\f[] of \f[I]integers\f[]
.IP "Default:" 9
\f[I]20\f[]
+.IP "Example:" 4
+[50, 20, 10, 5, 2]
+
.IP "Description:" 4
Number of requested results per search query.
+When given as a \f[I]list\f[],
+start with the first element as \f[I]count\f[] parameter
+and switch to the next element when
+\f[I]search-stop\f[]
+is reached.
+
.SS extractor.twitter.search-pagination
.IP "Type:" 6
@@ -7225,15 +7573,31 @@ Update the \f[I]max_id\f[] search query parameter
to the Tweet ID value of the last retrieved Tweet.
-.SS extractor.twitter.search-stop
+.SS extractor.twitter.search-results
.IP "Type:" 6
-\f[I]integer\f[]
+\f[I]string\f[]
.IP "Default:" 9
+\f[I]"latest"\f[]
+
+.IP "Description:" 4
+Determines the target of search results.
+
+.IP "Supported Values:" 4
.br
-* \f[I]3\f[] if \f[I]search-pagination\f[] is set to \f[I]"cursor"\f[]
+* \f[I]"top"\f[]
.br
-* \f[I]0\f[] otherwise
+* \f[I]"media"\f[]
+.br
+* \f[I]"latest"\f[] | \f[I]"live"\f[]
+
+
+.SS extractor.twitter.search-stop
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]3\f[]
.IP "Description:" 4
Number of empty search result batches
@@ -7298,18 +7662,6 @@ Extract \f[I]TwitPic\f[] embeds.
Ignore previously seen Tweets.
-.SS extractor.twitter.username-alt
-.IP "Type:" 6
-\f[I]string\f[]
-
-.IP "Description:" 4
-Alternate Identifier (username, email, phone number)
-when \f[I]logging in\f[].
-
-When not specified and asked for by Twitter,
-this identifier will need to be entered in an interactive prompt.
-
-
.SS extractor.twitter.users
.IP "Type:" 6
\f[I]string\f[]
@@ -7318,7 +7670,7 @@ this identifier will need to be entered in an interactive prompt.
\f[I]"user"\f[]
.IP "Example:" 4
-"https://twitter.com/search?q=from:{legacy[screen_name]}"
+"https://twitter.com/search?q=from:{core[screen_name]}"
.IP "Description:" 4
Basic format string for user URLs generated from
@@ -7723,6 +8075,18 @@ If this value is \f[I]"original"\f[], metadata for these files
will be taken from the original posts, not the retweeted posts.
+.SS extractor.weibo.text
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract full \f[I]text\f[] & \f[I]text_raw\f[] metadata
+for statuses with truncated \f[I]text\f[].
+
+
.SS extractor.weibo.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -7734,6 +8098,21 @@ will be taken from the original posts, not the retweeted posts.
Download video files.
+.SS extractor.wikimedia.format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]fandom\f[] | \f[I]wikigg\f[]
+\f[I]"original"\f[]
+otherwise
+\f[I]""\f[]
+
+.IP "Description:" 4
+Sets the format query parameter value
+added to all download URLs.
+
+
.SS extractor.wikimedia.image-revisions
.IP "Type:" 6
\f[I]integer\f[]
@@ -7777,6 +8156,23 @@ The value must be between 10 and 500.
For \f[I]Category:\f[] pages, recursively descent into subcategories.
+.SS extractor.[xenforo].order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which
+posts of a \f[I]thread\f[] are processed.
+
+\f[I]"asc"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[] | \f[I]"reverse"\f[]
+Descending order (newest first)
+
+
.SS extractor.ytdl.cmdline-args
.IP "Type:" 6
.br
@@ -8148,17 +8544,35 @@ into the actual output files.
.SS downloader.*.part-directory
.IP "Type:" 6
-\f[I]Path\f[]
+.br
+* \f[I]Path\f[]
+.br
+* \f[I]object\f[] (\f[I]Condition\f[] → \f[I]Path\f[])
.IP "Default:" 9
\f[I]null\f[]
+.IP "Example:" 4
+.. code:: json
+
+"/tmp/.gdl"
+
+.. code:: json
+
+{
+"size > 100000": "~/.gdl/part",
+"duration" : "/tmp/.gdl/video",
+}
+
+
.IP "Description:" 4
-Alternate location for \f[I].part\f[] files.
+Alternate location(s) for \f[I].part\f[] files.
+
+.IP "Note:" 4
+If this value is \f[I]null\f[] or no \f[I]Conditions\f[] apply,
+\f[I].part\f[] files are stored alongside the actual output files.
-Missing directories will be created as needed.
-If this value is \f[I]null\f[], \f[I].part\f[] files are going to be stored
-alongside the actual output files.
+For a single \f[I]Path\f[], missing directories will be created as needed
.SS downloader.*.progress
@@ -8842,8 +9256,6 @@ File to write logging output to.
.IP "Description:" 4
File to write external URLs unsupported by *gallery-dl* to.
-The default \f[I]Format String\f[] here is \f[I]"{message}"\f[].
-
.SS output.errorfile
.IP "Type:" 6
@@ -8855,8 +9267,6 @@ The default \f[I]Format String\f[] here is \f[I]"{message}"\f[].
.IP "Description:" 4
File to write input URLs which returned an error to.
-The default \f[I]Format String\f[] here is also \f[I]"{message}"\f[].
-
When combined with
\f[I]-I\f[]/\f[I]--input-file-comment\f[] or
\f[I]-x\f[]/\f[I]--input-file-delete\f[],
@@ -9093,6 +9503,18 @@ On POSIX systems, this means enabling the
to have it call \f[I]setsid()\f[].
+.SS exec.verbose
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include \f[I]command\f[]
+arguments in logging messages.
+
+
.SS hash.chunk-size
.IP "Type:" 6
\f[I]integer\f[]
@@ -9200,9 +9622,14 @@ Write metadata using \f[I]json.dump()\f[]
Write metadata in \f[I]JSON Lines\f[] format
\f[I]"tags"\f[]
Write \f[I]tags\f[] separated by newlines
+\f[I]"print"\f[]
+Write the result of applying
+\f[I]content-format\f[]
+to \f[I]stdout\f[]
\f[I]"custom"\f[]
-Write the result of applying \f[I]metadata.content-format\f[]
-to a file's metadata dictionary
+Write the result of applying
+\f[I]content-format\f[]
+to \f[I]a file\f[]
\f[I]"modify"\f[]
Add or modify metadata entries
\f[I]"delete"\f[]
@@ -9518,7 +9945,7 @@ Only applies to \f[I]"mode": "json"\f[] and \f[I]"jsonl"\f[].
.IP "Type:" 6
\f[I]string\f[]
-.IP "Defsult:" 4
+.IP "Default:" 9
\f[I]"w"\f[]
.IP "Description:" 4
@@ -9535,7 +9962,7 @@ See the \f[I]mode\f[] argument of \f[I]open()\f[] for further details.
.IP "Type:" 6
\f[I]string\f[]
-.IP "Defsult:" 4
+.IP "Default:" 9
\f[I]"utf-8"\f[]
.IP "Description:" 4
@@ -9544,6 +9971,31 @@ Name of the encoding used to encode a file's content.
See the \f[I]encoding\f[] argument of \f[I]open()\f[] for further details.
+.SS metadata.newline
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+The newline sequence used in metadata files.
+
+If \f[I]null\f[], any \f[I]\\n\f[] characters
+written are translated to the system default line separator.
+
+See the \f[I]newline\f[] argument of \f[I]open()\f[] for further details.
+
+.IP "Supported Values:" 4
+\f[I]null\f[]
+Any \f[I]\\n\f[] characters
+written are translated to the system default line separator.
+\f[I]""\f[] \f[I] \f[I]"\\n"\f[]
+Don't replace newline characters.
+\f[I]"\\r"\f[] \f[] \f[I]"\\r\\n"\f[]
+Replace newline characters with the given sequence.
+
+
.SS metadata.private
.IP "Type:" 6
\f[I]bool\f[]
@@ -10710,7 +11162,8 @@ in a JSON file.
"format" : "{asctime} {name}: {message}",
"format-date": "%H:%M:%S",
"path" : "~/log.txt",
-"encoding" : "ascii"
+"encoding" : "ascii",
+"defer" : true
}
.. code:: json
@@ -10744,7 +11197,12 @@ it is also possible to access the current
and keywords objects and their attributes, for example
\f[I]"{extractor.url}"\f[], \f[I]"{path.filename}"\f[], \f[I]"{keywords.title}"\f[]
.br
-* Default: \f[I]"[{name}][{levelname}] {message}"\f[]
+* Default:
+\f[I]"[{name}][{levelname}] {message}"\f[] for
+\f[I]logfile\f[],
+\f[I]"{message}"\f[] for
+\f[I]unsupportedfile\f[] and
+\f[I]errorfile\f[]
.br
* format-date
.br
@@ -10770,17 +11228,35 @@ and keywords objects and their attributes, for example
use \f[I]"w"\f[] to truncate or \f[I]"a"\f[] to append
(see \f[I]open()\f[])
.br
-* Default: \f[I]"w"\f[]
+* Default:
+\f[I]"w"\f[] for
+\f[I]logfile\f[] and
+\f[I]unsupportedfile\f[],
+\f[I]"a"\f[] for
+\f[I]errorfile\f[]
.br
* encoding
.br
* File encoding
.br
* Default: \f[I]"utf-8"\f[]
+.br
+* defer
+.br
+* Defer file opening/creation until writing the first logging message
+.br
+* Default:
+\f[I]false\f[] for
+\f[I]logfile\f[],
+\f[I]true\f[] for
+\f[I]unsupportedfile\f[] and
+\f[I]errorfile\f[]
+
.IP "Note:" 4
-path, mode, and encoding are only applied when configuring
-logging output to a file.
+path, mode, encoding, and defer
+are only applied when configuring logging output to a file.
+(See \f[I]logging.FileHandler\f[])
.SS Postprocessor Configuration
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 5fdca47..25eea53 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -47,13 +47,16 @@
"image-filter" : null,
"image-range" : null,
"image-unique" : false,
+ "post-filter" : null,
+ "post-range" : null,
"chapter-filter": null,
"chapter-range" : null,
"chapter-unique": false,
"keywords" : {},
- "keywords-eval" : false,
"keywords-default" : null,
+ "keywords-eval" : false,
+ "keywords-global" : {},
"parent-directory": false,
"parent-metadata" : false,
@@ -168,11 +171,12 @@
"username": "",
"password": "",
- "include" : ["media"],
- "metadata": false,
- "quoted" : false,
- "reposts" : false,
- "videos" : true,
+ "api-server": null,
+ "include" : ["media"],
+ "metadata" : false,
+ "quoted" : false,
+ "reposts" : false,
+ "videos" : true,
"likes": {
"depth" : 0,
@@ -215,6 +219,8 @@
"include" : ["user-images", "user-videos"],
"metadata": false,
"nsfw" : true,
+ "period" : "AllTime",
+ "sort" : "Newest",
"quality" : "original=true",
"quality-videos": "quality=100"
},
@@ -244,7 +250,8 @@
},
"cyberfile":
{
- "password": ""
+ "password" : "",
+ "recursive": true
},
"dankefuerslesen":
{
@@ -335,7 +342,11 @@
"comments": false,
"embeds" : true,
"fee-max" : null,
- "metadata": false
+ "metadata": false,
+
+ "creator": {
+ "offset": 0
+ }
},
"fansly":
{
@@ -397,6 +408,7 @@
"token": "",
"sleep-request": "0.5-1.5",
+ "cbz" : false,
"format": ["0", "1600", "1280", "980", "780"],
"tags" : false
},
@@ -466,6 +478,7 @@
{
"sleep-request": "0.5-1.5",
"include": "gallery",
+ "order" : "desc",
"videos" : true
},
"iwara":
@@ -520,6 +533,7 @@
"api-server": "https://api.mangadex.org",
"api-parameters": null,
+ "data-saver": false,
"lang": null,
"ratings": ["safe", "suggestive", "erotica", "pornographic"]
},
@@ -576,7 +590,11 @@
"cursor" : true,
"files" : ["images", "image_large", "attachments", "postfile", "content"],
"format-images": "download_url",
+ "order-posts" : "desc",
+ "collection": {
+ "order-posts": "asc"
+ },
"user": {
"date-max" : 0
}
@@ -658,24 +676,25 @@
},
"reddit":
{
+ "cookies" : null,
"client-id" : null,
"user-agent" : null,
"refresh-token": null,
- "api" : "oauth",
+ "api" : "rest",
"comments" : 0,
"morecomments": false,
"embeds" : true,
"date-min" : 0,
"date-max" : 253402210800,
- "date-format" : "%Y-%m-%dT%H:%M:%S",
+ "date-format" : null,
"id-min" : null,
"id-max" : null,
"limit" : null,
"previews" : true,
"recursion" : 0,
"selftext" : null,
- "videos" : true
+ "videos" : "dash"
},
"redgifs":
{
@@ -713,6 +732,7 @@
"token": "",
"sleep-request": "0.5-1.5",
+ "cbz" : false,
"format": ["0", "1600", "1280", "980", "780"],
"tags" : false
},
@@ -726,12 +746,6 @@
{
"gifs": true
},
- "simpcity":
- {
- "cookies": null,
-
- "order-posts": "desc"
- },
"sizebooru":
{
"sleep-request": "0.5-1.5",
@@ -803,6 +817,7 @@
{
"audio" : true,
"videos": true,
+ "covers": false,
"user": {
"avatar": true,
@@ -841,7 +856,6 @@
"twitter":
{
"username" : "",
- "username-alt": "",
"password" : "",
"cookies" : null,
@@ -852,16 +866,19 @@
"cursor" : true,
"expand" : false,
"include" : ["timeline"],
+ "limit" : 50,
"locked" : "abort",
"logout" : true,
"pinned" : false,
"quoted" : false,
"ratelimit" : "wait",
"replies" : true,
+ "retries-api" : 9,
"retweets" : false,
"search-limit": 20,
"search-pagination": "cursor",
- "search-stop" : "auto",
+ "search-results" : "latest",
+ "search-stop" : 3,
"size" : ["orig", "4096x4096", "large", "medium", "small"],
"text-tweets" : false,
"tweet-endpoint": "auto",
@@ -941,6 +958,7 @@
"livephoto": true,
"movies" : false,
"retweets" : false,
+ "text" : false,
"videos" : true
},
"xfolio":
@@ -981,6 +999,11 @@
"videos" : true
},
+ "chevereto":
+ {
+ "parent-metadata": true
+ },
+
"Danbooru":
{
"sleep-request": "0.5-1.5",
@@ -1050,6 +1073,11 @@
"referer": false
},
+ "imagehost":
+ {
+ "parent-metadata": true
+ },
+
"mastodon":
{
"access-token": null,
@@ -1062,9 +1090,13 @@
"misskey":
{
"access-token": null,
+ "date-min" : null,
+ "date-max" : null,
"include" : ["notes"],
+ "order-posts" : "desc",
"renotes" : false,
- "replies" : true
+ "replies" : true,
+ "text-posts" : false
},
"Nijie":
@@ -1122,10 +1154,36 @@
"wikimedia":
{
"sleep-request": "1.0-2.0",
+ "format": "",
"image-revisions": 1,
"limit": 50,
"subcategories": true
},
+ "fandom":
+ {
+ "format": "original"
+ },
+ "wikigg":
+ {
+ "format": "original"
+ },
+
+ "xenforo":
+ {
+ "order-posts": "desc"
+ },
+ "nudostarforum":
+ {
+ "username": "",
+ "password": "",
+ "cookies" : null
+ },
+ "simpcity":
+ {
+ "username": "",
+ "password": "",
+ "cookies" : null
+ },
"booru":
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 6a8f856..abf3e16 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.10
+Version: 1.31.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -24,6 +24,7 @@ Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
@@ -141,9 +142,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.10/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.31.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.10/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.31.1/gallery-dl.bin>`__
Nightly Builds
@@ -224,6 +225,13 @@ Pulling image from `GitHub Container Registry <https://github.com/mikf/gallery-d
docker pull ghcr.io/mikf/gallery-dl
docker tag ghcr.io/mikf/gallery-dl gallery-dl
+Pulling *Nightly Build* images built from the latest commit by using the ``dev`` tag:
+
+.. code:: bash
+
+ docker pull mikf123/gallery-dl:dev
+ docker pull ghcr.io/mikf/gallery-dl:dev
+
To run the container you will probably want to attach some directories on the host so that the config file and downloads can persist across runs.
Make sure to either download the example config file reference in the repo and place it in the mounted volume location or touch an empty file there.
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 2cecdad..1694339 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -20,6 +20,7 @@ gallery_dl/archive.py
gallery_dl/cache.py
gallery_dl/config.py
gallery_dl/cookies.py
+gallery_dl/dt.py
gallery_dl/exception.py
gallery_dl/formatter.py
gallery_dl/job.py
@@ -61,8 +62,10 @@ gallery_dl/extractor/agnph.py
gallery_dl/extractor/ao3.py
gallery_dl/extractor/arcalive.py
gallery_dl/extractor/architizer.py
+gallery_dl/extractor/arena.py
gallery_dl/extractor/artstation.py
gallery_dl/extractor/aryion.py
+gallery_dl/extractor/audiochan.py
gallery_dl/extractor/batoto.py
gallery_dl/extractor/bbc.py
gallery_dl/extractor/behance.py
@@ -75,9 +78,11 @@ gallery_dl/extractor/boosty.py
gallery_dl/extractor/booth.py
gallery_dl/extractor/bunkr.py
gallery_dl/extractor/catbox.py
+gallery_dl/extractor/cfake.py
gallery_dl/extractor/chevereto.py
gallery_dl/extractor/cien.py
gallery_dl/extractor/civitai.py
+gallery_dl/extractor/comedywildlifephoto.py
gallery_dl/extractor/comick.py
gallery_dl/extractor/comicvine.py
gallery_dl/extractor/common.py
@@ -92,6 +97,7 @@ gallery_dl/extractor/directlink.py
gallery_dl/extractor/discord.py
gallery_dl/extractor/dynastyscans.py
gallery_dl/extractor/e621.py
+gallery_dl/extractor/eporner.py
gallery_dl/extractor/erome.py
gallery_dl/extractor/everia.py
gallery_dl/extractor/exhentai.py
@@ -101,6 +107,8 @@ gallery_dl/extractor/fansly.py
gallery_dl/extractor/fantia.py
gallery_dl/extractor/fapachi.py
gallery_dl/extractor/fapello.py
+gallery_dl/extractor/fikfap.py
+gallery_dl/extractor/fitnakedgirls.py
gallery_dl/extractor/flickr.py
gallery_dl/extractor/foolfuuka.py
gallery_dl/extractor/foolslide.py
@@ -148,6 +156,7 @@ gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemono.py
gallery_dl/extractor/khinsider.py
gallery_dl/extractor/komikcast.py
+gallery_dl/extractor/koofr.py
gallery_dl/extractor/leakgallery.py
gallery_dl/extractor/lensdump.py
gallery_dl/extractor/lexica.py
@@ -188,12 +197,14 @@ gallery_dl/extractor/nozomi.py
gallery_dl/extractor/nsfwalbum.py
gallery_dl/extractor/nudostar.py
gallery_dl/extractor/oauth.py
+gallery_dl/extractor/okporn.py
gallery_dl/extractor/paheal.py
gallery_dl/extractor/patreon.py
gallery_dl/extractor/pexels.py
gallery_dl/extractor/philomena.py
gallery_dl/extractor/photovogue.py
gallery_dl/extractor/picarto.py
+gallery_dl/extractor/picazor.py
gallery_dl/extractor/pictoa.py
gallery_dl/extractor/piczel.py
gallery_dl/extractor/pillowfort.py
@@ -206,13 +217,13 @@ gallery_dl/extractor/poipiku.py
gallery_dl/extractor/poringa.py
gallery_dl/extractor/pornhub.py
gallery_dl/extractor/pornpics.py
+gallery_dl/extractor/pornstarstube.py
gallery_dl/extractor/postmill.py
gallery_dl/extractor/rawkuma.py
gallery_dl/extractor/reactor.py
gallery_dl/extractor/readcomiconline.py
gallery_dl/extractor/realbooru.py
gallery_dl/extractor/recursive.py
-gallery_dl/extractor/redbust.py
gallery_dl/extractor/reddit.py
gallery_dl/extractor/redgifs.py
gallery_dl/extractor/rule34us.py
@@ -229,7 +240,6 @@ gallery_dl/extractor/senmanga.py
gallery_dl/extractor/sexcom.py
gallery_dl/extractor/shimmie2.py
gallery_dl/extractor/shopify.py
-gallery_dl/extractor/simpcity.py
gallery_dl/extractor/simplyhentai.py
gallery_dl/extractor/sizebooru.py
gallery_dl/extractor/skeb.py
@@ -240,6 +250,7 @@ gallery_dl/extractor/soundgasm.py
gallery_dl/extractor/speakerdeck.py
gallery_dl/extractor/steamgriddb.py
gallery_dl/extractor/subscribestar.py
+gallery_dl/extractor/sxypix.py
gallery_dl/extractor/szurubooru.py
gallery_dl/extractor/tapas.py
gallery_dl/extractor/tcbscans.py
@@ -271,11 +282,13 @@ gallery_dl/extractor/weasyl.py
gallery_dl/extractor/webmshare.py
gallery_dl/extractor/webtoons.py
gallery_dl/extractor/weebcentral.py
+gallery_dl/extractor/weebdex.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
gallery_dl/extractor/wikifeet.py
gallery_dl/extractor/wikimedia.py
gallery_dl/extractor/xasiat.py
+gallery_dl/extractor/xenforo.py
gallery_dl/extractor/xfolio.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
@@ -300,11 +313,13 @@ test/test_cache.py
test/test_config.py
test/test_cookies.py
test/test_downloader.py
+test/test_dt.py
test/test_extractor.py
test/test_formatter.py
test/test_job.py
test/test_oauth.py
test/test_output.py
+test/test_path.py
test/test_postprocessor.py
test/test_results.py
test/test_text.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index fdcb6d0..98f8c12 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -284,14 +284,14 @@ def main():
# unsupported file logging handler
if handler := output.setup_logging_handler(
- "unsupportedfile", fmt="{message}"):
+ "unsupportedfile", fmt="{message}", defer=True):
ulog = job.Job.ulog = logging.getLogger("unsupported")
ulog.addHandler(handler)
ulog.propagate = False
# error file logging handler
if handler := output.setup_logging_handler(
- "errorfile", fmt="{message}", mode="a"):
+ "errorfile", fmt="{message}", mode="a", defer=True):
elog = input_manager.err = logging.getLogger("errorfile")
elog.addHandler(handler)
elog.propagate = False
diff --git a/gallery_dl/actions.py b/gallery_dl/actions.py
index 971c4d9..5d2f645 100644
--- a/gallery_dl/actions.py
+++ b/gallery_dl/actions.py
@@ -148,6 +148,11 @@ class LoggerAdapter():
if cond(msg):
action(args)
+ def traceback(self, exc):
+ if self.logger.isEnabledFor(logging.DEBUG):
+ self.logger._log(
+ logging.DEBUG, "", None, exc_info=exc, extra=self.extra)
+
def _level_to_int(level):
try:
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index ba719ac..26f8244 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -119,7 +119,7 @@ def load_cookies_webkit(browser_name, profile=None, domain=None):
for page_size in page_sizes:
_webkit_parse_cookies_page(p.read_bytes(page_size), cookies)
_log_info("Extracted %s cookies from %s",
- browser_name.capitalize(), len(cookies))
+ len(cookies), browser_name.capitalize())
return cookies
diff --git a/gallery_dl/downloader/__init__.py b/gallery_dl/downloader/__init__.py
index e1b936e..79dc5cb 100644
--- a/gallery_dl/downloader/__init__.py
+++ b/gallery_dl/downloader/__init__.py
@@ -27,7 +27,7 @@ def find(scheme):
scheme = "http"
if scheme in modules: # prevent unwanted imports
try:
- module = __import__(scheme, globals(), None, (), 1)
+ module = __import__(scheme, globals(), None, None, 1)
except ImportError:
pass
else:
diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py
index 7cd8d10..66996f7 100644
--- a/gallery_dl/downloader/common.py
+++ b/gallery_dl/downloader/common.py
@@ -31,8 +31,15 @@ class DownloaderBase():
self.partdir = self.config("part-directory")
if self.partdir:
- self.partdir = util.expand_path(self.partdir)
- os.makedirs(self.partdir, exist_ok=True)
+ if isinstance(self.partdir, dict):
+ self.partdir = [
+ (util.compile_filter(expr) if expr else util.true,
+ util.expand_path(pdir))
+ for expr, pdir in self.partdir.items()
+ ]
+ else:
+ self.partdir = util.expand_path(self.partdir)
+ os.makedirs(self.partdir, exist_ok=True)
proxies = self.config("proxy", util.SENTINEL)
if proxies is util.SENTINEL:
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 248bf70..703dcca 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -95,7 +95,7 @@ class HttpDownloader(DownloaderBase):
except Exception as exc:
if self.downloading:
output.stderr_write("\n")
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
raise
finally:
# remove file from incomplete downloads
@@ -230,6 +230,10 @@ class HttpDownloader(DownloaderBase):
# check file size
size = text.parse_int(size, None)
if size is not None:
+ if not size:
+ self.release_conn(response)
+ self.log.warning("Empty file")
+ return False
if self.minsize and size < self.minsize:
self.release_conn(response)
self.log.warning(
@@ -342,9 +346,15 @@ class HttpDownloader(DownloaderBase):
raise
# check file size
- if size and fp.tell() < size:
- msg = f"file size mismatch ({fp.tell()} < {size})"
- output.stderr_write("\n")
+ if size and (fsize := fp.tell()) < size:
+ if (segmented := kwdict.get("_http_segmented")) and \
+ segmented is True or segmented == fsize:
+ tries -= 1
+ msg = "Resuming segmented download"
+ output.stdout_write("\r")
+ else:
+ msg = f"file size mismatch ({fsize} < {size})"
+ output.stderr_write("\n")
continue
break
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index a56a6be..e9b3294 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -22,9 +22,9 @@ class YoutubeDLDownloader(DownloaderBase):
DownloaderBase.__init__(self, job)
extractor = job.extractor
- retries = self.config("retries", extractor._retries)
+ self.retries = self.config("retries", extractor._retries)
self.ytdl_opts = {
- "retries": retries+1 if retries >= 0 else float("inf"),
+ "retries": self.retries+1 if self.retries >= 0 else float("inf"),
"socket_timeout": self.config("timeout", extractor._timeout),
"nocheckcertificate": not self.config("verify", extractor._verify),
"proxy": self.proxies.get("http") if self.proxies else None,
@@ -39,17 +39,25 @@ class YoutubeDLDownloader(DownloaderBase):
def download(self, url, pathfmt):
kwdict = pathfmt.kwdict
+ tries = 0
- ytdl_instance = kwdict.pop("_ytdl_instance", None)
- if not ytdl_instance:
+ if ytdl_instance := kwdict.pop("_ytdl_instance", None):
+ # 'ytdl' extractor
+ self._prepare(ytdl_instance)
+ info_dict = kwdict.pop("_ytdl_info_dict")
+ else:
+ # other extractors
ytdl_instance = self.ytdl_instance
if not ytdl_instance:
try:
module = ytdl.import_module(self.config("module"))
except (ImportError, SyntaxError) as exc:
- self.log.error("Cannot import module '%s'",
- getattr(exc, "name", ""))
- self.log.debug("", exc_info=exc)
+ if exc.__context__:
+ self.log.error("Cannot import yt-dlp or youtube-dl")
+ else:
+ self.log.error("Cannot import module '%s'",
+ getattr(exc, "name", ""))
+ self.log.traceback(exc)
self.download = lambda u, p: False
return False
@@ -63,6 +71,8 @@ class YoutubeDLDownloader(DownloaderBase):
module, self, self.ytdl_opts)
if self.outtmpl == "default":
self.outtmpl = module.DEFAULT_OUTTMPL
+ self._prepare(ytdl_instance)
+
if self.forward_cookies:
self.log.debug("Forwarding cookies to %s",
ytdl_instance.__module__)
@@ -70,45 +80,150 @@ class YoutubeDLDownloader(DownloaderBase):
for cookie in self.session.cookies:
set_cookie(cookie)
- if "__gdl_initialize" in ytdl_instance.params:
- del ytdl_instance.params["__gdl_initialize"]
+ url = url[5:]
+ manifest = kwdict.get("_ytdl_manifest")
+ while True:
+ tries += 1
+ self.error = None
+ try:
+ if manifest is None:
+ info_dict = self._extract_url(
+ ytdl_instance, url)
+ else:
+ info_dict = self._extract_manifest(
+ ytdl_instance, url, kwdict)
+ except Exception as exc:
+ self.log.traceback(exc)
+ cls = exc.__class__
+ if cls.__module__ == "builtins":
+ tries = False
+ msg = f"{cls.__name__}: {exc}"
+ else:
+ if self.error is not None:
+ msg = self.error
+ elif not info_dict:
+ msg = "Empty 'info_dict' data"
+ else:
+ break
+
+ if tries:
+ self.log.error("%s (%s/%s)", msg, tries, self.retries+1)
+ else:
+ self.log.error(msg)
+ return False
+ if tries > self.retries:
+ return False
- if self.progress is not None:
- ytdl_instance.add_progress_hook(self._progress_hook)
- if rlf := ytdl_instance.params.pop("__gdl_ratelimit_func", False):
- self.rate_dyn = rlf
+ if extra := kwdict.get("_ytdl_extra"):
+ info_dict.update(extra)
- info_dict = kwdict.pop("_ytdl_info_dict", None)
- if not info_dict:
- url = url[5:]
+ while True:
+ tries += 1
+ self.error = None
try:
- if manifest := kwdict.pop("_ytdl_manifest", None):
- info_dict = self._extract_manifest(
- ytdl_instance, url, manifest,
- kwdict.pop("_ytdl_manifest_data", None),
- kwdict.pop("_ytdl_manifest_headers", None),
- kwdict.pop("_ytdl_manifest_cookies", None))
+ if "entries" in info_dict:
+ success = self._download_playlist(
+ ytdl_instance, pathfmt, info_dict)
else:
- info_dict = self._extract_info(ytdl_instance, url)
+ success = self._download_video(
+ ytdl_instance, pathfmt, info_dict)
except Exception as exc:
- self.log.debug("", exc_info=exc)
- self.log.warning("%s: %s", exc.__class__.__name__, exc)
+ self.log.traceback(exc)
+ cls = exc.__class__
+ if cls.__module__ == "builtins":
+ tries = False
+ msg = f"{cls.__name__}: {exc}"
+ else:
+ if self.error is not None:
+ msg = self.error
+ elif not success:
+ msg = "Error"
+ else:
+ break
- if not info_dict:
+ if tries:
+ self.log.error("%s (%s/%s)", msg, tries, self.retries+1)
+ else:
+ self.log.error(msg)
return False
+ if tries > self.retries:
+ return False
+ return True
+
+ def _extract_url(self, ytdl, url):
+ return ytdl.extract_info(url, download=False)
+
+ def _extract_manifest(self, ytdl, url, kwdict):
+ extr = ytdl.get_info_extractor("Generic")
+ video_id = extr._generic_id(url)
+
+ if cookies := kwdict.get("_ytdl_manifest_cookies"):
+ if isinstance(cookies, dict):
+ cookies = cookies.items()
+ set_cookie = ytdl.cookiejar.set_cookie
+ for name, value in cookies:
+ set_cookie(Cookie(
+ 0, name, value, None, False,
+ "", False, False, "/", False,
+ False, None, False, None, None, {},
+ ))
+
+ type = kwdict["_ytdl_manifest"]
+ data = kwdict.get("_ytdl_manifest_data")
+ headers = kwdict.get("_ytdl_manifest_headers")
+ if type == "hls":
+ if data is None:
+ try:
+ fmts, subs = extr._extract_m3u8_formats_and_subtitles(
+ url, video_id, "mp4", headers=headers)
+ except AttributeError:
+ fmts = extr._extract_m3u8_formats(
+ url, video_id, "mp4", headers=headers)
+ subs = None
+ else:
+ try:
+ fmts, subs = extr._parse_m3u8_formats_and_subtitles(
+ data, url, "mp4", headers=headers)
+ except AttributeError:
+ fmts = extr._parse_m3u8_formats(
+ data, url, "mp4", headers=headers)
+ subs = None
- if "entries" in info_dict:
- index = kwdict.get("_ytdl_index")
- if index is None:
- return self._download_playlist(
- ytdl_instance, pathfmt, info_dict)
+ elif type == "dash":
+ if data is None:
+ try:
+ fmts, subs = extr._extract_mpd_formats_and_subtitles(
+ url, video_id, headers=headers)
+ except AttributeError:
+ fmts = extr._extract_mpd_formats(
+ url, video_id, headers=headers)
+ subs = None
else:
- info_dict = info_dict["entries"][index]
+ if isinstance(data, str):
+ data = ElementTree.fromstring(data)
+ try:
+ fmts, subs = extr._parse_mpd_formats_and_subtitles(
+ data, mpd_id="dash")
+ except AttributeError:
+ fmts = extr._parse_mpd_formats(
+ data, mpd_id="dash")
+ subs = None
- if extra := kwdict.get("_ytdl_extra"):
- info_dict.update(extra)
+ else:
+ raise ValueError(f"Unsupported manifest type '{type}'")
- return self._download_video(ytdl_instance, pathfmt, info_dict)
+ if headers:
+ for fmt in fmts:
+ fmt["http_headers"] = headers
+
+ info_dict = {
+ "extractor": "",
+ "id" : video_id,
+ "title" : video_id,
+ "formats" : fmts,
+ "subtitles": subs,
+ }
+ return ytdl.process_ie_result(info_dict, download=False)
def _download_video(self, ytdl_instance, pathfmt, info_dict):
if "url" in info_dict:
@@ -161,12 +276,7 @@ class YoutubeDLDownloader(DownloaderBase):
path = pathfmt.realpath.replace("%", "%%")
self._set_outtmpl(ytdl_instance, path)
- try:
- ytdl_instance.process_info(info_dict)
- except Exception as exc:
- self.log.debug("", exc_info=exc)
- return False
-
+ ytdl_instance.process_info(info_dict)
pathfmt.temppath = info_dict.get("filepath") or info_dict["_filename"]
return True
@@ -188,78 +298,20 @@ class YoutubeDLDownloader(DownloaderBase):
ytdl_instance.process_info(entry)
status = True
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.error("%s: %s", exc.__class__.__name__, exc)
return status
- def _extract_info(self, ytdl, url):
- return ytdl.extract_info(url, download=False)
-
- def _extract_manifest(self, ytdl, url, manifest_type, manifest_data=None,
- headers=None, cookies=None):
- extr = ytdl.get_info_extractor("Generic")
- video_id = extr._generic_id(url)
-
- if cookies is not None:
- if isinstance(cookies, dict):
- cookies = cookies.items()
- set_cookie = ytdl.cookiejar.set_cookie
- for name, value in cookies:
- set_cookie(Cookie(
- 0, name, value, None, False,
- "", False, False, "/", False,
- False, None, False, None, None, {},
- ))
+ def _prepare(self, ytdl_instance):
+ if "__gdl_initialize" not in ytdl_instance.params:
+ return
- if manifest_type == "hls":
- if manifest_data is None:
- try:
- fmts, subs = extr._extract_m3u8_formats_and_subtitles(
- url, video_id, "mp4", headers=headers)
- except AttributeError:
- fmts = extr._extract_m3u8_formats(
- url, video_id, "mp4", headers=headers)
- subs = None
- else:
- try:
- fmts, subs = extr._parse_m3u8_formats_and_subtitles(
- url, video_id, "mp4")
- except AttributeError:
- fmts = extr._parse_m3u8_formats(url, video_id, "mp4")
- subs = None
-
- elif manifest_type == "dash":
- if manifest_data is None:
- try:
- fmts, subs = extr._extract_mpd_formats_and_subtitles(
- url, video_id, headers=headers)
- except AttributeError:
- fmts = extr._extract_mpd_formats(
- url, video_id, headers=headers)
- subs = None
- else:
- if isinstance(manifest_data, str):
- manifest_data = ElementTree.fromstring(manifest_data)
- try:
- fmts, subs = extr._parse_mpd_formats_and_subtitles(
- manifest_data, mpd_id="dash")
- except AttributeError:
- fmts = extr._parse_mpd_formats(
- manifest_data, mpd_id="dash")
- subs = None
-
- else:
- self.log.error("Unsupported manifest type '%s'", manifest_type)
- return None
-
- info_dict = {
- "extractor": "",
- "id" : video_id,
- "title" : video_id,
- "formats" : fmts,
- "subtitles": subs,
- }
- return ytdl.process_ie_result(info_dict, download=False)
+ del ytdl_instance.params["__gdl_initialize"]
+ if self.progress is not None:
+ ytdl_instance.add_progress_hook(self._progress_hook)
+ if rlf := ytdl_instance.params.pop("__gdl_ratelimit_func", False):
+ self.rate_dyn = rlf
+ ytdl_instance.params["logger"] = LoggerAdapter(self, ytdl_instance)
def _progress_hook(self, info):
if info["status"] == "downloading" and \
@@ -284,6 +336,31 @@ class YoutubeDLDownloader(DownloaderBase):
ytdl_instance.params["outtmpl"] = {"default": outtmpl}
+class LoggerAdapter():
+ __slots__ = ("obj", "log")
+
+ def __init__(self, obj, ytdl_instance):
+ self.obj = obj
+ self.log = ytdl_instance.params.get("logger")
+
+ def debug(self, msg):
+ if self.log is not None:
+ if msg[0] == "[":
+ msg = msg[msg.find("]")+2:]
+ self.log.debug(msg)
+
+ def warning(self, msg):
+ if self.log is not None:
+ if "WARNING:" in msg:
+ msg = msg[msg.find(" ")+1:]
+ self.log.warning(msg)
+
+ def error(self, msg):
+ if "ERROR:" in msg:
+ msg = msg[msg.find(" ")+1:]
+ self.obj.error = msg
+
+
def compatible_formats(formats):
"""Returns True if 'formats' are compatible for merge"""
video_ext = formats[0].get("ext")
diff --git a/gallery_dl/dt.py b/gallery_dl/dt.py
new file mode 100644
index 0000000..b37ebf3
--- /dev/null
+++ b/gallery_dl/dt.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Date/Time utilities"""
+
+import sys
+import time
+from datetime import datetime, date, timedelta, timezone # noqa F401
+
+
+class NullDatetime(datetime):
+
+ def __bool__(self):
+ return False
+
+ def __str__(self):
+ return "[Invalid DateTime]"
+
+ def __format__(self, format_spec):
+ return "[Invalid DateTime]"
+
+
+NONE = NullDatetime(1, 1, 1)
+EPOCH = datetime(1970, 1, 1)
+SECOND = timedelta(0, 1)
+
+
+def normalize(dt):
+ # if (o := dt.utcoffset()) is not None:
+ # return dt.replace(tzinfo=None, microsecond=0) - o
+ if dt.tzinfo is not None:
+ return dt.astimezone(timezone.utc).replace(tzinfo=None, microsecond=0)
+ if dt.microsecond:
+ return dt.replace(microsecond=0)
+ return dt
+
+
+def convert(value):
+ """Convert 'value' to a naive UTC datetime object"""
+ if not value:
+ return NONE
+ if isinstance(value, datetime):
+ return normalize(value)
+ if isinstance(value, str) and (dt := parse_iso(value)) is not NONE:
+ return dt
+ return parse_ts(value)
+
+
+def parse(dt_string, format):
+ """Parse 'dt_string' according to 'format'"""
+ try:
+ return normalize(datetime.strptime(dt_string, format))
+ except Exception:
+ return NONE
+
+
+if sys.hexversion < 0x30c0000:
+ # Python <= 3.11
+ def parse_iso(dt_string):
+ """Parse 'dt_string' as ISO 8601 value"""
+ try:
+ if dt_string[-1] == "Z":
+ # compat for Python < 3.11
+ dt_string = dt_string[:-1]
+ elif dt_string[-5] in "+-":
+ # compat for Python < 3.11
+ dt_string = f"{dt_string[:-2]}:{dt_string[-2:]}"
+ return normalize(datetime.fromisoformat(dt_string))
+ except Exception:
+ return NONE
+
+ from_ts = datetime.utcfromtimestamp
+ now = datetime.utcnow
+
+else:
+ # Python >= 3.12
+ def parse_iso(dt_string):
+ """Parse 'dt_string' as ISO 8601 value"""
+ try:
+ return normalize(datetime.fromisoformat(dt_string))
+ except Exception:
+ return NONE
+
+ def from_ts(ts=None):
+ """Convert Unix timestamp to naive UTC datetime"""
+ Y, m, d, H, M, S, _, _, _ = time.gmtime(ts)
+ return datetime(Y, m, d, H, M, S)
+
+ now = from_ts
+
+
+def parse_ts(ts, default=NONE):
+ """Create a datetime object from a Unix timestamp"""
+ try:
+ return from_ts(int(ts))
+ except Exception:
+ return default
+
+
+def to_ts(dt):
+ """Convert naive UTC datetime to Unix timestamp"""
+ return (dt - EPOCH) / SECOND
+
+
+def to_ts_string(dt):
+ """Convert naive UTC datetime to Unix timestamp string"""
+ try:
+ return str((dt - EPOCH) // SECOND)
+ except Exception:
+ return ""
diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py
index 912a251..1f17c99 100644
--- a/gallery_dl/extractor/2ch.py
+++ b/gallery_dl/extractor/2ch.py
@@ -4,28 +4,28 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://2ch.su/"""
+"""Extractors for https://2ch.org/"""
from .common import Extractor, Message
from .. import text, util
-BASE_PATTERN = r"(?:https?://)?2ch\.(su|life|hk)"
+BASE_PATTERN = r"(?:https?://)?2ch\.(org|su|life|hk)"
class _2chThreadExtractor(Extractor):
"""Extractor for 2ch threads"""
category = "2ch"
subcategory = "thread"
- root = "https://2ch.su"
+ root = "https://2ch.org"
directory_fmt = ("{category}", "{board}", "{thread} {title}")
filename_fmt = "{tim}{filename:? //}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)"
- example = "https://2ch.su/a/res/12345.html"
+ example = "https://2ch.org/a/res/12345.html"
def __init__(self, match):
tld = match[1]
- self.root = f"https://2ch.{'su' if tld == 'hk' else tld}"
+ self.root = f"https://2ch.{'org' if tld == 'hk' else tld}"
Extractor.__init__(self, match)
def items(self):
@@ -42,11 +42,11 @@ class _2chThreadExtractor(Extractor):
"title" : text.unescape(title)[:50],
}
- yield Message.Directory, thread
+ yield Message.Directory, "", thread
for post in posts:
if files := post.get("files"):
post["post_name"] = post["name"]
- post["date"] = text.parse_timestamp(post["timestamp"])
+ post["date"] = self.parse_timestamp(post["timestamp"])
del post["files"]
del post["name"]
@@ -65,9 +65,9 @@ class _2chBoardExtractor(Extractor):
"""Extractor for 2ch boards"""
category = "2ch"
subcategory = "board"
- root = "https://2ch.su"
+ root = "https://2ch.org"
pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$"
- example = "https://2ch.su/a/"
+ example = "https://2ch.org/a/"
def __init__(self, match):
tld = match[1]
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index 9927b5a..0e250c9 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -31,7 +31,7 @@ class _2chanThreadExtractor(Extractor):
f"/{self.board}/res/{self.thread}.htm")
page = self.request(url).text
data = self.metadata(page)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for post in self.posts(page):
if "filename" not in post:
continue
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py
index ee3510c..4456fd6 100644
--- a/gallery_dl/extractor/2chen.py
+++ b/gallery_dl/extractor/2chen.py
@@ -1,40 +1,55 @@
# -*- coding: utf-8 -*-
+# Copyright 2022-2025 Mike Fährmann
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://sturdychan.help/"""
+"""Extractors for 2chen boards"""
-from .common import Extractor, Message
+from .common import BaseExtractor, Message
from .. import text
-BASE_PATTERN = r"(?:https?://)?(?:sturdychan.help|2chen\.(?:moe|club))"
+class _2chenExtractor(BaseExtractor):
+ basecategory = "2chen"
-class _2chenThreadExtractor(Extractor):
+
+BASE_PATTERN = _2chenExtractor.update({
+ "sturdychan": {
+ "root": "https://sturdychan.help",
+ "pattern": r"(?:sturdychan\.help|2chen\.(?:moe|club))",
+ },
+ "schan": {
+ "root": "https://schan.help/",
+ "pattern": r"schan\.help",
+ },
+})
+
+
+class _2chenThreadExtractor(_2chenExtractor):
"""Extractor for 2chen threads"""
- category = "2chen"
subcategory = "thread"
- root = "https://sturdychan.help"
directory_fmt = ("{category}", "{board}", "{thread} {title}")
filename_fmt = "{time} {filename}.{extension}"
- archive_fmt = "{board}_{thread}_{hash}_{time}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
+ archive_fmt = "{board}_{thread}_{no}_{time}"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/(\d+)"
example = "https://sturdychan.help/a/12345/"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
-
def items(self):
- url = f"{self.root}/{self.board}/{self.thread}"
+ board = self.groups[-2]
+ thread = self.kwdict["thread"] = self.groups[-1]
+ url = f"{self.root}/{board}/{thread}"
page = self.request(url, encoding="utf-8", notfound="thread").text
- data = self.metadata(page)
- yield Message.Directory, data
- for post in self.posts(page):
+ self.kwdict["board"], pos = text.extract(
+ page, 'class="board">/', '/<')
+ self.kwdict["title"] = text.unescape(text.extract(
+ page, "<h3>", "</h3>", pos)[0])
+ yield Message.Directory, "", {}
+ for post in self.posts(page):
url = post["url"]
if not url:
continue
@@ -42,20 +57,10 @@ class _2chenThreadExtractor(Extractor):
url = self.root + url
post["url"] = url = url.partition("?")[0]
- post.update(data)
post["time"] = text.parse_int(post["date"].timestamp())
yield Message.Url, url, text.nameext_from_url(
post["filename"], post)
- def metadata(self, page):
- board, pos = text.extract(page, 'class="board">/', '/<')
- title = text.extract(page, "<h3>", "</h3>", pos)[0]
- return {
- "board" : board,
- "thread": self.thread,
- "title" : text.unescape(title),
- }
-
def posts(self, page):
"""Return iterable with relevant posts"""
return map(self.parse, text.extract_iter(
@@ -65,31 +70,25 @@ class _2chenThreadExtractor(Extractor):
extr = text.extract_from(post)
return {
"name" : text.unescape(extr("<span>", "</span>")),
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr("<time", "<").partition(">")[2],
"%d %b %Y (%a) %H:%M:%S"
),
"no" : extr('href="#p', '"'),
- "url" : extr('</a><a href="', '"'),
"filename": text.unescape(extr('download="', '"')),
+ "url" : text.extr(extr("<figure>", "</"), 'href="', '"'),
"hash" : extr('data-hash="', '"'),
}
-class _2chenBoardExtractor(Extractor):
+class _2chenBoardExtractor(_2chenExtractor):
"""Extractor for 2chen boards"""
- category = "2chen"
subcategory = "board"
- root = "https://sturdychan.help"
- pattern = BASE_PATTERN + r"/([^/?#]+)(?:/catalog|/?$)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/catalog|/?$)"
example = "https://sturdychan.help/a/"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board = match[1]
-
def items(self):
- url = f"{self.root}/{self.board}/catalog"
+ url = f"{self.root}/{self.groups[-1]}/catalog"
page = self.request(url, notfound="board").text
data = {"_extractor": _2chenThreadExtractor}
for thread in text.extract_iter(
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index ec5f0cb..154295e 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -29,7 +29,7 @@ class _35photoExtractor(Extractor):
url = photo["url"]
if first:
first = False
- yield Message.Directory, photo
+ yield Message.Directory, "", photo
yield Message.Url, url, text.nameext_from_url(url, photo)
def metadata(self):
diff --git a/gallery_dl/extractor/4archive.py b/gallery_dl/extractor/4archive.py
index 4c43464..a6dedde 100644
--- a/gallery_dl/extractor/4archive.py
+++ b/gallery_dl/extractor/4archive.py
@@ -7,7 +7,7 @@
"""Extractors for https://4archive.org/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text, dt
class _4archiveThreadExtractor(Extractor):
@@ -37,8 +37,8 @@ class _4archiveThreadExtractor(Extractor):
for post in posts:
post.update(data)
- post["time"] = int(util.datetime_to_timestamp(post["date"]))
- yield Message.Directory, post
+ post["time"] = int(dt.to_ts(post["date"]))
+ yield Message.Directory, "", post
if "url" in post:
yield Message.Url, post["url"], text.nameext_from_url(
post["filename"], post)
@@ -61,10 +61,9 @@ class _4archiveThreadExtractor(Extractor):
extr = text.extract_from(post)
data = {
"name": extr('class="name">', "</span>"),
- "date": text.parse_datetime(
+ "date": self.parse_datetime_iso(
(extr('class="dateTime">', "<") or
- extr('class="dateTime postNum" >', "<")).strip(),
- "%Y-%m-%d %H:%M:%S"),
+ extr('class="dateTime postNum" >', "<")).strip()),
"no" : text.parse_int(extr(">Post No.", "<")),
}
if 'class="file"' in post:
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index d81f305..ba24899 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -38,7 +38,7 @@ class _4chanThreadExtractor(Extractor):
"title" : text.unescape(title)[:50],
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for post in posts:
if "filename" in post:
post.update(data)
diff --git a/gallery_dl/extractor/4chanarchives.py b/gallery_dl/extractor/4chanarchives.py
index c187b41..16f4b39 100644
--- a/gallery_dl/extractor/4chanarchives.py
+++ b/gallery_dl/extractor/4chanarchives.py
@@ -40,7 +40,7 @@ class _4chanarchivesThreadExtractor(Extractor):
for post in posts:
post.update(data)
- yield Message.Directory, post
+ yield Message.Directory, "", post
if "url" in post:
yield Message.Url, post["url"], post
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index d1ac503..b74bc90 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -31,7 +31,7 @@ class _500pxExtractor(Extractor):
photo["extension"] = photo["image_format"]
if data:
photo.update(data)
- yield Message.Directory, photo
+ yield Message.Directory, "", photo
yield Message.Url, url, photo
def metadata(self):
@@ -92,7 +92,7 @@ class _500pxExtractor(Extractor):
class _500pxUserExtractor(_500pxExtractor):
"""Extractor for photos from a user's photostream on 500px.com"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])"
+ pattern = rf"{BASE_PATTERN}/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])"
example = "https://500px.com/USER"
def __init__(self, match):
@@ -121,8 +121,8 @@ class _500pxGalleryExtractor(_500pxExtractor):
"""Extractor for photo galleries on 500px.com"""
subcategory = "gallery"
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
- pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?"
- r"([^/?#]+)/galleries/([^/?#]+)")
+ pattern = (rf"{BASE_PATTERN}/(?!photo/)(?:p/)?"
+ rf"([^/?#]+)/galleries/([^/?#]+)")
example = "https://500px.com/USER/galleries/GALLERY"
def __init__(self, match):
@@ -178,7 +178,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
class _500pxFavoriteExtractor(_500pxExtractor):
"""Extractor for favorite 500px photos"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/liked/?$"
+ pattern = rf"{BASE_PATTERN}/liked/?$"
example = "https://500px.com/liked"
def photos(self):
@@ -202,7 +202,7 @@ class _500pxFavoriteExtractor(_500pxExtractor):
class _500pxImageExtractor(_500pxExtractor):
"""Extractor for individual images from 500px.com"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/photo/(\d+)"
+ pattern = rf"{BASE_PATTERN}/photo/(\d+)"
example = "https://500px.com/photo/12345/TITLE"
def __init__(self, match):
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index 0385067..3230182 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -9,9 +9,8 @@
"""Extractors for https://8chan.moe/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text, dt
from ..cache import memcache
-from datetime import timedelta
import itertools
BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)"
@@ -44,7 +43,7 @@ class _8chanExtractor(Extractor):
def cookies_prepare(self):
# fetch captcha cookies
# (necessary to download without getting interrupted)
- now = util.datetime_utcnow()
+ now = dt.now()
url = self.root + "/captcha.js"
params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")}
self.request(url, params=params).content
@@ -57,7 +56,7 @@ class _8chanExtractor(Extractor):
if cookie.domain.endswith(domain):
cookie.expires = None
if cookie.name == "captchaexpiration":
- cookie.value = (now + timedelta(30, 300)).strftime(
+ cookie.value = (now + dt.timedelta(30, 300)).strftime(
"%a, %d %b %Y %H:%M:%S GMT")
return self.cookies
@@ -70,7 +69,7 @@ class _8chanThreadExtractor(_8chanExtractor):
"{threadId} {subject[:50]}")
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/(?:res|last)/(\d+)"
example = "https://8chan.moe/a/res/12345.html"
def items(self):
@@ -92,7 +91,7 @@ class _8chanThreadExtractor(_8chanExtractor):
# download files
posts = thread.pop("posts", ())
- yield Message.Directory, thread
+ yield Message.Directory, "", thread
for post in itertools.chain((thread,), posts):
files = post.pop("files", ())
if not files:
@@ -108,7 +107,7 @@ class _8chanThreadExtractor(_8chanExtractor):
class _8chanBoardExtractor(_8chanExtractor):
"""Extractor for 8chan boards"""
subcategory = "board"
- pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/(?:(\d+)\.html)?$"
example = "https://8chan.moe/a/"
def items(self):
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index 120cd8a..a8d8b44 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -40,7 +40,7 @@ class _8musesAlbumExtractor(Extractor):
if images := data.get("pictures"):
count = len(images)
album = self._make_album(data["album"])
- yield Message.Directory, {"album": album, "count": count}
+ yield Message.Directory, "", {"album": album, "count": count}
for num, image in enumerate(images, 1):
url = self.root + "/image/fl/" + image["publicUri"]
img = {
@@ -85,8 +85,7 @@ class _8musesAlbumExtractor(Extractor):
"parent" : text.parse_int(album["parentId"]),
"views" : text.parse_int(album["numberViews"]),
"likes" : text.parse_int(album["numberLikes"]),
- "date" : text.parse_datetime(
- album["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ"),
+ "date" : self.parse_datetime_iso(album["updatedAt"]),
}
def _unobfuscate(self, data):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index c7e33c8..64134d0 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -26,8 +26,10 @@ modules = [
"ao3",
"arcalive",
"architizer",
+ "arena",
"artstation",
"aryion",
+ "audiochan",
"batoto",
"bbc",
"behance",
@@ -39,9 +41,11 @@ modules = [
"booth",
"bunkr",
"catbox",
+ "cfake",
"chevereto",
"cien",
"civitai",
+ "comedywildlifephoto",
"comick",
"comicvine",
"cyberdrop",
@@ -54,6 +58,7 @@ modules = [
"discord",
"dynastyscans",
"e621",
+ "eporner",
"erome",
"everia",
"exhentai",
@@ -63,6 +68,8 @@ modules = [
"fantia",
"fapello",
"fapachi",
+ "fikfap",
+ "fitnakedgirls",
"flickr",
"furaffinity",
"furry34",
@@ -106,6 +113,7 @@ modules = [
"kemono",
"khinsider",
"komikcast",
+ "koofr",
"leakgallery",
"lensdump",
"lexica",
@@ -140,12 +148,14 @@ modules = [
"nozomi",
"nsfwalbum",
"nudostar",
+ "okporn",
"paheal",
"patreon",
"pexels",
"philomena",
"photovogue",
"picarto",
+ "picazor",
"pictoa",
"piczel",
"pillowfort",
@@ -158,12 +168,12 @@ modules = [
"poringa",
"pornhub",
"pornpics",
+ "pornstarstube",
"postmill",
"rawkuma",
"reactor",
"readcomiconline",
"realbooru",
- "redbust",
"reddit",
"redgifs",
"rule34us",
@@ -179,7 +189,6 @@ modules = [
"senmanga",
"sexcom",
"shimmie2",
- "simpcity",
"simplyhentai",
"sizebooru",
"skeb",
@@ -190,6 +199,7 @@ modules = [
"speakerdeck",
"steamgriddb",
"subscribestar",
+ "sxypix",
"szurubooru",
"tapas",
"tcbscans",
@@ -221,11 +231,13 @@ modules = [
"webmshare",
"webtoons",
"weebcentral",
+ "weebdex",
"weibo",
"wikiart",
"wikifeet",
"wikimedia",
"xasiat",
+ "xenforo",
"xfolio",
"xhamster",
"xvideos",
@@ -299,7 +311,7 @@ def _list_classes():
def _modules_internal():
globals_ = globals()
for module_name in modules:
- yield __import__(module_name, globals_, None, (), 1)
+ yield __import__(module_name, globals_, None, None, 1)
def _modules_path(path, files):
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
index 3249ae6..e9adf97 100644
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@@ -33,7 +33,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
"gallery_id": text.parse_int(self.gallery_id),
"title" : text.unescape(extr('title="', '"')),
"studio" : extr(">studio</small>", "<").strip(),
- "date" : text.parse_datetime(extr(
+ "date" : self.parse_datetime(extr(
">released</small>", "<").strip(), "%m/%d/%Y"),
"actors" : sorted(text.split_html(extr(
'<ul class="item-details item-cast-list ', '</ul>'))[1:]),
diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py
index 5bb1835..55b17c7 100644
--- a/gallery_dl/extractor/agnph.py
+++ b/gallery_dl/extractor/agnph.py
@@ -9,7 +9,7 @@
"""Extractors for https://agn.ph/"""
from . import booru
-from .. import text, util
+from .. import text
import collections
BASE_PATTERN = r"(?:https?://)?agn\.ph"
@@ -33,7 +33,7 @@ class AgnphExtractor(booru.BooruExtractor):
self.cookies.set("confirmed_age", "true", domain="agn.ph")
def _prepare(self, post):
- post["date"] = text.parse_timestamp(post["created_at"])
+ post["date"] = self.parse_timestamp(post["created_at"])
post["status"] = post["status"].strip()
post["has_children"] = ("true" in post["has_children"])
@@ -70,7 +70,7 @@ class AgnphExtractor(booru.BooruExtractor):
return
tags = collections.defaultdict(list)
- pattern = util.re(r'class="(.)typetag">([^<]+)')
+ pattern = text.re(r'class="(.)typetag">([^<]+)')
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
for key, value in tags.items():
@@ -81,7 +81,7 @@ class AgnphTagExtractor(AgnphExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/gallery/post/(?:\?([^#]+))?$"
+ pattern = rf"{BASE_PATTERN}/gallery/post/(?:\?([^#]+))?$"
example = "https://agn.ph/gallery/post/?search=TAG"
def __init__(self, match):
@@ -99,7 +99,7 @@ class AgnphTagExtractor(AgnphExtractor):
class AgnphPostExtractor(AgnphExtractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/gallery/post/show/(\d+)"
+ pattern = rf"{BASE_PATTERN}/gallery/post/show/(\d+)"
example = "https://agn.ph/gallery/post/show/12345/"
def posts(self):
diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py
index 60380c4..716492e 100644
--- a/gallery_dl/extractor/ao3.py
+++ b/gallery_dl/extractor/ao3.py
@@ -118,7 +118,7 @@ class Ao3WorkExtractor(Ao3Extractor):
directory_fmt = ("{category}", "{author}")
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}.{extension}"
- pattern = BASE_PATTERN + r"/works/(\d+)"
+ pattern = rf"{BASE_PATTERN}/works/(\d+)"
example = "https://archiveofourown.org/works/12345"
def _init(self):
@@ -182,11 +182,11 @@ class Ao3WorkExtractor(Ao3Extractor):
extr('<dd class="freeform tags">', "</dd>")),
"lang" : extr('<dd class="language" lang="', '"'),
"series" : extr('<dd class="series">', "</dd>"),
- "date" : text.parse_datetime(
- extr('<dd class="published">', "<"), "%Y-%m-%d"),
- "date_completed": text.parse_datetime(
- extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"),
- "date_updated" : text.parse_timestamp(
+ "date" : self.parse_datetime_iso(extr(
+ '<dd class="published">', "<")),
+ "date_completed": self.parse_datetime_iso(extr(
+ '>Completed:</dt><dd class="status">', "<")),
+ "date_updated" : self.parse_timestamp(
path.rpartition("updated_at=")[2]),
"words" : text.parse_int(
extr('<dd class="words">', "<").replace(",", "")),
@@ -220,7 +220,7 @@ class Ao3WorkExtractor(Ao3Extractor):
else:
data["series"] = None
- yield Message.Directory, data
+ yield Message.Directory, "", data
for fmt in self.formats:
try:
url = text.urljoin(self.root, fmts[fmt])
@@ -233,28 +233,28 @@ class Ao3WorkExtractor(Ao3Extractor):
class Ao3SeriesExtractor(Ao3Extractor):
"""Extractor for AO3 works of a series"""
subcategory = "series"
- pattern = BASE_PATTERN + r"(/series/(\d+))"
+ pattern = rf"{BASE_PATTERN}(/series/(\d+))"
example = "https://archiveofourown.org/series/12345"
class Ao3TagExtractor(Ao3Extractor):
"""Extractor for AO3 works by tag"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"(/tags/([^/?#]+)/works(?:/?\?.+)?)"
+ pattern = rf"{BASE_PATTERN}(/tags/([^/?#]+)/works(?:/?\?.+)?)"
example = "https://archiveofourown.org/tags/TAG/works"
class Ao3SearchExtractor(Ao3Extractor):
"""Extractor for AO3 search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"(/works/search/?\?.+)"
+ pattern = rf"{BASE_PATTERN}(/works/search/?\?.+)"
example = "https://archiveofourown.org/works/search?work_search[query]=air"
class Ao3UserExtractor(Dispatch, Ao3Extractor):
"""Extractor for an AO3 user profile"""
- pattern = (BASE_PATTERN + r"/users/([^/?#]+(?:/pseuds/[^/?#]+)?)"
- r"(?:/profile)?/?(?:$|\?|#)")
+ pattern = (rf"{BASE_PATTERN}/users/([^/?#]+(?:/pseuds/[^/?#]+)?)"
+ rf"(?:/profile)?/?(?:$|\?|#)")
example = "https://archiveofourown.org/users/USER"
def items(self):
@@ -269,16 +269,16 @@ class Ao3UserExtractor(Dispatch, Ao3Extractor):
class Ao3UserWorksExtractor(Ao3Extractor):
"""Extractor for works of an AO3 user"""
subcategory = "user-works"
- pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
- r"works(?:/?\?.+)?)")
+ pattern = (rf"{BASE_PATTERN}(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
+ rf"works(?:/?\?.+)?)")
example = "https://archiveofourown.org/users/USER/works"
class Ao3UserSeriesExtractor(Ao3Extractor):
"""Extractor for series of an AO3 user"""
subcategory = "user-series"
- pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
- r"series(?:/?\?.+)?)")
+ pattern = (rf"{BASE_PATTERN}(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
+ rf"series(?:/?\?.+)?)")
example = "https://archiveofourown.org/users/USER/series"
def items(self):
@@ -297,8 +297,8 @@ class Ao3UserSeriesExtractor(Ao3Extractor):
class Ao3UserBookmarkExtractor(Ao3Extractor):
"""Extractor for bookmarked works of an AO3 user"""
subcategory = "user-bookmark"
- pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
- r"bookmarks(?:/?\?.+)?)")
+ pattern = (rf"{BASE_PATTERN}(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
+ rf"bookmarks(?:/?\?.+)?)")
example = "https://archiveofourown.org/users/USER/bookmarks"
def items(self):
@@ -308,7 +308,7 @@ class Ao3UserBookmarkExtractor(Ao3Extractor):
class Ao3SubscriptionsExtractor(Ao3Extractor):
"""Extractor for your AO3 account's subscriptions"""
subcategory = "subscriptions"
- pattern = BASE_PATTERN + r"(/users/([^/?#]+)/subscriptions(?:/?\?.+)?)"
+ pattern = rf"{BASE_PATTERN}(/users/([^/?#]+)/subscriptions(?:/?\?.+)?)"
example = "https://archiveofourown.org/users/USER/subscriptions"
def items(self):
diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py
index 1df7e0f..f950d14 100644
--- a/gallery_dl/extractor/arcalive.py
+++ b/gallery_dl/extractor/arcalive.py
@@ -36,7 +36,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
directory_fmt = ("{category}", "{boardSlug}")
filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}"
archive_fmt = "{id}_{num}"
- pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/b/(?:\w+)/(\d+)"
example = "https://arca.live/b/breaking/123456789"
def items(self):
@@ -49,13 +49,12 @@ class ArcalivePostExtractor(ArcaliveExtractor):
files = self._extract_files(post)
post["count"] = len(files)
- post["date"] = text.parse_datetime(
- post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+ post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
post["post_url"] = post_url = \
f"{self.root}/b/{post['boardSlug']}/{post['id']}"
post["_http_headers"] = {"Referer": post_url + "?p=1"}
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post.update(file)
url = file["url"]
@@ -64,7 +63,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
def _extract_files(self, post):
files = []
- for video, media in util.re(r"<(?:img|vide(o)) ([^>]+)").findall(
+ for video, media in text.re(r"<(?:img|vide(o)) ([^>]+)").findall(
post["content"]):
if not self.emoticons and 'class="arca-emoticon"' in media:
continue
@@ -116,7 +115,7 @@ class ArcalivePostExtractor(ArcaliveExtractor):
class ArcaliveBoardExtractor(ArcaliveExtractor):
"""Extractor for an arca.live board's posts"""
subcategory = "board"
- pattern = BASE_PATTERN + r"/b/([^/?#]+)/?(?:\?([^#]+))?$"
+ pattern = rf"{BASE_PATTERN}/b/([^/?#]+)/?(?:\?([^#]+))?$"
example = "https://arca.live/b/breaking"
def articles(self):
@@ -128,7 +127,7 @@ class ArcaliveBoardExtractor(ArcaliveExtractor):
class ArcaliveUserExtractor(ArcaliveExtractor):
"""Extractor for an arca.live users's posts"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/u/@([^/?#]+)/?(?:\?([^#]+))?$"
+ pattern = rf"{BASE_PATTERN}/u/@([^/?#]+)/?(?:\?([^#]+))?$"
example = "https://arca.live/u/@USER"
def articles(self):
diff --git a/gallery_dl/extractor/arena.py b/gallery_dl/extractor/arena.py
new file mode 100644
index 0000000..ada2fa1
--- /dev/null
+++ b/gallery_dl/extractor/arena.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractor for https://are.na/"""
+
+from .common import GalleryExtractor
+
+
+class ArenaChannelExtractor(GalleryExtractor):
+ """Extractor for are.na channels"""
+ category = "arena"
+ subcategory = "channel"
+ root = "https://are.na"
+ directory_fmt = ("{category}", "{user[full_name]} ({user[id]})",
+ "{channel[title]} ({channel[id]})")
+ filename_fmt = "{num:>03}{block[id]:? //}.{extension}"
+ archive_fmt = "{channel[id]}/{block[id]}"
+ pattern = r"(?:https?://)?(?:www\.)?are\.na/[^/?#]+/([^/?#]+)"
+ example = "https://are.na/evan-collins-1522646491/cassette-futurism"
+
+ def metadata(self, page):
+ channel = self.request_json(
+ f"https://api.are.na/v2/channels/{self.groups[0]}")
+
+ channel["date"] = self.parse_datetime_iso(
+ channel["created_at"])
+ channel["date_updated"] = self.parse_datetime_iso(
+ channel["updated_at"])
+ channel.pop("contents", None)
+
+ return {
+ "count" : channel.get("length"),
+ "user" : channel.pop("user", None),
+ "owner" : channel.pop("owner", None),
+ "channel": channel,
+ }
+
+ def images(self, page):
+ api = f"https://api.are.na/v2/channels/{self.groups[0]}/contents"
+ limit = 100
+ params = {"page": 1, "per": limit}
+
+ while True:
+ data = self.request_json(api, params=params)
+
+ contents = data.get("contents")
+ if not contents:
+ return
+
+ for block in contents:
+ url = None
+
+ # Attachments (e.g., PDFs, files)
+ if attachment := block.get("attachment"):
+ url = attachment.get("url")
+
+ # Images
+ elif image := block.get("image"):
+ # Prefer original image
+ if original := image.get("original"):
+ url = original.get("url")
+ # Fallback to display/large image if present
+ elif display := image.get("display"):
+ url = display.get("url")
+ elif large := image.get("large"):
+ url = large.get("url")
+
+ # Some Links/Channels may not have downloadable media
+ if not url:
+ continue
+
+ block["date"] = self.parse_datetime_iso(
+ block["created_at"])
+ block["date_updated"] = self.parse_datetime_iso(
+ block["updated_at"])
+
+ yield url, {
+ "block" : block,
+ "source": block.pop("source", None),
+ }
+
+ if len(contents) < limit:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index fdb92c4..f1b55ce 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -47,7 +47,7 @@ class ArtstationExtractor(Extractor):
asset.update(data)
adict = asset["asset"]
asset["num"] = num
- yield Message.Directory, asset
+ yield Message.Directory, "", asset
if adict["has_embedded_player"]:
if url := self._extract_embed(asset):
@@ -126,8 +126,7 @@ class ArtstationExtractor(Extractor):
data["title"] = text.unescape(data["title"])
data["description"] = text.unescape(text.remove_html(
data["description"]))
- data["date"] = text.parse_datetime(
- data["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ data["date"] = self.parse_datetime_iso(data["created_at"])
assets = data["assets"]
del data["assets"]
@@ -334,7 +333,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
update_url = f"{self.root}/contests/submission_updates.json"
challenge = self.request_json(challenge_url)
- yield Message.Directory, {"challenge": challenge}
+ yield Message.Directory, "", {"challenge": challenge}
params = {"sorting": self.sorting}
for submission in self._pagination(submission_url, params):
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 38b8ee4..5e5d1f2 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -9,10 +9,9 @@
"""Extractors for https://aryion.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, util, dt, exception
from ..cache import cache
from email.utils import parsedate_tz
-from datetime import datetime
BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
@@ -20,7 +19,7 @@ BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
class AryionExtractor(Extractor):
"""Base class for aryion extractors"""
category = "aryion"
- directory_fmt = ("{category}", "{user!l}", "{path:J - }")
+ directory_fmt = ("{category}", "{user!l}", "{path:I}")
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
cookies_domain = ".aryion.com"
@@ -64,7 +63,7 @@ class AryionExtractor(Extractor):
if post := self._parse_post(post_id):
if data:
post.update(data)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, post["url"], post
elif post is False and self.recursive:
base = self.root + "/g4/view/"
@@ -78,20 +77,20 @@ class AryionExtractor(Extractor):
def metadata(self):
"""Return general metadata"""
- def _pagination_params(self, url, params=None, needle=None):
+ def _pagination_params(self, url, params=None, needle=None, quote="'"):
if params is None:
params = {"p": 1}
else:
params["p"] = text.parse_int(params.get("p"), 1)
if needle is None:
- needle = "class='gallery-item' id='"
+ needle = "class='gallery-item' id=" + quote
while True:
page = self.request(url, params=params).text
cnt = 0
- for post_id in text.extract_iter(page, needle, "'"):
+ for post_id in text.extract_iter(page, needle, quote):
cnt += 1
yield post_id
@@ -109,6 +108,42 @@ class AryionExtractor(Extractor):
return
url = self.root + text.rextr(page, "href='", "'", pos)
+ def _pagination_folders(self, url, folder=None, seen=None):
+ if folder is None:
+ self.kwdict["folder"] = ""
+ else:
+ url = f"{url}/{folder}"
+ self.kwdict["folder"] = folder = text.unquote(folder)
+ self.log.debug("Descending into folder '%s'", folder)
+
+ params = {"p": 1}
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for item in text.extract_iter(
+ page, "<li class='gallery-item", "</li>"):
+ cnt += 1
+ if text.extr(item, 'data-item-type="', '"') == "Folders":
+ folder = text.extr(item, "href='", "'").rpartition("/")[2]
+ if seen is None:
+ seen = set()
+ if folder not in seen:
+ seen.add(folder)
+ if self.recursive:
+ yield from self._pagination_folders(
+ url, folder, seen)
+ else:
+ self.log.debug("Skipping folder '%s'", folder)
+ else:
+ yield text.extr(item, "data-item-id='", "'")
+
+ if cnt < 40 and ">Next &gt;&gt;<" not in page:
+ break
+ params["p"] += 1
+
+ self.kwdict["folder"] = ""
+
def _parse_post(self, post_id):
url = f"{self.root}/g4/data.php?id={post_id}"
with self.request(url, method="HEAD", fatal=False) as response:
@@ -154,9 +189,11 @@ class AryionExtractor(Extractor):
"user" : self.user or artist,
"title" : title,
"artist": artist,
+ "description": text.unescape(extr(
+ 'property="og:description" content="', '"')),
"path" : text.split_html(extr(
"cookiecrumb'>", '</span'))[4:-1:2],
- "date" : datetime(*parsedate_tz(lmod)[:6]),
+ "date" : dt.datetime(*parsedate_tz(lmod)[:6]),
"size" : text.parse_int(clen),
"views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
"width" : text.parse_int(extr("Resolution</b>:", "x")),
@@ -164,8 +201,6 @@ class AryionExtractor(Extractor):
"comments" : text.parse_int(extr("Comments</b>:", "<")),
"favorites": text.parse_int(extr("Favorites</b>:", "<")),
"tags" : text.split_html(extr("class='taglist'>", "</span>")),
- "description": text.unescape(text.remove_html(extr(
- "<p>", "</p>"), "", "")),
"filename" : fname,
"extension": ext,
"_http_lastmodified": lmod,
@@ -176,14 +211,11 @@ class AryionGalleryExtractor(AryionExtractor):
"""Extractor for a user's gallery on eka's portal"""
subcategory = "gallery"
categorytransfer = True
- pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/(?:gallery/|user/|latest.php\?name=)([^/?#]+)"
example = "https://aryion.com/g4/gallery/USER"
- def __init__(self, match):
- AryionExtractor.__init__(self, match)
- self.offset = 0
-
def _init(self):
+ self.offset = 0
self.recursive = self.config("recursive", True)
def skip(self, num):
@@ -204,15 +236,34 @@ class AryionGalleryExtractor(AryionExtractor):
class AryionFavoriteExtractor(AryionExtractor):
"""Extractor for a user's favorites gallery"""
subcategory = "favorite"
- directory_fmt = ("{category}", "{user!l}", "favorites")
+ directory_fmt = ("{category}", "{user!l}", "favorites", "{folder}")
archive_fmt = "f_{user}_{id}"
- categorytransfer = True
- pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/favorites/([^/?#]+)(?:/([^?#]+))?"
example = "https://aryion.com/g4/favorites/USER"
+ def _init(self):
+ self.recursive = self.config("recursive", True)
+
def posts(self):
url = f"{self.root}/g4/favorites/{self.user}"
- return self._pagination_params(url, None, "data-item-id='")
+ return self._pagination_folders(url, self.groups[1])
+
+
+class AryionWatchExtractor(AryionExtractor):
+ """Extractor for your watched users and tags"""
+ subcategory = "watch"
+ directory_fmt = ("{category}", "{user!l}",)
+ pattern = rf"{BASE_PATTERN}/messagepage\.php()"
+ example = "https://aryion.com/g4/messagepage.php"
+
+ def posts(self):
+ if not self.cookies_check(self.cookies_names):
+ raise exception.AuthRequired(
+ ("username & password", "authenticated cookies"),
+ "watched Submissions")
+ self.cookies.set("g4p_msgpage_style", "plain", domain="aryion.com")
+ url = self.root + "/g4/messagepage.php"
+ return self._pagination_params(url, None, 'data-item-id="', '"')
class AryionTagExtractor(AryionExtractor):
@@ -220,7 +271,7 @@ class AryionTagExtractor(AryionExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "tags", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/tags\.php\?([^#]+)"
example = "https://aryion.com/g4/tags.php?tag=TAG"
def _init(self):
@@ -235,10 +286,34 @@ class AryionTagExtractor(AryionExtractor):
return self._pagination_params(url, self.params)
+class AryionSearchExtractor(AryionExtractor):
+ """Extractor for searches on eka's portal"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "searches", "{search[prefix]}"
+ "{search[q]|search[tags]|search[user]}")
+ archive_fmt = ("s_{search[prefix]}"
+ "{search[q]|search[tags]|search[user]}_{id}")
+ pattern = rf"{BASE_PATTERN}/search\.php\?([^#]+)"
+ example = "https://aryion.com/g4/search.php?q=TEXT&tags=TAGS&user=USER"
+
+ def metadata(self):
+ params = text.parse_query(self.user)
+ return {"search": {
+ **params,
+ "prefix": ("" if params.get("q") else
+ "t_" if params.get("tags") else
+ "u_" if params.get("user") else ""),
+ }}
+
+ def posts(self):
+ url = f"{self.root}/g4/search.php?{self.user}"
+ return self._pagination_next(url)
+
+
class AryionPostExtractor(AryionExtractor):
"""Extractor for individual posts on eka's portal"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/view/(\d+)"
+ pattern = rf"{BASE_PATTERN}/view/(\d+)"
example = "https://aryion.com/g4/view/12345"
def posts(self):
diff --git a/gallery_dl/extractor/audiochan.py b/gallery_dl/extractor/audiochan.py
new file mode 100644
index 0000000..b708ce7
--- /dev/null
+++ b/gallery_dl/extractor/audiochan.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://audiochan.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?audiochan\.com"
+
+
+class AudiochanExtractor(Extractor):
+ """Base class for audiochan extractors"""
+ category = "audiochan"
+ root = "https://audiochan.com"
+ root_api = "https://api.audiochan.com"
+ directory_fmt = ("{category}", "{user[display_name]}")
+ filename_fmt = "{title} ({slug}).{extension}"
+ archive_fmt = "{audioFile[id]}"
+
+ def _init(self):
+ self.user = False
+ self.headers_api = {
+ "content-type" : "application/json",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest" : "empty",
+ "Sec-Fetch-Mode" : "cors",
+ "Sec-Fetch-Site" : "same-site",
+ }
+ self.headers_dl = {
+ "Accept": "audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,"
+ "application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
+ "Sec-Fetch-Dest" : "audio",
+ "Sec-Fetch-Mode" : "no-cors",
+ "Sec-Fetch-Site" : "same-site",
+ "Accept-Encoding": "identity",
+ }
+
+ def items(self):
+ for post in self.posts():
+ file = post["audioFile"]
+
+ post["_http_headers"] = self.headers_dl
+ post["date"] = self.parse_datetime_iso(file["created_at"])
+ post["date_updated"] = self.parse_datetime_iso(file["updated_at"])
+ post["description"] = self._extract_description(
+ post["description"])
+
+ tags = []
+ for tag in post["tags"]:
+ if "tag" in tag:
+ tag = tag["tag"]
+ tags.append(f"{tag['category']}:{tag['name']}")
+ post["tags"] = tags
+
+ if self.user:
+ post["user"] = post["credits"][0]["user"]
+
+ if not (url := file["url"]):
+ post["_http_segmented"] = 600000
+ url = file["stream_url"]
+
+ yield Message.Directory, "", post
+ text.nameext_from_name(file["filename"], post)
+ yield Message.Url, url, post
+
+ def request_api(self, endpoint, params=None):
+ url = self.root_api + endpoint
+ return self.request_json(url, params=params, headers=self.headers_api)
+
+ def _pagination(self, endpoint, params, key=None):
+ params["page"] = 1
+ params["limit"] = "12"
+
+ while True:
+ data = self.request_api(endpoint, params)
+ if key is not None:
+ data = data[key]
+
+ yield from data["data"]
+
+ if not data["has_more"]:
+ break
+ params["page"] += 1
+
+ def _extract_description(self, description, texts=None):
+ if texts is None:
+ texts = []
+
+ if "text" in description:
+ texts.append(description["text"])
+ elif "content" in description:
+ for desc in description["content"]:
+ self._extract_description(desc, texts)
+
+ return texts
+
+
+class AudiochanAudioExtractor(AudiochanExtractor):
+ subcategory = "audio"
+ pattern = rf"{BASE_PATTERN}/a/([^/?#]+)"
+ example = "https://audiochan.com/a/SLUG"
+
+ def posts(self):
+ self.user = True
+ audio = self.request_api("/audios/slug/" + self.groups[0])
+ return (audio,)
+
+
+class AudiochanUserExtractor(AudiochanExtractor):
+ subcategory = "user"
+ pattern = rf"{BASE_PATTERN}/u/([^/?#]+)"
+ example = "https://audiochan.com/u/USER"
+
+ def posts(self):
+ endpoint = "/users/" + self.groups[0]
+ self.kwdict["user"] = self.request_api(endpoint)["data"]
+
+ params = {
+ "sfw_only": "false",
+ "sort" : "new",
+ }
+ return self._pagination(endpoint + "/audios", params)
+
+
+class AudiochanCollectionExtractor(AudiochanExtractor):
+ subcategory = "collection"
+ pattern = rf"{BASE_PATTERN}/c/([^/?#]+)"
+ example = "https://audiochan.com/c/SLUG"
+
+ def posts(self):
+ slug = self.groups[0]
+ endpoint = "/collections/" + slug
+ self.kwdict["collection"] = col = self.request_api(endpoint)
+ col.pop("audios", None)
+ col.pop("items", None)
+
+ endpoint = f"/collections/slug/{slug}/items"
+ return self._pagination(endpoint, {})
+
+
+class AudiochanSearchExtractor(AudiochanExtractor):
+ subcategory = "search"
+ pattern = rf"{BASE_PATTERN}/search/?\?([^#]+)"
+ example = "https://audiochan.com/search?q=QUERY"
+
+ def posts(self):
+ self.user = True
+ endpoint = "/search"
+ params = text.parse_query(self.groups[0])
+ params["sfw_only"] = "false"
+ self.kwdict["search_tags"] = params.get("q")
+ return self._pagination(endpoint, params, "audios")
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index a7d1b78..f8e803b 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -80,7 +80,7 @@ class BatotoBase():
class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
"""Extractor for batoto manga chapters"""
archive_fmt = "{chapter_id}_{page}"
- pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?:title/[^/?#]+|chapter)/(\d+)"
example = "https://xbato.org/title/12345-MANGA/54321"
def __init__(self, match):
@@ -104,7 +104,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
info = text.remove_html(extr('link-hover">', "</"))
info = text.unescape(info)
- match = util.re(
+ match = text.re(
r"(?i)(?:(?:Volume|S(?:eason)?)\s*(\d+)\s+)?"
r"(?:Chapter|Episode)\s*(\d+)([\w.]*)").match(info)
if match:
@@ -123,7 +123,7 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
"chapter_minor" : minor,
"chapter_string": info,
"chapter_id" : text.parse_int(self.chapter_id),
- "date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
+ "date" : self.parse_timestamp(extr(' time="', '"')[:-3]),
}
def images(self, page):
@@ -139,8 +139,8 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
"""Extractor for batoto manga"""
reverse = False
chapterclass = BatotoChapterExtractor
- pattern = (BASE_PATTERN +
- r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
+ pattern = (rf"{BASE_PATTERN}"
+ rf"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
example = "https://xbato.org/title/12345-MANGA/"
def __init__(self, match):
@@ -167,8 +167,7 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
- data["date"] = text.parse_datetime(
- extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
+ data["date"] = self.parse_datetime_iso(extr('time="', '"'))
url = f"{self.root}/title/{href}"
results.append((url, data.copy()))
@@ -188,9 +187,9 @@ def _manga_info(self, manga_id, page=None):
"manga" : data["name"][1],
"manga_id" : text.parse_int(manga_id),
"manga_slug" : data["slug"][1],
- "manga_date" : text.parse_timestamp(
+ "manga_date" : self.parse_timestamp(
data["dateCreate"][1] // 1000),
- "manga_date_updated": text.parse_timestamp(
+ "manga_date_updated": self.parse_timestamp(
data["dateUpdate"][1] / 1000),
"author" : json_list(data["authors"]),
"artist" : json_list(data["artists"]),
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index 8efb3db..cb357d1 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -18,11 +18,10 @@ class BbcGalleryExtractor(GalleryExtractor):
"""Extractor for a programme gallery on bbc.co.uk"""
category = "bbc"
root = "https://www.bbc.co.uk"
- directory_fmt = ("{category}", "{path[0]}", "{path[1]}", "{path[2]}",
- "{path[3:]:J - /}")
+ directory_fmt = ("{category}", "{path:I}")
filename_fmt = "{num:>02}.{extension}"
archive_fmt = "{programme}_{num}"
- pattern = BASE_PATTERN + r"[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$"
+ pattern = rf"{BASE_PATTERN}[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$"
example = "https://www.bbc.co.uk/programmes/PATH"
def metadata(self, page):
@@ -72,7 +71,7 @@ class BbcProgrammeExtractor(Extractor):
category = "bbc"
subcategory = "programme"
root = "https://www.bbc.co.uk"
- pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?"
+ pattern = rf"{BASE_PATTERN}[^/?#]+/galleries)(?:/?\?page=(\d+))?"
example = "https://www.bbc.co.uk/programmes/ID/galleries"
def items(self):
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 4a7c074..bb0562d 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -67,7 +67,7 @@ class BehanceExtractor(Extractor):
tags = [tag["title"] for tag in tags]
data["tags"] = tags
- data["date"] = text.parse_timestamp(
+ data["date"] = self.parse_timestamp(
data.get("publishedOn") or data.get("conceived_on") or 0)
if creator := data.get("creator"):
@@ -109,7 +109,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
imgs = self.get_images(data)
data["count"] = len(imgs)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], (url, module) in enumerate(imgs, 1):
data["module"] = module
data["extension"] = (module.get("extension") or
diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py
index ce50a91..33f4ad3 100644
--- a/gallery_dl/extractor/bellazon.py
+++ b/gallery_dl/extractor/bellazon.py
@@ -46,8 +46,8 @@ class BellazonExtractor(Extractor):
data = {"post": post}
post["count"] = data["count"] = len(urls)
- yield Message.Directory, data
- data["num"] = 0
+ yield Message.Directory, "", data
+ data["num"] = data["num_internal"] = data["num_external"] = 0
for info, url, url_img in urls:
url = text.unescape(url or url_img)
@@ -59,27 +59,35 @@ class BellazonExtractor(Extractor):
):
continue
data["num"] += 1
+ data["num_internal"] += 1
if not (alt := text.extr(info, ' alt="', '"')) or (
alt.startswith("post-") and "_thumb." in alt):
- name = url
+ dc = text.nameext_from_url(url, data.copy())
else:
- name = text.unescape(alt)
+ dc = data.copy()
+ dc["name"] = name = text.unescape(alt)
+ dc["filename"] = name.partition(".")[0]
- dc = text.nameext_from_url(name, data.copy())
dc["id"] = text.extr(info, 'data-fileid="', '"')
if ext := text.extr(info, 'data-fileext="', '"'):
dc["extension"] = ext
elif "/core/interface/file/attachment.php" in url:
if not dc["id"]:
- dc["id"] = url.rpartition("?id=")[2]
+ dc["id"] = \
+ url.rpartition("?id=")[2].partition("&")[0]
if name := text.extr(info, ">", "<").strip():
- text.nameext_from_url(name, dc)
+ dc["name"] = name = text.unescape(name)
+ text.nameext_from_name(name, dc)
+ else:
+ dc["extension"] = text.ext_from_url(url)
if url[0] == "/":
url = f"https:{url}"
yield Message.Url, url, dc
else:
+ data["num"] += 1
+ data["num_external"] += 1
yield Message.Queue, url, data
def _pagination(self, base, pnum=None):
@@ -106,7 +114,7 @@ class BellazonExtractor(Extractor):
def _pagination_reverse(self, base, pnum=None):
base = f"{self.root}{base}"
- url = f"{base}/page/9999/" # force redirect to highest page number
+ url = f"{base}/page/{'9999' if pnum is None else pnum}/"
with self.request(url) as response:
parts = response.url.rsplit("/", 3)
pnum = text.parse_int(parts[2]) if parts[1] == "page" else 1
@@ -130,7 +138,7 @@ class BellazonExtractor(Extractor):
author = schema["author"]
stats = schema["interactionStatistic"]
url_t = schema["url"]
- url_a = author["url"]
+ url_a = author.get("url") or ""
path = text.split_html(text.extr(
page, '<nav class="ipsBreadcrumb', "</nav>"))[2:-1]
@@ -141,8 +149,8 @@ class BellazonExtractor(Extractor):
"title": schema["headline"],
"views": stats[0]["userInteractionCount"],
"posts": stats[1]["userInteractionCount"],
- "date" : text.parse_datetime(schema["datePublished"]),
- "date_updated": text.parse_datetime(schema["dateModified"]),
+ "date" : self.parse_datetime_iso(schema["datePublished"]),
+ "date_updated": self.parse_datetime_iso(schema["dateModified"]),
"description" : text.unescape(schema["text"]).strip(),
"section" : path[-2],
"author" : author["name"],
@@ -151,8 +159,12 @@ class BellazonExtractor(Extractor):
thread["id"], _, thread["slug"] = \
url_t.rsplit("/", 2)[1].partition("-")
- thread["author_id"], _, thread["author_slug"] = \
- url_a.rsplit("/", 2)[1].partition("-")
+
+ if url_a:
+ thread["author_id"], _, thread["author_slug"] = \
+ url_a.rsplit("/", 2)[1].partition("-")
+ else:
+ thread["author_id"] = thread["author_slug"] = ""
return thread
@@ -162,15 +174,18 @@ class BellazonExtractor(Extractor):
post = {
"id": extr('id="elComment_', '"'),
"author_url": extr(" href='", "'"),
- "date": text.parse_datetime(extr("datetime='", "'")),
+ "date": self.parse_datetime_iso(extr("datetime='", "'")),
"content": extr("<!-- Post content -->", "\n\t\t</div>"),
}
if (pos := post["content"].find(">")) >= 0:
post["content"] = post["content"][pos+1:].strip()
- post["author_id"], _, post["author_slug"] = \
- post["author_url"].rsplit("/", 2)[1].partition("-")
+ if url_a := post["author_url"]:
+ post["author_id"], _, post["author_slug"] = \
+ url_a.rsplit("/", 2)[1].partition("-")
+ else:
+ post["author_id"] = post["author_slug"] = ""
return post
diff --git a/gallery_dl/extractor/bilibili.py b/gallery_dl/extractor/bilibili.py
index 3f0acff..fe10150 100644
--- a/gallery_dl/extractor/bilibili.py
+++ b/gallery_dl/extractor/bilibili.py
@@ -74,7 +74,7 @@ class BilibiliArticleExtractor(BilibiliExtractor):
pass
article["count"] = len(pics)
- yield Message.Directory, article
+ yield Message.Directory, "", article
for article["num"], pic in enumerate(pics, 1):
url = pic["url"]
article.update(pic)
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index af43446..766272f 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -13,7 +13,7 @@ from .. import text, util
def original(url):
- return (util.re(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)")
+ return (text.re(r"(/|=)(?:[sw]\d+|w\d+-h\d+)(?=/|$)")
.sub(r"\1s0", url)
.replace("http:", "https:", 1))
@@ -32,7 +32,7 @@ class BloggerExtractor(BaseExtractor):
self.videos = self.config("videos", True)
if self.videos:
- self.findall_video = util.re(
+ self.findall_video = text.re(
r"""src=["'](https?://www\.blogger\.com"""
r"""/video\.g\?token=[^"']+)""").findall
@@ -40,10 +40,10 @@ class BloggerExtractor(BaseExtractor):
blog = self.api.blog_by_url("http://" + self.blog)
blog["pages"] = blog["pages"]["totalItems"]
blog["posts"] = blog["posts"]["totalItems"]
- blog["date"] = text.parse_datetime(blog["published"])
+ blog["date"] = self.parse_datetime_iso(blog["published"])
del blog["selfLink"]
- findall_image = util.re(
+ findall_image = text.re(
r'src="(https?://(?:'
r'blogger\.googleusercontent\.com/img|'
r'lh\d+(?:-\w+)?\.googleusercontent\.com|'
@@ -65,14 +65,14 @@ class BloggerExtractor(BaseExtractor):
post["author"] = post["author"]["displayName"]
post["replies"] = post["replies"]["totalItems"]
post["content"] = text.remove_html(content)
- post["date"] = text.parse_datetime(post["published"])
+ post["date"] = self.parse_datetime_iso(post["published"])
del post["selfLink"]
del post["blog"]
data = {"blog": blog, "post": post}
if metadata:
data.update(metadata)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(files, 1):
data["url"] = url
@@ -117,7 +117,7 @@ BASE_PATTERN = BloggerExtractor.update({
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
+ pattern = rf"{BASE_PATTERN}(/\d\d\d\d/\d\d/[^/?#]+\.html)"
example = "https://BLOG.blogspot.com/1970/01/TITLE.html"
def posts(self, blog):
@@ -127,7 +127,7 @@ class BloggerPostExtractor(BloggerExtractor):
class BloggerBlogExtractor(BloggerExtractor):
"""Extractor for an entire Blogger blog"""
subcategory = "blog"
- pattern = BASE_PATTERN + r"/?$"
+ pattern = rf"{BASE_PATTERN}/?$"
example = "https://BLOG.blogspot.com/"
def posts(self, blog):
@@ -137,7 +137,7 @@ class BloggerBlogExtractor(BloggerExtractor):
class BloggerSearchExtractor(BloggerExtractor):
"""Extractor for Blogger search resuls"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/search/?\?q=([^&#]+)"
example = "https://BLOG.blogspot.com/search?q=QUERY"
def metadata(self):
@@ -151,7 +151,7 @@ class BloggerSearchExtractor(BloggerExtractor):
class BloggerLabelExtractor(BloggerExtractor):
"""Extractor for Blogger posts by label"""
subcategory = "label"
- pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/search/label/([^/?#]+)"
example = "https://BLOG.blogspot.com/search/label/LABEL"
def metadata(self):
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index e8c5707..c981608 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -14,7 +14,7 @@ from ..cache import cache, memcache
BASE_PATTERN = (r"(?:https?://)?"
r"(?:(?:www\.)?(?:c|[fv]x)?bs[ky]y[ex]?\.app|main\.bsky\.dev)")
-USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/profile/([^/?#]+)"
class BlueskyExtractor(Extractor):
@@ -60,7 +60,7 @@ class BlueskyExtractor(Extractor):
self._prepare(post)
files = self._extract_files(post)
- yield Message.Directory, post
+ yield Message.Directory, "", post
if files:
did = post["author"]["did"]
base = (f"{self.api.service_endpoint(did)}/xrpc"
@@ -135,8 +135,7 @@ class BlueskyExtractor(Extractor):
post["instance"] = self.instance
post["post_id"] = self._pid(post)
- post["date"] = text.parse_datetime(
- post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+ post["date"] = self.parse_datetime_iso(post["createdAt"][:19])
def _extract_files(self, post):
if "embed" not in post:
@@ -217,7 +216,7 @@ class BlueskyExtractor(Extractor):
class BlueskyUserExtractor(Dispatch, BlueskyExtractor):
- pattern = USER_PATTERN + r"$"
+ pattern = rf"{USER_PATTERN}$"
example = "https://bsky.app/profile/HANDLE"
def items(self):
@@ -238,7 +237,7 @@ class BlueskyUserExtractor(Dispatch, BlueskyExtractor):
class BlueskyPostsExtractor(BlueskyExtractor):
subcategory = "posts"
- pattern = USER_PATTERN + r"/posts"
+ pattern = rf"{USER_PATTERN}/posts"
example = "https://bsky.app/profile/HANDLE/posts"
def posts(self):
@@ -248,7 +247,7 @@ class BlueskyPostsExtractor(BlueskyExtractor):
class BlueskyRepliesExtractor(BlueskyExtractor):
subcategory = "replies"
- pattern = USER_PATTERN + r"/replies"
+ pattern = rf"{USER_PATTERN}/replies"
example = "https://bsky.app/profile/HANDLE/replies"
def posts(self):
@@ -258,7 +257,7 @@ class BlueskyRepliesExtractor(BlueskyExtractor):
class BlueskyMediaExtractor(BlueskyExtractor):
subcategory = "media"
- pattern = USER_PATTERN + r"/media"
+ pattern = rf"{USER_PATTERN}/media"
example = "https://bsky.app/profile/HANDLE/media"
def posts(self):
@@ -268,7 +267,7 @@ class BlueskyMediaExtractor(BlueskyExtractor):
class BlueskyVideoExtractor(BlueskyExtractor):
subcategory = "video"
- pattern = USER_PATTERN + r"/video"
+ pattern = rf"{USER_PATTERN}/video"
example = "https://bsky.app/profile/HANDLE/video"
def posts(self):
@@ -278,7 +277,7 @@ class BlueskyVideoExtractor(BlueskyExtractor):
class BlueskyLikesExtractor(BlueskyExtractor):
subcategory = "likes"
- pattern = USER_PATTERN + r"/likes"
+ pattern = rf"{USER_PATTERN}/likes"
example = "https://bsky.app/profile/HANDLE/likes"
def posts(self):
@@ -289,7 +288,7 @@ class BlueskyLikesExtractor(BlueskyExtractor):
class BlueskyFeedExtractor(BlueskyExtractor):
subcategory = "feed"
- pattern = USER_PATTERN + r"/feed/([^/?#]+)"
+ pattern = rf"{USER_PATTERN}/feed/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/feed/NAME"
def posts(self):
@@ -299,7 +298,7 @@ class BlueskyFeedExtractor(BlueskyExtractor):
class BlueskyListExtractor(BlueskyExtractor):
subcategory = "list"
- pattern = USER_PATTERN + r"/lists/([^/?#]+)"
+ pattern = rf"{USER_PATTERN}/lists/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/lists/ID"
def posts(self):
@@ -309,7 +308,7 @@ class BlueskyListExtractor(BlueskyExtractor):
class BlueskyFollowingExtractor(BlueskyExtractor):
subcategory = "following"
- pattern = USER_PATTERN + r"/follows"
+ pattern = rf"{USER_PATTERN}/follows"
example = "https://bsky.app/profile/HANDLE/follows"
def items(self):
@@ -321,7 +320,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor):
class BlueskyPostExtractor(BlueskyExtractor):
subcategory = "post"
- pattern = USER_PATTERN + r"/post/([^/?#]+)"
+ pattern = rf"{USER_PATTERN}/post/([^/?#]+)"
example = "https://bsky.app/profile/HANDLE/post/ID"
def posts(self):
@@ -331,19 +330,19 @@ class BlueskyPostExtractor(BlueskyExtractor):
class BlueskyInfoExtractor(BlueskyExtractor):
subcategory = "info"
- pattern = USER_PATTERN + r"/info"
+ pattern = rf"{USER_PATTERN}/info"
example = "https://bsky.app/profile/HANDLE/info"
def items(self):
self._metadata_user = True
self.api._did_from_actor(self.groups[0])
- return iter(((Message.Directory, self._user),))
+ return iter(((Message.Directory, "", self._user),))
class BlueskyAvatarExtractor(BlueskyExtractor):
subcategory = "avatar"
filename_fmt = "avatar_{post_id}.{extension}"
- pattern = USER_PATTERN + r"/avatar"
+ pattern = rf"{USER_PATTERN}/avatar"
example = "https://bsky.app/profile/HANDLE/avatar"
def posts(self):
@@ -353,7 +352,7 @@ class BlueskyAvatarExtractor(BlueskyExtractor):
class BlueskyBackgroundExtractor(BlueskyExtractor):
subcategory = "background"
filename_fmt = "background_{post_id}.{extension}"
- pattern = USER_PATTERN + r"/ba(?:nner|ckground)"
+ pattern = rf"{USER_PATTERN}/ba(?:nner|ckground)"
example = "https://bsky.app/profile/HANDLE/banner"
def posts(self):
@@ -362,7 +361,7 @@ class BlueskyBackgroundExtractor(BlueskyExtractor):
class BlueskySearchExtractor(BlueskyExtractor):
subcategory = "search"
- pattern = BASE_PATTERN + r"/search(?:/|\?q=)(.+)"
+ pattern = rf"{BASE_PATTERN}/search(?:/|\?q=)(.+)"
example = "https://bsky.app/search?q=QUERY"
def posts(self):
@@ -372,7 +371,7 @@ class BlueskySearchExtractor(BlueskyExtractor):
class BlueskyHashtagExtractor(BlueskyExtractor):
subcategory = "hashtag"
- pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)(?:/(top|latest))?"
+ pattern = rf"{BASE_PATTERN}/hashtag/([^/?#]+)(?:/(top|latest))?"
example = "https://bsky.app/hashtag/NAME"
def posts(self):
@@ -382,7 +381,7 @@ class BlueskyHashtagExtractor(BlueskyExtractor):
class BlueskyBookmarkExtractor(BlueskyExtractor):
subcategory = "bookmark"
- pattern = BASE_PATTERN + r"/saved"
+ pattern = rf"{BASE_PATTERN}/saved"
example = "https://bsky.app/saved"
def posts(self):
@@ -401,7 +400,9 @@ class BlueskyAPI():
self.headers = {"Accept": "application/json"}
self.username, self.password = extractor._get_auth_info()
- if self.username:
+ if srv := extractor.config("api-server", False):
+ self.root = srv.rstrip("/")
+ elif self.username:
self.root = "https://bsky.social"
else:
self.root = "https://api.bsky.app"
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index ae455bf..4858a4b 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -57,7 +57,7 @@ class BooruExtractor(BaseExtractor):
post.update(data)
self._prepare(post)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, url, post
def skip(self, num):
diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py
index 22f3259..5add768 100644
--- a/gallery_dl/extractor/boosty.py
+++ b/gallery_dl/extractor/boosty.py
@@ -49,6 +49,9 @@ class BoostyExtractor(Extractor):
self.videos = videos
def items(self):
+ headers = self.api.headers.copy()
+ del headers["Accept"]
+
for post in self.posts():
if not post.get("hasAccess"):
self.log.warning("Not allowed to access post %s", post["id"])
@@ -61,9 +64,10 @@ class BoostyExtractor(Extractor):
"post" : post,
"user" : post.pop("user", None),
"count": len(files),
+ "_http_headers": headers,
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], file in enumerate(files, 1):
data["file"] = file
url = file["url"]
@@ -78,7 +82,7 @@ class BoostyExtractor(Extractor):
post["links"] = links = []
if "createdAt" in post:
- post["date"] = text.parse_timestamp(post["createdAt"])
+ post["date"] = self.parse_timestamp(post["createdAt"])
for block in post["data"]:
try:
@@ -159,7 +163,7 @@ class BoostyExtractor(Extractor):
class BoostyUserExtractor(BoostyExtractor):
"""Extractor for boosty.to user profiles"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/([^/?#]+)(?:\?([^#]+))?$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:\?([^#]+))?$"
example = "https://boosty.to/USER"
def posts(self):
@@ -175,7 +179,7 @@ class BoostyMediaExtractor(BoostyExtractor):
subcategory = "media"
directory_fmt = "{category}", "{user[blogUrl]} ({user[id]})", "media"
filename_fmt = "{post[id]}_{num}.{extension}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/media/([^/?#]+)(?:\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/media/([^/?#]+)(?:\?([^#]+))?"
example = "https://boosty.to/USER/media/all"
def posts(self):
@@ -188,7 +192,7 @@ class BoostyMediaExtractor(BoostyExtractor):
class BoostyFeedExtractor(BoostyExtractor):
"""Extractor for your boosty.to subscription feed"""
subcategory = "feed"
- pattern = BASE_PATTERN + r"/(?:\?([^#]+))?(?:$|#)"
+ pattern = rf"{BASE_PATTERN}/(?:\?([^#]+))?(?:$|#)"
example = "https://boosty.to/"
def posts(self):
@@ -199,7 +203,7 @@ class BoostyFeedExtractor(BoostyExtractor):
class BoostyPostExtractor(BoostyExtractor):
"""Extractor for boosty.to posts"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/([^/?#]+)/posts/([0-9a-f-]+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/posts/([0-9a-f-]+)"
example = "https://boosty.to/USER/posts/01234567-89ab-cdef-0123-456789abcd"
def posts(self):
@@ -212,7 +216,7 @@ class BoostyPostExtractor(BoostyExtractor):
class BoostyFollowingExtractor(BoostyExtractor):
"""Extractor for your boosty.to subscribed users"""
subcategory = "following"
- pattern = BASE_PATTERN + r"/app/settings/subscriptions"
+ pattern = rf"{BASE_PATTERN}/app/settings/subscriptions"
example = "https://boosty.to/app/settings/subscriptions"
def items(self):
@@ -227,7 +231,7 @@ class BoostyDirectMessagesExtractor(BoostyExtractor):
subcategory = "direct-messages"
directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
"Direct Messages")
- pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)"
+ pattern = rf"{BASE_PATTERN}/app/messages/?\?dialogId=(\d+)"
example = "https://boosty.to/app/messages?dialogId=12345"
def items(self):
@@ -260,7 +264,7 @@ class BoostyDirectMessagesExtractor(BoostyExtractor):
"count": len(files),
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], file in enumerate(files, 1):
data["file"] = file
url = file["url"]
@@ -280,8 +284,12 @@ class BoostyAPI():
if not access_token:
if auth := self.extractor.cookies.get("auth", domain=".boosty.to"):
- access_token = text.extr(
- text.unquote(auth), '"accessToken":"', '"')
+ auth = text.unquote(auth)
+ access_token = text.extr(auth, '"accessToken":"', '"')
+ if expires := text.extr(auth, '"expiresAt":', ','):
+ import time
+ if text.parse_int(expires) < time.time() * 1000:
+ extractor.log.warning("'auth' cookie tokens expired")
if access_token:
self.headers["Authorization"] = "Bearer " + access_token
diff --git a/gallery_dl/extractor/booth.py b/gallery_dl/extractor/booth.py
index 0fcb1cb..3c000b1 100644
--- a/gallery_dl/extractor/booth.py
+++ b/gallery_dl/extractor/booth.py
@@ -70,8 +70,7 @@ class BoothItemExtractor(BoothExtractor):
url + ".json", headers=headers, interval=False)
item["booth_category"] = item.pop("category", None)
- item["date"] = text.parse_datetime(
- item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ item["date"] = self.parse_datetime_iso(item["published_at"])
item["tags"] = [t["name"] for t in item["tags"]]
shop = item["shop"]
@@ -84,7 +83,7 @@ class BoothItemExtractor(BoothExtractor):
item["count"] = 0
shop["uuid"] = util.NONE
- yield Message.Directory, item
+ yield Message.Directory, "", item
for num, file in enumerate(files, 1):
url = file["url"]
file["num"] = num
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 14ebc48..ed9cd0f 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -11,6 +11,7 @@
from .common import Extractor
from .lolisafe import LolisafeAlbumExtractor
from .. import text, util, config, exception
+from ..cache import memcache
import random
if config.get(("extractor", "bunkr"), "tlds"):
@@ -63,7 +64,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
root_dl = "https://get.bunkrr.su"
root_api = "https://apidl.bunkr.ru"
archive_fmt = "{album_id}_{id|id_url|slug}"
- pattern = BASE_PATTERN + r"/a/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/a/([^/?#]+)"
example = "https://bunkr.si/a/ID"
def __init__(self, match):
@@ -167,7 +168,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
item, 'name: "', ".")
file["size"] = text.parse_int(text.extr(
item, "size: ", " ,\n"))
- file["date"] = text.parse_datetime(text.extr(
+ file["date"] = self.parse_datetime(text.extr(
item, 'timestamp: "', '"'), "%H:%M:%S %d/%m/%Y")
yield file
@@ -176,6 +177,10 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
self.log.debug("%s", item, exc_info=exc)
+ if isinstance(exc, exception.HttpError) and \
+ exc.status == 400 and \
+ exc.response.url.startswith(self.root_api):
+ raise exception.AbortExtraction("Album deleted")
def _extract_file(self, data_id):
referer = f"{self.root_dl}/file/{data_id}"
@@ -211,7 +216,7 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
"""Extractor for bunkr.si media links"""
subcategory = "media"
directory_fmt = ("{category}",)
- pattern = BASE_PATTERN + r"(/[fvid]/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/[fvid]/[^/?#]+)"
example = "https://bunkr.si/f/FILENAME"
def fetch_album(self, album_id):
@@ -227,10 +232,26 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}
+ album_id, album_name, album_size = self._album_info(text.extr(
+ page, ' href="../a/', '"'))
return (file,), {
- "album_id" : "",
- "album_name" : "",
- "album_size" : -1,
- "description": "",
- "count" : 1,
+ "album_id" : album_id,
+ "album_name": album_name,
+ "album_size": album_size,
+ "count" : 1,
}
+
+ @memcache(keyarg=1)
+ def _album_info(self, album_id):
+ if album_id:
+ try:
+ page = self.request(f"{self.root}/a/{album_id}").text
+ return (
+ album_id,
+ text.unescape(text.unescape(text.extr(
+ page, 'property="og:title" content="', '"'))),
+ text.extr(page, '<span class="font-semibold">(', ')'),
+ )
+ except Exception:
+ pass
+ return album_id, "", -1
diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py
index 22f7a97..2066839 100644
--- a/gallery_dl/extractor/catbox.py
+++ b/gallery_dl/extractor/catbox.py
@@ -28,7 +28,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
return {
"album_id" : self.page_url.rpartition("/")[2],
"album_name" : text.unescape(extr("<h1>", "<")),
- "date" : text.parse_datetime(extr(
+ "date" : self.parse_datetime(extr(
"<p>Created ", "<"), "%B %d %Y"),
"description": text.unescape(extr("<p>", "<")),
}
@@ -52,5 +52,5 @@ class CatboxFileExtractor(Extractor):
def items(self):
url = text.ensure_http_scheme(self.url)
file = text.nameext_from_url(url, {"url": url})
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, url, file
diff --git a/gallery_dl/extractor/cfake.py b/gallery_dl/extractor/cfake.py
new file mode 100644
index 0000000..4c37455
--- /dev/null
+++ b/gallery_dl/extractor/cfake.py
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://cfake.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?cfake\.com"
+
+
+class CfakeExtractor(Extractor):
+ """Base class for cfake extractors"""
+ category = "cfake"
+ root = "https://cfake.com"
+ directory_fmt = ("{category}", "{type}", "{type_name} ({type_id})")
+ filename_fmt = "{category}_{type_name}_{id}.{extension}"
+ archive_fmt = "{id}"
+
+ def items(self):
+ type, type_name, type_id, sub_id, pnum = self.groups
+
+ if type.endswith("ies"):
+ type = type[:-3] + "y"
+
+ kwdict = self.kwdict
+ kwdict["type"] = type
+ kwdict["type_id"] = text.parse_int(type_id)
+ kwdict["type_name"] = text.unquote(type_name).replace("_", " ")
+ kwdict["sub_id"] = text.parse_int(sub_id)
+ kwdict["page"] = pnum = text.parse_int(pnum, 1)
+ yield Message.Directory, "", {}
+
+ base = f"{self.root}/images/{type}/{type_name}/{type_id}"
+ if sub_id:
+ base = f"{base}/{sub_id}"
+
+ while True:
+ url = base if pnum < 2 else f"{base}/p{pnum}"
+ page = self.request(url).text
+
+ # Extract and yield images
+ num = 0
+ for image in self._extract_images(page):
+ num += 1
+ image["num"] = num + (pnum - 1) * 50
+ url = image["url"]
+ yield Message.Url, url, text.nameext_from_url(url, image)
+
+ # Check for next page
+ if not num or not (pnum := self._check_pagination(page)):
+ return
+ kwdict["page"] = pnum
+
+ def _extract_images(self, page):
+ """Extract image URLs and metadata from a gallery page"""
+ for item in text.extract_iter(
+ page, '<a href="javascript:showimage(', '</div></div>'):
+
+ # Extract image path from showimage call
+ # Format: 'big.php?show=2025/filename.jpg&id_picture=...
+ show_param = text.extr(item, "show=", "&")
+ if not show_param:
+ continue
+
+ # Extract metadata
+ picture_id = text.extr(item, "id_picture=", "&")
+ name_param = text.extr(item, "p_name=", "'")
+
+ # Extract date
+ date = text.extr(item, 'id="date_vignette">', '</div>')
+
+ # Extract rating
+ rating_text = text.extr(item, 'class="current-rating"', '</li>')
+ rating = text.extr(rating_text, 'width:', 'px')
+
+ # Convert thumbnail path to full image path
+ # show_param is like "2025/filename.jpg"
+ image_url = f"{self.root}/medias/photos/{show_param}"
+
+ yield {
+ "url": image_url,
+ "id": text.parse_int(picture_id) if picture_id else 0,
+ "name": text.unescape(name_param) if name_param else "",
+ "date": date,
+ "rating": rating,
+ }
+
+ def _check_pagination(self, page):
+ """Check if there are more pages and return next page number"""
+ # Look for current page indicator
+ # Format: id="num_page_current" ><a href=".../ p1">1</a>
+ current_section = text.extr(
+ page, 'id="num_page_current"', '</div>')
+ if not current_section:
+ return None
+
+ # Extract current page number from the link text
+ current_page_str = text.extr(current_section, '">', '</a>')
+ if not current_page_str:
+ return None
+
+ current_page = text.parse_int(current_page_str)
+ if not current_page:
+ return None
+
+ next_page = current_page + 1
+
+ # Check if next page link exists anywhere in the page
+ # Look for href="/images/.../pN" pattern
+ if f'/p{next_page}"' in page or f'/p{next_page} ' in page:
+ return next_page
+
+ return None
+
+
+class CfakeCelebrityExtractor(CfakeExtractor):
+ """Extractor for celebrity image galleries from cfake.com"""
+ subcategory = "celebrity"
+ pattern = (BASE_PATTERN + r"/images/(celebrity)"
+ r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
+ example = "https://cfake.com/images/celebrity/NAME/123"
+
+
+class CfakeCategoryExtractor(CfakeExtractor):
+ """Extractor for category image galleries from cfake.com"""
+ subcategory = "category"
+ pattern = (BASE_PATTERN + r"/images/(categories)"
+ r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
+ example = "https://cfake.com/images/categories/NAME/123"
+
+
+class CfakeCreatedExtractor(CfakeExtractor):
+ """Extractor for 'created' image galleries from cfake.com"""
+ subcategory = "created"
+ pattern = (BASE_PATTERN + r"/images/(created)"
+ r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
+ example = "https://cfake.com/images/created/NAME/12345/123"
+
+
+class CfakeCountryExtractor(CfakeExtractor):
+ """Extractor for country image galleries from cfake.com"""
+ subcategory = "country"
+ pattern = (BASE_PATTERN + r"/images/(country)"
+ r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
+ example = "https://cfake.com/images/country/NAME/12345/123"
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 1552899..9a766d0 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -17,14 +17,17 @@ class CheveretoExtractor(BaseExtractor):
basecategory = "chevereto"
directory_fmt = ("{category}", "{user}", "{album}")
archive_fmt = "{id}"
+ parent = True
def _init(self):
self.path = self.groups[-1]
- def _pagination(self, url):
- while True:
- page = self.request(url).text
+ def _pagination(self, url, callback=None):
+ page = self.request(url).text
+ if callback is not None:
+ callback(page)
+ while True:
for item in text.extract_iter(
page, '<div class="list-item-image ', 'image-container'):
yield text.urljoin(self.root, text.extr(
@@ -35,12 +38,13 @@ class CheveretoExtractor(BaseExtractor):
return
if url[0] == "/":
url = self.root + url
+ page = self.request(url).text
BASE_PATTERN = CheveretoExtractor.update({
"jpgfish": {
- "root": "https://jpg6.su",
- "pattern": r"(?:www\.)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
+ "root": "https://jpg7.cr",
+ "pattern": r"(?:www\.)?jpe?g\d?\.(?:cr|su|pet|fish(?:ing)?|church)",
},
"imagepond": {
"root": "https://imagepond.net",
@@ -56,8 +60,8 @@ BASE_PATTERN = CheveretoExtractor.update({
class CheveretoImageExtractor(CheveretoExtractor):
"""Extractor for chevereto images"""
subcategory = "image"
- pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)"
- example = "https://jpg2.su/img/TITLE.ID"
+ pattern = rf"{BASE_PATTERN}(/im(?:g|age)/[^/?#]+)"
+ example = "https://jpg7.cr/img/TITLE.ID"
def items(self):
url = self.root + self.path
@@ -74,25 +78,27 @@ class CheveretoImageExtractor(CheveretoExtractor):
url, b"seltilovessimpcity@simpcityhatesscrapers",
fromhex=True)
+ album_url, _, album_name = extr("Added to <a", "</a>").rpartition(">")
file = {
"id" : self.path.rpartition("/")[2].rpartition(".")[2],
"url" : url,
- "album": text.remove_html(extr(
- "Added to <a", "</a>").rpartition(">")[2]),
- "date" : text.parse_datetime(extr(
- '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ "album": text.remove_html(album_name),
+ "date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}
+ file["album_slug"], _, file["album_id"] = text.rextr(
+ album_url, "/", '"').rpartition(".")
+
text.nameext_from_url(file["url"], file)
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, file["url"], file
class CheveretoVideoExtractor(CheveretoExtractor):
"""Extractor for chevereto videos"""
subcategory = "video"
- pattern = BASE_PATTERN + r"(/video/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/video/[^/?#]+)"
example = "https://imagepond.net/video/TITLE.ID"
def items(self):
@@ -114,13 +120,17 @@ class CheveretoVideoExtractor(CheveretoExtractor):
'property="video:height" content="', '"')),
"duration" : extr(
'class="far fa-clock"></i>', "—"),
- "album": text.remove_html(extr(
- "Added to <a", "</a>").rpartition(">")[2]),
- "date" : text.parse_datetime(extr(
- '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ "album" : extr(
+ "Added to <a", "</a>"),
+ "date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : extr('username: "', '"'),
}
+ album_url, _, album_name = file["album"].rpartition(">")
+ file["album"] = text.remove_html(album_name)
+ file["album_slug"], _, file["album_id"] = text.rextr(
+ album_url, "/", '"').rpartition(".")
+
try:
min, _, sec = file["duration"].partition(":")
file["duration"] = int(min) * 60 + int(sec)
@@ -128,15 +138,15 @@ class CheveretoVideoExtractor(CheveretoExtractor):
pass
text.nameext_from_url(file["url"], file)
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, file["url"], file
class CheveretoAlbumExtractor(CheveretoExtractor):
"""Extractor for chevereto albums"""
subcategory = "album"
- pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
- example = "https://jpg2.su/album/TITLE.ID"
+ pattern = rf"{BASE_PATTERN}(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
+ example = "https://jpg7.cr/album/TITLE.ID"
def items(self):
url = self.root + self.path
@@ -148,16 +158,31 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
else:
albums = (url,)
+ kwdict = self.kwdict
for album in albums:
- for item_url in self._pagination(album):
+ for kwdict["num"], item_url in enumerate(self._pagination(
+ album, self._extract_metadata_album), 1):
data = data_video if "/video/" in item_url else data_image
yield Message.Queue, item_url, data
+ def _extract_metadata_album(self, page):
+ url, pos = text.extract(
+ page, 'property="og:url" content="', '"')
+ title, pos = text.extract(
+ page, 'property="og:title" content="', '"', pos)
+
+ kwdict = self.kwdict
+ kwdict["album_slug"], _, kwdict["album_id"] = \
+ url[url.rfind("/")+1:].rpartition(".")
+ kwdict["album"] = text.unescape(title)
+ kwdict["count"] = text.parse_int(text.extract(
+ page, 'data-text="image-count">', "<", pos)[0])
+
class CheveretoCategoryExtractor(CheveretoExtractor):
"""Extractor for chevereto galleries"""
subcategory = "category"
- pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/category/[^/?#]+)"
example = "https://imglike.com/category/TITLE"
def items(self):
@@ -169,8 +194,8 @@ class CheveretoCategoryExtractor(CheveretoExtractor):
class CheveretoUserExtractor(CheveretoExtractor):
"""Extractor for chevereto users"""
subcategory = "user"
- pattern = BASE_PATTERN + r"(/[^/?#]+(?:/albums)?)"
- example = "https://jpg2.su/USER"
+ pattern = rf"{BASE_PATTERN}(/[^/?#]+(?:/albums)?)"
+ example = "https://jpg7.cr/USER"
def items(self):
url = self.root + self.path
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
index 45e5dab..c68af2e 100644
--- a/gallery_dl/extractor/cien.py
+++ b/gallery_dl/extractor/cien.py
@@ -34,7 +34,7 @@ class CienExtractor(Extractor):
page = self.request(url, params=params).text
for card in text.extract_iter(
- page, ' class="c-cardCase-item', '</div>'):
+ page, ' class="c-cardCase-item', '</figure>'):
article_url = text.extr(card, ' href="', '"')
yield Message.Queue, article_url, data
@@ -48,7 +48,7 @@ class CienArticleExtractor(CienExtractor):
filename_fmt = "{num:>02} {filename}.{extension}"
directory_fmt = ("{category}", "{author[name]}", "{post_id} {name}")
archive_fmt = "{post_id}_{num}"
- pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)"
+ pattern = rf"{BASE_PATTERN}/creator/(\d+)/article/(\d+)"
example = "https://ci-en.net/creator/123/article/12345"
def items(self):
@@ -61,7 +61,7 @@ class CienArticleExtractor(CienExtractor):
post["post_url"] = url
post["post_id"] = text.parse_int(post_id)
post["count"] = len(files)
- post["date"] = text.parse_datetime(post["datePublished"])
+ post["date"] = self.parse_datetime_iso(post["datePublished"])
try:
post["author"]["id"] = text.parse_int(author_id)
@@ -70,7 +70,7 @@ class CienArticleExtractor(CienExtractor):
except Exception:
pass
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post.update(file)
if "extension" not in file:
@@ -160,7 +160,7 @@ class CienArticleExtractor(CienExtractor):
class CienCreatorExtractor(CienExtractor):
subcategory = "creator"
- pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$"
+ pattern = rf"{BASE_PATTERN}/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$"
example = "https://ci-en.net/creator/123"
def items(self):
@@ -172,7 +172,7 @@ class CienCreatorExtractor(CienExtractor):
class CienRecentExtractor(CienExtractor):
subcategory = "recent"
- pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/mypage/recent(?:\?([^#]+))?"
example = "https://ci-en.net/mypage/recent"
def items(self):
@@ -183,7 +183,7 @@ class CienRecentExtractor(CienExtractor):
class CienFollowingExtractor(CienExtractor):
subcategory = "following"
- pattern = BASE_PATTERN + r"/mypage/subscription(/following)?"
+ pattern = rf"{BASE_PATTERN}/mypage/subscription(/following)?"
example = "https://ci-en.net/mypage/subscription"
def items(self):
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 26ee3fd..742c561 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -15,7 +15,7 @@ import itertools
import time
BASE_PATTERN = r"(?:https?://)?civitai\.com"
-USER_PATTERN = BASE_PATTERN + r"/user/([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/user/([^/?#]+)"
class CivitaiExtractor(Extractor):
@@ -61,13 +61,14 @@ class CivitaiExtractor(Extractor):
if isinstance(metadata, str):
metadata = metadata.split(",")
elif not isinstance(metadata, (list, tuple)):
- metadata = ("generation", "version", "post")
+ metadata = {"generation", "version", "post", "tags"}
self._meta_generation = ("generation" in metadata)
self._meta_version = ("version" in metadata)
self._meta_post = ("post" in metadata)
+ self._meta_tags = ("tags" in metadata)
else:
self._meta_generation = self._meta_version = self._meta_post = \
- False
+ self._meta_tags = False
def items(self):
if models := self.models():
@@ -86,8 +87,7 @@ class CivitaiExtractor(Extractor):
images = self.api.images_post(post["id"])
post = self.api.post(post["id"])
- post["date"] = text.parse_datetime(
- post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["publishedAt"])
data = {
"post": post,
"user": post.pop("user"),
@@ -96,7 +96,7 @@ class CivitaiExtractor(Extractor):
data["model"], data["version"] = \
self._extract_meta_version(post)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for file in self._image_results(images):
file.update(data)
yield Message.Url, file["url"], file
@@ -111,8 +111,9 @@ class CivitaiExtractor(Extractor):
}
if self._meta_generation:
- data["generation"] = \
- self._extract_meta_generation(file)
+ data["generation"] = self._extract_meta_generation(file)
+ if self._meta_tags:
+ data["tags"] = self._extract_meta_tags(file)
if self._meta_version:
data["model"], data["version"] = \
self._extract_meta_version(file, False)
@@ -122,8 +123,7 @@ class CivitaiExtractor(Extractor):
data["post"] = post = self._extract_meta_post(file)
if post:
post.pop("user", None)
- file["date"] = text.parse_datetime(
- file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ file["date"] = self.parse_datetime_iso(file["createdAt"])
data["url"] = url = self._url(file)
text.nameext_from_url(url, data)
@@ -131,7 +131,7 @@ class CivitaiExtractor(Extractor):
data["extension"] = (
self._video_ext if file.get("type") == "video" else
self._image_ext)
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
return
@@ -180,10 +180,11 @@ class CivitaiExtractor(Extractor):
if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"])
if "date" not in file:
- file["date"] = text.parse_datetime(
- file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ file["date"] = self.parse_datetime_iso(file["createdAt"])
if self._meta_generation:
file["generation"] = self._extract_meta_generation(file)
+ if self._meta_tags:
+ file["tags"] = self._extract_meta_tags(file)
yield data
def _image_reactions(self):
@@ -211,16 +212,21 @@ class CivitaiExtractor(Extractor):
try:
return self.api.image_generationdata(image["id"])
except Exception as exc:
- return self.log.debug("", exc_info=exc)
+ return self.log.traceback(exc)
def _extract_meta_post(self, image):
try:
post = self.api.post(image["postId"])
- post["date"] = text.parse_datetime(
- post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["publishedAt"])
return post
except Exception as exc:
- return self.log.debug("", exc_info=exc)
+ return self.log.traceback(exc)
+
+ def _extract_meta_tags(self, image):
+ try:
+ return self.api.tag_getvotabletags(image["id"])
+ except Exception as exc:
+ return self.log.traceback(exc)
def _extract_meta_version(self, item, is_post=True):
try:
@@ -228,7 +234,7 @@ class CivitaiExtractor(Extractor):
version = self.api.model_version(version_id).copy()
return version.pop("model", None), version
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
return None, None
def _extract_version_id(self, item, is_post=True):
@@ -252,7 +258,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
directory_fmt = ("{category}", "{user[username]}",
"{model[id]}{model[name]:? //}",
"{version[id]}{version[name]:? //}")
- pattern = BASE_PATTERN + r"/models/(\d+)(?:/?\?modelVersionId=(\d+))?"
+ pattern = rf"{BASE_PATTERN}/models/(\d+)(?:/?\?modelVersionId=(\d+))?"
example = "https://civitai.com/models/12345/TITLE"
def items(self):
@@ -278,8 +284,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
versions = (version,)
for version in versions:
- version["date"] = text.parse_datetime(
- version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ version["date"] = self.parse_datetime_iso(version["createdAt"])
data = {
"model" : model,
@@ -287,7 +292,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
"user" : user,
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for file in self._extract_files(model, version, user):
file.update(data)
yield Message.Url, file["url"], file
@@ -342,9 +347,9 @@ class CivitaiModelExtractor(CivitaiExtractor):
params = {
"modelVersionId": version["id"],
"prioritizedUserIds": (user["id"],),
- "period": "AllTime",
- "sort": "Most Reactions",
- "limit": 20,
+ "period" : self.api._param_period(),
+ "sort" : self.api._param_sort(),
+ "limit" : 20,
"pending": True,
}
images = self.api.images(params, defaults=False)
@@ -370,7 +375,7 @@ class CivitaiModelExtractor(CivitaiExtractor):
class CivitaiImageExtractor(CivitaiExtractor):
subcategory = "image"
- pattern = BASE_PATTERN + r"/images/(\d+)"
+ pattern = rf"{BASE_PATTERN}/images/(\d+)"
example = "https://civitai.com/images/12345"
def images(self):
@@ -381,7 +386,7 @@ class CivitaiCollectionExtractor(CivitaiExtractor):
subcategory = "collection"
directory_fmt = ("{category}", "{user_collection[username]}",
"collections", "{collection[id]}{collection[name]:? //}")
- pattern = BASE_PATTERN + r"/collections/(\d+)"
+ pattern = rf"{BASE_PATTERN}/collections/(\d+)"
example = "https://civitai.com/collections/12345"
def images(self):
@@ -391,8 +396,8 @@ class CivitaiCollectionExtractor(CivitaiExtractor):
params = {
"collectionId" : cid,
- "period" : "AllTime",
- "sort" : "Newest",
+ "period" : self.api._param_period(),
+ "sort" : self.api._param_sort(),
"browsingLevel" : self.api.nsfw,
"include" : ("cosmetics",),
}
@@ -403,7 +408,7 @@ class CivitaiPostExtractor(CivitaiExtractor):
subcategory = "post"
directory_fmt = ("{category}", "{username|user[username]}", "posts",
"{post[id]}{post[title]:? //}")
- pattern = BASE_PATTERN + r"/posts/(\d+)"
+ pattern = rf"{BASE_PATTERN}/posts/(\d+)"
example = "https://civitai.com/posts/12345"
def posts(self):
@@ -412,7 +417,7 @@ class CivitaiPostExtractor(CivitaiExtractor):
class CivitaiTagExtractor(CivitaiExtractor):
subcategory = "tag"
- pattern = BASE_PATTERN + r"/tag/([^/?&#]+)"
+ pattern = rf"{BASE_PATTERN}/tag/([^/?&#]+)"
example = "https://civitai.com/tag/TAG"
def models(self):
@@ -422,7 +427,7 @@ class CivitaiTagExtractor(CivitaiExtractor):
class CivitaiSearchModelsExtractor(CivitaiExtractor):
subcategory = "search-models"
- pattern = BASE_PATTERN + r"/search/models\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/search/models\?([^#]+)"
example = "https://civitai.com/search/models?query=QUERY"
def models(self):
@@ -433,7 +438,7 @@ class CivitaiSearchModelsExtractor(CivitaiExtractor):
class CivitaiSearchImagesExtractor(CivitaiExtractor):
subcategory = "search-images"
- pattern = BASE_PATTERN + r"/search/images\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/search/images\?([^#]+)"
example = "https://civitai.com/search/images?query=QUERY"
def images(self):
@@ -444,7 +449,7 @@ class CivitaiSearchImagesExtractor(CivitaiExtractor):
class CivitaiModelsExtractor(CivitaiExtractor):
subcategory = "models"
- pattern = BASE_PATTERN + r"/models(?:/?\?([^#]+))?(?:$|#)"
+ pattern = rf"{BASE_PATTERN}/models(?:/?\?([^#]+))?(?:$|#)"
example = "https://civitai.com/models"
def models(self):
@@ -454,7 +459,7 @@ class CivitaiModelsExtractor(CivitaiExtractor):
class CivitaiImagesExtractor(CivitaiExtractor):
subcategory = "images"
- pattern = BASE_PATTERN + r"/images(?:/?\?([^#]+))?(?:$|#)"
+ pattern = rf"{BASE_PATTERN}/images(?:/?\?([^#]+))?(?:$|#)"
example = "https://civitai.com/images"
def images(self):
@@ -465,7 +470,7 @@ class CivitaiImagesExtractor(CivitaiExtractor):
class CivitaiVideosExtractor(CivitaiExtractor):
subcategory = "videos"
- pattern = BASE_PATTERN + r"/videos(?:/?\?([^#]+))?(?:$|#)"
+ pattern = rf"{BASE_PATTERN}/videos(?:/?\?([^#]+))?(?:$|#)"
example = "https://civitai.com/videos"
def images(self):
@@ -476,7 +481,7 @@ class CivitaiVideosExtractor(CivitaiExtractor):
class CivitaiPostsExtractor(CivitaiExtractor):
subcategory = "posts"
- pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?(?:$|#)"
+ pattern = rf"{BASE_PATTERN}/posts(?:/?\?([^#]+))?(?:$|#)"
example = "https://civitai.com/posts"
def posts(self):
@@ -485,7 +490,7 @@ class CivitaiPostsExtractor(CivitaiExtractor):
class CivitaiUserExtractor(Dispatch, CivitaiExtractor):
- pattern = USER_PATTERN + r"/?(?:$|\?|#)"
+ pattern = rf"{USER_PATTERN}/?(?:$|\?|#)"
example = "https://civitai.com/user/USER"
def items(self):
@@ -501,7 +506,7 @@ class CivitaiUserExtractor(Dispatch, CivitaiExtractor):
class CivitaiUserModelsExtractor(CivitaiExtractor):
subcategory = "user-models"
- pattern = USER_PATTERN + r"/models/?(?:\?([^#]+))?"
+ pattern = rf"{USER_PATTERN}/models/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/models"
def models(self):
@@ -515,7 +520,7 @@ class CivitaiUserPostsExtractor(CivitaiExtractor):
subcategory = "user-posts"
directory_fmt = ("{category}", "{username|user[username]}", "posts",
"{post[id]}{post[title]:? //}")
- pattern = USER_PATTERN + r"/posts/?(?:\?([^#]+))?"
+ pattern = rf"{USER_PATTERN}/posts/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/posts"
def posts(self):
@@ -527,7 +532,7 @@ class CivitaiUserPostsExtractor(CivitaiExtractor):
class CivitaiUserImagesExtractor(CivitaiExtractor):
subcategory = "user-images"
- pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?"
+ pattern = rf"{USER_PATTERN}/images/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/images"
def __init__(self, match):
@@ -548,7 +553,7 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
class CivitaiUserVideosExtractor(CivitaiExtractor):
subcategory = "user-videos"
directory_fmt = ("{category}", "{username|user[username]}", "videos")
- pattern = USER_PATTERN + r"/videos/?(?:\?([^#]+))?"
+ pattern = rf"{USER_PATTERN}/videos/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/videos"
def __init__(self, match):
@@ -567,7 +572,7 @@ class CivitaiUserVideosExtractor(CivitaiExtractor):
class CivitaiUserCollectionsExtractor(CivitaiExtractor):
subcategory = "user-collections"
- pattern = USER_PATTERN + r"/collections/?(?:\?([^#]+))?"
+ pattern = rf"{USER_PATTERN}/collections/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/collections"
def items(self):
@@ -586,16 +591,15 @@ class CivitaiGeneratedExtractor(CivitaiExtractor):
subcategory = "generated"
filename_fmt = "{filename}.{extension}"
directory_fmt = ("{category}", "generated")
- pattern = f"{BASE_PATTERN}/generate"
+ pattern = rf"{BASE_PATTERN}/generate"
example = "https://civitai.com/generate"
def items(self):
self._require_auth()
for gen in self.api.orchestrator_queryGeneratedImages():
- gen["date"] = text.parse_datetime(
- gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
- yield Message.Directory, gen
+ gen["date"] = self.parse_datetime_iso(gen["createdAt"])
+ yield Message.Directory, "", gen
for step in gen.pop("steps", ()):
for image in step.pop("images", ()):
data = {"file": image, **step, **gen}
@@ -719,8 +723,8 @@ class CivitaiTrpcAPI():
if defaults:
params = self._merge_params(params, {
"useIndex" : True,
- "period" : "AllTime",
- "sort" : "Newest",
+ "period" : self._param_period(),
+ "sort" : self._param_sort(),
"withMeta" : False, # Metadata Only
"fromPlatform" : False, # Made On-Site
"browsingLevel": self.nsfw,
@@ -733,8 +737,8 @@ class CivitaiTrpcAPI():
def images_gallery(self, model, version, user):
endpoint = "image.getImagesAsPostsInfinite"
params = {
- "period" : "AllTime",
- "sort" : "Newest",
+ "period" : self._param_period(),
+ "sort" : self._param_sort(),
"modelVersionId": version["id"],
"modelId" : model["id"],
"hidden" : False,
@@ -768,9 +772,9 @@ class CivitaiTrpcAPI():
if defaults:
params = self._merge_params(params, {
- "period" : "AllTime",
+ "period" : self._param_period(),
"periodMode" : "published",
- "sort" : "Newest",
+ "sort" : self._param_sort(),
"pending" : False,
"hidden" : False,
"followed" : False,
@@ -797,9 +801,9 @@ class CivitaiTrpcAPI():
if defaults:
params = self._merge_params(params, {
"browsingLevel": self.nsfw,
- "period" : "AllTime",
+ "period" : self._param_period(),
"periodMode" : "published",
- "sort" : "Newest",
+ "sort" : self._param_sort(),
"followed" : False,
"draftOnly" : False,
"pending" : True,
@@ -821,12 +825,17 @@ class CivitaiTrpcAPI():
if defaults:
params = self._merge_params(params, {
"browsingLevel": self.nsfw,
- "sort" : "Newest",
+ "sort" : self._param_sort(),
})
params = self._type_params(params)
return self._pagination(endpoint, params)
+ def tag_getvotabletags(self, image_id):
+ endpoint = "tag.getVotableTags"
+ params = {"id": int(image_id), "type": "image"}
+ return self._call(endpoint, params)
+
def user(self, username):
endpoint = "user.getCreator"
params = {"username": username}
@@ -835,7 +844,7 @@ class CivitaiTrpcAPI():
def orchestrator_queryGeneratedImages(self):
endpoint = "orchestrator.queryGeneratedImages"
params = {
- "ascending": False,
+ "ascending": True if self._param_sort() == "Oldest" else False,
"tags" : ("gen",),
"authed" : True,
}
@@ -908,6 +917,21 @@ class CivitaiTrpcAPI():
params[name] = [type(item) for item in value]
return params
+ def _param_period(self):
+ if period := self.extractor.config("period"):
+ return period
+ return "AllTime"
+
+ def _param_sort(self):
+ if sort := self.extractor.config("sort"):
+ s = sort[0].lower()
+ if s in "drn":
+ return "Newest"
+ if s in "ao":
+ return "Oldest"
+ return sort
+ return "Newest"
+
def _bool(value):
return value == "true"
diff --git a/gallery_dl/extractor/comedywildlifephoto.py b/gallery_dl/extractor/comedywildlifephoto.py
new file mode 100644
index 0000000..a1c1ef4
--- /dev/null
+++ b/gallery_dl/extractor/comedywildlifephoto.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.comedywildlifephoto.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class ComedywildlifephotoGalleryExtractor(GalleryExtractor):
+ """Extractor for comedywildlifephoto galleries"""
+ category = "comedywildlifephoto"
+ root = "https://www.comedywildlifephoto.com"
+ directory_fmt = ("{category}", "{section}", "{title}")
+ filename_fmt = "{num:>03} {filename}.{extension}"
+ archive_fmt = "{section}/{title}/{num}"
+ pattern = (r"(?:https?://)?(?:www\.)?comedywildlifephoto\.com"
+ r"(/gallery/[^/?#]+/[^/?#]+\.php)")
+ example = "https://www.comedywildlifephoto.com/gallery/SECTION/TITLE.php"
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+
+ return {
+ "section": extr("<h1>", "<").strip(),
+ "title" : extr(">", "<"),
+ "description": text.unescape(extr(
+ 'class="c1 np">', "<div")),
+ }
+
+ def images(self, page):
+ results = []
+
+ for fig in text.extract_iter(page, "<figure", "</figure>"):
+ width, _, height = text.extr(
+ fig, 'data-size="', '"').partition("x")
+ results.append((
+ self.root + text.extr(fig, 'href="', '"'), {
+ "width" : text.parse_int(width),
+ "height" : text.parse_int(height),
+ "caption": text.unescape(text.extr(
+ fig, "<figcaption>", "<")),
+ }
+ ))
+
+ return results
diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py
index c76694c..9816786 100644
--- a/gallery_dl/extractor/comick.py
+++ b/gallery_dl/extractor/comick.py
@@ -27,7 +27,7 @@ class ComickCoversExtractor(ComickBase, GalleryExtractor):
directory_fmt = ("{category}", "{manga}", "Covers")
filename_fmt = "{volume:>02}_{lang}.{extension}"
archive_fmt = "c_{id}"
- pattern = BASE_PATTERN + r"/comic/([\w-]+)/cover"
+ pattern = rf"{BASE_PATTERN}/comic/([\w-]+)/cover"
example = "https://comick.io/comic/MANGA/cover"
def metadata(self, page):
@@ -60,7 +60,7 @@ class ComickCoversExtractor(ComickBase, GalleryExtractor):
class ComickChapterExtractor(ComickBase, ChapterExtractor):
"""Extractor for comick.io manga chapters"""
archive_fmt = "{chapter_hid}_{page}"
- pattern = (BASE_PATTERN + r"/comic/([\w-]+)"
+ pattern = (rf"{BASE_PATTERN}/comic/([\w-]+)"
r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)")
example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
@@ -114,10 +114,8 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
"chapter_hid" : ch["hid"],
"chapter_string": chstr,
"group" : ch["group_name"],
- "date" : text.parse_datetime(
- ch["created_at"][:19], "%Y-%m-%dT%H:%M:%S"),
- "date_updated" : text.parse_datetime(
- ch["updated_at"][:19], "%Y-%m-%dT%H:%M:%S"),
+ "date" : self.parse_datetime_iso(ch["created_at"][:19]),
+ "date_updated" : self.parse_datetime_iso(ch["updated_at"][:19]),
"lang" : ch["lang"],
}
@@ -142,7 +140,7 @@ class ComickChapterExtractor(ComickBase, ChapterExtractor):
class ComickMangaExtractor(ComickBase, MangaExtractor):
"""Extractor for comick.io manga"""
- pattern = BASE_PATTERN + r"/comic/([\w-]+)/?(?:\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/comic/([\w-]+)/?(?:\?([^#]+))?"
example = "https://comick.io/comic/MANGA"
def items(self):
diff --git a/gallery_dl/extractor/comicvine.py b/gallery_dl/extractor/comicvine.py
index 39397b9..f579ef7 100644
--- a/gallery_dl/extractor/comicvine.py
+++ b/gallery_dl/extractor/comicvine.py
@@ -60,6 +60,6 @@ class ComicvineTagExtractor(BooruExtractor):
_file_url = operator.itemgetter("original")
def _prepare(self, post):
- post["date"] = text.parse_datetime(
+ post["date"] = self.parse_datetime(
post["dateCreated"], "%a, %b %d %Y")
post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 34e65c5..13c7bbe 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -19,11 +19,10 @@ import getpass
import logging
import requests
import threading
-from datetime import datetime
from xml.etree import ElementTree
from requests.adapters import HTTPAdapter
from .message import Message
-from .. import config, output, text, util, cache, exception
+from .. import config, output, text, util, dt, cache, exception
urllib3 = requests.packages.urllib3
@@ -32,7 +31,9 @@ class Extractor():
category = ""
subcategory = ""
basecategory = ""
+ basesubcategory = ""
categorytransfer = False
+ parent = False
directory_fmt = ("{category}",)
filename_fmt = "{filename}.{extension}"
archive_fmt = ""
@@ -64,6 +65,10 @@ class Extractor():
else:
self.category = CATEGORY_MAP[self.category]
+ self.parse_datetime = dt.parse
+ self.parse_datetime_iso = dt.parse_iso
+ self.parse_timestamp = dt.parse_ts
+
self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = ""
@@ -89,7 +94,8 @@ class Extractor():
pass
def items(self):
- yield Message.Version, 1
+ return
+ yield
def skip(self, num):
return 0
@@ -313,9 +319,9 @@ class Extractor():
seconds = float(seconds)
until = now + seconds
elif until:
- if isinstance(until, datetime):
+ if isinstance(until, dt.datetime):
# convert to UTC timestamp
- until = util.datetime_to_timestamp(until)
+ until = dt.to_ts(until)
else:
until = float(until)
seconds = until - now
@@ -327,7 +333,7 @@ class Extractor():
return
if reason:
- t = datetime.fromtimestamp(until).time()
+ t = dt.datetime.fromtimestamp(until).time()
isotime = f"{t.hour:02}:{t.minute:02}:{t.second:02}"
self.log.info("Waiting until %s (%s)", isotime, reason)
time.sleep(seconds)
@@ -652,7 +658,7 @@ class Extractor():
self.log.warning(
"cookies: %s/%s expired at %s",
cookie.domain.lstrip("."), cookie.name,
- datetime.fromtimestamp(cookie.expires))
+ dt.datetime.fromtimestamp(cookie.expires))
continue
elif diff <= 86400:
@@ -693,13 +699,16 @@ class Extractor():
def get(key, default):
ts = self.config(key, default)
if isinstance(ts, str):
- try:
- ts = int(datetime.strptime(ts, fmt).timestamp())
- except ValueError as exc:
- self.log.warning("Unable to parse '%s': %s", key, exc)
+ dt_obj = dt.parse_iso(ts) if fmt is None else dt.parse(ts, fmt)
+ if dt_obj is dt.NONE:
+ self.log.warning(
+ "Unable to parse '%s': Invalid %s string '%s'",
+ key, "isoformat" if fmt is None else "date", ts)
ts = default
+ else:
+ ts = int(dt.to_ts(dt_obj))
return ts
- fmt = self.config("date-format", "%Y-%m-%dT%H:%M:%S")
+ fmt = self.config("date-format")
return get("date-min", dmin), get("date-max", dmax)
@classmethod
@@ -793,7 +802,7 @@ class GalleryExtractor(Extractor):
enum = util.enumerate_reversed
images = enum(imgs, 1)
- yield Message.Directory, data
+ yield Message.Directory, "", data
enum_key = self.enum
if assets:
@@ -912,7 +921,7 @@ class Dispatch():
elif isinstance(include, str):
include = include.replace(" ", "").split(",")
- results = [(Message.Version, 1)]
+ results = []
for category in include:
try:
extr, url = extractors[category]
@@ -962,18 +971,16 @@ class BaseExtractor(Extractor):
def __init__(self, match):
if not self.category:
- self.groups = match.groups()
- self.match = match
- self._init_category()
+ self._init_category(match)
Extractor.__init__(self, match)
- def _init_category(self):
- for index, group in enumerate(self.groups):
+ def _init_category(self, match):
+ for index, group in enumerate(match.groups()):
if group is not None:
if index:
self.category, self.root, info = self.instances[index-1]
if not self.root:
- self.root = text.root_from_url(self.match[0])
+ self.root = text.root_from_url(match[0])
self.config_instance = info.get
else:
self.root = group
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index b3944f7..93d3953 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -4,27 +4,27 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://cyberdrop.me/"""
+"""Extractors for https://cyberdrop.cr/"""
from . import lolisafe
from .common import Message
from .. import text
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:cr|me|to)"
class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
"""Extractor for cyberdrop albums"""
category = "cyberdrop"
- root = "https://cyberdrop.me"
- root_api = "https://api.cyberdrop.me"
- pattern = BASE_PATTERN + r"/a/([^/?#]+)"
- example = "https://cyberdrop.me/a/ID"
+ root = "https://cyberdrop.cr"
+ root_api = "https://api.cyberdrop.cr"
+ pattern = rf"{BASE_PATTERN}/a/([^/?#]+)"
+ example = "https://cyberdrop.cr/a/ID"
def items(self):
files, data = self.fetch_album(self.album_id)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], file in enumerate(files, 1):
file.update(data)
text.nameext_from_url(file["name"], file)
@@ -47,7 +47,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
"album_name" : text.unescape(extr('title="', '"')),
"album_size" : text.parse_bytes(extr(
'<p class="title">', "B")),
- "date" : text.parse_datetime(extr(
+ "date" : self.parse_datetime(extr(
'<p class="title">', '<'), "%d.%m.%Y"),
"description": text.unescape(text.unescape( # double
desc.rpartition(" [R")[0])),
@@ -76,8 +76,8 @@ class CyberdropMediaExtractor(CyberdropAlbumExtractor):
"""Extractor for cyberdrop media links"""
subcategory = "media"
directory_fmt = ("{category}",)
- pattern = BASE_PATTERN + r"/f/([^/?#]+)"
- example = "https://cyberdrop.me/f/ID"
+ pattern = rf"{BASE_PATTERN}/f/([^/?#]+)"
+ example = "https://cyberdrop.cr/f/ID"
def fetch_album(self, album_id):
return self._extract_files((album_id,)), {
diff --git a/gallery_dl/extractor/cyberfile.py b/gallery_dl/extractor/cyberfile.py
index 2ea81d6..e8c0061 100644
--- a/gallery_dl/extractor/cyberfile.py
+++ b/gallery_dl/extractor/cyberfile.py
@@ -56,7 +56,9 @@ class CyberfileFolderExtractor(CyberfileExtractor):
url = f"{self.root}/folder/{folder_hash}"
folder_num = text.extr(self.request(url).text, "ages('folder', '", "'")
- extract_urls = text.re(r'dtfullurl="([^"]+)').findall
+ extract_folders = text.re(r'sharing-url="([^"]+)').findall
+ extract_files = text.re(r'dtfullurl="([^"]+)').findall
+ recursive = self.config("recursive", True)
perpage = 600
data = {
@@ -67,25 +69,63 @@ class CyberfileFolderExtractor(CyberfileExtractor):
"filterOrderBy": "",
}
resp = self.request_api("/account/ajax/load_files", data)
+ html = resp["html"]
folder = {
- "_extractor" : CyberfileFileExtractor,
"folder_hash": folder_hash,
"folder_num" : text.parse_int(folder_num),
"folder" : resp["page_title"],
}
while True:
- urls = extract_urls(resp["html"])
- for url in urls:
- yield Message.Queue, url, folder
-
- if len(urls) < perpage:
+ folders = extract_folders(html)
+ if recursive and folders:
+ folder["_extractor"] = CyberfileFolderExtractor
+ for url in folders:
+ yield Message.Queue, url, folder
+
+ if files := extract_files(html):
+ folder["_extractor"] = CyberfileFileExtractor
+ for url in files:
+ yield Message.Queue, url, folder
+
+ if len(folders) + len(files) < perpage:
return
data["pageStart"] += 1
resp = self.request_api("/account/ajax/load_files", data)
+class CyberfileSharedExtractor(CyberfileExtractor):
+ subcategory = "shared"
+ pattern = rf"{BASE_PATTERN}/shared/([a-zA-Z0-9]+)"
+ example = "https://cyberfile.me/shared/AbCdEfGhIjK"
+
+ def items(self):
+ # get 'filehosting' cookie
+ url = f"{self.root}/shared/{self.groups[0]}"
+ self.request(url, method="HEAD")
+
+ data = {
+ "pageType" : "nonaccountshared",
+ "nodeId" : "",
+ "pageStart": "1",
+ "perPage" : "500",
+ "filterOrderBy": "",
+ }
+ resp = self.request_api("/account/ajax/load_files", data)
+
+ html = resp["html"]
+ pos = html.find("<!-- /.navbar-collapse -->") + 26
+
+ data = {"_extractor": CyberfileFolderExtractor}
+ for url in text.extract_iter(html, 'sharing-url="', '"', pos):
+ yield Message.Queue, url, data
+
+ data = {"_extractor": CyberfileFileExtractor}
+ for url in text.extract_iter(html, 'dtfullurl="', '"', pos):
+ yield Message.Queue, url, data
+
+
class CyberfileFileExtractor(CyberfileExtractor):
subcategory = "file"
directory_fmt = ("{category}", "{uploader}", "{folder}")
@@ -113,7 +153,7 @@ class CyberfileFileExtractor(CyberfileExtractor):
"Filesize:", "</tr>"))[:-1]),
"tags" : text.split_html(extr(
"Keywords:", "</tr>")),
- "date" : text.parse_datetime(text.remove_html(extr(
+ "date" : self.parse_datetime(text.remove_html(extr(
"Uploaded:", "</tr>")), "%d/%m/%Y %H:%M:%S"),
"permissions": text.remove_html(extr(
"Permissions:", "</tr>")).split(" &amp; "),
@@ -121,5 +161,5 @@ class CyberfileFileExtractor(CyberfileExtractor):
file["file_url"] = url = extr("openUrl('", "'")
text.nameext_from_url(file["name"] or url, file)
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, url, file
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 29c7763..5ea33c4 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -9,8 +9,7 @@
"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances"""
from .common import BaseExtractor, Message
-from .. import text, util
-import datetime
+from .. import text, util, dt
class DanbooruExtractor(BaseExtractor):
@@ -64,13 +63,12 @@ class DanbooruExtractor(BaseExtractor):
except KeyError:
if self.external and post["source"]:
post.update(data)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Queue, post["source"], post
continue
text.nameext_from_url(url, post)
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["date"] = dt.parse_iso(post["created_at"])
post["tags"] = (
post["tag_string"].split(" ")
@@ -108,7 +106,7 @@ class DanbooruExtractor(BaseExtractor):
url = self.root + url
post.update(data)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, url, post
def items_artists(self):
@@ -253,7 +251,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)"
+ pattern = rf"{BASE_PATTERN}/posts\?(?:[^&#]*&)*tags=([^&#]*)"
example = "https://danbooru.donmai.us/posts?tags=TAG"
def metadata(self):
@@ -281,7 +279,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
class DanbooruRandomExtractor(DanbooruTagExtractor):
"""Extractor for a random danbooru post"""
subcategory = "random"
- pattern = BASE_PATTERN + r"/posts/random(?:\?(?:[^&#]*&)*tags=([^&#]*))?"
+ pattern = rf"{BASE_PATTERN}/posts/random(?:\?(?:[^&#]*&)*tags=([^&#]*))?"
example = "https://danbooru.donmai.us/posts/random?tags=TAG"
def metadata(self):
@@ -301,7 +299,7 @@ class DanbooruPoolExtractor(DanbooruExtractor):
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
filename_fmt = "{num:>04}_{id}_{filename}.{extension}"
archive_fmt = "p_{pool[id]}_{id}"
- pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/pool(?:s|/show)/(\d+)"
example = "https://danbooru.donmai.us/pools/12345"
def metadata(self):
@@ -319,7 +317,7 @@ class DanbooruFavgroupExtractor(DanbooruExtractor):
"{favgroup[id]} {favgroup[name]}")
filename_fmt = "{num:>04}_{id}_{filename}.{extension}"
archive_fmt = "fg_{favgroup[id]}_{id}"
- pattern = BASE_PATTERN + r"/favorite_group(?:s|/show)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/favorite_group(?:s|/show)/(\d+)"
example = "https://danbooru.donmai.us/favorite_groups/12345"
def metadata(self):
@@ -334,7 +332,7 @@ class DanbooruPostExtractor(DanbooruExtractor):
"""Extractor for single danbooru posts"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post(?:s|/show)/(\d+)"
example = "https://danbooru.donmai.us/posts/12345"
def posts(self):
@@ -346,22 +344,48 @@ class DanbooruPostExtractor(DanbooruExtractor):
return (post,)
+class DanbooruMediaassetExtractor(DanbooruExtractor):
+ """Extractor for a danbooru media asset"""
+ subcategory = "media-asset"
+ filename_fmt = "{category}_ma{id}_{filename}.{extension}"
+ archive_fmt = "m{id}"
+ pattern = rf"{BASE_PATTERN}/media_assets/(\d+)"
+ example = "https://danbooru.donmai.us/media_assets/12345"
+
+ def posts(self):
+ url = f"{self.root}/media_assets/{self.groups[-1]}.json"
+ asset = self.request_json(url)
+
+ asset["file_url"] = asset["variants"][-1]["url"]
+ asset["tag_string"] = \
+ asset["tag_string_artist"] = \
+ asset["tag_string_character"] = \
+ asset["tag_string_copyright"] = \
+ asset["tag_string_general"] = \
+ asset["tag_string_meta"] = ""
+
+ if self.includes:
+ params = {"only": self.includes}
+ asset.update(self.request_json(url, params=params))
+ return (asset,)
+
+
class DanbooruPopularExtractor(DanbooruExtractor):
"""Extractor for popular images from danbooru"""
subcategory = "popular"
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
- pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
+ pattern = rf"{BASE_PATTERN}/(?:explore/posts/)?popular(?:\?([^#]*))?"
example = "https://danbooru.donmai.us/explore/posts/popular"
def metadata(self):
self.params = params = text.parse_query(self.groups[-1])
scale = params.get("scale", "day")
- date = params.get("date") or datetime.date.today().isoformat()
+ date = params.get("date") or dt.date.today().isoformat()
if scale == "week":
- date = datetime.date.fromisoformat(date)
- date = (date - datetime.timedelta(days=date.weekday())).isoformat()
+ date = dt.date.fromisoformat(date)
+ date = (date - dt.timedelta(days=date.weekday())).isoformat()
elif scale == "month":
date = date[:-3]
@@ -374,7 +398,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
class DanbooruArtistExtractor(DanbooruExtractor):
"""Extractor for danbooru artists"""
subcategory = "artist"
- pattern = BASE_PATTERN + r"/artists/(\d+)"
+ pattern = rf"{BASE_PATTERN}/artists/(\d+)"
example = "https://danbooru.donmai.us/artists/12345"
items = DanbooruExtractor.items_artists
@@ -387,7 +411,7 @@ class DanbooruArtistExtractor(DanbooruExtractor):
class DanbooruArtistSearchExtractor(DanbooruExtractor):
"""Extractor for danbooru artist searches"""
subcategory = "artist-search"
- pattern = BASE_PATTERN + r"/artists/?\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/artists/?\?([^#]+)"
example = "https://danbooru.donmai.us/artists?QUERY"
items = DanbooruExtractor.items_artists
diff --git a/gallery_dl/extractor/dankefuerslesen.py b/gallery_dl/extractor/dankefuerslesen.py
index 1c4b7d8..ed7e40b 100644
--- a/gallery_dl/extractor/dankefuerslesen.py
+++ b/gallery_dl/extractor/dankefuerslesen.py
@@ -28,7 +28,7 @@ class DankefuerslesenBase():
class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
"""Extractor for Danke fürs Lesen manga chapters"""
- pattern = BASE_PATTERN + r"/read/manga/([\w-]+)/([\w-]+)"
+ pattern = rf"{BASE_PATTERN}/read/manga/([\w-]+)/([\w-]+)"
example = "https://danke.moe/read/manga/TITLE/123/1/"
def _init(self):
@@ -68,7 +68,7 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
"chapter_minor": minor,
"group" : manga["groups"][group_id].split(" & "),
"group_id" : text.parse_int(group_id),
- "date" : text.parse_timestamp(data["release_date"][group_id]),
+ "date" : self.parse_timestamp(data["release_date"][group_id]),
"lang" : util.NONE,
"language" : util.NONE,
}
@@ -95,7 +95,7 @@ class DankefuerslesenMangaExtractor(DankefuerslesenBase, MangaExtractor):
"""Extractor for Danke fürs Lesen manga"""
chapterclass = DankefuerslesenChapterExtractor
reverse = False
- pattern = BASE_PATTERN + r"/read/manga/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/read/manga/([^/?#]+)"
example = "https://danke.moe/read/manga/TITLE/"
def chapters(self, page):
diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py
index 364d88f..be25053 100644
--- a/gallery_dl/extractor/desktopography.py
+++ b/gallery_dl/extractor/desktopography.py
@@ -22,7 +22,7 @@ class DesktopographyExtractor(Extractor):
class DesktopographySiteExtractor(DesktopographyExtractor):
"""Extractor for all desktopography exhibitions """
subcategory = "site"
- pattern = BASE_PATTERN + r"/$"
+ pattern = rf"{BASE_PATTERN}/$"
example = "https://desktopography.net/"
def items(self):
@@ -41,7 +41,7 @@ class DesktopographySiteExtractor(DesktopographyExtractor):
class DesktopographyExhibitionExtractor(DesktopographyExtractor):
"""Extractor for a yearly desktopography exhibition"""
subcategory = "exhibition"
- pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
+ pattern = rf"{BASE_PATTERN}/exhibition-([^/?#]+)/"
example = "https://desktopography.net/exhibition-2020/"
def __init__(self, match):
@@ -70,7 +70,7 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor):
class DesktopographyEntryExtractor(DesktopographyExtractor):
"""Extractor for all resolutions of a desktopography wallpaper"""
subcategory = "entry"
- pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
+ pattern = rf"{BASE_PATTERN}/portfolios/([\w-]+)"
example = "https://desktopography.net/portfolios/NAME/"
def __init__(self, match):
@@ -82,7 +82,7 @@ class DesktopographyEntryExtractor(DesktopographyExtractor):
page = self.request(url).text
entry_data = {"entry": self.entry}
- yield Message.Directory, entry_data
+ yield Message.Directory, "", entry_data
for image_data in text.extract_iter(
page,
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 39690da..5bd43d4 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.deviantart.com/"""
from .common import Extractor, Message, Dispatch
-from .. import text, util, exception
+from .. import text, util, dt, exception
from ..cache import cache, memcache
import collections
import mimetypes
@@ -64,13 +64,13 @@ class DeviantartExtractor(Extractor):
if self.quality:
if self.quality == "png":
self.quality = "-fullview.png?"
- self.quality_sub = util.re(r"-fullview\.[a-z0-9]+\?").sub
+ self.quality_sub = text.re(r"-fullview\.[a-z0-9]+\?").sub
else:
self.quality = f",q_{self.quality}"
- self.quality_sub = util.re(r",q_\d+").sub
+ self.quality_sub = text.re(r",q_\d+").sub
if self.intermediary:
- self.intermediary_subn = util.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
+ self.intermediary_subn = text.re(r"(/f/[^/]+/[^/]+)/v\d+/.*").subn
if isinstance(self.original, str) and \
self.original.lower().startswith("image"):
@@ -154,7 +154,7 @@ class DeviantartExtractor(Extractor):
deviation.update(data)
self.prepare(deviation)
- yield Message.Directory, deviation
+ yield Message.Directory, "", deviation
if "content" in deviation:
content = self._extract_content(deviation)
@@ -259,7 +259,7 @@ class DeviantartExtractor(Extractor):
deviation["published_time"] = text.parse_int(
deviation["published_time"])
- deviation["date"] = text.parse_timestamp(
+ deviation["date"] = self.parse_timestamp(
deviation["published_time"])
if self.comments:
@@ -269,7 +269,7 @@ class DeviantartExtractor(Extractor):
)
# filename metadata
- sub = util.re(r"\W").sub
+ sub = text.re(r"\W").sub
deviation["filename"] = "".join((
sub("_", deviation["title"].lower()), "_by_",
sub("_", deviation["author"]["username"].lower()), "-d",
@@ -404,7 +404,7 @@ class DeviantartExtractor(Extractor):
try:
return self._tiptap_to_html(markup)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.error("%s: '%s: %s'", deviation["index"],
exc.__class__.__name__, exc)
@@ -675,7 +675,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
def _find_folder(self, folders, name, uuid):
if uuid.isdecimal():
- match = util.re(
+ match = text.re(
"(?i)" + name.replace("-", "[^a-z0-9]+") + "$").match
for folder in folders:
if match(folder["name"]):
@@ -864,7 +864,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
class DeviantartUserExtractor(Dispatch, DeviantartExtractor):
"""Extractor for an artist's user profile"""
- pattern = BASE_PATTERN + r"/?$"
+ pattern = rf"{BASE_PATTERN}/?$"
example = "https://www.deviantart.com/USER"
def items(self):
@@ -887,8 +887,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
archive_fmt = "g_{_username}_{index}.{extension}"
- pattern = (BASE_PATTERN + r"/gallery"
- r"(?:/all|/recommended-for-you|/?\?catpath=)?/?$")
+ pattern = (rf"{BASE_PATTERN}/gallery"
+ r"(?:/all|/recommended-for-you)?/?(\?(?!q=).*)?$")
example = "https://www.deviantart.com/USER/gallery/"
def deviations(self):
@@ -902,7 +902,7 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
"""Extractor for an artist's avatar"""
subcategory = "avatar"
archive_fmt = "a_{_username}_{index}"
- pattern = BASE_PATTERN + r"/avatar"
+ pattern = rf"{BASE_PATTERN}/avatar"
example = "https://www.deviantart.com/USER/avatar/"
def deviations(self):
@@ -956,7 +956,7 @@ class DeviantartBackgroundExtractor(DeviantartExtractor):
"""Extractor for an artist's banner"""
subcategory = "background"
archive_fmt = "b_{index}"
- pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
+ pattern = rf"{BASE_PATTERN}/ba(?:nner|ckground)"
example = "https://www.deviantart.com/USER/banner/"
def deviations(self):
@@ -972,7 +972,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{username}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)/([^/?#]+)"
example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
def __init__(self, match):
@@ -1088,7 +1088,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{username}", "Favourites")
archive_fmt = "f_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
+ pattern = rf"{BASE_PATTERN}/favourites(?:/all|/?\?catpath=)?/?$"
example = "https://www.deviantart.com/USER/favourites/"
def deviations(self):
@@ -1105,7 +1105,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Favourites",
"{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/favourites/([^/?#]+)/([^/?#]+)"
example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
def __init__(self, match):
@@ -1136,7 +1136,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
subcategory = "journal"
directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
+ pattern = rf"{BASE_PATTERN}/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
example = "https://www.deviantart.com/USER/posts/journals/"
def deviations(self):
@@ -1149,7 +1149,7 @@ class DeviantartStatusExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Status")
filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
archive_fmt = "S_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/posts/statuses"
+ pattern = rf"{BASE_PATTERN}/posts/statuses"
example = "https://www.deviantart.com/USER/posts/statuses/"
def deviations(self):
@@ -1187,8 +1187,8 @@ class DeviantartStatusExtractor(DeviantartExtractor):
deviation["username"] = deviation["author"]["username"]
deviation["_username"] = deviation["username"].lower()
- deviation["date"] = dt = text.parse_datetime(deviation["ts"])
- deviation["published_time"] = int(util.datetime_to_timestamp(dt))
+ deviation["date"] = d = self.parse_datetime_iso(deviation["ts"])
+ deviation["published_time"] = int(dt.to_ts(d))
deviation["da_category"] = "Status"
deviation["category_path"] = "status"
@@ -1253,7 +1253,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "g_{_username}_{index}.{extension}"
- pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
+ pattern = (rf"{BASE_PATTERN}/(art|journal)/(?:[^/?#]+-)?(\d+)"
r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/"
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
r"(\d+)" # bare deviation ID without slug
@@ -1315,7 +1315,7 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
archive_fmt = "s_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
+ pattern = rf"{BASE_PATTERN}/gallery/(?:\?catpath=)?scraps\b"
example = "https://www.deviantart.com/USER/gallery/scraps"
def deviations(self):
@@ -1382,7 +1382,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
"""Extractor for deviantart gallery searches"""
subcategory = "gallery-search"
archive_fmt = "g_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
+ pattern = rf"{BASE_PATTERN}/gallery/?\?(q=[^#]+)"
example = "https://www.deviantart.com/USER/gallery?q=QUERY"
def __init__(self, match):
@@ -1412,7 +1412,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
class DeviantartFollowingExtractor(DeviantartExtractor):
"""Extractor for user's watched users"""
subcategory = "following"
- pattern = BASE_PATTERN + "/(?:about#)?watching"
+ pattern = rf"{BASE_PATTERN}/(?:about#)?watching"
example = "https://www.deviantart.com/USER/about#watching"
def items(self):
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 85358ba..bbc1ef0 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -40,5 +40,5 @@ class DirectlinkExtractor(Extractor):
data["_http_headers"] = {
"Referer": self.url.encode("latin-1", "ignore")}
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, self.url, data
diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py
index 216e486..0e7f309 100644
--- a/gallery_dl/extractor/discord.py
+++ b/gallery_dl/extractor/discord.py
@@ -19,7 +19,7 @@ class DiscordExtractor(Extractor):
root = "https://discord.com"
directory_fmt = ("{category}", "{server_id}_{server}",
"{channel_id}_{channel}")
- filename_fmt = "{message_id}_{num:>02}_{filename}.{extension}"
+ filename_fmt = "{message_id}_{num:>02}_{filename[:220]}.{extension}"
archive_fmt = "{message_id}_{num}"
server_metadata = {}
@@ -72,9 +72,7 @@ class DiscordExtractor(Extractor):
"author_files": [],
"message": self.extract_message_text(message),
"message_id": message["id"],
- "date": text.parse_datetime(
- message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z"
- ),
+ "date": self.parse_datetime_iso(message["timestamp"]),
"files": []
})
@@ -122,7 +120,7 @@ class DiscordExtractor(Extractor):
text.nameext_from_url(file["url"], file)
file["num"] = num
- yield Message.Directory, message_metadata
+ yield Message.Directory, "", message_metadata
for file in message_metadata["files"]:
message_metadata_file = message_metadata.copy()
@@ -240,7 +238,7 @@ class DiscordExtractor(Extractor):
class DiscordChannelExtractor(DiscordExtractor):
subcategory = "channel"
- pattern = BASE_PATTERN + r"/channels/(\d+)/(?:\d+/threads/)?(\d+)/?$"
+ pattern = rf"{BASE_PATTERN}/channels/(\d+)/(?:\d+/threads/)?(\d+)/?$"
example = "https://discord.com/channels/1234567890/9876543210"
def items(self):
@@ -253,7 +251,7 @@ class DiscordChannelExtractor(DiscordExtractor):
class DiscordMessageExtractor(DiscordExtractor):
subcategory = "message"
- pattern = BASE_PATTERN + r"/channels/(\d+)/(\d+)/(\d+)/?$"
+ pattern = rf"{BASE_PATTERN}/channels/(\d+)/(\d+)/(\d+)/?$"
example = "https://discord.com/channels/1234567890/9876543210/2468013579"
def items(self):
@@ -270,7 +268,7 @@ class DiscordMessageExtractor(DiscordExtractor):
class DiscordServerExtractor(DiscordExtractor):
subcategory = "server"
- pattern = BASE_PATTERN + r"/channels/(\d+)/?$"
+ pattern = rf"{BASE_PATTERN}/channels/(\d+)/?$"
example = "https://discord.com/channels/1234567890"
def items(self):
@@ -288,7 +286,7 @@ class DiscordDirectMessagesExtractor(DiscordExtractor):
subcategory = "direct-messages"
directory_fmt = ("{category}", "Direct Messages",
"{channel_id}_{recipients:J,}")
- pattern = BASE_PATTERN + r"/channels/@me/(\d+)/?$"
+ pattern = rf"{BASE_PATTERN}/channels/@me/(\d+)/?$"
example = "https://discord.com/channels/@me/1234567890"
def items(self):
@@ -299,7 +297,7 @@ class DiscordDirectMessageExtractor(DiscordExtractor):
subcategory = "direct-message"
directory_fmt = ("{category}", "Direct Messages",
"{channel_id}_{recipients:J,}")
- pattern = BASE_PATTERN + r"/channels/@me/(\d+)/(\d+)/?$"
+ pattern = rf"{BASE_PATTERN}/channels/@me/(\d+)/(\d+)/?$"
example = "https://discord.com/channels/@me/1234567890/9876543210"
def items(self):
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 3e0424d..36423db 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -41,12 +41,12 @@ class DynastyscansBase():
class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"""Extractor for manga-chapters from dynasty-scans.com"""
- pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/chapters/[^/?#]+)"
example = "https://dynasty-scans.com/chapters/NAME"
def metadata(self, page):
extr = text.extract_from(page)
- match = util.re(
+ match = text.re(
r"(?:<a[^>]*>)?([^<]+)(?:</a>)?" # manga name
r"(?: ch(\d+)([^:<]*))?" # chapter info
r"(?:: (.+))?" # title
@@ -62,7 +62,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"author" : text.remove_html(author),
"group" : (text.remove_html(group) or
text.extr(group, ' alt="', '"')),
- "date" : text.parse_datetime(extr(
+ "date" : self.parse_datetime(extr(
'"icon-calendar"></i> ', '<'), "%b %d, %Y"),
"tags" : text.split_html(extr(
"class='tags'>", "<div id='chapter-actions'")),
@@ -81,7 +81,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor):
chapterclass = DynastyscansChapterExtractor
reverse = False
- pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/series/[^/?#]+)"
example = "https://dynasty-scans.com/series/NAME"
def chapters(self, page):
@@ -97,7 +97,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
directory_fmt = ("{category}", "Images")
filename_fmt = "{image_id}.{extension}"
archive_fmt = "i_{image_id}"
- pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$"
+ pattern = rf"{BASE_PATTERN}/images/?(?:\?([^#]+))?$"
example = "https://dynasty-scans.com/images?QUERY"
def __init__(self, match):
@@ -105,7 +105,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
self.query = match[1] or ""
def items(self):
- yield Message.Directory, {}
+ yield Message.Directory, "", {}
for image_id in self.images():
image = self._parse_image_page(image_id)
url = image["url"]
@@ -126,7 +126,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
class DynastyscansImageExtractor(DynastyscansSearchExtractor):
"""Extractor for individual images on dynasty-scans.com"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/images/(\d+)"
+ pattern = rf"{BASE_PATTERN}/images/(\d+)"
example = "https://dynasty-scans.com/images/12345"
def images(self):
@@ -136,7 +136,7 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor):
class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
"""Extractor for dynasty-scans anthologies"""
subcategory = "anthology"
- pattern = BASE_PATTERN + r"/anthologies/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/anthologies/([^/?#]+)"
example = "https://dynasty-scans.com/anthologies/TITLE"
def items(self):
@@ -166,8 +166,6 @@ class DynastyscansAnthologyExtractor(DynastyscansSearchExtractor):
data["scanlator"] = content[1].text[11:]
data["tags"] = content[2].text[6:].lower().split(", ")
data["title"] = element[5].text
- data["date"] = text.parse_datetime(
- element[1].text, "%Y-%m-%dT%H:%M:%S%z")
- data["date_updated"] = text.parse_datetime(
- element[2].text, "%Y-%m-%dT%H:%M:%S%z")
+ data["date"] = self.parse_datetime_iso(element[1].text)
+ data["date_updated"] = self.parse_datetime_iso(element[2].text)
yield Message.Queue, element[4].text, data
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 71c3b30..cc6708d 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -51,13 +51,18 @@ class E621Extractor(danbooru.DanbooruExtractor):
post["filename"] = file["md5"]
post["extension"] = file["ext"]
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["date"] = self.parse_datetime_iso(post["created_at"])
post.update(data)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, file["url"], post
+ def items_artists(self):
+ for artist in self.artists():
+ artist["_extractor"] = E621TagExtractor
+ url = f"{self.root}/posts?tags={text.quote(artist['name'])}"
+ yield Message.Queue, url, artist
+
def _get_notes(self, id):
return self.request_json(
f"{self.root}/notes.json?search[post_id]={id}")
@@ -89,13 +94,13 @@ BASE_PATTERN = E621Extractor.update({
class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
"""Extractor for e621 posts from tag searches"""
- pattern = BASE_PATTERN + r"/posts?(?:\?[^#]*?tags=|/index/\d+/)([^&#]*)"
+ pattern = rf"{BASE_PATTERN}/posts?(?:\?[^#]*?tags=|/index/\d+/)([^&#]*)"
example = "https://e621.net/posts?tags=TAG"
class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
"""Extractor for e621 pools"""
- pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/pool(?:s|/show)/(\d+)"
example = "https://e621.net/pools/12345"
def posts(self):
@@ -120,7 +125,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
"""Extractor for single e621 posts"""
- pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post(?:s|/show)/(\d+)"
example = "https://e621.net/posts/12345"
def posts(self):
@@ -130,19 +135,38 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
"""Extractor for popular images from e621"""
- pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
+ pattern = rf"{BASE_PATTERN}/explore/posts/popular(?:\?([^#]*))?"
example = "https://e621.net/explore/posts/popular"
def posts(self):
return self._pagination("/popular.json", self.params)
+class E621ArtistExtractor(E621Extractor, danbooru.DanbooruArtistExtractor):
+ """Extractor for e621 artists"""
+ subcategory = "artist"
+ pattern = rf"{BASE_PATTERN}/artists/(\d+)"
+ example = "https://e621.net/artists/12345"
+
+ items = E621Extractor.items_artists
+
+
+class E621ArtistSearchExtractor(E621Extractor,
+ danbooru.DanbooruArtistSearchExtractor):
+ """Extractor for e621 artist searches"""
+ subcategory = "artist-search"
+ pattern = rf"{BASE_PATTERN}/artists/?\?([^#]+)"
+ example = "https://e621.net/artists?QUERY"
+
+ items = E621Extractor.items_artists
+
+
class E621FavoriteExtractor(E621Extractor):
"""Extractor for e621 favorites"""
subcategory = "favorite"
directory_fmt = ("{category}", "Favorites", "{user_id}")
archive_fmt = "f_{user_id}_{id}"
- pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+ pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
example = "https://e621.net/favorites"
def metadata(self):
diff --git a/gallery_dl/extractor/eporner.py b/gallery_dl/extractor/eporner.py
new file mode 100644
index 0000000..307f14b
--- /dev/null
+++ b/gallery_dl/extractor/eporner.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.eporner.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class EpornerGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from eporner.com"""
+ category = "eporner"
+ root = "https://eporner.com"
+ pattern = (r"(?:https?://)?(?:www\.)?eporner\.com"
+ r"/gallery/(\w+)(?:/([\w-]+))?")
+ example = "https://www.eporner.com/gallery/GID/SLUG/"
+
+ def __init__(self, match):
+ url = f"{self.root}/gallery/{match[1]}/{match[2]}/"
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ title = text.extr(page, "<title>", " - EPORNER</title>")
+ if title.endswith(" Photo Gallery"):
+ title = title[:-14]
+
+ return {
+ "gallery_id": self.groups[0],
+ "title" : text.unescape(title),
+ "slug" : text.extr(
+ page, "/gallery/", '/"').rpartition("/")[2],
+ "description": text.unescape(text.extr(
+ page, 'name="description" content="', '"')),
+ "tags": text.extr(
+ page, 'EP.ads.keywords = "', '"').split(","),
+ }
+
+ def images(self, page):
+ album = text.extr(
+ page, 'class="photosgrid gallerygrid"', "id='gallerySlideBox'")
+
+ results = []
+ for url in text.extract_iter(album, ' src="', '"'):
+ url, _, ext = url.rpartition(".")
+ # Preview images have a resolution suffix.
+ # E.g. "11208293-image-3_296x1000.jpg".
+ # The same name, but without the suffix, leads to the full image.
+ url = url[:url.rfind("_")]
+ name = url[url.rfind("/")+1:]
+ results.append((f"{url}.{ext}", {"id": name[:name.find("-")]}))
+ return results
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 68cfdbc..2c9ab47 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -64,7 +64,7 @@ class EromeExtractor(Extractor):
class EromeAlbumExtractor(EromeExtractor):
"""Extractor for albums on erome.com"""
subcategory = "album"
- pattern = BASE_PATTERN + r"/a/(\w+)"
+ pattern = rf"{BASE_PATTERN}/a/(\w+)"
example = "https://www.erome.com/a/ID"
def items(self):
@@ -74,8 +74,12 @@ class EromeAlbumExtractor(EromeExtractor):
try:
page = self.request(url).text
except exception.HttpError as exc:
+ if exc.status == 410:
+ msg = text.extr(exc.response.text, "<h1>", "<")
+ else:
+ msg = "Unable to fetch album page"
raise exception.AbortExtraction(
- f"{album_id}: Unable to fetch album page ({exc})")
+ f"{album_id}: {msg} ({exc})")
title, pos = text.extract(
page, 'property="og:title" content="', '"')
@@ -96,7 +100,7 @@ class EromeAlbumExtractor(EromeExtractor):
if not date:
ts = text.extr(group, '?v=', '"')
if len(ts) > 1:
- date = text.parse_timestamp(ts)
+ date = self.parse_timestamp(ts)
data = {
"album_id": album_id,
@@ -110,14 +114,14 @@ class EromeAlbumExtractor(EromeExtractor):
"_http_headers": {"Referer": url},
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(urls, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
class EromeUserExtractor(EromeExtractor):
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)(?:/?\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/(?!a/|search\?)([^/?#]+)(?:/?\?([^#]+))?"
example = "https://www.erome.com/USER"
def albums(self):
@@ -133,7 +137,7 @@ class EromeUserExtractor(EromeExtractor):
class EromeSearchExtractor(EromeExtractor):
subcategory = "search"
- pattern = BASE_PATTERN + r"/search/?\?(q=[^#]+)"
+ pattern = rf"{BASE_PATTERN}/search/?\?(q=[^#]+)"
example = "https://www.erome.com/search?q=QUERY"
def albums(self):
diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py
index 91672bb..ce29800 100644
--- a/gallery_dl/extractor/everia.py
+++ b/gallery_dl/extractor/everia.py
@@ -7,7 +7,7 @@
"""Extractors for https://everia.club"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
BASE_PATTERN = r"(?:https?://)?everia\.club"
@@ -25,7 +25,7 @@ class EveriaExtractor(Extractor):
return self._pagination(self.groups[0])
def _pagination(self, path, params=None, pnum=1):
- find_posts = util.re(r'thumbnail">\s*<a href="([^"]+)').findall
+ find_posts = text.re(r'thumbnail">\s*<a href="([^"]+)').findall
while True:
if pnum == 1:
@@ -45,14 +45,14 @@ class EveriaPostExtractor(EveriaExtractor):
subcategory = "post"
directory_fmt = ("{category}", "{title}")
archive_fmt = "{post_url}_{num}"
- pattern = BASE_PATTERN + r"(/\d{4}/\d{2}/\d{2}/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/\d{{4}}/\d{{2}}/\d{{2}}/[^/?#]+)"
example = "https://everia.club/0000/00/00/TITLE"
def items(self):
url = self.root + self.groups[0] + "/"
page = self.request(url).text
content = text.extr(page, 'itemprop="text">', "<h3")
- urls = util.re(r'img.*?lazy-src="([^"]+)').findall(content)
+ urls = text.re(r'img.*?lazy-src="([^"]+)').findall(content)
data = {
"title": text.unescape(
@@ -64,7 +64,7 @@ class EveriaPostExtractor(EveriaExtractor):
"count": len(urls),
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(urls, 1):
url = text.unquote(url)
yield Message.Url, url, text.nameext_from_url(url, data)
@@ -72,26 +72,26 @@ class EveriaPostExtractor(EveriaExtractor):
class EveriaTagExtractor(EveriaExtractor):
subcategory = "tag"
- pattern = BASE_PATTERN + r"(/tag/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/tag/[^/?#]+)"
example = "https://everia.club/tag/TAG"
class EveriaCategoryExtractor(EveriaExtractor):
subcategory = "category"
- pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/category/[^/?#]+)"
example = "https://everia.club/category/CATEGORY"
class EveriaDateExtractor(EveriaExtractor):
subcategory = "date"
- pattern = (BASE_PATTERN +
- r"(/\d{4}(?:/\d{2})?(?:/\d{2})?)(?:/page/\d+)?/?$")
+ pattern = (rf"{BASE_PATTERN}"
+ rf"(/\d{{4}}(?:/\d{{2}})?(?:/\d{{2}})?)(?:/page/\d+)?/?$")
example = "https://everia.club/0000/00/00"
class EveriaSearchExtractor(EveriaExtractor):
subcategory = "search"
- pattern = BASE_PATTERN + r"/(?:page/\d+/)?\?s=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/(?:page/\d+/)?\?s=([^&#]+)"
example = "https://everia.club/?s=SEARCH"
def posts(self):
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index f147959..9dab923 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -115,9 +115,9 @@ class ExhentaiExtractor(Extractor):
class ExhentaiGalleryExtractor(ExhentaiExtractor):
"""Extractor for image galleries from exhentai.org"""
subcategory = "gallery"
- pattern = (BASE_PATTERN +
- r"(?:/g/(\d+)/([\da-f]{10})"
- r"|/s/([\da-f]{10})/(\d+)-(\d+))")
+ pattern = (rf"{BASE_PATTERN}/(?:"
+ rf"g/(\d+)/([\da-f]{{10}})|"
+ rf"s/([\da-f]{{10}})/(\d+)-(\d+))")
example = "https://e-hentai.org/g/12345/67890abcde/"
def __init__(self, match):
@@ -193,7 +193,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.data = data = self.get_metadata(gpage)
self.count = text.parse_int(data["filecount"])
- yield Message.Directory, data
+ yield Message.Directory, "", data
images = itertools.chain(
(self.image_from_page(ipage),), self.images_from_api())
@@ -216,7 +216,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def _items_hitomi(self):
if self.config("metadata", False):
data = self.metadata_from_api()
- data["date"] = text.parse_timestamp(data["posted"])
+ data["date"] = self.parse_timestamp(data["posted"])
else:
data = {}
@@ -226,14 +226,14 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
yield Message.Queue, url, data
def _items_metadata(self):
- yield Message.Directory, self.metadata_from_api()
+ yield Message.Directory, "", self.metadata_from_api()
def get_metadata(self, page):
"""Extract gallery metadata"""
data = self.metadata_from_page(page)
if self.config("metadata", False):
data.update(self.metadata_from_api())
- data["date"] = text.parse_timestamp(data["posted"])
+ data["date"] = self.parse_timestamp(data["posted"])
if self.config("tags", False):
tags = collections.defaultdict(list)
for tag in data["tags"]:
@@ -258,8 +258,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"_" : extr('<div id="gdc"><div class="cs ct', '"'),
"eh_category" : extr('>', '<'),
"uploader" : extr('<div id="gdn">', '</div>'),
- "date" : text.parse_datetime(extr(
- '>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
+ "date" : self.parse_datetime_iso(extr(
+ '>Posted:</td><td class="gdt2">', '</td>')),
"parent" : extr(
'>Parent:</td><td class="gdt2"><a href="', '"'),
"expunged" : "Yes" != extr(
@@ -563,7 +563,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
class ExhentaiSearchExtractor(ExhentaiExtractor):
"""Extractor for exhentai search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/(?:\?([^#]*)|tag/([^/?#]+))"
+ pattern = rf"{BASE_PATTERN}/(?:\?([^#]*)|tag/([^/?#]+))"
example = "https://e-hentai.org/?f_search=QUERY"
def __init__(self, match):
@@ -620,7 +620,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
"""Extractor for favorited exhentai galleries"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/favorites\.php(?:\?([^#]*)())?"
+ pattern = rf"{BASE_PATTERN}/favorites\.php(?:\?([^#]*)())?"
example = "https://e-hentai.org/favorites.php"
def _init(self):
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index 6061737..5d56a5f 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -11,9 +11,9 @@ from .. import text, util, exception
from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com"
-USER_PATTERN = (BASE_PATTERN +
- r"/(?!media/|photo/|photo.php|watch/)"
- r"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)")
+USER_PATTERN = (rf"{BASE_PATTERN}/"
+ rf"(?!media/|photo/|photo.php|watch/|permalink.php)"
+ rf"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)")
class FacebookExtractor(Extractor):
@@ -108,7 +108,7 @@ class FacebookExtractor(Extractor):
'"message":{"delight_ranges"',
'"},"message_preferred_body"'
).rsplit('],"text":"', 1)[-1]),
- "date": text.parse_timestamp(
+ "date": self.parse_timestamp(
text.extr(photo_page, '\\"publish_time\\":', ',') or
text.extr(photo_page, '"created_time":', ',')
),
@@ -172,7 +172,7 @@ class FacebookExtractor(Extractor):
"user_id": text.extr(
video_page, '"owner":{"__typename":"User","id":"', '"'
),
- "date": text.parse_timestamp(text.extr(
+ "date": self.parse_timestamp(text.extr(
video_page, '\\"publish_time\\":', ','
)),
"type": "video"
@@ -292,7 +292,7 @@ class FacebookExtractor(Extractor):
else:
retries = 0
photo.update(set_data)
- yield Message.Directory, photo
+ yield Message.Directory, "", photo
yield Message.Url, photo["url"], photo
if not photo["next_photo_id"]:
@@ -389,9 +389,9 @@ class FacebookExtractor(Extractor):
class FacebookPhotoExtractor(FacebookExtractor):
"""Base class for Facebook Photo extractors"""
subcategory = "photo"
- pattern = (BASE_PATTERN +
- r"/(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?"
- r"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$")
+ pattern = (rf"{BASE_PATTERN}/"
+ rf"(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?"
+ rf"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$")
example = "https://www.facebook.com/photo/?fbid=PHOTO_ID"
def items(self):
@@ -408,7 +408,7 @@ class FacebookPhotoExtractor(FacebookExtractor):
directory = self.parse_set_page(set_page)
- yield Message.Directory, directory
+ yield Message.Directory, "", directory
yield Message.Url, photo["url"], photo
if self.author_followups:
@@ -427,12 +427,11 @@ class FacebookSetExtractor(FacebookExtractor):
"""Base class for Facebook Set extractors"""
subcategory = "set"
pattern = (
- BASE_PATTERN +
- r"/(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)"
- r"[^/?#]*(?<!&setextract)$"
- r"|([^/?#]+/posts/[^/?#]+)"
- r"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)"
- )
+ rf"{BASE_PATTERN}/"
+ rf"(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)"
+ rf"[^/?#]*(?<!&setextract)$"
+ rf"|([^/?#]+/posts/[^/?#]+)"
+ rf"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)")
example = "https://www.facebook.com/media/set/?set=SET_ID"
def items(self):
@@ -455,7 +454,7 @@ class FacebookVideoExtractor(FacebookExtractor):
"""Base class for Facebook Video extractors"""
subcategory = "video"
directory_fmt = ("{category}", "{username}", "{subcategory}")
- pattern = BASE_PATTERN + r"/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)"
+ pattern = rf"{BASE_PATTERN}/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)"
example = "https://www.facebook.com/watch/?v=VIDEO_ID"
def items(self):
@@ -468,7 +467,7 @@ class FacebookVideoExtractor(FacebookExtractor):
if "url" not in video:
return
- yield Message.Directory, video
+ yield Message.Directory, "", video
if self.videos == "ytdl":
yield Message.Url, "ytdl:" + video_url, video
@@ -482,18 +481,18 @@ class FacebookInfoExtractor(FacebookExtractor):
"""Extractor for Facebook Profile data"""
subcategory = "info"
directory_fmt = ("{category}", "{username}")
- pattern = USER_PATTERN + r"/info"
+ pattern = rf"{USER_PATTERN}/info"
example = "https://www.facebook.com/USERNAME/info"
def items(self):
user = self._extract_profile(self.groups[0])
- return iter(((Message.Directory, user),))
+ return iter(((Message.Directory, "", user),))
class FacebookAlbumsExtractor(FacebookExtractor):
"""Extractor for Facebook Profile albums"""
subcategory = "albums"
- pattern = USER_PATTERN + r"/photos_albums(?:/([^/?#]+))?"
+ pattern = rf"{USER_PATTERN}/photos_albums(?:/([^/?#]+))?"
example = "https://www.facebook.com/USERNAME/photos_albums"
def items(self):
@@ -526,7 +525,7 @@ class FacebookAlbumsExtractor(FacebookExtractor):
class FacebookPhotosExtractor(FacebookExtractor):
"""Extractor for Facebook Profile Photos"""
subcategory = "photos"
- pattern = USER_PATTERN + r"/photos(?:_by)?"
+ pattern = rf"{USER_PATTERN}/photos(?:_by)?"
example = "https://www.facebook.com/USERNAME/photos"
def items(self):
@@ -543,7 +542,7 @@ class FacebookPhotosExtractor(FacebookExtractor):
class FacebookAvatarExtractor(FacebookExtractor):
"""Extractor for Facebook Profile Avatars"""
subcategory = "avatar"
- pattern = USER_PATTERN + r"/avatar"
+ pattern = rf"{USER_PATTERN}/avatar"
example = "https://www.facebook.com/USERNAME/avatar"
def items(self):
@@ -559,13 +558,13 @@ class FacebookAvatarExtractor(FacebookExtractor):
set_page = self.request(set_url).text
directory = self.parse_set_page(set_page)
- yield Message.Directory, directory
+ yield Message.Directory, "", directory
yield Message.Url, avatar["url"], avatar
class FacebookUserExtractor(Dispatch, FacebookExtractor):
"""Extractor for Facebook Profiles"""
- pattern = USER_PATTERN + r"/?(?:$|\?|#)"
+ pattern = rf"{USER_PATTERN}/?(?:$|\?|#)"
example = "https://www.facebook.com/USERNAME"
def items(self):
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 70b06e7..036b388 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -66,18 +66,17 @@ class FanboxExtractor(Extractor):
if fee_max is not None and fee_max < item["feeRequired"]:
self.log.warning("Skipping post %s (feeRequired of %s > %s)",
item["id"], item["feeRequired"], fee_max)
- continue
-
- try:
- url = "https://api.fanbox.cc/post.info?postId=" + item["id"]
- body = self.request_json(url, headers=self.headers)["body"]
- content_body, post = self._extract_post(body)
- except Exception as exc:
- self.log.warning("Skipping post %s (%s: %s)",
- item["id"], exc.__class__.__name__, exc)
- continue
-
- yield Message.Directory, post
+ else:
+ try:
+ url = ("https://api.fanbox.cc/post.info?postId=" +
+ item["id"])
+ item = self.request_json(url, headers=self.headers)["body"]
+ except Exception as exc:
+ self.log.warning("Skipping post %s (%s: %s)",
+ item["id"], exc.__class__.__name__, exc)
+
+ content_body, post = self._extract_post(item)
+ yield Message.Directory, "", post
yield from self._get_urls_from_post(content_body, post)
def posts(self):
@@ -128,15 +127,19 @@ class FanboxExtractor(Extractor):
if file.get("extension", "").lower() in exts
]
- post["date"] = text.parse_datetime(post["publishedDatetime"])
+ try:
+ post["date"] = self.parse_datetime_iso(post["publishedDatetime"])
+ except Exception:
+ post["date"] = None
post["text"] = content_body.get("text") if content_body else None
post["isCoverImage"] = False
- if self._meta_user:
- post["user"] = self._get_user_data(post["creatorId"])
- if self._meta_plan:
+ cid = post.get("creatorId")
+ if self._meta_user and cid is not None:
+ post["user"] = self._get_user_data(cid)
+ if self._meta_plan and cid is not None:
plans = self._get_plan_data(post["creatorId"])
- fee = post["feeRequired"]
+ fee = post.get("feeRequired") or 0
try:
post["plan"] = plans[fee]
except KeyError:
@@ -147,7 +150,7 @@ class FanboxExtractor(Extractor):
plan["fee"] = fee
post["plan"] = plans[fee] = plan
if self._meta_comments:
- if post["commentCount"]:
+ if post.get("commentCount"):
post["comments"] = list(self._get_comment_data(post["id"]))
else:
post["commentd"] = ()
@@ -216,7 +219,7 @@ class FanboxExtractor(Extractor):
def _get_urls_from_post(self, content_body, post):
num = 0
if cover_image := post.get("coverImageUrl"):
- cover_image = util.re("/c/[0-9a-z_]+").sub("", cover_image)
+ cover_image = text.re("/c/[0-9a-z_]+").sub("", cover_image)
final_post = post.copy()
final_post["isCoverImage"] = True
final_post["fileUrl"] = cover_image
@@ -352,7 +355,7 @@ class FanboxExtractor(Extractor):
class FanboxCreatorExtractor(FanboxExtractor):
"""Extractor for a Fanbox creator's works"""
subcategory = "creator"
- pattern = USER_PATTERN + r"(?:/posts)?/?$"
+ pattern = rf"{USER_PATTERN}(?:/posts)?/?$"
example = "https://USER.fanbox.cc/"
def posts(self):
@@ -362,15 +365,26 @@ class FanboxCreatorExtractor(FanboxExtractor):
def _pagination_creator(self, url):
urls = self.request_json(url, headers=self.headers)["body"]
+ if offset := self.config("offset"):
+ quotient, remainder = divmod(offset, 10)
+ if quotient:
+ urls = urls[quotient:]
+ else:
+ remainder = None
+
for url in urls:
url = text.ensure_http_scheme(url)
- yield from self.request_json(url, headers=self.headers)["body"]
+ posts = self.request_json(url, headers=self.headers)["body"]
+ if remainder:
+ posts = posts[remainder:]
+ remainder = None
+ yield from posts
class FanboxPostExtractor(FanboxExtractor):
"""Extractor for media from a single Fanbox post"""
subcategory = "post"
- pattern = USER_PATTERN + r"/posts/(\d+)"
+ pattern = rf"{USER_PATTERN}/posts/(\d+)"
example = "https://USER.fanbox.cc/posts/12345"
def posts(self):
@@ -380,7 +394,7 @@ class FanboxPostExtractor(FanboxExtractor):
class FanboxHomeExtractor(FanboxExtractor):
"""Extractor for your Fanbox home feed"""
subcategory = "home"
- pattern = BASE_PATTERN + r"/?$"
+ pattern = rf"{BASE_PATTERN}/?$"
example = "https://fanbox.cc/"
def posts(self):
@@ -391,7 +405,7 @@ class FanboxHomeExtractor(FanboxExtractor):
class FanboxSupportingExtractor(FanboxExtractor):
"""Extractor for your supported Fanbox users feed"""
subcategory = "supporting"
- pattern = BASE_PATTERN + r"/home/supporting"
+ pattern = rf"{BASE_PATTERN}/home/supporting"
example = "https://fanbox.cc/home/supporting"
def posts(self):
@@ -403,6 +417,7 @@ class FanboxRedirectExtractor(Extractor):
"""Extractor for pixiv redirects to fanbox.cc"""
category = "fanbox"
subcategory = "redirect"
+ cookies_domain = None
pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)"
example = "https://www.pixiv.net/fanbox/creator/12345"
diff --git a/gallery_dl/extractor/fansly.py b/gallery_dl/extractor/fansly.py
index 7138599..ba60b15 100644
--- a/gallery_dl/extractor/fansly.py
+++ b/gallery_dl/extractor/fansly.py
@@ -35,9 +35,9 @@ class FanslyExtractor(Extractor):
for post in self.posts():
files = self._extract_files(post)
post["count"] = len(files)
- post["date"] = text.parse_timestamp(post["createdAt"])
+ post["date"] = self.parse_timestamp(post["createdAt"])
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post.update(file)
url = file["url"]
@@ -61,7 +61,8 @@ class FanslyExtractor(Extractor):
yield from self.posts_wall(account, wall)
def _extract_files(self, post):
- files = []
+ if "attachments" not in post:
+ return ()
if "_extra" in post:
extra = post.pop("_extra", ())
@@ -75,11 +76,12 @@ class FanslyExtractor(Extractor):
if mid in media
)
+ files = []
for attachment in post.pop("attachments"):
try:
self._extract_attachment(files, post, attachment)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.error(
"%s/%s, Failed to extract media (%s: %s)",
post["id"], attachment.get("id"),
@@ -117,8 +119,8 @@ class FanslyExtractor(Extractor):
file = {
**variant,
"format": variant["type"],
- "date": text.parse_timestamp(media["createdAt"]),
- "date_updated": text.parse_timestamp(media["updatedAt"]),
+ "date": self.parse_timestamp(media["createdAt"]),
+ "date_updated": self.parse_timestamp(media["updatedAt"]),
}
if "metadata" in location:
@@ -331,12 +333,20 @@ class FanslyAPI():
posts = response["posts"]
for post in posts:
- post["account"] = accounts[post.pop("accountId")]
+ try:
+ post["account"] = accounts[post.pop("accountId")]
+ except KeyError:
+ pass
extra = None
attachments = []
for attachment in post["attachments"]:
- cid = attachment["contentId"]
+ try:
+ cid = attachment["contentId"]
+ except KeyError:
+ attachments.append(attachment)
+ continue
+
if cid in media:
attachments.append(media[cid])
elif cid in bundles:
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index e32a86b..d13ec13 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -48,7 +48,7 @@ class FantiaExtractor(Extractor):
for content in contents:
files = self._process_content(post, content)
- yield Message.Directory, post
+ yield Message.Directory, "", post
if content["visible_status"] != "visible":
self.log.warning(
@@ -101,7 +101,7 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"],
"rating": resp["rating"],
"posted_at": resp["posted_at"],
- "date": text.parse_datetime(
+ "date": self.parse_datetime(
resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"],
diff --git a/gallery_dl/extractor/fapachi.py b/gallery_dl/extractor/fapachi.py
index 7ff71b0..a18ce31 100644
--- a/gallery_dl/extractor/fapachi.py
+++ b/gallery_dl/extractor/fapachi.py
@@ -34,7 +34,7 @@ class FapachiPostExtractor(Extractor):
page = self.request(f"{self.root}/{self.user}/media/{self.id}").text
url = self.root + text.extract(
page, 'data-src="', '"', page.index('class="media-img'))[0]
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
index b961cbe..afef942 100644
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@@ -20,7 +20,7 @@ class FapelloPostExtractor(Extractor):
directory_fmt = ("{category}", "{model}")
filename_fmt = "{model}_{id}.{extension}"
archive_fmt = "{type}_{model}_{id}"
- pattern = BASE_PATTERN + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?!search/|popular_videos/)([^/?#]+)/(\d+)"
example = "https://fapello.com/MODEL/12345/"
def __init__(self, match):
@@ -44,7 +44,7 @@ class FapelloPostExtractor(Extractor):
}
url = text.extr(page, 'src="', '"').replace(
".md", "").replace(".th", "")
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, text.nameext_from_url(url, data)
@@ -52,9 +52,9 @@ class FapelloModelExtractor(Extractor):
"""Extractor for all posts from a fapello model"""
category = "fapello"
subcategory = "model"
- pattern = (BASE_PATTERN + r"/(?!top-(?:likes|followers)|popular_videos"
- r"|videos|trending|search/?$)"
- r"([^/?#]+)/?$")
+ pattern = (rf"{BASE_PATTERN}/(?!top-(?:likes|followers)|popular_videos"
+ rf"|videos|trending|search/?$)"
+ rf"([^/?#]+)/?$")
example = "https://fapello.com/model/"
def __init__(self, match):
@@ -85,9 +85,9 @@ class FapelloPathExtractor(Extractor):
"""Extractor for models and posts from fapello.com paths"""
category = "fapello"
subcategory = "path"
- pattern = (BASE_PATTERN +
- r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
- r"|popular_videos/[^/?#]+)/?$")
+ pattern = (rf"{BASE_PATTERN}/(?!search/?$)"
+ rf"(top-(?:likes|followers)|videos|trending"
+ rf"|popular_videos/[^/?#]+)/?$")
example = "https://fapello.com/trending/"
def __init__(self, match):
diff --git a/gallery_dl/extractor/fikfap.py b/gallery_dl/extractor/fikfap.py
new file mode 100644
index 0000000..75071c5
--- /dev/null
+++ b/gallery_dl/extractor/fikfap.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fikfap.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fikfap\.com"
+
+
+class FikfapExtractor(Extractor):
+ """Base class for fikfap extractors"""
+ category = "fikfap"
+ root = "https://fikfap.com"
+ root_api = "https://api.fikfap.com"
+ directory_fmt = ("{category}", "{author[username]}")
+ filename_fmt = "{postId} {label[:240]}.{extension}"
+ archive_fmt = "{postId}"
+
+ def items(self):
+ headers = {
+ "Referer" : self.root + "/",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "cross-site",
+ }
+
+ for post in self.posts():
+ if url := post.get("videoFileOriginalUrl"):
+ post["extension"] = text.ext_from_url(url)
+ elif url := post.get("videoStreamUrl"):
+ url = "ytdl:" + url
+ post["extension"] = "mp4"
+ post["_ytdl_manifest"] = "hls"
+ post["_ytdl_manifest_headers"] = headers
+ else:
+ self.log.warning("%s: No video available", post["postId"])
+ continue
+
+ post["date"] = self.parse_datetime_iso(post["createdAt"])
+ post["date_updated"] = self.parse_datetime_iso(post["updatedAt"])
+ post["tags"] = [t["label"] for t in post["hashtags"]]
+ post["filename"] = post["label"]
+
+ yield Message.Directory, "", post
+ yield Message.Url, url, post
+
+ def request_api(self, url, params):
+ return self.request_json(url, params=params, headers={
+ "Referer" : self.root + "/",
+ "Authorization-Anonymous": "2527cc30-c3c5-41be-b8bb-104b6ea7a206",
+ "IsLoggedIn" : "false",
+ "IsPWA" : "false",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-site",
+ })
+
+
+class FikfapPostExtractor(FikfapExtractor):
+ subcategory = "post"
+ pattern = rf"{BASE_PATTERN}/user/(\w+)/post/(\d+)"
+ example = "https://fikfap.com/user/USER/post/12345"
+
+ def posts(self):
+ user, pid = self.groups
+
+ url = f"{self.root_api}/profile/username/{user}/posts"
+ params = {"amount" : "1", "startId": pid}
+ posts = self.request_api(url, params)
+
+ pid = int(pid)
+ for post in posts:
+ if post["postId"] == pid:
+ return (post,)
+ raise exception.NotFoundError("post")
+
+
+class FikfapUserExtractor(FikfapExtractor):
+ subcategory = "user"
+ pattern = rf"{BASE_PATTERN}/user/(\w+)"
+ example = "https://fikfap.com/user/USER"
+
+ def posts(self):
+ user = self.groups[0]
+
+ url = f"{self.root_api}/profile/username/{user}/posts"
+ params = {"amount": "21"}
+
+ while True:
+ data = self.request_api(url, params)
+
+ yield from data
+
+ if len(data) < 21:
+ return
+ params["afterId"] = data[-1]["postId"]
diff --git a/gallery_dl/extractor/fitnakedgirls.py b/gallery_dl/extractor/fitnakedgirls.py
new file mode 100644
index 0000000..d252ec4
--- /dev/null
+++ b/gallery_dl/extractor/fitnakedgirls.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fitnakedgirls.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fitnakedgirls\.com"
+
+
+class FitnakedgirlsExtractor(Extractor):
+ """Base class for fitnakedgirls extractors"""
+ category = "fitnakedgirls"
+ root = "https://fitnakedgirls.com"
+
+ def items(self):
+ data = {"_extractor": FitnakedgirlsGalleryExtractor}
+ for url in self.galleries():
+ yield Message.Queue, url, data
+
+ def _pagination(self, base):
+ url = base
+ pnum = 1
+
+ while True:
+ page = self.request(url).text
+
+ for post in text.extract_iter(
+ page, 'class="entry-body', "</a>"):
+ yield text.extr(post, 'href="', '"')
+
+ pnum += 1
+ url = f"{base}page/{pnum}/"
+ if f'href="{url}"' not in page:
+ return
+
+ def _extract_title(self, extr, sep=" - "):
+ title = text.unescape(extr("<title>", "<"))
+ if sep in title:
+ title = title.rpartition(sep)[0]
+ return title.strip()
+
+
+class FitnakedgirlsGalleryExtractor(GalleryExtractor, FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls galleries"""
+ directory_fmt = ("{category}", "{title}")
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{gallery_id}_{filename}"
+ pattern = rf"{BASE_PATTERN}/photos/gallery/([\w-]+)/?$"
+ example = "https://fitnakedgirls.com/photos/gallery/MODEL-nude/"
+
+ def __init__(self, match):
+ url = f"{self.root}/photos/gallery/{match[1]}/"
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ title = self._extract_title(extr)
+
+ # Strip common patterns to get cleaner model name
+ for pattern in (" Nudes", " Nude", " nudes", " nude"):
+ if pattern in title:
+ title = title.partition(pattern)[0]
+ break
+
+ return {
+ "gallery_id" : text.parse_int(extr('data-post-id="', '"')),
+ "gallery_slug": self.groups[0],
+ "model": title,
+ "title": title,
+ "date" : self.parse_datetime_iso(extr(
+ 'article:published_time" content="', '"')),
+ }
+
+ def images(self, page):
+ results = []
+
+ content = text.extr(
+ page, 'itemprop="articleBody"', '<!-- .entry-content -->') or page
+
+ # Extract videos from wp-block-video figures
+ for figure in text.extract_iter(
+ content, '<figure class="wp-block-video">', '</figure>'):
+ if src := text.extr(figure, 'src="', '"'):
+ if "/wp-content/uploads/" in src:
+ results.append((src, None))
+
+ # Extract images from wp-block-image figures (newer template)
+ for figure in text.extract_iter(
+ content, '<figure class="wp-block-image', '</figure>'):
+ if src := text.extr(figure, 'data-src="', '"'):
+ if "/wp-content/uploads/" in src:
+ results.append((src, None))
+
+ # Fallback: Extract images with size-large class (older template)
+ if not results:
+ for img in text.extract_iter(content, "<img ", ">"):
+ if "size-large" in img:
+ if src := text.extr(img, 'data-src="', '"'):
+ if "/wp-content/uploads/" in src:
+ results.append((src, None))
+
+ return results
+
+
+class FitnakedgirlsCategoryExtractor(FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls category pages"""
+ subcategory = "category"
+ pattern = rf"{BASE_PATTERN}/photos/gallery/category/([\w-]+)"
+ example = "https://fitnakedgirls.com/photos/gallery/category/CATEGORY/"
+
+ def galleries(self):
+ base = f"{self.root}/photos/gallery/category/{self.groups[0]}/"
+ return self._pagination(base)
+
+
+class FitnakedgirlsTagExtractor(FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls tag pages"""
+ subcategory = "tag"
+ pattern = rf"{BASE_PATTERN}/photos/gallery/tag/([\w-]+)"
+ example = "https://fitnakedgirls.com/photos/gallery/tag/TAG/"
+
+ def galleries(self):
+ base = f"{self.root}/photos/gallery/tag/{self.groups[0]}/"
+ return self._pagination(base)
+
+
+class FitnakedgirlsVideoExtractor(FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls video posts"""
+ subcategory = "video"
+ directory_fmt = ("{category}", "{title}")
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{video_id}_{filename}"
+ pattern = rf"{BASE_PATTERN}/videos/(\d+)/(\d+)/([\w-]+)"
+ example = "https://fitnakedgirls.com/videos/2025/08/VIDEO-TITLE/"
+
+ def items(self):
+ year, month, slug = self.groups
+ url = f"{self.root}/videos/{year}/{month}/{slug}/"
+ page = self.request(url).text
+
+ extr = text.extract_from(page)
+ data = {
+ "slug" : slug,
+ "title" : self._extract_title(extr, " | "),
+ "video_id": text.parse_int(extr('data-post-id="', '"')),
+ "date" : self.parse_datetime_iso(
+ extr('article:published_time" content="', '"')),
+ }
+
+ yield Message.Directory, "", data
+
+ content = text.extr(
+ page, 'itemprop="articleBody"', '<!-- .entry-content -->') or page
+ for video in text.extract_iter(content, "<video ", "</video>"):
+ if src := text.extr(video, 'src="', '"'):
+ if "/wp-content/uploads/" in src:
+ yield Message.Url, src, text.nameext_from_url(src, data)
+
+
+class FitnakedgirlsBlogExtractor(FitnakedgirlsExtractor):
+ """Extractor for fitnakedgirls blog posts"""
+ subcategory = "blog"
+ directory_fmt = ("{category}", "{title}")
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{post_id}_{filename}"
+ pattern = rf"{BASE_PATTERN}/fitblog/([\w-]+)"
+ example = "https://fitnakedgirls.com/fitblog/MODEL-NAME/"
+
+ def items(self):
+ slug = self.groups[0]
+ url = f"{self.root}/fitblog/{slug}/"
+ page = self.request(url).text
+
+ extr = text.extract_from(page)
+ data = {
+ "slug" : slug,
+ "title" : self._extract_title(extr),
+ "post_id": text.parse_int(extr('data-post-id="', '"')),
+ "date" : self.parse_datetime_iso(
+ extr('article:published_time" content="', '"')),
+ }
+
+ yield Message.Directory, "", data
+
+ # Extract images from wp-block-image figures
+ content = text.extr(
+ page, 'itemprop="articleBody"', '<!-- .entry-content -->') or page
+ for figure in text.extract_iter(
+ content, '<figure class="wp-block-image', '</figure>'):
+ # Try srcset first for highest resolution
+ if srcset := text.extr(figure, 'srcset="', '"'):
+ # Get the last (largest) image from srcset
+ urls = srcset.split(", ")
+ if urls:
+ src = urls[-1].partition(" ")[0]
+ if "/wp-content/uploads/" in src:
+ yield Message.Url, src, text.nameext_from_url(
+ src, data)
+ continue
+ # Fallback to src
+ if src := text.extr(figure, 'src="', '"'):
+ if "/wp-content/uploads/" in src:
+ yield Message.Url, src, text.nameext_from_url(src, data)
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index 35263a3..1446eb8 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text, oauth, util, exception
+from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:www\.|secure\.|m\.)?flickr\.com"
@@ -17,6 +18,7 @@ BASE_PATTERN = r"(?:https?://)?(?:www\.|secure\.|m\.)?flickr\.com"
class FlickrExtractor(Extractor):
"""Base class for flickr extractors"""
category = "flickr"
+ root = "https://www.flickr.com"
filename_fmt = "{category}_{id}.{extension}"
directory_fmt = ("{category}", "{user[username]}")
archive_fmt = "{id}"
@@ -24,11 +26,12 @@ class FlickrExtractor(Extractor):
request_interval_min = 0.5
def _init(self):
- self.api = FlickrAPI(self)
self.user = None
self.item_id = self.groups[0]
def items(self):
+ self.api = FlickrAPI(self)
+
data = self.metadata()
extract = self.api._extract_format
for photo in self.photos():
@@ -38,11 +41,11 @@ class FlickrExtractor(Extractor):
self.log.warning(
"Skipping photo %s (%s: %s)",
photo["id"], exc.__class__.__name__, exc)
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
else:
photo.update(data)
url = self._file_url(photo)
- yield Message.Directory, photo
+ yield Message.Directory, "", photo
yield Message.Url, url, text.nameext_from_url(url, photo)
def metadata(self):
@@ -75,6 +78,8 @@ class FlickrImageExtractor(FlickrExtractor):
example = "https://www.flickr.com/photos/USER/12345"
def items(self):
+ self.api = FlickrAPI(self)
+
item_id, enc_id = self.groups
if enc_id is not None:
alphabet = ("123456789abcdefghijkmnopqrstu"
@@ -98,7 +103,7 @@ class FlickrImageExtractor(FlickrExtractor):
photo["comments"] = text.parse_int(photo["comments"]["_content"])
photo["description"] = photo["description"]["_content"]
photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
- photo["date"] = text.parse_timestamp(photo["dateuploaded"])
+ photo["date"] = self.parse_timestamp(photo["dateuploaded"])
photo["views"] = text.parse_int(photo["views"])
photo["id"] = text.parse_int(photo["id"])
@@ -109,7 +114,7 @@ class FlickrImageExtractor(FlickrExtractor):
location[key] = value["_content"]
url = self._file_url(photo)
- yield Message.Directory, photo
+ yield Message.Directory, "", photo
yield Message.Url, url, text.nameext_from_url(url, photo)
@@ -119,7 +124,7 @@ class FlickrAlbumExtractor(FlickrExtractor):
directory_fmt = ("{category}", "{user[username]}",
"Albums", "{album[id]} {album[title]}")
archive_fmt = "a_{album[id]}_{id}"
- pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
+ pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
example = "https://www.flickr.com/photos/USER/albums/12345"
def items(self):
@@ -129,6 +134,8 @@ class FlickrAlbumExtractor(FlickrExtractor):
return self._album_items()
def _album_items(self):
+ self.api = FlickrAPI(self)
+
data = FlickrExtractor.metadata(self)
data["_extractor"] = FlickrAlbumExtractor
@@ -159,7 +166,7 @@ class FlickrGalleryExtractor(FlickrExtractor):
directory_fmt = ("{category}", "{user[username]}",
"Galleries", "{gallery[gallery_id]} {gallery[title]}")
archive_fmt = "g_{gallery[id]}_{id}"
- pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)"
+ pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/galleries/(\d+)"
example = "https://www.flickr.com/photos/USER/galleries/12345/"
def metadata(self):
@@ -177,7 +184,7 @@ class FlickrGroupExtractor(FlickrExtractor):
subcategory = "group"
directory_fmt = ("{category}", "Groups", "{group[groupname]}")
archive_fmt = "G_{group[nsid]}_{id}"
- pattern = BASE_PATTERN + r"/groups/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/groups/([^/?#]+)"
example = "https://www.flickr.com/groups/NAME/"
def metadata(self):
@@ -192,7 +199,7 @@ class FlickrUserExtractor(FlickrExtractor):
"""Extractor for the photostream of a flickr user"""
subcategory = "user"
archive_fmt = "u_{user[nsid]}_{id}"
- pattern = BASE_PATTERN + r"/photos/([^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/?$"
example = "https://www.flickr.com/photos/USER/"
def photos(self):
@@ -204,7 +211,7 @@ class FlickrFavoriteExtractor(FlickrExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{user[username]}", "Favorites")
archive_fmt = "f_{user[nsid]}_{id}"
- pattern = BASE_PATTERN + r"/photos/([^/?#]+)/favorites"
+ pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)/favorites"
example = "https://www.flickr.com/photos/USER/favorites"
def photos(self):
@@ -216,7 +223,7 @@ class FlickrSearchExtractor(FlickrExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search[text]}")
archive_fmt = "s_{search}_{id}"
- pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/search/?\?([^#]+)"
example = "https://flickr.com/search/?text=QUERY"
def metadata(self):
@@ -236,8 +243,8 @@ class FlickrAPI(oauth.OAuth1API):
"""
API_URL = "https://api.flickr.com/services/rest/"
- API_KEY = "90c368449018a0cb880ea4889cbb8681"
- API_SECRET = "e4b83e319c11e9e1"
+ # API_KEY = ""
+ API_SECRET = ""
FORMATS = [
("o" , "Original" , None),
("6k", "X-Large 6K" , 6144),
@@ -282,6 +289,14 @@ class FlickrAPI(oauth.OAuth1API):
"10": "Public Domain Mark",
}
+ @property
+ @memcache(maxage=3600)
+ def API_KEY(self):
+ extr = self.extractor
+ extr.log.info("Retrieving public API key")
+ page = extr.request(extr.root + "/prints").text
+ return text.extr(page, '.flickr.api.site_key = "', '"')
+
def __init__(self, extractor):
oauth.OAuth1API.__init__(self, extractor)
@@ -489,7 +504,7 @@ class FlickrAPI(oauth.OAuth1API):
def _extract_format(self, photo):
photo["description"] = photo["description"]["_content"].strip()
photo["views"] = text.parse_int(photo["views"])
- photo["date"] = text.parse_timestamp(photo["dateupload"])
+ photo["date"] = self.extractor.parse_timestamp(photo["dateupload"])
photo["tags"] = photo["tags"].split()
self._extract_metadata(photo)
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index dc23488..3c69489 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -31,7 +31,7 @@ class FoolfuukaExtractor(BaseExtractor):
self.fixup_redirect = False
def items(self):
- yield Message.Directory, self.metadata()
+ yield Message.Directory, "", self.metadata()
for post in self.posts():
media = post["media"]
if not media:
@@ -147,7 +147,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
subcategory = "thread"
directory_fmt = ("{category}", "{board[shortname]}",
"{thread_num} {title|comment[:50]}")
- pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/thread/(\d+)"
example = "https://archived.moe/a/thread/12345/"
def __init__(self, match):
@@ -174,7 +174,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
- pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/(?:page/)?(\d*))?$"
example = "https://archived.moe/a/"
def __init__(self, match):
@@ -210,7 +210,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
"""Base extractor for search results on FoolFuuka based boards/archives"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search}")
- pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
example = "https://archived.moe/_/search/text/QUERY/"
request_interval = (0.5, 1.5)
@@ -265,7 +265,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka galleries"""
subcategory = "gallery"
directory_fmt = ("{category}", "{board}", "gallery")
- pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/gallery(?:/(\d+))?"
example = "https://archived.moe/a/gallery"
def metadata(self):
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 7c59f72..d932174 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -47,7 +47,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
+ pattern = rf"{BASE_PATTERN}(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
example = "https://read.powermanga.org/read/MANGA/en/0/123/"
def items(self):
@@ -58,7 +58,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
data["count"] = len(imgs)
data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
- yield Message.Directory, data
+ yield Message.Directory, "", data
enum = util.enumerate_reversed if self.config(
"page-reverse") else enumerate
for data["page"], image in enum(imgs, 1):
@@ -91,7 +91,7 @@ class FoolslideMangaExtractor(FoolslideExtractor):
"""Base class for manga extractors for FoOlSlide based sites"""
subcategory = "manga"
categorytransfer = True
- pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/series/[^/?#]+)"
example = "https://read.powermanga.org/series/MANGA/"
def items(self):
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 0d24f83..ad57a6b 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -54,7 +54,7 @@ class FuraffinityExtractor(Extractor):
if post := self._parse_post(post_id):
if metadata:
post.update(metadata)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, post["url"], post
if self.external:
@@ -95,7 +95,7 @@ class FuraffinityExtractor(Extractor):
if self._new_layout:
data["tags"] = text.split_html(extr(
- 'class="tags-row">', '</section>'))
+ "<h3>Keywords</h3>", "</section>"))
data["scraps"] = (extr(' submissions">', "<") == "Scraps")
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
data["artist_url"] = extr('title="', '"').strip()
@@ -143,7 +143,7 @@ class FuraffinityExtractor(Extractor):
data["folders"] = () # folders not present in old layout
data["user"] = self.user or data["artist_url"]
- data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
+ data["date"] = self.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"])
data["thumbnail"] = (f"https://t.furaffinity.net/{post_id}@600-"
f"{path.rsplit('/', 2)[1]}.jpg")
@@ -231,7 +231,7 @@ class FuraffinityExtractor(Extractor):
class FuraffinityGalleryExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's gallery"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/gallery/([^/?#]+)(?:$|/(?!folder/))"
+ pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)(?:$|/(?!folder/))"
example = "https://www.furaffinity.net/gallery/USER/"
def posts(self):
@@ -243,7 +243,7 @@ class FuraffinityFolderExtractor(FuraffinityExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{user!l}",
"Folders", "{folder_id}{folder_name:? //}")
- pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?"
+ pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?"
example = "https://www.furaffinity.net/gallery/USER/folder/12345/FOLDER"
def metadata(self):
@@ -260,7 +260,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's scraps"""
subcategory = "scraps"
directory_fmt = ("{category}", "{user!l}", "Scraps")
- pattern = BASE_PATTERN + r"/scraps/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/scraps/([^/?#]+)"
example = "https://www.furaffinity.net/scraps/USER/"
def posts(self):
@@ -271,7 +271,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's favorites"""
subcategory = "favorite"
directory_fmt = ("{category}", "{user!l}", "Favorites")
- pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/favorites/([^/?#]+)"
example = "https://www.furaffinity.net/favorites/USER/"
def posts(self):
@@ -287,7 +287,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
"""Extractor for furaffinity search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = BASE_PATTERN + r"/search(?:/([^/?#]+))?/?[?&]([^#]+)"
+ pattern = rf"{BASE_PATTERN}/search(?:/([^/?#]+))?/?[?&]([^#]+)"
example = "https://www.furaffinity.net/search/?q=QUERY"
def __init__(self, match):
@@ -306,7 +306,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
class FuraffinityPostExtractor(FuraffinityExtractor):
"""Extractor for individual posts on furaffinity"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?:view|full)/(\d+)"
example = "https://www.furaffinity.net/view/12345/"
def posts(self):
@@ -317,7 +317,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor):
"""Extractor for furaffinity user profiles"""
- pattern = BASE_PATTERN + r"/user/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)"
example = "https://www.furaffinity.net/user/USER/"
def items(self):
@@ -333,7 +333,7 @@ class FuraffinityUserExtractor(Dispatch, FuraffinityExtractor):
class FuraffinityFollowingExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's watched users"""
subcategory = "following"
- pattern = BASE_PATTERN + "/watchlist/by/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/watchlist/by/([^/?#]+)"
example = "https://www.furaffinity.net/watchlist/by/USER/"
def items(self):
@@ -355,7 +355,7 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor):
class FuraffinitySubmissionsExtractor(FuraffinityExtractor):
"""Extractor for new furaffinity submissions"""
subcategory = "submissions"
- pattern = BASE_PATTERN + r"(/msg/submissions(?:/[^/?#]+)?)"
+ pattern = rf"{BASE_PATTERN}(/msg/submissions(?:/[^/?#]+)?)"
example = "https://www.furaffinity.net/msg/submissions"
def posts(self):
diff --git a/gallery_dl/extractor/furry34.py b/gallery_dl/extractor/furry34.py
index a93ec75..95b98db 100644
--- a/gallery_dl/extractor/furry34.py
+++ b/gallery_dl/extractor/furry34.py
@@ -55,8 +55,7 @@ class Furry34Extractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
- post["date"] = text.parse_datetime(
- post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["created"])
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]
@@ -98,7 +97,7 @@ class Furry34Extractor(BooruExtractor):
class Furry34PostExtractor(Furry34Extractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post/(\d+)"
example = "https://furry34.com/post/12345"
def posts(self):
@@ -109,7 +108,7 @@ class Furry34PlaylistExtractor(Furry34Extractor):
subcategory = "playlist"
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
- pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
+ pattern = rf"{BASE_PATTERN}/playlists/view/(\d+)"
example = "https://furry34.com/playlists/view/12345"
def metadata(self):
@@ -124,7 +123,7 @@ class Furry34TagExtractor(Furry34Extractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/(?:([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)"
+ pattern = rf"{BASE_PATTERN}/(?:([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)"
example = "https://furry34.com/TAG"
def _init(self):
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index f32059e..0571fcd 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -148,7 +148,7 @@ class GelbooruBase():
class GelbooruTagExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02TagExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
- pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]*)"
+ pattern = rf"{BASE_PATTERN}page=post&s=list&tags=([^&#]*)"
example = "https://gelbooru.com/index.php?page=post&s=list&tags=TAG"
@@ -156,7 +156,7 @@ class GelbooruPoolExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PoolExtractor):
"""Extractor for gelbooru pools"""
per_page = 45
- pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}page=pool&s=show&id=(\d+)"
example = "https://gelbooru.com/index.php?page=pool&s=show&id=12345"
skip = GelbooruBase._skip_offset
@@ -187,7 +187,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02FavoriteExtractor):
"""Extractor for gelbooru favorites"""
per_page = 100
- pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}page=favorites&s=view&id=(\d+)"
example = "https://gelbooru.com/index.php?page=favorites&s=view&id=12345"
skip = GelbooruBase._skip_offset
@@ -246,7 +246,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs:
for post in self._api_request({"id": fav["favorite"]}):
- post["date_favorited"] = text.parse_timestamp(fav["added"])
+ post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post
params["pid"] += 1
@@ -273,7 +273,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
for fav in favs:
for post in self._api_request({"id": fav["favorite"]}):
- post["date_favorited"] = text.parse_timestamp(fav["added"])
+ post["date_favorited"] = self.parse_timestamp(fav["added"])
yield post
params["pid"] -= 1
@@ -284,10 +284,10 @@ class GelbooruFavoriteExtractor(GelbooruBase,
class GelbooruPostExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PostExtractor):
"""Extractor for single images from gelbooru.com"""
- pattern = (BASE_PATTERN +
- r"(?=(?:[^#]+&)?page=post(?:&|#|$))"
- r"(?=(?:[^#]+&)?s=view(?:&|#|$))"
- r"(?:[^#]+&)?id=(\d+)")
+ pattern = (rf"{BASE_PATTERN}"
+ rf"(?=(?:[^#]+&)?page=post(?:&|#|$))"
+ rf"(?=(?:[^#]+&)?s=view(?:&|#|$))"
+ rf"(?:[^#]+&)?id=(\d+)")
example = "https://gelbooru.com/index.php?page=post&s=view&id=12345"
diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py
index 61d0545..7b9c732 100644
--- a/gallery_dl/extractor/gelbooru_v01.py
+++ b/gallery_dl/extractor/gelbooru_v01.py
@@ -35,8 +35,7 @@ class GelbooruV01Extractor(booru.BooruExtractor):
}
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%d %H:%M:%S")
+ post["date"] = self.parse_datetime_iso(post["created_at"])
return post
@@ -88,7 +87,7 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=list&tags=([^&#]+)"
example = "https://allgirl.booru.org/index.php?page=post&s=list&tags=TAG"
def metadata(self):
@@ -105,7 +104,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
directory_fmt = ("{category}", "favorites", "{favorite_id}")
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
- pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=favorites&s=view&id=(\d+)"
example = "https://allgirl.booru.org/index.php?page=favorites&s=view&id=1"
def metadata(self):
@@ -121,7 +120,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
class GelbooruV01PostExtractor(GelbooruV01Extractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=view&id=(\d+)"
example = "https://allgirl.booru.org/index.php?page=post&s=view&id=12345"
def posts(self):
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 33db4e4..122f5a9 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -96,7 +96,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start * self.per_page
data = {}
- find_ids = util.re(r"\sid=\"p(\d+)").findall
+ find_ids = text.re(r"\sid=\"p(\d+)").findall
while True:
page = self.request(url, params=params).text
@@ -122,7 +122,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _prepare(self, post):
post["tags"] = post["tags"].strip()
- post["date"] = text.parse_datetime(
+ post["date"] = self.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
def _html(self, post):
@@ -136,7 +136,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
return
tags = collections.defaultdict(list)
- pattern = util.re(r"(?s)tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)")
+ pattern = text.re(r"(?s)tag-type-([^\"' ]+).*?[?;]tags=([^\"'&]+)")
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unescape(text.unquote(tag_name)))
for key, value in tags.items():
@@ -190,7 +190,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=list&tags=([^&#]*)"
example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
def posts(self):
@@ -206,7 +206,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=pool&s=show&id=(\d+)"
example = "https://safebooru.org/index.php?page=pool&s=show&id=12345"
def __init__(self, match):
@@ -257,7 +257,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
directory_fmt = ("{category}", "favorites", "{favorite_id}")
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
- pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=favorites&s=view&id=(\d+)"
example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
def metadata(self):
@@ -275,7 +275,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
class GelbooruV02PostExtractor(GelbooruV02Extractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=view&id=(\d+)"
example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
def posts(self):
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 407e478..99e6ea7 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -7,7 +7,7 @@
"""Generic information extractor"""
from .common import Extractor, Message
-from .. import config, text, util
+from .. import config, text
import os.path
@@ -75,7 +75,7 @@ class GenericExtractor(Extractor):
pass
images = enumerate(imgs, 1)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], (url, imgdata) in images:
if imgdata:
@@ -171,8 +171,8 @@ class GenericExtractor(Extractor):
r"(?:[^\"'<>\s]*)?" # optional query and fragment
)
- imageurls_src = util.re(imageurl_pattern_src).findall(page)
- imageurls_ext = util.re(imageurl_pattern_ext).findall(page)
+ imageurls_src = text.re(imageurl_pattern_src).findall(page)
+ imageurls_ext = text.re(imageurl_pattern_ext).findall(page)
imageurls = imageurls_src + imageurls_ext
# Resolve relative urls
@@ -181,7 +181,7 @@ class GenericExtractor(Extractor):
# by prepending a suitable base url.
#
# If the page contains a <base> element, use it as base url
- basematch = util.re(
+ basematch = text.re(
r"(?i)(?:<base\s.*?href=[\"']?)(?P<url>[^\"' >]+)").search(page)
if basematch:
self.baseurl = basematch['url'].rstrip('/')
diff --git a/gallery_dl/extractor/girlsreleased.py b/gallery_dl/extractor/girlsreleased.py
index 5e68a63..0fbdeff 100644
--- a/gallery_dl/extractor/girlsreleased.py
+++ b/gallery_dl/extractor/girlsreleased.py
@@ -41,7 +41,7 @@ class GirlsreleasedExtractor(Extractor):
class GirlsreleasedSetExtractor(GirlsreleasedExtractor):
"""Extractor for girlsreleased galleries"""
subcategory = "set"
- pattern = BASE_PATTERN + r"/set/(\d+)"
+ pattern = rf"{BASE_PATTERN}/set/(\d+)"
example = "https://girlsreleased.com/set/12345"
def items(self):
@@ -52,11 +52,11 @@ class GirlsreleasedSetExtractor(GirlsreleasedExtractor):
"id": json["id"],
"site": json["site"],
"model": [model for _, model in json["models"]],
- "date": text.parse_timestamp(json["date"]),
+ "date": self.parse_timestamp(json["date"]),
"count": len(json["images"]),
"url": "https://girlsreleased.com/set/" + json["id"],
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], image in enumerate(json["images"], 1):
text.nameext_from_url(image[5], data)
yield Message.Queue, image[3], data
@@ -65,12 +65,12 @@ class GirlsreleasedSetExtractor(GirlsreleasedExtractor):
class GirlsreleasedModelExtractor(GirlsreleasedExtractor):
"""Extractor for girlsreleased models"""
subcategory = _path = "model"
- pattern = BASE_PATTERN + r"/model/(\d+(?:/.+)?)"
+ pattern = rf"{BASE_PATTERN}/model/(\d+(?:/.+)?)"
example = "https://girlsreleased.com/model/12345/MODEL"
class GirlsreleasedSiteExtractor(GirlsreleasedExtractor):
"""Extractor for girlsreleased sites"""
subcategory = _path = "site"
- pattern = BASE_PATTERN + r"/site/([^/?#]+(?:/model/\d+/?.*)?)"
+ pattern = rf"{BASE_PATTERN}/site/([^/?#]+(?:/model/\d+/?.*)?)"
example = "https://girlsreleased.com/site/SITE"
diff --git a/gallery_dl/extractor/girlswithmuscle.py b/gallery_dl/extractor/girlswithmuscle.py
index 51b979f..e61e472 100644
--- a/gallery_dl/extractor/girlswithmuscle.py
+++ b/gallery_dl/extractor/girlswithmuscle.py
@@ -5,7 +5,7 @@
# published by the Free Software Foundation.
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, exception
from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
@@ -60,7 +60,7 @@ class GirlswithmuscleExtractor(Extractor):
class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
"""Extractor for individual posts on girlswithmuscle.com"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(\d+)"
example = "https://www.girlswithmuscle.com/12345/"
def items(self):
@@ -80,7 +80,7 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
metadata["type"] = "video"
text.nameext_from_url(url, metadata)
- yield Message.Directory, metadata
+ yield Message.Directory, "", metadata
yield Message.Url, url, metadata
def metadata(self, page):
@@ -101,9 +101,8 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
"model": model,
"model_list": self._parse_model_list(model),
"tags": text.split_html(tags)[1::2],
- "date": text.parse_datetime(
- text.extr(page, 'class="hover-time" title="', '"')[:19],
- "%Y-%m-%d %H:%M:%S"),
+ "date": self.parse_datetime_iso(text.extr(
+ page, 'class="hover-time" title="', '"')[:19]),
"is_favorite": self._parse_is_favorite(page),
"source_filename": source,
"uploader": uploader,
@@ -144,7 +143,7 @@ class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor):
"""Extractor for search results on girlswithmuscle.com"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/images/(.*)"
+ pattern = rf"{BASE_PATTERN}/images/(.*)"
example = "https://www.girlswithmuscle.com/images/?name=MODEL"
def pages(self):
@@ -156,7 +155,7 @@ class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor):
raise exception.AuthorizationError(msg)
page = response.text
- match = util.re(r"Page (\d+) of (\d+)").search(page)
+ match = text.re(r"Page (\d+) of (\d+)").search(page)
current, total = match.groups()
current, total = text.parse_int(current), text.parse_int(total)
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index 0a6c9b9..7c9755a 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -39,7 +39,7 @@ class GofileFolderExtractor(Extractor):
self._get_website_token())
folder = self._get_content(self.content_id, password)
- yield Message.Directory, folder
+ yield Message.Directory, "", folder
try:
contents = folder.pop("children")
@@ -75,14 +75,16 @@ class GofileFolderExtractor(Extractor):
@cache(maxage=86400)
def _get_website_token(self):
self.log.debug("Fetching website token")
- page = self.request(self.root + "/dist/js/global.js").text
+ page = self.request(self.root + "/dist/js/config.js").text
return text.extr(page, '.wt = "', '"')
def _get_content(self, content_id, password=None):
- headers = {"Authorization": "Bearer " + self.api_token}
- params = {"wt": self.website_token}
- if password is not None:
- params["password"] = hashlib.sha256(password.encode()).hexdigest()
+ headers = {
+ "Authorization" : "Bearer " + self.api_token,
+ "X-Website-Token": self.website_token,
+ }
+ params = None if password is None else {"password": hashlib.sha256(
+ password.encode()).hexdigest()}
return self._api_request("contents/" + content_id, params, headers)
def _api_request(self, endpoint, params=None, headers=None, method="GET"):
diff --git a/gallery_dl/extractor/hatenablog.py b/gallery_dl/extractor/hatenablog.py
index 8e350d6..7065d7b 100644
--- a/gallery_dl/extractor/hatenablog.py
+++ b/gallery_dl/extractor/hatenablog.py
@@ -7,7 +7,7 @@
"""Extractors for https://hatenablog.com"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
BASE_PATTERN = (
@@ -30,11 +30,11 @@ class HatenablogExtractor(Extractor):
self.domain = match[1] or match[2]
def _init(self):
- self._find_img = util.re(r'<img +([^>]+)').finditer
+ self._find_img = text.re(r'<img +([^>]+)').finditer
def _handle_article(self, article: str):
extr = text.extract_from(article)
- date = text.parse_datetime(extr('<time datetime="', '"'))
+ date = self.parse_datetime_iso(extr('<time datetime="', '"'))
entry_link = text.unescape(extr('<a href="', '"'))
entry = entry_link.partition("/entry/")[2]
title = text.unescape(extr('>', '<'))
@@ -56,7 +56,7 @@ class HatenablogExtractor(Extractor):
"title": title,
"count": len(images),
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(images, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
@@ -73,7 +73,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
def _init(self):
HatenablogExtractor._init(self)
- self._find_pager_url = util.re(
+ self._find_pager_url = text.re(
r' class="pager-next">\s*<a href="([^"]+)').search
def items(self):
@@ -123,7 +123,7 @@ class HatenablogEntriesExtractor(HatenablogExtractor):
class HatenablogEntryExtractor(HatenablogExtractor):
"""Extractor for a single entry URL"""
subcategory = "entry"
- pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE
+ pattern = rf"{BASE_PATTERN}/entry/([^?#]+){QUERY_RE}"
example = "https://BLOG.hatenablog.com/entry/PATH"
def __init__(self, match):
@@ -146,21 +146,21 @@ class HatenablogEntryExtractor(HatenablogExtractor):
class HatenablogHomeExtractor(HatenablogEntriesExtractor):
"""Extractor for a blog's home page"""
subcategory = "home"
- pattern = BASE_PATTERN + r"(/?)" + QUERY_RE
+ pattern = rf"{BASE_PATTERN}(/?){QUERY_RE}"
example = "https://BLOG.hatenablog.com"
class HatenablogArchiveExtractor(HatenablogEntriesExtractor):
"""Extractor for a blog's archive page"""
subcategory = "archive"
- pattern = (BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?"
- r"|/category/[^?#]+)?)" + QUERY_RE)
+ pattern = (rf"{BASE_PATTERN}(/archive(?:/\d+(?:/\d+(?:/\d+)?)?"
+ rf"|/category/[^?#]+)?){QUERY_RE}")
example = "https://BLOG.hatenablog.com/archive/2024"
class HatenablogSearchExtractor(HatenablogEntriesExtractor):
"""Extractor for a blog's search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"(/search)" + QUERY_RE
+ pattern = rf"{BASE_PATTERN}(/search){QUERY_RE}"
example = "https://BLOG.hatenablog.com/search?q=QUERY"
allowed_parameters = ("q",)
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index ac4cd02..0e4c040 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -30,7 +30,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
chapter, sep, minor = self.groups[1].partition(".")
- match = util.re(
+ match = text.re(
r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - "
r"([^:]+): (.+) . Page 1 ").match(title)
if match:
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index 5c2628f..b5f3d0e 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -38,7 +38,7 @@ class HentaicosplaysGalleryExtractor(
directory_fmt = ("{site}", "{title}")
filename_fmt = "{filename}.{extension}"
archive_fmt = "{title}_{filename}"
- pattern = BASE_PATTERN + r"/(?:image|story)/([\w-]+)"
+ pattern = rf"{BASE_PATTERN}/(?:image|story)/([\w-]+)"
example = "https://hentai-cosplay-xxx.com/image/TITLE/"
def __init__(self, match):
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index a08f7bb..882183b 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -43,7 +43,7 @@ class HentaifoundryExtractor(Extractor):
for post_url in util.advance(self.posts(), self.start_post):
image = self._parse_post(post_url)
image.update(data)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, image["src"], image
def skip(self, num):
@@ -86,7 +86,8 @@ class HentaifoundryExtractor(Extractor):
.replace("\r\n", "\n")),
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")],
- "date" : text.parse_datetime(extr("datetime='", "'")),
+ "categories" : self._extract_categories(extr),
+ "date" : self.parse_datetime_iso(extr("datetime='", "'")),
"views" : text.parse_int(extr(">Views</span>", "<")),
"score" : text.parse_int(extr(">Vote Score</span>", "<")),
"media" : text.unescape(extr(">Media</span>", "<").strip()),
@@ -126,7 +127,7 @@ class HentaifoundryExtractor(Extractor):
"title" : text.unescape(extr(
"<div class='titlebar'>", "</a>").rpartition(">")[2]),
"author" : text.unescape(extr('alt="', '"')),
- "date" : text.parse_datetime(extr(
+ "date" : self.parse_datetime(extr(
">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
"status" : extr("class='indent'>", "<"),
}
@@ -141,11 +142,17 @@ class HentaifoundryExtractor(Extractor):
path = extr('class="pdfLink" href="', '"')
data["src"] = self.root + path
data["index"] = text.parse_int(path.rsplit("/", 2)[1])
+ data["categories"] = self._extract_categories(extr)
data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")]
return text.nameext_from_url(data["src"], data)
+ def _extract_categories(self, extr):
+ return [text.unescape(text.extr(c, ">", "<"))
+ for c in extr('class="categoryBreadcrumbs">', "</span>")
+ .split("&raquo;")]
+
def _request_check(self, url, **kwargs):
self.request = self._request_original
@@ -207,7 +214,7 @@ class HentaifoundryExtractor(Extractor):
class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor):
"""Extractor for a hentaifoundry user profile"""
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/profile"
example = "https://www.hentai-foundry.com/user/USER/profile"
def items(self):
@@ -228,7 +235,7 @@ class HentaifoundryUserExtractor(Dispatch, HentaifoundryExtractor):
class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
"""Extractor for all pictures of a hentaifoundry user"""
subcategory = "pictures"
- pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
+ pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
example = "https://www.hentai-foundry.com/pictures/user/USER"
def __init__(self, match):
@@ -240,7 +247,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
"""Extractor for scraps of a hentaifoundry user"""
subcategory = "scraps"
directory_fmt = ("{category}", "{user}", "Scraps")
- pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps"
+ pattern = rf"{BASE_PATTERN}/pictures/user/([^/?#]+)/scraps"
example = "https://www.hentai-foundry.com/pictures/user/USER/scraps"
def __init__(self, match):
@@ -253,7 +260,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites")
archive_fmt = "f_{user}_{index}"
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/faves/pictures"
example = "https://www.hentai-foundry.com/user/USER/faves/pictures"
def __init__(self, match):
@@ -266,7 +273,7 @@ class HentaifoundryTagExtractor(HentaifoundryExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{index}"
- pattern = BASE_PATTERN + r"/pictures/tagged/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/pictures/tagged/([^/?#]+)"
example = "https://www.hentai-foundry.com/pictures/tagged/TAG"
def __init__(self, match):
@@ -282,7 +289,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
subcategory = "recent"
directory_fmt = ("{category}", "Recent Pictures", "{date}")
archive_fmt = "r_{index}"
- pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
+ pattern = rf"{BASE_PATTERN}/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
example = "https://www.hentai-foundry.com/pictures/recent/1970-01-01"
def __init__(self, match):
@@ -298,7 +305,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "Popular Pictures")
archive_fmt = "p_{index}"
- pattern = BASE_PATTERN + r"/pictures/popular()"
+ pattern = rf"{BASE_PATTERN}/pictures/popular()"
example = "https://www.hentai-foundry.com/pictures/popular"
def __init__(self, match):
@@ -324,7 +331,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
f"/{self.index}/?enterAgree=1")
image = self._parse_post(post_url)
image["user"] = self.user
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, image["src"], image
@@ -332,14 +339,14 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
"""Extractor for stories of a hentaifoundry user"""
subcategory = "stories"
archive_fmt = "s_{index}"
- pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
+ pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
example = "https://www.hentai-foundry.com/stories/user/USER"
def items(self):
self._init_site_filters()
for story_html in util.advance(self.stories(), self.start_post):
story = self._parse_story(story_html)
- yield Message.Directory, story
+ yield Message.Directory, "", story
yield Message.Url, story["src"], story
def stories(self):
@@ -351,7 +358,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
"""Extractor for a hentaifoundry story"""
subcategory = "story"
archive_fmt = "s_{index}"
- pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/stories/user/([^/?#]+)/(\d+)"
example = "https://www.hentai-foundry.com/stories/user/USER/12345/TITLE"
skip = Extractor.skip
@@ -364,5 +371,5 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
story_url = (f"{self.root}/stories/user/{self.user}"
f"/{self.index}/x?enterAgree=1")
story = self._parse_story(self.request(story_url).text)
- yield Message.Directory, story
+ yield Message.Directory, "", story
yield Message.Url, story["src"], story
diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py
index f4f9d86..2ca462e 100644
--- a/gallery_dl/extractor/hentaihand.py
+++ b/gallery_dl/extractor/hentaihand.py
@@ -35,8 +35,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
"language" : info["language"]["name"],
"lang" : util.language_to_code(info["language"]["name"]),
"tags" : [t["slug"] for t in info["tags"]],
- "date" : text.parse_datetime(
- info["uploaded_at"], "%Y-%m-%d"),
+ "date" : self.parse_datetime_iso(info["uploaded_at"]),
}
for key in ("artists", "authors", "groups", "characters",
"relationships", "parodies"):
diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py
index b894d77..ef72366 100644
--- a/gallery_dl/extractor/hentaihere.py
+++ b/gallery_dl/extractor/hentaihere.py
@@ -33,7 +33,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
title = text.extr(page, "<title>", "</title>")
chapter_id = text.extr(page, 'report/C', '"')
chapter, sep, minor = self.chapter.partition(".")
- match = util.re(
+ match = text.re(
r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by "
r"(.+) at ").match(title)
return {
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index a75eee0..0eaf798 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -9,7 +9,7 @@
"""Extractors for https://hiperdex.com/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
+from .. import text
from ..cache import memcache
BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
@@ -67,7 +67,7 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for hiperdex manga chapters"""
- pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
+ pattern = rf"{BASE_PATTERN}(/mangas?/([^/?#]+)/([^/?#]+))"
example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
def __init__(self, match):
@@ -79,7 +79,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
return self.chapter_data(self.chapter)
def images(self, page):
- pattern = util.re(r'id="image-\d+"\s+(?:data-)?src="([^"]+)')
+ pattern = text.re(r'id="image-\d+"\s+(?:data-)?src="([^"]+)')
return [
(url.strip(), None)
for url in pattern.findall(page)
@@ -89,7 +89,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"""Extractor for hiperdex manga"""
chapterclass = HiperdexChapterExtractor
- pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
+ pattern = rf"{BASE_PATTERN}(/mangas?/([^/?#]+))/?$"
example = "https://hiperdex.com/manga/MANGA/"
def __init__(self, match):
@@ -125,7 +125,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
categorytransfer = False
chapterclass = HiperdexMangaExtractor
reverse = False
- pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
+ pattern = rf"{BASE_PATTERN}(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
example = "https://hiperdex.com/manga-artist/NAME/"
def __init__(self, match):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 82bed80..b05a9a7 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -84,7 +84,7 @@ class HitomiGalleryExtractor(HitomiExtractor, GalleryExtractor):
"type" : info["type"].capitalize(),
"language" : language,
"lang" : util.language_to_code(language),
- "date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
+ "date" : self.parse_datetime_iso(date),
"tags" : tags,
"artist" : [o["artist"] for o in iget("artists") or ()],
"group" : [o["group"] for o in iget("groups") or ()],
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index 587d88c..953cf4e 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -30,7 +30,7 @@ class HotleakExtractor(Extractor):
.replace("_thumb.", ".")
)
post["_http_expected_status"] = (404,)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, post["url"], post
def posts(self):
@@ -61,7 +61,7 @@ def decode_video_url(url):
class HotleakPostExtractor(HotleakExtractor):
"""Extractor for individual posts on hotleak"""
subcategory = "post"
- pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
+ pattern = (rf"{BASE_PATTERN}/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/]+)/(photo|video)/(\d+)")
example = "https://hotleak.vip/MODEL/photo/12345"
@@ -96,7 +96,7 @@ class HotleakPostExtractor(HotleakExtractor):
class HotleakCreatorExtractor(HotleakExtractor):
"""Extractor for all posts from a hotleak creator"""
subcategory = "creator"
- pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
+ pattern = (rf"{BASE_PATTERN}/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/?#]+)/?$")
example = "https://hotleak.vip/MODEL"
@@ -150,7 +150,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
class HotleakCategoryExtractor(HotleakExtractor):
"""Extractor for hotleak categories"""
subcategory = "category"
- pattern = BASE_PATTERN + r"/(hot|creators|videos|photos)(?:/?\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/(hot|creators|videos|photos)(?:/?\?([^#]+))?"
example = "https://hotleak.vip/photos"
def __init__(self, match):
@@ -172,7 +172,7 @@ class HotleakCategoryExtractor(HotleakExtractor):
class HotleakSearchExtractor(HotleakExtractor):
"""Extractor for hotleak search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search(?:/?\?([^#]+))"
+ pattern = rf"{BASE_PATTERN}/search(?:/?\?([^#]+))"
example = "https://hotleak.vip/search?search=QUERY"
def __init__(self, match):
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index 26fd595..a8f1298 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -11,7 +11,8 @@
from . import sankaku
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
- r"idol(?:\.sankaku)?complex\.com(?:/[a-z]{2})?")
+ r"idol(?:\.sankaku)?complex\.com"
+ r"(?:/[a-z]{2}(?:[-_][A-Z]{2})?)?")
class IdolcomplexBase():
@@ -28,17 +29,17 @@ class IdolcomplexBase():
class IdolcomplexTagExtractor(IdolcomplexBase, sankaku.SankakuTagExtractor):
"""Extractor for idolcomplex tag searches"""
- pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)"
+ pattern = rf"{BASE_PATTERN}(?:/posts)?/?\?([^#]*)"
example = "https://www.idolcomplex.com/en/posts?tags=TAGS"
class IdolcomplexPoolExtractor(IdolcomplexBase, sankaku.SankakuPoolExtractor):
"""Extractor for idolcomplex pools"""
- pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
+ pattern = rf"{BASE_PATTERN}/pools?/(?:show/)?(\w+)"
example = "https://www.idolcomplex.com/en/pools/0123456789abcdef"
class IdolcomplexPostExtractor(IdolcomplexBase, sankaku.SankakuPostExtractor):
"""Extractor for individual idolcomplex posts"""
- pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
+ pattern = rf"{BASE_PATTERN}/posts?(?:/show)?/(\w+)"
example = "https://www.idolcomplex.com/en/posts/0123456789abcdef"
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index abba9df..66fbdd6 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.imagebam.com/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
class ImagebamExtractor(Extractor):
@@ -30,12 +30,10 @@ class ImagebamExtractor(Extractor):
url, pos = text.extract(page, '<img src="https://images', '"')
filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
- data = {
+ return text.nameext_from_name(filename, {
"url" : "https://images" + url,
"image_key": path.rpartition("/")[2],
- }
- data["filename"], _, data["extension"] = filename.rpartition(".")
- return data
+ })
class ImagebamGalleryExtractor(ImagebamExtractor):
@@ -58,7 +56,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
data["count"] = len(images)
data["gallery_key"] = self.path.rpartition("/")[2]
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], path in enumerate(images, 1):
image = self._parse_image_page(path)
image.update(data)
@@ -69,7 +67,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
page, 'id="gallery-name">', '<').strip())}
def images(self, page):
- findall = util.re(r'<a href="https://www\.imagebam\.com'
+ findall = text.re(r'<a href="https://www\.imagebam\.com'
r'(/(?:image/|view/M)[a-zA-Z0-9]+)').findall
paths = []
while True:
@@ -96,5 +94,5 @@ class ImagebamImageExtractor(ImagebamExtractor):
path = ("/view/" if path[10] == "M" else "/image/") + path[10:]
image = self._parse_image_page(path)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, image["url"], image
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 464e489..074b4ae 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -19,7 +19,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from imgchest.com"""
category = "imagechest"
root = "https://imgchest.com"
- pattern = BASE_PATTERN + r"/p/([A-Za-z0-9]{11})"
+ pattern = rf"{BASE_PATTERN}/p/([A-Za-z0-9]{{11}})"
example = "https://imgchest.com/p/abcdefghijk"
def __init__(self, match):
@@ -53,11 +53,9 @@ class ImagechestGalleryExtractor(GalleryExtractor):
def _metadata_api(self, page):
post = self.api.post(self.gallery_id)
- post["date"] = text.parse_datetime(
- post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["created"])
for img in post["images"]:
- img["date"] = text.parse_datetime(
- img["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ img["date"] = self.parse_datetime_iso(img["created"])
post["gallery_id"] = self.gallery_id
post.pop("image_count", None)
@@ -80,7 +78,7 @@ class ImagechestUserExtractor(Extractor):
category = "imagechest"
subcategory = "user"
root = "https://imgchest.com"
- pattern = BASE_PATTERN + r"/u/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/u/([^/?#]+)"
example = "https://imgchest.com/u/USER"
def items(self):
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 993af7c..f727969 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -39,7 +39,7 @@ class ImagefapExtractor(Extractor):
class ImagefapGalleryExtractor(ImagefapExtractor):
"""Extractor for image galleries from imagefap.com"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"
example = "https://www.imagefap.com/gallery/12345"
def __init__(self, match):
@@ -51,7 +51,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
url = f"{self.root}/gallery/{self.gid}"
page = self.request(url).text
data = self.get_job_metadata(page)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for url, image in self.get_images():
data.update(image)
yield Message.Url, url, data
@@ -110,7 +110,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
class ImagefapImageExtractor(ImagefapExtractor):
"""Extractor for single images from imagefap.com"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/photo/(\d+)"
+ pattern = rf"{BASE_PATTERN}/photo/(\d+)"
example = "https://www.imagefap.com/photo/12345"
def __init__(self, match):
@@ -119,7 +119,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
def items(self):
url, data = self.get_image()
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
def get_image(self):
@@ -148,9 +148,9 @@ class ImagefapImageExtractor(ImagefapExtractor):
class ImagefapFolderExtractor(ImagefapExtractor):
"""Extractor for imagefap user folders"""
subcategory = "folder"
- pattern = (BASE_PATTERN + r"/(?:organizer/|"
- r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
- r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
+ pattern = (rf"{BASE_PATTERN}/(?:organizer/|"
+ rf"(?:usergallery\.php\?user(id)?=([^&#]+)&"
+ rf"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
example = "https://www.imagefap.com/organizer/12345"
def __init__(self, match):
@@ -206,9 +206,9 @@ class ImagefapFolderExtractor(ImagefapExtractor):
class ImagefapUserExtractor(ImagefapExtractor):
"""Extractor for an imagefap user profile"""
subcategory = "user"
- pattern = (BASE_PATTERN +
- r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
- r"|usergallery\.php\?userid=(\d+))(?:$|#)")
+ pattern = (rf"{BASE_PATTERN}/(?:"
+ rf"profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?|"
+ rf"usergallery\.php\?userid=(\d+))(?:$|#)")
example = "https://www.imagefap.com/profile/USER"
def __init__(self, match):
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 817d2c4..21e6cf8 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -19,6 +19,7 @@ class ImagehostImageExtractor(Extractor):
basecategory = "imagehost"
subcategory = "image"
archive_fmt = "{token}"
+ parent = True
_https = True
_params = None
_cookies = None
@@ -27,7 +28,10 @@ class ImagehostImageExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.page_url = f"http{'s' if self._https else ''}://{match[1]}"
+ if self.root:
+ self.page_url = f"{self.root}{match[1]}"
+ else:
+ self.page_url = f"http{'s' if self._https else ''}://{match[1]}"
self.token = match[2]
if self._params == "simple":
@@ -53,14 +57,25 @@ class ImagehostImageExtractor(Extractor):
).text
url, filename = self.get_info(page)
- data = text.nameext_from_url(filename, {"token": self.token})
+ if not url:
+ return
+
+ if filename:
+ data = text.nameext_from_name(filename)
+ if not data["extension"]:
+ data["extension"] = text.ext_from_url(url)
+ else:
+ data = text.nameext_from_url(url)
+ data["token"] = self.token
+ data["post_url"] = self.page_url
data.update(self.metadata(page))
+
if self._https and url.startswith("http:"):
url = "https:" + url[5:]
if self._validate is not None:
data["_http_validate"] = self._validate
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
def get_info(self, page):
@@ -70,6 +85,9 @@ class ImagehostImageExtractor(Extractor):
"""Return additional metadata"""
return ()
+ def not_found(self, resource=None):
+ raise exception.NotFoundError(resource or self.__class__.subcategory)
+
class ImxtoImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imx.to"""
@@ -92,7 +110,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
url, pos = text.extract(
page, '<div style="text-align:center;"><a href="', '"')
if not url:
- raise exception.NotFoundError("image")
+ self.not_found()
filename, pos = text.extract(page, ' title="', '"', pos)
if self.url_ext and filename:
filename += splitext(url)[1]
@@ -152,7 +170,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
if not url:
url, pos = text.extract(page, '<img class="centred" src="', '"')
if not url:
- raise exception.NotFoundError("image")
+ self.not_found()
filename, pos = text.extract(page, "alt='", "'", pos)
if not filename:
@@ -169,7 +187,11 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
example = "https://www.imagevenue.com/ME123456789"
def get_info(self, page):
- pos = page.index('class="card-body')
+ try:
+ pos = page.index('class="card-body')
+ except ValueError:
+ self.not_found()
+
url, pos = text.extract(page, '<img src="', '"', pos)
if url.endswith("/loader.svg"):
url, pos = text.extract(page, '<img src="', '"', pos)
@@ -199,6 +221,8 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
url , pos = text.extract(page, '<img src="', '"')
+ if url and url.startswith("/imgs/"):
+ self.not_found()
filename, pos = text.extract(page, ' alt="', '"', pos)
return url, filename
@@ -249,7 +273,7 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
pos = page.find('id="imgpreview"')
if pos < 0:
- raise exception.NotFoundError("image")
+ self.not_found()
url , pos = text.extract(page, 'src="', '"', pos)
name, pos = text.extract(page, 'alt="', '"', pos)
return url, text.unescape(name)
@@ -258,23 +282,26 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
class PixhostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from pixhost.to"""
category = "pixhost"
- pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
- r"/show/\d+/(\d+)_[^/?#]+)")
+ root = "https://pixhost.to"
+ pattern = (r"(?:https?://)?(?:www\.)?pixhost\.(?:to|org)"
+ r"(/show/\d+/(\d+)_[^/?#]+)")
example = "https://pixhost.to/show/123/12345_NAME.EXT"
_cookies = {"pixhostads": "1", "pixhosttest": "1"}
def get_info(self, page):
- url , pos = text.extract(page, "class=\"image-img\" src=\"", "\"")
- filename, pos = text.extract(page, "alt=\"", "\"", pos)
- return url, filename
+ self.kwdict["directory"] = self.page_url.rsplit("/")[-2]
+ url , pos = text.extract(page, "class=\"image-img\" src=\"", "\"")
+ name, pos = text.extract(page, "alt=\"", "\"", pos)
+ return url, text.unescape(name) if name else None
class PixhostGalleryExtractor(ImagehostImageExtractor):
"""Extractor for image galleries from pixhost.to"""
category = "pixhost"
subcategory = "gallery"
- pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
- r"/gallery/([^/?#]+))")
+ root = "https://pixhost.to"
+ pattern = (r"(?:https?://)?(?:www\.)?pixhost\.(?:to|org)"
+ r"(/gallery/([^/?#]+))")
example = "https://pixhost.to/gallery/ID"
def items(self):
@@ -288,29 +315,39 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
class PostimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from postimages.org"""
category = "postimg"
- pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
- r"\.(?:cc|org)/(?!gallery/)(?:image/)?([^/?#]+)/?)")
- example = "https://postimages.org/ID"
+ root = "https://postimg.cc"
+ pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)"
+ r"\.(?:cc|org)(/(?!gallery/)(?:image/)?([^/?#]+)/?)")
+ example = "https://postimg.cc/ID"
def get_info(self, page):
pos = page.index(' id="download"')
url , pos = text.rextract(page, ' href="', '"', pos)
- filename, pos = text.extract(page, 'class="imagename">', '<', pos)
- return url, text.unescape(filename)
+ filename, pos = text.extract(page, ' class="my-4">', '<', pos)
+ return url, text.unescape(filename) if filename else None
class PostimgGalleryExtractor(ImagehostImageExtractor):
"""Extractor for images galleries from postimages.org"""
category = "postimg"
subcategory = "gallery"
- pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
- r"\.(?:cc|org)/gallery/([^/?#]+))")
- example = "https://postimages.org/gallery/ID"
+ root = "https://postimg.cc"
+ pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)"
+ r"\.(?:cc|org)(/gallery/([^/?#]+))")
+ example = "https://postimg.cc/gallery/ID"
def items(self):
page = self.request(self.page_url).text
- data = {"_extractor": PostimgImageExtractor}
- for url in text.extract_iter(page, ' class="thumb"><a href="', '"'):
+ title = text.extr(
+ page, 'property="og:title" content="', ' — Postimages"')
+
+ data = {
+ "_extractor" : PostimgImageExtractor,
+ "gallery_title": text.unescape(title),
+ }
+
+ for token in text.extract_iter(page, 'data-image="', '"'):
+ url = f"{self.root}/{token}"
yield Message.Queue, url, data
@@ -323,7 +360,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
url = text.extract(page, 'src="', '"', page.index("<img "))[0]
- return url, url
+ return url, None
class TurboimagehostGalleryExtractor(ImagehostImageExtractor):
@@ -343,7 +380,7 @@ class TurboimagehostGalleryExtractor(ImagehostImageExtractor):
if params["p"] == 1 and \
"Requested gallery don`t exist on our website." in page:
- raise exception.NotFoundError("gallery")
+ self.not_found()
thumb_url = None
for thumb_url in text.extract_iter(page, '"><a href="', '"'):
@@ -362,7 +399,7 @@ class ViprImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
url = text.extr(page, '<img src="', '"')
- return url, url
+ return url, None
class ImgclickImageExtractor(ImagehostImageExtractor):
@@ -439,14 +476,16 @@ class ImgdriveImageExtractor(ImagehostImageExtractor):
class SilverpicImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from silverpic.com"""
category = "silverpic"
- pattern = (r"(?:https?://)?((?:www\.)?silverpic\.com"
- r"/([a-z0-9]{10,})/[\S]+\.html)")
- example = "https://silverpic.com/a1b2c3d4f5g6/NAME.EXT.html"
+ root = "https://silverpic.net"
+ _params = "complex"
+ pattern = (r"(?:https?://)?(?:www\.)?silverpic\.(?:net|com)"
+ r"(/([a-z0-9]{10,})/[\S]+\.html)")
+ example = "https://silverpic.net/a1b2c3d4f5g6/NAME.EXT.html"
def get_info(self, page):
url, pos = text.extract(page, '<img src="/img/', '"')
alt, pos = text.extract(page, 'alt="', '"', pos)
- return f"https://silverpic.com/img/{url}", alt
+ return f"{self.root}/img/{url}", alt
def metadata(self, page):
pos = page.find('<img src="/img/')
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index d9a63c7..d957328 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -30,7 +30,7 @@ class ImgbbExtractor(Extractor):
for image in self.posts():
url = image["url"]
text.nameext_from_url(url, image)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, url, image
def login(self):
@@ -159,8 +159,7 @@ class ImgbbImageExtractor(ImgbbExtractor):
"width" : text.parse_int(extr('"og:image:width" content="', '"')),
"height": text.parse_int(extr('"og:image:height" content="', '"')),
"album" : extr("Added to <a", "</a>"),
- "date" : text.parse_datetime(extr(
- '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ "date" : self.parse_datetime_iso(extr('<span title="', '"')),
"user" : util.json_loads(extr(
"CHV.obj.resource=", "};") + "}").get("user"),
}
diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py
index 5def88d..8ac66f4 100644
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@@ -9,7 +9,7 @@
"""Extractors for https://imgbox.com/"""
from .common import Extractor, Message, AsynchronousMixin
-from .. import text, util, exception
+from .. import text, exception
class ImgboxExtractor(Extractor):
@@ -19,7 +19,7 @@ class ImgboxExtractor(Extractor):
def items(self):
data = self.get_job_metadata()
- yield Message.Directory, data
+ yield Message.Directory, "", data
for image_key in self.get_image_keys():
imgpage = self.request(self.root + "/" + image_key).text
@@ -69,7 +69,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
page = self.request(self.root + "/g/" + self.gallery_key).text
if "The specified gallery could not be found." in page:
raise exception.NotFoundError("gallery")
- self.image_keys = util.re(
+ self.image_keys = text.re(
r'<a href="/([^"]+)"><img alt="').findall(page)
title = text.extr(page, "<h1>", "</h1>")
@@ -88,7 +88,10 @@ class ImgboxImageExtractor(ImgboxExtractor):
"""Extractor for single images from imgbox.com"""
subcategory = "image"
archive_fmt = "{image_key}"
- pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"
+ pattern = (r"(?:https?://)?(?:"
+ r"(?:www\.|i\.)?imgbox\.com|"
+ r"images\d+\.imgbox\.com/[0-9a-f]{2}/[0-9a-f]{2}"
+ r")/([A-Za-z0-9]{8})")
example = "https://imgbox.com/1234abcd"
def __init__(self, match):
diff --git a/gallery_dl/extractor/imgpile.py b/gallery_dl/extractor/imgpile.py
index 9fc3a9c..f634203 100644
--- a/gallery_dl/extractor/imgpile.py
+++ b/gallery_dl/extractor/imgpile.py
@@ -54,7 +54,7 @@ class ImgpilePostExtractor(ImgpileExtractor):
data = {"post": post}
data["count"] = post["count"] = len(files)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], file in enumerate(files, 1):
data.update(file)
url = file["url"]
diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py
index 7e5e6cf..4a81e53 100644
--- a/gallery_dl/extractor/imgth.py
+++ b/gallery_dl/extractor/imgth.py
@@ -31,7 +31,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
"title": text.unescape(extr("<h1>", "</h1>")),
"count": text.parse_int(extr(
"total of images in this gallery: ", " ")),
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr("created on ", " by <")
.replace("th, ", " ", 1).replace("nd, ", " ", 1)
.replace("st, ", " ", 1), "%B %d %Y at %H:%M"),
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 1ac76e0..4755388 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -38,7 +38,7 @@ class ImgurExtractor(Extractor):
image["url"] = url = \
f"https://i.imgur.com/{image['id']}.{image['ext']}"
- image["date"] = text.parse_datetime(image["created_at"])
+ image["date"] = self.parse_datetime_iso(image["created_at"])
image["_http_validate"] = self._validate
text.nameext_from_url(url, image)
@@ -67,7 +67,7 @@ class ImgurImageExtractor(ImgurExtractor):
subcategory = "image"
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
archive_fmt = "{id}"
- pattern = (BASE_PATTERN + r"/(?!gallery|search)"
+ pattern = (rf"{BASE_PATTERN}/(?!gallery|search)"
r"(?:r/\w+/)?(?:[^/?#]+-)?(\w{7}|\w{5})[sbtmlh]?")
example = "https://imgur.com/abcdefg"
@@ -83,7 +83,7 @@ class ImgurImageExtractor(ImgurExtractor):
image.update(image["media"][0])
del image["media"]
url = self._prepare(image)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, url, image
@@ -93,7 +93,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}")
filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
archive_fmt = "{album[id]}_{id}"
- pattern = BASE_PATTERN + r"/a/(?:[^/?#]+-)?(\w{7}|\w{5})"
+ pattern = rf"{BASE_PATTERN}/a/(?:[^/?#]+-)?(\w{{7}}|\w{{5}})"
example = "https://imgur.com/a/abcde"
def items(self):
@@ -106,7 +106,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
del album["media"]
count = len(images)
- album["date"] = text.parse_datetime(album["created_at"])
+ album["date"] = self.parse_datetime_iso(album["created_at"])
try:
del album["ad_url"]
@@ -119,14 +119,15 @@ class ImgurAlbumExtractor(ImgurExtractor):
image["num"] = num
image["count"] = count
image["album"] = album
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, url, image
class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(?:[^/?#]+-)?(\w{7}|\w{5})"
+ pattern = (rf"{BASE_PATTERN}/"
+ rf"(?:gallery|t/\w+)/(?:[^/?#]+-)?(\w{{7}}|\w{{5}})")
example = "https://imgur.com/gallery/abcde"
def items(self):
@@ -142,7 +143,7 @@ class ImgurGalleryExtractor(ImgurExtractor):
class ImgurUserExtractor(ImgurExtractor):
"""Extractor for all images posted by a user"""
subcategory = "user"
- pattern = (BASE_PATTERN + r"/user/(?!me(?:/|$|\?|#))"
+ pattern = (rf"{BASE_PATTERN}/user/(?!me(?:/|$|\?|#))"
r"([^/?#]+)(?:/posts|/submitted)?/?$")
example = "https://imgur.com/user/USER"
@@ -153,7 +154,7 @@ class ImgurUserExtractor(ImgurExtractor):
class ImgurFavoriteExtractor(ImgurExtractor):
"""Extractor for a user's favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/?$"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/favorites/?$"
example = "https://imgur.com/user/USER/favorites"
def items(self):
@@ -163,7 +164,7 @@ class ImgurFavoriteExtractor(ImgurExtractor):
class ImgurFavoriteFolderExtractor(ImgurExtractor):
"""Extractor for a user's favorites folder"""
subcategory = "favorite-folder"
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/folder/(\d+)"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/favorites/folder/(\d+)"
example = "https://imgur.com/user/USER/favorites/folder/12345/TITLE"
def __init__(self, match):
@@ -178,7 +179,7 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor):
class ImgurMeExtractor(ImgurExtractor):
"""Extractor for your personal uploads"""
subcategory = "me"
- pattern = BASE_PATTERN + r"/user/me(?:/posts)?(/hidden)?"
+ pattern = rf"{BASE_PATTERN}/user/me(?:/posts)?(/hidden)?"
example = "https://imgur.com/user/me"
def items(self):
@@ -195,7 +196,7 @@ class ImgurMeExtractor(ImgurExtractor):
class ImgurSubredditExtractor(ImgurExtractor):
"""Extractor for a subreddits's imgur links"""
subcategory = "subreddit"
- pattern = BASE_PATTERN + r"/r/([^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}/r/([^/?#]+)/?$"
example = "https://imgur.com/r/SUBREDDIT"
def items(self):
@@ -205,7 +206,7 @@ class ImgurSubredditExtractor(ImgurExtractor):
class ImgurTagExtractor(ImgurExtractor):
"""Extractor for imgur tag searches"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"/t/([^/?#]+)$"
+ pattern = rf"{BASE_PATTERN}/t/([^/?#]+)$"
example = "https://imgur.com/t/TAG"
def items(self):
@@ -215,7 +216,7 @@ class ImgurTagExtractor(ImgurExtractor):
class ImgurSearchExtractor(ImgurExtractor):
"""Extractor for imgur search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search(?:/[^?#]+)?/?\?q=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/search(?:/[^?#]+)?/?\?q=([^&#]+)"
example = "https://imgur.com/search?q=UERY"
def items(self):
diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py
index 5ad1c30..d83dcc8 100644
--- a/gallery_dl/extractor/imhentai.py
+++ b/gallery_dl/extractor/imhentai.py
@@ -79,7 +79,7 @@ BASE_PATTERN = ImhentaiExtractor.update({
class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
"""Extractor for imhentai galleries"""
- pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?:gallery|view)/(\d+)"
example = "https://imhentai.xxx/gallery/12345/"
def __init__(self, match):
@@ -141,7 +141,7 @@ class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
class ImhentaiTagExtractor(ImhentaiExtractor):
"""Extractor for imhentai tag searches"""
subcategory = "tag"
- pattern = (BASE_PATTERN + r"(/(?:"
+ pattern = (rf"{BASE_PATTERN}(/(?:"
r"artist|category|character|group|language|parody|tag"
r")/([^/?#]+))")
example = "https://imhentai.xxx/tag/TAG/"
@@ -154,9 +154,8 @@ class ImhentaiTagExtractor(ImhentaiExtractor):
class ImhentaiSearchExtractor(ImhentaiExtractor):
"""Extractor for imhentai search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search(/?\?[^#]+|/[^/?#]+/?)"
+ pattern = rf"{BASE_PATTERN}(/(?:advanced-)?search/?\?[^#]+|/[^/?#]+/?)"
example = "https://imhentai.xxx/search/?key=QUERY"
def items(self):
- url = self.root + "/search" + self.groups[-1]
- return self._pagination(url)
+ return self._pagination(self.root + self.groups[-1])
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 45ae52e..547d4ee 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -35,8 +35,8 @@ class InkbunnyExtractor(Extractor):
for post in self.posts():
post.update(metadata)
- post["date"] = text.parse_datetime(
- post["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
+ post["date"] = self.parse_datetime_iso(
+ post["create_datetime"][:19])
post["tags"] = [kw["keyword_name"] for kw in post["keywords"]]
post["ratings"] = [r["name"] for r in post["ratings"]]
files = post["files"]
@@ -48,12 +48,12 @@ class InkbunnyExtractor(Extractor):
del post["keywords"]
del post["files"]
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post.update(file)
post["deleted"] = (file["deleted"] == "t")
- post["date"] = text.parse_datetime(
- file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z")
+ post["date"] = self.parse_datetime_iso(
+ file["create_datetime"][:19])
text.nameext_from_url(file["file_name"], post)
url = file["file_url_full"]
@@ -71,7 +71,7 @@ class InkbunnyExtractor(Extractor):
class InkbunnyUserExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user profiles"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])"
+ pattern = rf"{BASE_PATTERN}/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])"
example = "https://inkbunny.net/USER"
def __init__(self, match):
@@ -101,7 +101,7 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
class InkbunnyPoolExtractor(InkbunnyExtractor):
"""Extractor for inkbunny pools"""
subcategory = "pool"
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"poolview_process\.php\?pool_id=(\d+)|"
r"submissionsviewall\.php"
r"\?((?:[^#]+&)?mode=pool(?:&[^#]+)?))")
@@ -132,7 +132,7 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user favorites"""
subcategory = "favorite"
directory_fmt = ("{category}", "{favs_username!l}", "Favorites")
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"userfavorites_process\.php\?favs_user_id=(\d+)|"
r"submissionsviewall\.php"
r"\?((?:[^#]+&)?mode=userfavs(?:&[^#]+)?))")
@@ -175,7 +175,7 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
class InkbunnyUnreadExtractor(InkbunnyExtractor):
"""Extractor for unread inkbunny submissions"""
subcategory = "unread"
- pattern = (BASE_PATTERN + r"/submissionsviewall\.php"
+ pattern = (rf"{BASE_PATTERN}/submissionsviewall\.php"
r"\?((?:[^#]+&)?mode=unreadsubs(?:&[^#]+)?)")
example = ("https://inkbunny.net/submissionsviewall.php"
"?text=&mode=unreadsubs&type=")
@@ -195,7 +195,7 @@ class InkbunnyUnreadExtractor(InkbunnyExtractor):
class InkbunnySearchExtractor(InkbunnyExtractor):
"""Extractor for inkbunny search results"""
subcategory = "search"
- pattern = (BASE_PATTERN + r"/submissionsviewall\.php"
+ pattern = (rf"{BASE_PATTERN}/submissionsviewall\.php"
r"\?((?:[^#]+&)?mode=search(?:&[^#]+)?)")
example = ("https://inkbunny.net/submissionsviewall.php"
"?text=TAG&mode=search&type=")
@@ -229,7 +229,7 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
class InkbunnyFollowingExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user watches"""
subcategory = "following"
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
r"usersviewall\.php"
r"\?((?:[^#]+&)?mode=watching(?:&[^#]+)?))")
@@ -268,7 +268,7 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
class InkbunnyPostExtractor(InkbunnyExtractor):
"""Extractor for individual Inkbunny posts"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/s/(\d+)"
+ pattern = rf"{BASE_PATTERN}/s/(\d+)"
example = "https://inkbunny.net/s/12345"
def __init__(self, match):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 71964e9..b89369f 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -16,7 +16,7 @@ import itertools
import binascii
BASE_PATTERN = r"(?:https?://)?(?:www\.)?instagram\.com"
-USER_PATTERN = BASE_PATTERN + r"/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/(?!(?:p|tv|reel|explore|stories)/)([^/?#]+)"
class InstagramExtractor(Extractor):
@@ -38,7 +38,7 @@ class InstagramExtractor(Extractor):
def _init(self):
self.www_claim = "0"
self.csrf_token = util.generate_token()
- self._find_tags = util.re(r"#\w+").findall
+ self._find_tags = text.re(r"#\w+").findall
self._logged_in = True
self._cursor = None
self._user = None
@@ -62,8 +62,10 @@ class InstagramExtractor(Extractor):
data = self.metadata()
if videos := self.config("videos", True):
- videos_dash = (videos != "merged")
+ self.videos_dash = videos_dash = (videos != "merged")
videos_headers = {"User-Agent": "Mozilla/5.0"}
+ else:
+ self.videos_dash = False
previews = self.config("previews", False)
max_posts = self.config("max-posts")
@@ -86,7 +88,7 @@ class InstagramExtractor(Extractor):
files = post.pop("_files")
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
if "date" in post:
del post["date"]
@@ -173,7 +175,7 @@ class InstagramExtractor(Extractor):
post_url = f"{self.root}/stories/highlights/{reel_id}/"
data = {
"user" : post.get("user"),
- "expires": text.parse_timestamp(expires),
+ "expires": self.parse_timestamp(expires),
"post_id": reel_id,
"post_shortcode": shortcode_from_id(reel_id),
"post_url": post_url,
@@ -224,7 +226,7 @@ class InstagramExtractor(Extractor):
data["owner_id"] = owner["pk"]
data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name")
- data["post_date"] = data["date"] = text.parse_timestamp(
+ data["post_date"] = data["date"] = self.parse_timestamp(
post.get("taken_at") or post.get("created_at") or post.get("seen"))
data["_files"] = files = []
for num, item in enumerate(items, 1):
@@ -236,13 +238,23 @@ class InstagramExtractor(Extractor):
data["post_shortcode"])
continue
+ width_orig = item.get("original_width", 0)
+ height_orig = item.get("original_height", 0)
+
if video_versions := item.get("video_versions"):
video = max(
video_versions,
key=lambda x: (x["width"], x["height"], x["type"]),
)
- manifest = item.get("video_dash_manifest")
+
media = video
+ if (manifest := item.get("video_dash_manifest")) and \
+ self.videos_dash:
+ width = width_orig
+ height = height_orig
+ else:
+ width = video["width"]
+ height = video["height"]
if self._warn_video:
self._warn_video = False
@@ -254,22 +266,21 @@ class InstagramExtractor(Extractor):
else:
video = manifest = None
media = image
+ width = image["width"]
+ height = image["height"]
- if self._warn_image < (
- (image["width"] < item.get("original_width", 0)) +
- (image["height"] < item.get("original_height", 0))):
+ if self._warn_image < ((width < width_orig) +
+ (height < height_orig)):
self.log.warning(
"%s: Available image resolutions lower than the "
"original (%sx%s < %sx%s). "
"Consider refreshing your cookies.",
data["post_shortcode"],
- image["width"], image["height"],
- item.get("original_width", 0),
- item.get("original_height", 0))
+ width, height, width_orig, height_orig)
media = {
"num" : num,
- "date" : text.parse_timestamp(item.get("taken_at") or
+ "date" : self.parse_timestamp(item.get("taken_at") or
media.get("taken_at") or
post.get("taken_at")),
"media_id" : item["pk"],
@@ -277,8 +288,10 @@ class InstagramExtractor(Extractor):
shortcode_from_id(item["pk"])),
"display_url": image["url"],
"video_url" : video["url"] if video else None,
- "width" : media["width"],
- "height" : media["height"],
+ "width" : width,
+ "width_original" : width_orig,
+ "height" : height,
+ "height_original": height_orig,
}
if manifest is not None:
@@ -288,7 +301,9 @@ class InstagramExtractor(Extractor):
if "reshared_story_media_author" in item:
media["author"] = item["reshared_story_media_author"]
if "expiring_at" in item:
- media["expires"] = text.parse_timestamp(post["expiring_at"])
+ media["expires"] = self.parse_timestamp(post["expiring_at"])
+ if "subscription_media_visibility" in item:
+ media["subscription"] = item["subscription_media_visibility"]
self._extract_tagged_users(item, media)
files.append(media)
@@ -331,7 +346,7 @@ class InstagramExtractor(Extractor):
"post_id" : post["id"],
"post_shortcode": post["shortcode"],
"post_url" : f"{self.root}/p/{post['shortcode']}/",
- "post_date" : text.parse_timestamp(post["taken_at_timestamp"]),
+ "post_date" : self.parse_timestamp(post["taken_at_timestamp"]),
"description": text.parse_unicode_escapes("\n".join(
edge["node"]["text"]
for edge in post["edge_media_to_caption"]["edges"]
@@ -490,7 +505,7 @@ class InstagramPostExtractor(InstagramExtractor):
class InstagramUserExtractor(Dispatch, InstagramExtractor):
"""Extractor for an Instagram user profile"""
- pattern = USER_PATTERN + r"/?(?:$|[?#])"
+ pattern = rf"{USER_PATTERN}/?(?:$|[?#])"
example = "https://www.instagram.com/USER/"
def items(self):
@@ -510,7 +525,7 @@ class InstagramUserExtractor(Dispatch, InstagramExtractor):
class InstagramPostsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's posts"""
subcategory = "posts"
- pattern = USER_PATTERN + r"/posts"
+ pattern = rf"{USER_PATTERN}/posts"
example = "https://www.instagram.com/USER/posts/"
def posts(self):
@@ -527,7 +542,7 @@ class InstagramPostsExtractor(InstagramExtractor):
class InstagramReelsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's reels"""
subcategory = "reels"
- pattern = USER_PATTERN + r"/reels"
+ pattern = rf"{USER_PATTERN}/reels"
example = "https://www.instagram.com/USER/reels/"
def posts(self):
@@ -544,7 +559,7 @@ class InstagramReelsExtractor(InstagramExtractor):
class InstagramTaggedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's tagged posts"""
subcategory = "tagged"
- pattern = USER_PATTERN + r"/tagged"
+ pattern = rf"{USER_PATTERN}/tagged"
example = "https://www.instagram.com/USER/tagged/"
def metadata(self):
@@ -570,7 +585,7 @@ class InstagramTaggedExtractor(InstagramExtractor):
class InstagramGuideExtractor(InstagramExtractor):
"""Extractor for an Instagram guide"""
subcategory = "guide"
- pattern = USER_PATTERN + r"/guide/[^/?#]+/(\d+)"
+ pattern = rf"{USER_PATTERN}/guide/[^/?#]+/(\d+)"
example = "https://www.instagram.com/USER/guide/NAME/12345"
def __init__(self, match):
@@ -587,7 +602,7 @@ class InstagramGuideExtractor(InstagramExtractor):
class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's saved media"""
subcategory = "saved"
- pattern = USER_PATTERN + r"/saved(?:/all-posts)?/?$"
+ pattern = rf"{USER_PATTERN}/saved(?:/all-posts)?/?$"
example = "https://www.instagram.com/USER/saved/"
def posts(self):
@@ -597,7 +612,7 @@ class InstagramSavedExtractor(InstagramExtractor):
class InstagramCollectionExtractor(InstagramExtractor):
"""Extractor for Instagram collection"""
subcategory = "collection"
- pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
+ pattern = rf"{USER_PATTERN}/saved/([^/?#]+)/([^/?#]+)"
example = "https://www.instagram.com/USER/saved/COLLECTION/12345"
def __init__(self, match):
@@ -623,7 +638,7 @@ class InstagramStoriesTrayExtractor(InstagramExtractor):
def items(self):
base = f"{self.root}/stories/id:"
for story in self.api.reels_tray():
- story["date"] = text.parse_timestamp(story["latest_reel_media"])
+ story["date"] = self.parse_timestamp(story["latest_reel_media"])
story["_extractor"] = InstagramStoriesExtractor
yield Message.Queue, f"{base}{story['id']}/", story
@@ -681,7 +696,7 @@ class InstagramStoriesExtractor(InstagramExtractor):
class InstagramHighlightsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's story highlights"""
subcategory = "highlights"
- pattern = USER_PATTERN + r"/highlights"
+ pattern = rf"{USER_PATTERN}/highlights"
example = "https://www.instagram.com/USER/highlights/"
def posts(self):
@@ -692,7 +707,7 @@ class InstagramHighlightsExtractor(InstagramExtractor):
class InstagramFollowersExtractor(InstagramExtractor):
"""Extractor for an Instagram user's followers"""
subcategory = "followers"
- pattern = USER_PATTERN + r"/followers"
+ pattern = rf"{USER_PATTERN}/followers"
example = "https://www.instagram.com/USER/followers/"
def items(self):
@@ -706,7 +721,7 @@ class InstagramFollowersExtractor(InstagramExtractor):
class InstagramFollowingExtractor(InstagramExtractor):
"""Extractor for an Instagram user's followed users"""
subcategory = "following"
- pattern = USER_PATTERN + r"/following"
+ pattern = rf"{USER_PATTERN}/following"
example = "https://www.instagram.com/USER/following/"
def items(self):
@@ -721,7 +736,7 @@ class InstagramTagExtractor(InstagramExtractor):
"""Extractor for Instagram tags"""
subcategory = "tag"
directory_fmt = ("{category}", "{subcategory}", "{tag}")
- pattern = BASE_PATTERN + r"/explore/tags/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/explore/tags/([^/?#]+)"
example = "https://www.instagram.com/explore/tags/TAG/"
def metadata(self):
@@ -734,7 +749,7 @@ class InstagramTagExtractor(InstagramExtractor):
class InstagramInfoExtractor(InstagramExtractor):
"""Extractor for an Instagram user's profile data"""
subcategory = "info"
- pattern = USER_PATTERN + r"/info"
+ pattern = rf"{USER_PATTERN}/info"
example = "https://www.instagram.com/USER/info/"
def items(self):
@@ -744,13 +759,13 @@ class InstagramInfoExtractor(InstagramExtractor):
else:
user = self.api.user_by_name(screen_name)
- return iter(((Message.Directory, user),))
+ return iter(((Message.Directory, "", user),))
class InstagramAvatarExtractor(InstagramExtractor):
"""Extractor for an Instagram user's avatar"""
subcategory = "avatar"
- pattern = USER_PATTERN + r"/avatar"
+ pattern = rf"{USER_PATTERN}/avatar"
example = "https://www.instagram.com/USER/avatar/"
def posts(self):
@@ -858,8 +873,11 @@ class InstagramRestAPI():
def user_by_name(self, screen_name):
endpoint = "/v1/users/web_profile_info/"
params = {"username": screen_name}
- return self._call(
- endpoint, params=params, notfound="user")["data"]["user"]
+ try:
+ return self._call(
+ endpoint, params=params, notfound="user")["data"]["user"]
+ except KeyError:
+ raise exception.NotFoundError("user")
@memcache(keyarg=1)
def user_by_id(self, user_id):
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index 06c5caa..c3fbf8d 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -36,8 +36,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
'{"":' + data.replace('\\"', '"')))
doc = data["initialDocumentData"]["document"]
- doc["date"] = text.parse_datetime(
- doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ doc["date"] = self.parse_datetime_iso(
+ doc["originalPublishDateInISOString"])
self.count = text.parse_int(doc["pageCount"])
self.base = (f"https://image.isu.pub/{doc['revisionId']}-"
@@ -68,7 +68,7 @@ class IssuuUserExtractor(IssuuBase, Extractor):
data = text.extr(html, '\\"docs\\":', '}]\\n"]')
docs = util.json_loads(data.replace('\\"', '"'))
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
return
for publication in docs:
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 19ffc50..566ee8b 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -13,7 +13,7 @@ from ..cache import memcache
from .. import text, util
BASE_PATTERN = r"(?:https?://)?itaku\.ee"
-USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/profile/([^/?#]+)"
class ItakuExtractor(Extractor):
@@ -32,8 +32,7 @@ class ItakuExtractor(Extractor):
def items(self):
if images := self.images():
for image in images:
- image["date"] = text.parse_datetime(
- image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ image["date"] = self.parse_datetime_iso(image["date_added"])
for category, tags in image.pop("categorized_tags").items():
image[f"tags_{category.lower()}"] = [
t["name"] for t in tags]
@@ -52,7 +51,7 @@ class ItakuExtractor(Extractor):
else:
url = image["image"]
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, url, text.nameext_from_url(url, image)
return
@@ -60,15 +59,14 @@ class ItakuExtractor(Extractor):
for post in posts:
images = post.pop("gallery_images") or ()
post["count"] = len(images)
- post["date"] = text.parse_datetime(
- post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["date_added"])
post["tags"] = [t["name"] for t in post["tags"]]
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], image in enumerate(images, 1):
post["file"] = image
- image["date"] = text.parse_datetime(
- image["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ image["date"] = self.parse_datetime_iso(
+ image["date_added"])
url = image["image"]
yield Message.Url, url, text.nameext_from_url(url, post)
@@ -88,7 +86,7 @@ class ItakuExtractor(Extractor):
class ItakuGalleryExtractor(ItakuExtractor):
"""Extractor for an itaku user's gallery"""
subcategory = "gallery"
- pattern = USER_PATTERN + r"/gallery(?:/(\d+))?"
+ pattern = rf"{USER_PATTERN}/gallery(?:/(\d+))?"
example = "https://itaku.ee/profile/USER/gallery"
def images(self):
@@ -106,7 +104,7 @@ class ItakuPostsExtractor(ItakuExtractor):
"{id}{title:? //}")
filename_fmt = "{file[id]}{file[title]:? //}.{extension}"
archive_fmt = "{id}_{file[id]}"
- pattern = USER_PATTERN + r"/posts(?:/(\d+))?"
+ pattern = rf"{USER_PATTERN}/posts(?:/(\d+))?"
example = "https://itaku.ee/profile/USER/posts"
def posts(self):
@@ -120,7 +118,7 @@ class ItakuPostsExtractor(ItakuExtractor):
class ItakuStarsExtractor(ItakuExtractor):
"""Extractor for an itaku user's starred images"""
subcategory = "stars"
- pattern = USER_PATTERN + r"/stars(?:/(\d+))?"
+ pattern = rf"{USER_PATTERN}/stars(?:/(\d+))?"
example = "https://itaku.ee/profile/USER/stars"
def images(self):
@@ -134,7 +132,7 @@ class ItakuStarsExtractor(ItakuExtractor):
class ItakuFollowingExtractor(ItakuExtractor):
subcategory = "following"
- pattern = USER_PATTERN + r"/following"
+ pattern = rf"{USER_PATTERN}/following"
example = "https://itaku.ee/profile/USER/following"
def users(self):
@@ -145,7 +143,7 @@ class ItakuFollowingExtractor(ItakuExtractor):
class ItakuFollowersExtractor(ItakuExtractor):
subcategory = "followers"
- pattern = USER_PATTERN + r"/followers"
+ pattern = rf"{USER_PATTERN}/followers"
example = "https://itaku.ee/profile/USER/followers"
def users(self):
@@ -157,7 +155,7 @@ class ItakuFollowersExtractor(ItakuExtractor):
class ItakuBookmarksExtractor(ItakuExtractor):
"""Extractor for an itaku bookmarks folder"""
subcategory = "bookmarks"
- pattern = USER_PATTERN + r"/bookmarks/(image|user)/(\d+)"
+ pattern = rf"{USER_PATTERN}/bookmarks/(image|user)/(\d+)"
example = "https://itaku.ee/profile/USER/bookmarks/image/12345"
def _init(self):
@@ -178,23 +176,23 @@ class ItakuBookmarksExtractor(ItakuExtractor):
class ItakuUserExtractor(Dispatch, ItakuExtractor):
"""Extractor for itaku user profiles"""
- pattern = USER_PATTERN + r"/?(?:$|\?|#)"
+ pattern = rf"{USER_PATTERN}/?(?:$|\?|#)"
example = "https://itaku.ee/profile/USER"
def items(self):
base = f"{self.root}/profile/{self.groups[0]}/"
return self._dispatch_extractors((
- (ItakuGalleryExtractor , base + "gallery"),
- (ItakuPostsExtractor , base + "posts"),
- (ItakuFollowersExtractor, base + "followers"),
- (ItakuFollowingExtractor, base + "following"),
- (ItakuStarsExtractor , base + "stars"),
+ (ItakuGalleryExtractor , f"{base}gallery"),
+ (ItakuPostsExtractor , f"{base}posts"),
+ (ItakuFollowersExtractor, f"{base}followers"),
+ (ItakuFollowingExtractor, f"{base}following"),
+ (ItakuStarsExtractor , f"{base}stars"),
), ("gallery",))
class ItakuImageExtractor(ItakuExtractor):
subcategory = "image"
- pattern = BASE_PATTERN + r"/images/(\d+)"
+ pattern = rf"{BASE_PATTERN}/images/(\d+)"
example = "https://itaku.ee/images/12345"
def images(self):
@@ -207,7 +205,7 @@ class ItakuPostExtractor(ItakuExtractor):
"{id}{title:? //}")
filename_fmt = "{file[id]}{file[title]:? //}.{extension}"
archive_fmt = "{id}_{file[id]}"
- pattern = BASE_PATTERN + r"/posts/(\d+)"
+ pattern = rf"{BASE_PATTERN}/posts/(\d+)"
example = "https://itaku.ee/posts/12345"
def posts(self):
@@ -216,7 +214,7 @@ class ItakuPostExtractor(ItakuExtractor):
class ItakuSearchExtractor(ItakuExtractor):
subcategory = "search"
- pattern = BASE_PATTERN + r"/home/images/?\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/home/images/?\?([^#]+)"
example = "https://itaku.ee/home/images?tags=SEARCH"
def images(self):
@@ -248,7 +246,7 @@ class ItakuAPI():
def __init__(self, extractor):
self.extractor = extractor
- self.root = extractor.root + "/api"
+ self.root = f"{extractor.root}/api"
self.headers = {
"Accept": "application/json, text/plain, */*",
}
@@ -259,7 +257,7 @@ class ItakuAPI():
"cursor" : None,
"date_range": "",
"maturity_rating": ("SFW", "Questionable", "NSFW"),
- "ordering" : "-date_added",
+ "ordering" : self._order(),
"page" : "1",
"page_size" : "30",
"visibility": ("PUBLIC", "PROFILE_ONLY"),
@@ -273,7 +271,7 @@ class ItakuAPI():
"cursor" : None,
"date_range": "",
"maturity_rating": ("SFW", "Questionable", "NSFW"),
- "ordering" : "-date_added",
+ "ordering" : self._order(),
"page" : "1",
"page_size" : "30",
**params,
@@ -284,7 +282,7 @@ class ItakuAPI():
endpoint = "/user_profiles/"
params = {
"cursor" : None,
- "ordering" : "-date_added",
+ "ordering" : self._order(),
"page" : "1",
"page_size": "50",
"sfw_only" : "false",
@@ -311,7 +309,7 @@ class ItakuAPI():
def _call(self, endpoint, params=None):
if not endpoint.startswith("http"):
- endpoint = self.root + endpoint
+ endpoint = f"{self.root}{endpoint}"
return self.extractor.request_json(
endpoint, params=params, headers=self.headers)
@@ -330,3 +328,11 @@ class ItakuAPI():
return
data = self._call(url_next)
+
+ def _order(self):
+ if order := self.extractor.config("order"):
+ if order in {"a", "asc", "r", "reverse"}:
+ return "date_added"
+ if order not in {"d", "desc"}:
+ return order
+ return "-date_added"
diff --git a/gallery_dl/extractor/itchio.py b/gallery_dl/extractor/itchio.py
index 6312e58..6fefe33 100644
--- a/gallery_dl/extractor/itchio.py
+++ b/gallery_dl/extractor/itchio.py
@@ -57,5 +57,5 @@ class ItchioGameExtractor(Extractor):
game = {"game": game, "user": user, "id": upload_id}
url = info["url"]
- yield Message.Directory, game
+ yield Message.Directory, "", game
yield Message.Url, url, text.nameext_from_url(url, game)
diff --git a/gallery_dl/extractor/iwara.py b/gallery_dl/extractor/iwara.py
index 8af2f42..d9d1cf0 100644
--- a/gallery_dl/extractor/iwara.py
+++ b/gallery_dl/extractor/iwara.py
@@ -47,7 +47,7 @@ class IwaraExtractor(Extractor):
group_info["type"] = "image"
group_info["count"] = len(files)
- yield Message.Directory, group_info
+ yield Message.Directory, "", group_info
for num, file in enumerate(files, 1):
file_info = self.extract_media_info(file, None)
file_id = file_info["file_id"]
@@ -78,7 +78,7 @@ class IwaraExtractor(Extractor):
video["id"], exc.__class__.__name__, exc)
continue
- yield Message.Directory, info
+ yield Message.Directory, "", info
yield Message.Url, f"https:{download_url}", info
def items_user(self, users, key=None):
@@ -122,10 +122,10 @@ class IwaraExtractor(Extractor):
info["file_id"] = file_info.get("id")
info["filename"] = filename
info["extension"] = extension
- info["date"] = text.parse_datetime(
- file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
- info["date_updated"] = text.parse_datetime(
- file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
+ info["date"] = self.parse_datetime_iso(
+ file_info.get("createdAt"))
+ info["date_updated"] = self.parse_datetime_iso(
+ file_info.get("updatedAt"))
info["mime"] = file_info.get("mime")
info["size"] = file_info.get("size")
info["width"] = file_info.get("width")
@@ -144,8 +144,7 @@ class IwaraExtractor(Extractor):
"status" : user.get("status"),
"role" : user.get("role"),
"premium": user.get("premium"),
- "date" : text.parse_datetime(
- user.get("createdAt"), "%Y-%m-%dT%H:%M:%S.000Z"),
+ "date" : self.parse_datetime_iso(user.get("createdAt")),
"description": profile.get("body"),
}
diff --git a/gallery_dl/extractor/jschan.py b/gallery_dl/extractor/jschan.py
index 5f3e75a..5dacf70 100644
--- a/gallery_dl/extractor/jschan.py
+++ b/gallery_dl/extractor/jschan.py
@@ -30,7 +30,7 @@ class JschanThreadExtractor(JschanExtractor):
"{threadId} {subject|nomarkup[:50]}")
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
archive_fmt = "{board}_{postId}_{num}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/thread/(\d+)\.html"
example = "https://94chan.org/a/thread/12345.html"
def items(self):
@@ -39,7 +39,7 @@ class JschanThreadExtractor(JschanExtractor):
thread["threadId"] = thread["postId"]
posts = thread.pop("replies", ())
- yield Message.Directory, thread
+ yield Message.Directory, "", thread
for post in itertools.chain((thread,), posts):
if files := post.pop("files", ()):
thread.update(post)
@@ -56,7 +56,7 @@ class JschanThreadExtractor(JschanExtractor):
class JschanBoardExtractor(JschanExtractor):
"""Extractor for jschan boards"""
subcategory = "board"
- pattern = (BASE_PATTERN + r"/([^/?#]+)"
+ pattern = (rf"{BASE_PATTERN}/([^/?#]+)"
r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)")
example = "https://94chan.org/a/"
diff --git a/gallery_dl/extractor/kabeuchi.py b/gallery_dl/extractor/kabeuchi.py
index c259c47..88f2e32 100644
--- a/gallery_dl/extractor/kabeuchi.py
+++ b/gallery_dl/extractor/kabeuchi.py
@@ -32,9 +32,8 @@ class KabeuchiUserExtractor(Extractor):
if post.get("is_ad") or not post["image1"]:
continue
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%d %H:%M:%S")
- yield Message.Directory, post
+ post["date"] = self.parse_datetime_iso(post["created_at"])
+ yield Message.Directory, "", post
for key in keys:
name = post[key]
diff --git a/gallery_dl/extractor/keenspot.py b/gallery_dl/extractor/keenspot.py
index f55a930..3c1eb24 100644
--- a/gallery_dl/extractor/keenspot.py
+++ b/gallery_dl/extractor/keenspot.py
@@ -34,7 +34,7 @@ class KeenspotComicExtractor(Extractor):
def items(self):
data = {"comic": self.comic}
- yield Message.Directory, data
+ yield Message.Directory, "", data
with self.request(self.root + "/") as response:
if response.history:
diff --git a/gallery_dl/extractor/kemono.py b/gallery_dl/extractor/kemono.py
index b4a8abc..bf35670 100644
--- a/gallery_dl/extractor/kemono.py
+++ b/gallery_dl/extractor/kemono.py
@@ -16,7 +16,7 @@ import json
BASE_PATTERN = (r"(?:https?://)?(?:www\.|beta\.)?"
r"(kemono|coomer)\.(cr|s[tu]|party)")
-USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)/user/([^/?#]+)"
HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})"
@@ -44,7 +44,7 @@ class KemonoExtractor(Extractor):
order = self.config("order-revisions")
self.revisions_reverse = order[0] in ("r", "a") if order else False
- self._find_inline = util.re(
+ self._find_inline = text.re(
r'src="(?:https?://(?:kemono\.cr|coomer\.st))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
self._json_dumps = json.JSONEncoder(
@@ -52,7 +52,7 @@ class KemonoExtractor(Extractor):
sort_keys=True, separators=(",", ":")).encode
def items(self):
- find_hash = util.re(HASH_PATTERN).match
+ find_hash = text.re(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
announcements = True if self.config("announcements") else None
archives = True if self.config("archives") else False
@@ -145,18 +145,24 @@ class KemonoExtractor(Extractor):
file["hash"] = hash = ""
if url[0] == "/":
- url = self.root + "/data" + url
+ url = f"{self.root}/data{url}"
elif url.startswith(self.root):
- url = self.root + "/data" + url[20:]
+ url = f"{self.root}/data{url[20:]}"
file["url"] = url
- text.nameext_from_url(file.get("name", url), file)
- ext = text.ext_from_url(url)
- if not file["extension"]:
- file["extension"] = ext
- elif ext == "txt" and file["extension"] != "txt":
- file["_http_validate"] = _validate
- elif ext in exts_archive or \
+ if name := file.get("name"):
+ text.nameext_from_name(name, file)
+ ext = text.ext_from_url(url)
+
+ if not file["extension"]:
+ file["extension"] = ext
+ elif ext == "txt" and file["extension"] != "txt":
+ file["_http_validate"] = _validate
+ else:
+ text.nameext_from_url(url, file)
+ ext = file["extension"]
+
+ if ext in exts_archive or \
ext == "bin" and file["extension"] in exts_archive:
file["type"] = "archive"
if archives:
@@ -176,7 +182,7 @@ class KemonoExtractor(Extractor):
files.append(file)
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
if "id" in file:
del file["id"]
@@ -194,13 +200,13 @@ class KemonoExtractor(Extractor):
username = username[0]
self.log.info("Logging in as %s", username)
- url = self.root + "/api/v1/authentication/login"
+ url = f"{self.root}/api/v1/authentication/login"
data = {"username": username, "password": password}
response = self.request(url, method="POST", json=data, fatal=False)
if response.status_code >= 400:
try:
- msg = '"' + response.json()["error"] + '"'
+ msg = f'"{response.json()["error"]}"'
except Exception:
msg = '"Username or password is incorrect"'
raise exception.AuthenticationError(msg)
@@ -238,7 +244,7 @@ class KemonoExtractor(Extractor):
def _parse_datetime(self, date_string):
if len(date_string) > 19:
date_string = date_string[:19]
- return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
+ return self.parse_datetime_iso(date_string)
def _revisions(self, posts):
return itertools.chain.from_iterable(
@@ -316,7 +322,7 @@ def _validate(response):
class KemonoUserExtractor(KemonoExtractor):
"""Extractor for all posts from a kemono.cr user listing"""
subcategory = "user"
- pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)"
+ pattern = rf"{USER_PATTERN}/?(?:\?([^#]+))?(?:$|\?|#)"
example = "https://kemono.cr/SERVICE/user/12345"
def __init__(self, match):
@@ -339,7 +345,7 @@ class KemonoUserExtractor(KemonoExtractor):
class KemonoPostsExtractor(KemonoExtractor):
"""Extractor for kemono.cr post listings"""
subcategory = "posts"
- pattern = BASE_PATTERN + r"/posts()()(?:/?\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/posts()()(?:/?\?([^#]+))?"
example = "https://kemono.cr/posts"
def posts(self):
@@ -351,7 +357,7 @@ class KemonoPostsExtractor(KemonoExtractor):
class KemonoPostExtractor(KemonoExtractor):
"""Extractor for a single kemono.cr post"""
subcategory = "post"
- pattern = USER_PATTERN + r"/post/([^/?#]+)(/revisions?(?:/(\d*))?)?"
+ pattern = rf"{USER_PATTERN}/post/([^/?#]+)(/revisions?(?:/(\d*))?)?"
example = "https://kemono.cr/SERVICE/user/12345/post/12345"
def __init__(self, match):
@@ -384,7 +390,7 @@ class KemonoDiscordExtractor(KemonoExtractor):
"{server_id} {server}", "{channel_id} {channel}")
filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
archive_fmt = "discord_{server_id}_{id}_{num}"
- pattern = BASE_PATTERN + r"/discord/server/(\d+)[/#](?:channel/)?(\d+)"
+ pattern = rf"{BASE_PATTERN}/discord/server/(\d+)[/#](?:channel/)?(\d+)"
example = "https://kemono.cr/discord/server/12345/12345"
def items(self):
@@ -407,10 +413,10 @@ class KemonoDiscordExtractor(KemonoExtractor):
"parent_id" : channel["parent_channel_id"],
}
- find_inline = util.re(
+ find_inline = text.re(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
- find_hash = util.re(HASH_PATTERN).match
+ find_hash = text.re(HASH_PATTERN).match
if (order := self.config("order-posts")) and order[0] in ("r", "d"):
posts = self.api.discord_channel(channel_id, channel["post_count"])
@@ -428,13 +434,13 @@ class KemonoDiscordExtractor(KemonoExtractor):
attachment["type"] = "attachment"
files.append(attachment)
for path in find_inline(post["content"] or ""):
- files.append({"path": "https://cdn.discordapp.com" + path,
+ files.append({"path": f"https://cdn.discordapp.com{path}",
"name": path, "type": "inline", "hash": ""})
post.update(data)
post["date"] = self._parse_datetime(post["published"])
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post["hash"] = file["hash"]
@@ -446,15 +452,15 @@ class KemonoDiscordExtractor(KemonoExtractor):
post["extension"] = text.ext_from_url(url)
if url[0] == "/":
- url = self.root + "/data" + url
+ url = f"{self.root}/data{url}"
elif url.startswith(self.root):
- url = self.root + "/data" + url[20:]
+ url = f"{self.root}/data{url[20:]}"
yield Message.Url, url, post
class KemonoDiscordServerExtractor(KemonoExtractor):
subcategory = "discord-server"
- pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
+ pattern = rf"{BASE_PATTERN}/discord/server/(\d+)$"
example = "https://kemono.cr/discord/server/12345"
def items(self):
@@ -482,7 +488,7 @@ def discord_server_info(extr, server_id):
class KemonoFavoriteExtractor(KemonoExtractor):
"""Extractor for kemono.cr favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/(?:account/)?favorites()()(?:/?\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/(?:account/)?favorites()()(?:/?\?([^#]+))?"
example = "https://kemono.cr/account/favorites/artists"
def items(self):
@@ -530,7 +536,7 @@ class KemonoFavoriteExtractor(KemonoExtractor):
class KemonoArtistsExtractor(KemonoExtractor):
"""Extractor for kemono artists"""
subcategory = "artists"
- pattern = BASE_PATTERN + r"/artists(?:\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/artists(?:\?([^#]+))?"
example = "https://kemono.cr/artists"
def items(self):
@@ -564,32 +570,32 @@ class KemonoArtistsExtractor(KemonoExtractor):
class KemonoAPI():
- """Interface for the Kemono API v1.1.0
+ """Interface for the Kemono API v1.3.0
https://kemono.cr/documentation/api
"""
def __init__(self, extractor):
self.extractor = extractor
- self.root = extractor.root + "/api/v1"
+ self.root = f"{extractor.root}/api"
self.headers = {"Accept": "text/css"}
def posts(self, offset=0, query=None, tags=None):
- endpoint = "/posts"
+ endpoint = "/v1/posts"
params = {"q": query, "o": offset, "tag": tags}
return self._pagination(endpoint, params, 50, "posts")
def file(self, file_hash):
- endpoint = "/file/" + file_hash
+ endpoint = f"/v1/file/{file_hash}"
return self._call(endpoint)
def creators(self):
- endpoint = "/creators"
+ endpoint = "/v1/creators"
return self._call(endpoint)
def creator_posts(self, service, creator_id,
offset=0, query=None, tags=None):
- endpoint = f"/{service}/user/{creator_id}/posts"
+ endpoint = f"/v1/{service}/user/{creator_id}/posts"
params = {"o": offset, "tag": tags, "q": query}
return self._pagination(endpoint, params, 50)
@@ -601,58 +607,58 @@ class KemonoAPI():
service, creator_id, post["id"])["post"]
def creator_announcements(self, service, creator_id):
- endpoint = f"/{service}/user/{creator_id}/announcements"
+ endpoint = f"/v1/{service}/user/{creator_id}/announcements"
return self._call(endpoint)
def creator_dms(self, service, creator_id):
- endpoint = f"/{service}/user/{creator_id}/dms"
+ endpoint = f"/v1/{service}/user/{creator_id}/dms"
return self._call(endpoint)
def creator_fancards(self, service, creator_id):
- endpoint = f"/{service}/user/{creator_id}/fancards"
+ endpoint = f"/v1/{service}/user/{creator_id}/fancards"
return self._call(endpoint)
def creator_post(self, service, creator_id, post_id):
- endpoint = f"/{service}/user/{creator_id}/post/{post_id}"
+ endpoint = f"/v1/{service}/user/{creator_id}/post/{post_id}"
return self._call(endpoint)
def creator_post_comments(self, service, creator_id, post_id):
- endpoint = f"/{service}/user/{creator_id}/post/{post_id}/comments"
+ endpoint = f"/v1/{service}/user/{creator_id}/post/{post_id}/comments"
return self._call(endpoint, fatal=False)
def creator_post_revisions(self, service, creator_id, post_id):
- endpoint = f"/{service}/user/{creator_id}/post/{post_id}/revisions"
+ endpoint = f"/v1/{service}/user/{creator_id}/post/{post_id}/revisions"
return self._call(endpoint, fatal=False)
def creator_profile(self, service, creator_id):
- endpoint = f"/{service}/user/{creator_id}/profile"
+ endpoint = f"/v1/{service}/user/{creator_id}/profile"
return self._call(endpoint)
def creator_links(self, service, creator_id):
- endpoint = f"/{service}/user/{creator_id}/links"
+ endpoint = f"/v1/{service}/user/{creator_id}/links"
return self._call(endpoint)
def creator_tags(self, service, creator_id):
- endpoint = f"/{service}/user/{creator_id}/tags"
+ endpoint = f"/v1/{service}/user/{creator_id}/tags"
return self._call(endpoint)
def discord_channel(self, channel_id, post_count=None):
- endpoint = f"/discord/channel/{channel_id}"
+ endpoint = f"/v1/discord/channel/{channel_id}"
if post_count is None:
return self._pagination(endpoint, {}, 150)
else:
return self._pagination_reverse(endpoint, {}, 150, post_count)
def discord_channel_lookup(self, server_id):
- endpoint = f"/discord/channel/lookup/{server_id}"
+ endpoint = f"/v1/discord/channel/lookup/{server_id}"
return self._call(endpoint)
def discord_server(self, server_id):
- endpoint = f"/discord/server/{server_id}"
+ endpoint = f"/v1/discord/server/{server_id}"
return self._call(endpoint)
def account_favorites(self, type):
- endpoint = "/account/favorites"
+ endpoint = "/v1/account/favorites"
params = {"type": type}
return self._call(endpoint, params)
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index f22d54e..8d1497d 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -35,7 +35,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
raise exception.NotFoundError("soundtrack")
data = self.metadata(page)
- yield Message.Directory, data
+ yield Message.Directory, "", data
if self.config("covers", False):
for num, url in enumerate(self._extract_covers(page), 1):
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index 816bc3d..e2f00e1 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -9,7 +9,7 @@
"""Extractors for https://komikcast.li/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
+from .. import text
BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
r"komikcast\d*\.(?:l(?:i|a|ol)|com|cz|site|mo?e)")
@@ -25,7 +25,7 @@ class KomikcastBase():
if data is None:
data = {}
- pattern = util.re(r"(?:(.*) Chapter )?0*(\d+)([^ ]*)(?: (?:- )?(.+))?")
+ pattern = text.re(r"(?:(.*) Chapter )?0*(\d+)([^ ]*)(?: (?:- )?(.+))?")
match = pattern.match(text.unescape(chapter_string))
manga, chapter, data["chapter_minor"], title = match.groups()
@@ -44,7 +44,7 @@ class KomikcastBase():
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
"""Extractor for komikcast manga chapters"""
- pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
+ pattern = rf"{BASE_PATTERN}(/chapter/[^/?#]+/)"
example = "https://komikcast.li/chapter/TITLE/"
def metadata(self, page):
@@ -54,7 +54,7 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
def images(self, page):
readerarea = text.extr(
page, '<div class="main-reading-area', '</div')
- pattern = util.re(r"<img[^>]* src=[\"']([^\"']+)")
+ pattern = text.re(r"<img[^>]* src=[\"']([^\"']+)")
return [
(text.unescape(url), None)
for url in pattern.findall(readerarea)
@@ -64,7 +64,7 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
"""Extractor for komikcast manga"""
chapterclass = KomikcastChapterExtractor
- pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+/?)$"
+ pattern = rf"{BASE_PATTERN}(/(?:komik/)?[^/?#]+/?)$"
example = "https://komikcast.li/komik/TITLE"
def chapters(self, page):
diff --git a/gallery_dl/extractor/koofr.py b/gallery_dl/extractor/koofr.py
new file mode 100644
index 0000000..9ebc133
--- /dev/null
+++ b/gallery_dl/extractor/koofr.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://koofr.net/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class KoofrSharedExtractor(Extractor):
+ """Base class for koofr extractors"""
+ category = "koofr"
+ subcategory = "shared"
+ root = "https://app.koofr.net"
+ pattern = (r"(?:https?://)?(?:"
+ r"(?:app\.)?koofr\.(?:net|eu)/links/([\w-]+)|"
+ r"k00\.fr/(\w+))")
+ example = "https://app.koofr.net/links/UUID"
+
+ def items(self):
+ uuid, code = self.groups
+ if code is not None:
+ uuid = self.request_location(
+ "https://k00.fr/" + code, method="GET").rpartition("/")[2]
+
+ url = f"{self.root}/api/v2/public/links/{uuid}"
+ referer = f"{self.root}/links/{uuid}"
+ password = self.config("password")
+ params = {"password": password or ""}
+ headers = {
+ "Referer" : referer,
+ "X-Client" : "newfrontend",
+ "X-Koofr-Version": "2.1",
+ "Sec-Fetch-Dest" : "empty",
+ "Sec-Fetch-Mode" : "cors",
+ "Sec-Fetch-Site" : "same-origin",
+ }
+ data = self.request_json(url, params=params, headers=headers)
+
+ name = data["name"]
+ file = text.nameext_from_name(name, data["file"])
+ file["_http_headers"] = {"Referer": referer}
+
+ root = data.get("publicUrlBase") or self.root
+ url = f"{root}/content/links/{uuid}/files/get/{name}?path=/&force="
+ if password:
+ url = f"{url}&password={password}"
+
+ yield Message.Directory, "", file
+ yield Message.Url, url, file
diff --git a/gallery_dl/extractor/leakgallery.py b/gallery_dl/extractor/leakgallery.py
index c609891..2939304 100644
--- a/gallery_dl/extractor/leakgallery.py
+++ b/gallery_dl/extractor/leakgallery.py
@@ -37,7 +37,7 @@ class LeakgalleryExtractor(Extractor):
media["url"] = url = f"https://cdn.leakgallery.com/{path}"
text.nameext_from_url(url, media)
- yield Message.Directory, media
+ yield Message.Directory, "", media
yield Message.Url, url, media
def _pagination(self, type, base, params=None, creator=None, pnum=1):
@@ -81,7 +81,7 @@ class LeakgalleryUserExtractor(LeakgalleryExtractor):
class LeakgalleryTrendingExtractor(LeakgalleryExtractor):
"""Extractor for trending posts on leakgallery.com"""
subcategory = "trending"
- pattern = BASE_PATTERN + r"/trending-medias(?:/([\w-]+))?"
+ pattern = rf"{BASE_PATTERN}/trending-medias(?:/([\w-]+))?"
example = "https://leakgallery.com/trending-medias/Week"
def items(self):
@@ -93,7 +93,7 @@ class LeakgalleryTrendingExtractor(LeakgalleryExtractor):
class LeakgalleryMostlikedExtractor(LeakgalleryExtractor):
"""Extractor for most liked posts on leakgallery.com"""
subcategory = "mostliked"
- pattern = BASE_PATTERN + r"/most-liked"
+ pattern = rf"{BASE_PATTERN}/most-liked"
example = "https://leakgallery.com/most-liked"
def items(self):
@@ -104,7 +104,7 @@ class LeakgalleryMostlikedExtractor(LeakgalleryExtractor):
class LeakgalleryPostExtractor(LeakgalleryExtractor):
"""Extractor for individual posts on leakgallery.com"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/(\d+)"
example = "https://leakgallery.com/CREATOR/12345"
def items(self):
@@ -134,7 +134,7 @@ class LeakgalleryPostExtractor(LeakgalleryExtractor):
"url": url,
}
text.nameext_from_url(url, data)
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
except Exception as exc:
self.log.error("Failed to extract post page %s/%s: %s",
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index b0198d5..a7b1318 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -31,7 +31,7 @@ class LensdumpBase():
class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
subcategory = "album"
- pattern = BASE_PATTERN + r"/a/(\w+)(?:/?\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/a/(\w+)(?:/?\?([^#]+))?"
example = "https://lensdump.com/a/ID"
def __init__(self, match):
@@ -76,7 +76,7 @@ class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
"""Extractor for album list from lensdump.com"""
subcategory = "albums"
- pattern = BASE_PATTERN + r"/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?"
example = "https://lensdump.com/USER"
def items(self):
@@ -119,10 +119,9 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
'property="image:width" content="', '"')),
"height": text.parse_int(extr(
'property="image:height" content="', '"')),
- "date" : text.parse_datetime(extr(
- '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+ "date" : self.parse_datetime_iso(extr('<span title="', '"')),
}
text.nameext_from_url(data["url"], data)
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, data["url"], data
diff --git a/gallery_dl/extractor/lexica.py b/gallery_dl/extractor/lexica.py
index 6e54847..fc44f51 100644
--- a/gallery_dl/extractor/lexica.py
+++ b/gallery_dl/extractor/lexica.py
@@ -36,7 +36,7 @@ class LexicaSearchExtractor(Extractor):
image["filename"] = image["id"]
image["extension"] = "jpg"
image["search_tags"] = tags
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, base + image["id"], image
def posts(self):
diff --git a/gallery_dl/extractor/lightroom.py b/gallery_dl/extractor/lightroom.py
index b557149..27aa15a 100644
--- a/gallery_dl/extractor/lightroom.py
+++ b/gallery_dl/extractor/lightroom.py
@@ -35,7 +35,7 @@ class LightroomGalleryExtractor(Extractor):
images = self.images(album)
for img in images:
url = img["url"]
- yield Message.Directory, img
+ yield Message.Directory, "", img
yield Message.Url, url, text.nameext_from_url(url, img)
def metadata(self, album):
diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py
index ab3be69..706194a 100644
--- a/gallery_dl/extractor/livedoor.py
+++ b/gallery_dl/extractor/livedoor.py
@@ -27,7 +27,7 @@ class LivedoorExtractor(Extractor):
def items(self):
for post in self.posts():
if images := self._images(post):
- yield Message.Directory, {"post": post}
+ yield Message.Directory, "", {"post": post}
for image in images:
yield Message.Url, image["url"], image
@@ -45,7 +45,7 @@ class LivedoorExtractor(Extractor):
"title" : text.unescape(extr('dc:title="', '"')),
"categories" : extr('dc:subject="', '"').partition(",")[::2],
"description": extr('dc:description="', '"'),
- "date" : text.parse_datetime(extr('dc:date="', '"')),
+ "date" : self.parse_datetime_iso(extr('dc:date="', '"')),
"tags" : text.split_html(tags)[1:] if tags else [],
"user" : self.user,
"body" : body,
diff --git a/gallery_dl/extractor/lofter.py b/gallery_dl/extractor/lofter.py
index c20d983..b1f58ac 100644
--- a/gallery_dl/extractor/lofter.py
+++ b/gallery_dl/extractor/lofter.py
@@ -29,7 +29,7 @@ class LofterExtractor(Extractor):
post = post["post"]
post["blog_name"] = post["blogInfo"]["blogName"]
- post["date"] = text.parse_timestamp(post["publishTime"] // 1000)
+ post["date"] = self.parse_timestamp(post["publishTime"] // 1000)
post_type = post["type"]
# Article
@@ -63,7 +63,7 @@ class LofterExtractor(Extractor):
post["id"], post_type)
post["count"] = len(image_urls)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], url in enumerate(image_urls, 1):
yield Message.Url, url, text.nameext_from_url(url, post)
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 5233033..d17549d 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -25,7 +25,7 @@ BASE_PATTERN = LolisafeExtractor.update({
class LolisafeAlbumExtractor(LolisafeExtractor):
subcategory = "album"
- pattern = BASE_PATTERN + "/a/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/a/([^/?#]+)"
example = "https://xbunkr.com/a/ID"
def __init__(self, match):
@@ -42,7 +42,7 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
def items(self):
files, data = self.fetch_album(self.album_id)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], file in enumerate(files, 1):
url = file["file"]
file.update(data)
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 0cbc523..2abd1c8 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -58,7 +58,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
def items(self):
album = self.metadata()
- yield Message.Directory, {"album": album}
+ yield Message.Directory, "", {"album": album}
for num, image in enumerate(self.images(), 1):
image["num"] = num
image["album"] = album
@@ -69,7 +69,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
image["thumbnail"] = ""
image["tags"] = [item["text"] for item in image["tags"]]
- image["date"] = text.parse_timestamp(image["created"])
+ image["date"] = self.parse_timestamp(image["created"])
image["id"] = text.parse_int(image["id"])
url = (image["url_to_original"] or image["url_to_video"]
@@ -188,7 +188,7 @@ fragment AlbumStandard on Album {
album["created_by"] = album["created_by"]["display_name"]
album["id"] = text.parse_int(album["id"])
- album["date"] = text.parse_timestamp(album["created"])
+ album["date"] = self.parse_timestamp(album["created"])
return album
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
index fde2df5..7cf1282 100644
--- a/gallery_dl/extractor/lynxchan.py
+++ b/gallery_dl/extractor/lynxchan.py
@@ -39,7 +39,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
"{threadId} {subject|message[:50]}")
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)"
example = "https://endchan.org/a/res/12345.html"
def items(self):
@@ -48,7 +48,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
thread["postId"] = thread["threadId"]
posts = thread.pop("posts", ())
- yield Message.Directory, thread
+ yield Message.Directory, "", thread
for post in itertools.chain((thread,), posts):
if files := post.pop("files", ()):
thread.update(post)
@@ -63,7 +63,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
class LynxchanBoardExtractor(LynxchanExtractor):
"""Extractor for LynxChan boards"""
subcategory = "board"
- pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
example = "https://endchan.org/a/"
def items(self):
diff --git a/gallery_dl/extractor/madokami.py b/gallery_dl/extractor/madokami.py
index 1db5126..e15b90d 100644
--- a/gallery_dl/extractor/madokami.py
+++ b/gallery_dl/extractor/madokami.py
@@ -47,8 +47,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
"path": text.unescape(extr('href="', '"')),
"chapter_string": text.unescape(extr(">", "<")),
"size": text.parse_bytes(extr("<td>", "</td>")),
- "date": text.parse_datetime(
- extr("<td>", "</td>").strip(), "%Y-%m-%d %H:%M"),
+ "date": self.parse_datetime_iso(extr("<td>", "</td>").strip()),
})
if self.config("chapter-reverse"):
@@ -89,5 +88,5 @@ class MadokamiMangaExtractor(MadokamiExtractor):
url = f"{self.root}{ch['path']}"
text.nameext_from_url(url, ch)
- yield Message.Directory, ch
+ yield Message.Directory, "", ch
yield Message.Url, url, ch
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 16eb650..0a1709d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -68,7 +68,7 @@ class MangadexExtractor(Extractor):
"chapter" : text.parse_int(chnum),
"chapter_minor": f"{sep}{minor}",
"chapter_id": chapter["id"],
- "date" : text.parse_datetime(cattributes["publishAt"]),
+ "date" : self.parse_datetime_iso(cattributes["publishAt"]),
"group" : [group["attributes"]["name"]
for group in relationships["scanlation_group"]],
"lang" : lang,
@@ -95,7 +95,7 @@ class MangadexCoversExtractor(MangadexExtractor):
name = data["cover"]
text.nameext_from_url(name, data)
data["cover_id"] = data["filename"]
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, f"{base}{name}", data
def _transform_cover(self, cover):
@@ -109,8 +109,8 @@ class MangadexCoversExtractor(MangadexExtractor):
"cover" : cattributes["fileName"],
"lang" : cattributes.get("locale"),
"volume" : text.parse_int(cattributes["volume"]),
- "date" : text.parse_datetime(cattributes["createdAt"]),
- "date_updated": text.parse_datetime(cattributes["updatedAt"]),
+ "date" : self.parse_datetime_iso(cattributes["createdAt"]),
+ "date_updated": self.parse_datetime_iso(cattributes["updatedAt"]),
}
@@ -134,15 +134,21 @@ class MangadexChapterExtractor(MangadexExtractor):
f"available on MangaDex and can instead be read on the "
f"official publisher's website at {data['_external_url']}.")
- yield Message.Directory, data
+ yield Message.Directory, "", data
+
+ if self.config("data-saver", False):
+ path = "data-saver"
+ key = "dataSaver"
+ else:
+ path = key = "data"
server = self.api.athome_server(self.uuid)
chapter = server["chapter"]
- base = f"{server['baseUrl']}/data/{chapter['hash']}/"
+ base = f"{server['baseUrl']}/{path}/{chapter['hash']}/"
enum = util.enumerate_reversed if self.config(
"page-reverse") else enumerate
- for data["page"], page in enum(chapter["data"], 1):
+ for data["page"], page in enum(chapter[key], 1):
text.nameext_from_url(page, data)
yield Message.Url, f"{base}{page}", data
@@ -454,7 +460,7 @@ def _manga_info(self, uuid):
"manga_id": manga["id"],
"manga_titles": [t.popitem()[1]
for t in mattr.get("altTitles") or ()],
- "manga_date" : text.parse_datetime(mattr.get("createdAt")),
+ "manga_date" : self.parse_datetime_iso(mattr.get("createdAt")),
"description" : (mattr["description"].get("en") or
next(iter(mattr["description"].values()), "")),
"demographic": mattr.get("publicationDemographic"),
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index 76f4b7e..8fa645b 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -18,8 +18,8 @@ class MangafoxChapterExtractor(ChapterExtractor):
"""Extractor for manga chapters from fanfox.net"""
category = "mangafox"
root = "https://m.fanfox.net"
- pattern = BASE_PATTERN + \
- r"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))"
+ pattern = (rf"{BASE_PATTERN}"
+ rf"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))")
example = "https://fanfox.net/manga/TITLE/v01/c001/1.html"
def __init__(self, match):
@@ -62,7 +62,7 @@ class MangafoxMangaExtractor(MangaExtractor):
category = "mangafox"
root = "https://m.fanfox.net"
chapterclass = MangafoxChapterExtractor
- pattern = BASE_PATTERN + r"(/manga/[^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+)/?$"
example = "https://fanfox.net/manga/TITLE"
def chapters(self, page):
@@ -99,7 +99,7 @@ class MangafoxMangaExtractor(MangaExtractor):
"chapter" : text.parse_int(chapter),
"chapter_minor" : minor or "",
"chapter_string": cstr,
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr('right">', '</span>'), "%b %d, %Y"),
}
chapter.update(data)
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index 151e809..9b3a3a1 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.mangahere.cc/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
+from .. import text
class MangahereBase():
@@ -102,7 +102,7 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
info, pos = text.extract(page, 'class="title3">', '<', pos)
date, pos = text.extract(page, 'class="title2">', '<', pos)
- match = util.re(
+ match = text.re(
r"(?:Vol\.0*(\d+) )?Ch\.0*(\d+)(\S*)(?: - (.*))?").match(info)
if match:
volume, chapter, minor, title = match.groups()
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index a6948e3..3ecf934 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -39,7 +39,7 @@ BASE_PATTERN = ManganeloExtractor.update({
class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
"""Extractor for manganelo manga chapters"""
- pattern = BASE_PATTERN + r"(/manga/[^/?#]+/chapter-[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+/chapter-[^/?#]+)"
example = "https://www.mangakakalot.gg/manga/MANGA_NAME/chapter-123"
def __init__(self, match):
@@ -50,10 +50,10 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
extr = text.extract_from(page)
data = {
- "date" : text.parse_datetime(extr(
- '"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
- "date_updated": text.parse_datetime(extr(
- '"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"),
+ "date" : self.parse_datetime_iso(extr(
+ '"datePublished": "', '"')[:19]),
+ "date_updated": self.parse_datetime_iso(extr(
+ '"dateModified": "', '"')[:19]),
"manga_id" : text.parse_int(extr("comic_id =", ";")),
"chapter_id" : text.parse_int(extr("chapter_id =", ";")),
"manga" : extr("comic_name =", ";").strip('" '),
@@ -86,7 +86,7 @@ class ManganeloChapterExtractor(ManganeloExtractor, ChapterExtractor):
class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
"""Extractor for manganelo manga"""
chapterclass = ManganeloChapterExtractor
- pattern = BASE_PATTERN + r"(/manga/[^/?#]+)$"
+ pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+)$"
example = "https://www.mangakakalot.gg/manga/MANGA_NAME"
def __init__(self, match):
@@ -99,7 +99,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
manga = text.unescape(extr("<h1>", "<"))
author = text.remove_html(extr("<li>Author(s) :", "</a>"))
status = extr("<li>Status :", "<").strip()
- update = text.parse_datetime(extr(
+ update = self.parse_datetime(extr(
"<li>Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p")
tags = text.split_html(extr(">Genres :", "</li>"))[::2]
@@ -121,7 +121,7 @@ class ManganeloMangaExtractor(ManganeloExtractor, MangaExtractor):
"chapter" : text.parse_int(chapter),
"chapter_minor": (sep and ".") + minor,
"title" : title.partition(": ")[2],
- "date" : text.parse_datetime(date, "%b-%d-%Y %H:%M"),
+ "date" : self.parse_datetime(date, "%b-%d-%Y %H:%M"),
"lang" : "en",
"language": "English",
}))
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 19aee33..e2f9166 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -23,7 +23,7 @@ class MangaparkBase():
category = "mangapark"
def _parse_chapter_title(self, title):
- match = util.re(
+ match = text.re(
r"(?i)"
r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?"
r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)"
@@ -70,8 +70,8 @@ class MangaparkBase():
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
- pattern = (BASE_PATTERN +
- r"/(?:title/[^/?#]+/|comic/\d+/[^/?#]+/[^/?#]+-i)(\d+)")
+ pattern = (rf"{BASE_PATTERN}/"
+ rf"(?:title/[^/?#]+/|comic/\d+/[^/?#]+/[^/?#]+-i)(\d+)")
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
def __init__(self, match):
@@ -101,7 +101,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"language" : util.code_to_language(lang),
"source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"],
- "date" : text.parse_timestamp(chapter["dateCreate"] // 1000),
+ "date" : self.parse_timestamp(chapter["dateCreate"] // 1000),
}
def images(self, _):
@@ -111,7 +111,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
class MangaparkMangaExtractor(MangaparkBase, Extractor):
"""Extractor for manga from mangapark.net"""
subcategory = "manga"
- pattern = BASE_PATTERN + r"/(?:title|comic)/(\d+)(?:[/-][^/?#]*)?/?$"
+ pattern = rf"{BASE_PATTERN}/(?:title|comic)/(\d+)(?:[/-][^/?#]*)?/?$"
example = "https://mangapark.net/title/12345-MANGA"
def __init__(self, match):
@@ -138,7 +138,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"language" : util.code_to_language(lang),
"source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"],
- "date" : text.parse_timestamp(
+ "date" : self.parse_timestamp(
chapter["dateCreate"] // 1000),
"_extractor": MangaparkChapterExtractor,
}
diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py
index a3bdf39..82fddde 100644
--- a/gallery_dl/extractor/mangaread.py
+++ b/gallery_dl/extractor/mangaread.py
@@ -7,7 +7,7 @@
"""Extractors for https://mangaread.org/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util, exception
+from .. import text, exception
class MangareadBase():
@@ -16,7 +16,7 @@ class MangareadBase():
root = "https://www.mangaread.org"
def parse_chapter_string(self, chapter_string, data):
- match = util.re(
+ match = text.re(
r"(?:(.+)\s*-\s*)?[Cc]hapter\s*(\d+)(\.\d+)?(?:\s*-\s*(.+))?"
).match(text.unescape(chapter_string).strip())
manga, chapter, minor, title = match.groups()
diff --git a/gallery_dl/extractor/mangataro.py b/gallery_dl/extractor/mangataro.py
index f4cc058..029bc2e 100644
--- a/gallery_dl/extractor/mangataro.py
+++ b/gallery_dl/extractor/mangataro.py
@@ -40,10 +40,8 @@ class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
"chapter_minor": str(round(minor, 5))[1:] if minor else "",
"chapter_id" : text.parse_int(chapter_id),
"chapter_url" : comic["url"],
- "date" : text.parse_datetime(
- comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
- "date_updated" : text.parse_datetime(
- comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
+ "date" : self.parse_datetime_iso(comic["datePublished"]),
+ "date_updated" : self.parse_datetime_iso(comic["dateModified"]),
}
def images(self, page):
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index beb13ce..60f0de9 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -91,7 +91,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
data = self.metadata(page)
imgs = self.images(url, page)
- yield Message.Directory, data
+ yield Message.Directory, "", data
data["extension"] = None
for data["num"], path in enumerate(imgs, 1):
@@ -119,7 +119,7 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
"album": {
"id": self.album_id,
"name": text.unescape(title),
- "date": text.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
+ "date": self.parse_datetime(date.strip(), "%Y.%m.%d %H:%M"),
"description": text.unescape(descr),
},
"count": text.parse_int(count),
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 1bab63a..165f8b8 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -64,10 +64,9 @@ class MastodonExtractor(BaseExtractor):
status["count"] = len(attachments)
status["tags"] = [tag["name"] for tag in status["tags"]]
- status["date"] = text.parse_datetime(
- status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
+ status["date"] = self.parse_datetime_iso(status["created_at"][:19])
- yield Message.Directory, status
+ yield Message.Directory, "", status
for status["num"], media in enumerate(attachments, 1):
status["media"] = media
url = media["url"]
@@ -119,7 +118,7 @@ BASE_PATTERN = MastodonExtractor.update({
class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$"
+ pattern = rf"{BASE_PATTERN}/(?:@|users/)([^/?#]+)(?:/media)?/?$"
example = "https://mastodon.social/@USER"
def statuses(self):
@@ -139,7 +138,7 @@ class MastodonUserExtractor(MastodonExtractor):
class MastodonBookmarkExtractor(MastodonExtractor):
"""Extractor for mastodon bookmarks"""
subcategory = "bookmark"
- pattern = BASE_PATTERN + r"/bookmarks"
+ pattern = rf"{BASE_PATTERN}/bookmarks"
example = "https://mastodon.social/bookmarks"
def statuses(self):
@@ -149,7 +148,7 @@ class MastodonBookmarkExtractor(MastodonExtractor):
class MastodonFavoriteExtractor(MastodonExtractor):
"""Extractor for mastodon favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/favourites"
+ pattern = rf"{BASE_PATTERN}/favourites"
example = "https://mastodon.social/favourites"
def statuses(self):
@@ -159,7 +158,7 @@ class MastodonFavoriteExtractor(MastodonExtractor):
class MastodonListExtractor(MastodonExtractor):
"""Extractor for mastodon lists"""
subcategory = "list"
- pattern = BASE_PATTERN + r"/lists/(\w+)"
+ pattern = rf"{BASE_PATTERN}/lists/(\w+)"
example = "https://mastodon.social/lists/12345"
def statuses(self):
@@ -169,7 +168,7 @@ class MastodonListExtractor(MastodonExtractor):
class MastodonHashtagExtractor(MastodonExtractor):
"""Extractor for mastodon hashtags"""
subcategory = "hashtag"
- pattern = BASE_PATTERN + r"/tags/(\w+)"
+ pattern = rf"{BASE_PATTERN}/tags/(\w+)"
example = "https://mastodon.social/tags/NAME"
def statuses(self):
@@ -179,7 +178,7 @@ class MastodonHashtagExtractor(MastodonExtractor):
class MastodonFollowingExtractor(MastodonExtractor):
"""Extractor for followed mastodon users"""
subcategory = "following"
- pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)/following"
+ pattern = rf"{BASE_PATTERN}/(?:@|users/)([^/?#]+)/following"
example = "https://mastodon.social/@USER/following"
def items(self):
@@ -194,7 +193,7 @@ class MastodonFollowingExtractor(MastodonExtractor):
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
- pattern = (BASE_PATTERN + r"/(?:@[^/?#]+|(?:users/[^/?#]+/)?"
+ pattern = (rf"{BASE_PATTERN}/(?:@[^/?#]+|(?:users/[^/?#]+/)?"
r"(?:statuses|notice|objects()))/(?!following)([^/?#]+)")
example = "https://mastodon.social/@USER/12345"
@@ -319,10 +318,8 @@ class MastodonAPI():
if code == 404:
raise exception.NotFoundError()
if code == 429:
- self.extractor.wait(until=text.parse_datetime(
- response.headers["x-ratelimit-reset"],
- "%Y-%m-%dT%H:%M:%S.%fZ",
- ))
+ self.extractor.wait(until=self.extractor.parse_datetime_iso(
+ response.headers["x-ratelimit-reset"]))
continue
raise exception.AbortExtraction(response.json().get("error"))
diff --git a/gallery_dl/extractor/message.py b/gallery_dl/extractor/message.py
index d5c2554..6eda213 100644
--- a/gallery_dl/extractor/message.py
+++ b/gallery_dl/extractor/message.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -14,13 +14,14 @@ class Message():
is one of the following identifiers. This message-identifier determines
the type and meaning of the other elements in such a tuple.
- - Message.Version:
+ - Message.Version: # obsolete
- Message protocol version (currently always '1')
- 2nd element specifies the version of all following messages as integer
- Message.Directory:
- Sets the target directory for all following images
- - 2nd element is a dictionary containing general metadata
+ - 2nd element is unused
+ - 3rd element is a dictionary containing general metadata
- Message.Url:
- Image URL and its metadata
@@ -45,7 +46,7 @@ class Message():
- The additional URLs serve as a fallback if the primary one fails
"""
- Version = 1
+ # Version = 1
Directory = 2
Url = 3
# Headers = 4
diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py
index 42eaeef..ca3ae18 100644
--- a/gallery_dl/extractor/misskey.py
+++ b/gallery_dl/extractor/misskey.py
@@ -7,7 +7,7 @@
"""Extractors for Misskey instances"""
from .common import BaseExtractor, Message, Dispatch
-from .. import text, exception
+from .. import text, dt, exception
from ..cache import memcache
@@ -18,10 +18,6 @@ class MisskeyExtractor(BaseExtractor):
filename_fmt = "{category}_{id}_{file[id]}.{extension}"
archive_fmt = "{id}_{file[id]}"
- def __init__(self, match):
- BaseExtractor.__init__(self, match)
- self.item = self.groups[-1]
-
def _init(self):
self.api = MisskeyAPI(self)
self.instance = self.root.rpartition("://")[2]
@@ -48,13 +44,11 @@ class MisskeyExtractor(BaseExtractor):
note["instance"] = self.instance
note["instance_remote"] = note["user"]["host"]
note["count"] = len(files)
- note["date"] = text.parse_datetime(
- note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ note["date"] = self.parse_datetime_iso(note["createdAt"])
- yield Message.Directory, note
+ yield Message.Directory, "", note
for note["num"], file in enumerate(files, 1):
- file["date"] = text.parse_datetime(
- file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ file["date"] = self.parse_datetime_iso(file["createdAt"])
note["file"] = file
url = file["url"]
yield Message.Url, url, text.nameext_from_url(url, note)
@@ -108,11 +102,11 @@ BASE_PATTERN = MisskeyExtractor.update({
class MisskeyUserExtractor(Dispatch, MisskeyExtractor):
"""Extractor for all images of a Misskey user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/@([^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}/@([^/?#]+)/?$"
example = "https://misskey.io/@USER"
def items(self):
- base = f"{self.root}/@{self.item}/"
+ base = f"{self.root}/@{self.groups[-1]}/"
return self._dispatch_extractors((
(MisskeyInfoExtractor , base + "info"),
(MisskeyAvatarExtractor , base + "avatar"),
@@ -124,32 +118,33 @@ class MisskeyUserExtractor(Dispatch, MisskeyExtractor):
class MisskeyNotesExtractor(MisskeyExtractor):
"""Extractor for a Misskey user's notes"""
subcategory = "notes"
- pattern = BASE_PATTERN + r"/@([^/?#]+)/notes"
+ pattern = rf"{BASE_PATTERN}/@([^/?#]+)/notes"
example = "https://misskey.io/@USER/notes"
def notes(self):
- return self.api.users_notes(self.api.user_id_by_username(self.item))
+ return self.api.users_notes(self.api.user_id_by_username(
+ self.groups[-1]))
class MisskeyInfoExtractor(MisskeyExtractor):
"""Extractor for a Misskey user's profile data"""
subcategory = "info"
- pattern = BASE_PATTERN + r"/@([^/?#]+)/info"
+ pattern = rf"{BASE_PATTERN}/@([^/?#]+)/info"
example = "https://misskey.io/@USER/info"
def items(self):
- user = self.api.users_show(self.item)
- return iter(((Message.Directory, user),))
+ user = self.api.users_show(self.groups[-1])
+ return iter(((Message.Directory, "", user),))
class MisskeyAvatarExtractor(MisskeyExtractor):
"""Extractor for a Misskey user's avatar"""
subcategory = "avatar"
- pattern = BASE_PATTERN + r"/@([^/?#]+)/avatar"
+ pattern = rf"{BASE_PATTERN}/@([^/?#]+)/avatar"
example = "https://misskey.io/@USER/avatar"
def notes(self):
- user = self.api.users_show(self.item)
+ user = self.api.users_show(self.groups[-1])
url = user.get("avatarUrl")
return (self._make_note("avatar", user, url),) if url else ()
@@ -157,11 +152,11 @@ class MisskeyAvatarExtractor(MisskeyExtractor):
class MisskeyBackgroundExtractor(MisskeyExtractor):
"""Extractor for a Misskey user's banner image"""
subcategory = "background"
- pattern = BASE_PATTERN + r"/@([^/?#]+)/ba(?:nner|ckground)"
+ pattern = rf"{BASE_PATTERN}/@([^/?#]+)/ba(?:nner|ckground)"
example = "https://misskey.io/@USER/banner"
def notes(self):
- user = self.api.users_show(self.item)
+ user = self.api.users_show(self.groups[-1])
url = user.get("bannerUrl")
return (self._make_note("background", user, url),) if url else ()
@@ -169,11 +164,11 @@ class MisskeyBackgroundExtractor(MisskeyExtractor):
class MisskeyFollowingExtractor(MisskeyExtractor):
"""Extractor for followed Misskey users"""
subcategory = "following"
- pattern = BASE_PATTERN + r"/@([^/?#]+)/following"
+ pattern = rf"{BASE_PATTERN}/@([^/?#]+)/following"
example = "https://misskey.io/@USER/following"
def items(self):
- user_id = self.api.user_id_by_username(self.item)
+ user_id = self.api.user_id_by_username(self.groups[-1])
for user in self.api.users_following(user_id):
user = user["followee"]
url = f"{self.root}/@{user['username']}"
@@ -186,17 +181,17 @@ class MisskeyFollowingExtractor(MisskeyExtractor):
class MisskeyNoteExtractor(MisskeyExtractor):
"""Extractor for images from a Note"""
subcategory = "note"
- pattern = BASE_PATTERN + r"/notes/(\w+)"
+ pattern = rf"{BASE_PATTERN}/notes/(\w+)"
example = "https://misskey.io/notes/98765"
def notes(self):
- return (self.api.notes_show(self.item),)
+ return (self.api.notes_show(self.groups[-1]),)
class MisskeyFavoriteExtractor(MisskeyExtractor):
"""Extractor for favorited notes"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/(?:my|api/i)/favorites"
+ pattern = rf"{BASE_PATTERN}/(?:my|api/i)/favorites"
example = "https://misskey.io/my/favorites"
def notes(self):
@@ -253,12 +248,39 @@ class MisskeyAPI():
return self.extractor.request_json(url, method="POST", json=data)
def _pagination(self, endpoint, data):
+ extr = self.extractor
data["limit"] = 100
- data["withRenotes"] = self.extractor.renotes
+ data["withRenotes"] = extr.renotes
+ data["withFiles"] = False if extr.config("text-posts") else True
+
+ date_min, date_max = extr._get_date_min_max()
+ if (order := extr.config("order-posts")) and \
+ order[0] in ("a", "r"):
+ key = "sinceId"
+ data["sinceDate"] = 1 if date_min is None else date_min * 1000
+ date_stop = None if date_max is None else date_max
+ else:
+ key = "untilId"
+ date_stop = None
+ if date_min is not None:
+ data["sinceDate"] = date_min * 1000
+ if date_max is None:
+ # ensure notes are returned in descending order
+ data["untilDate"] = (int(dt.time.time()) + 1000) * 1000
+ if date_max is not None:
+ data["untilDate"] = date_max * 1000
while True:
notes = self._call(endpoint, data)
if not notes:
return
- yield from notes
- data["untilId"] = notes[-1]["id"]
+ elif date_stop is not None and dt.to_ts(dt.parse_iso(
+ notes[-1]["createdAt"])) > date_stop:
+ for idx, note in enumerate(notes):
+ if dt.to_ts(dt.parse_iso(note["createdAt"])) > date_stop:
+ yield from notes[:idx]
+ return
+ else:
+ yield from notes
+
+ data[key] = notes[-1]["id"]
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index ba27994..23f8fd9 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -9,9 +9,8 @@
"""Extractors for Moebooru based sites"""
from .booru import BooruExtractor
-from .. import text, util
+from .. import text, dt
import collections
-import datetime
class MoebooruExtractor(BooruExtractor):
@@ -21,7 +20,7 @@ class MoebooruExtractor(BooruExtractor):
page_start = 1
def _prepare(self, post):
- post["date"] = text.parse_timestamp(post["created_at"])
+ post["date"] = dt.parse_ts(post["created_at"])
def _html(self, post):
url = f"{self.root}/post/show/{post['id']}"
@@ -33,7 +32,7 @@ class MoebooruExtractor(BooruExtractor):
return
tags = collections.defaultdict(list)
- pattern = util.re(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
+ pattern = text.re(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
for tag_type, tag_name in pattern.findall(tag_container):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
@@ -93,7 +92,7 @@ class MoebooruTagExtractor(MoebooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/post\?(?:[^&#]*&)*tags=([^&#]*)"
+ pattern = rf"{BASE_PATTERN}/post\?(?:[^&#]*&)*tags=([^&#]*)"
example = "https://yande.re/post?tags=TAG"
def __init__(self, match):
@@ -112,7 +111,7 @@ class MoebooruPoolExtractor(MoebooruExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
- pattern = BASE_PATTERN + r"/pool/show/(\d+)"
+ pattern = rf"{BASE_PATTERN}/pool/show/(\d+)"
example = "https://yande.re/pool/show/12345"
def __init__(self, match):
@@ -136,7 +135,7 @@ class MoebooruPoolExtractor(MoebooruExtractor):
class MoebooruPostExtractor(MoebooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/show/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post/show/(\d+)"
example = "https://yande.re/post/show/12345"
def posts(self):
@@ -148,8 +147,8 @@ class MoebooruPopularExtractor(MoebooruExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
- pattern = BASE_PATTERN + \
- r"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?"
+ pattern = (rf"{BASE_PATTERN}"
+ rf"/post/popular_(by_(?:day|week|month)|recent)(?:\?([^#]*))?")
example = "https://yande.re/post/popular_by_month?year=YYYY&month=MM"
def __init__(self, match):
@@ -164,14 +163,14 @@ class MoebooruPopularExtractor(MoebooruExtractor):
date = (f"{params['year']:>04}-{params.get('month', '01'):>02}-"
f"{params.get('day', '01'):>02}")
else:
- date = datetime.date.today().isoformat()
+ date = dt.date.today().isoformat()
scale = self.scale
if scale.startswith("by_"):
scale = scale[3:]
if scale == "week":
- date = datetime.date.fromisoformat(date)
- date = (date - datetime.timedelta(days=date.weekday())).isoformat()
+ date = dt.date.fromisoformat(date)
+ date = (date - dt.timedelta(days=date.weekday())).isoformat()
elif scale == "month":
date = date[:-3]
diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py
index 48137ce..c20f138 100644
--- a/gallery_dl/extractor/motherless.py
+++ b/gallery_dl/extractor/motherless.py
@@ -9,9 +9,8 @@
"""Extractors for https://motherless.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, dt, exception
from ..cache import memcache
-from datetime import timedelta
BASE_PATTERN = r"(?:https?://)?motherless\.com"
@@ -42,6 +41,8 @@ class MotherlessExtractor(Extractor):
path, _, media_id = path.rpartition("/")
data = {
"id" : media_id,
+ "title": text.unescape(
+ (t := extr("<title>", "<")) and t[:t.rfind(" | ")]),
"type" : extr("__mediatype = '", "'"),
"group": extr("__group = '", "'"),
"url" : extr("__fileurl = '", "'"),
@@ -50,7 +51,6 @@ class MotherlessExtractor(Extractor):
for tag in text.extract_iter(
extr('class="media-meta-tags">', "</div>"), ">#", "<")
],
- "title": text.unescape(extr("<h1>", "<")),
"views": text.parse_int(extr(
'class="count">', " ").replace(",", "")),
"favorites": text.parse_int(extr(
@@ -115,14 +115,14 @@ class MotherlessExtractor(Extractor):
return data
- def _parse_datetime(self, dt):
- if " ago" not in dt:
- return text.parse_datetime(dt, "%d %b %Y")
+ def _parse_datetime(self, dt_string):
+ if " ago" not in dt_string:
+ return dt.parse(dt_string, "%d %b %Y")
- value = text.parse_int(dt[:-5])
- delta = timedelta(0, value*3600) if dt[-5] == "h" else timedelta(value)
- return (util.datetime_utcnow() - delta).replace(
- hour=0, minute=0, second=0)
+ value = text.parse_int(dt_string[:-5])
+ delta = (dt.timedelta(0, value*3600) if dt_string[-5] == "h" else
+ dt.timedelta(value))
+ return (dt.now() - delta).replace(hour=0, minute=0, second=0)
@memcache(keyarg=2)
def _extract_gallery_title(self, page, gallery_id):
@@ -132,10 +132,9 @@ class MotherlessExtractor(Extractor):
if title:
return text.unescape(title.strip())
- pos = page.find(f' href="/G{gallery_id}"')
- if pos >= 0:
- return text.unescape(text.extract(
- page, ' title="', '"', pos)[0])
+ if f' href="/G{gallery_id}"' in page:
+ return text.unescape(
+ (t := text.extr(page, "<title>", "<")) and t[:t.rfind(" | ")])
return ""
@@ -153,15 +152,15 @@ class MotherlessExtractor(Extractor):
class MotherlessMediaExtractor(MotherlessExtractor):
"""Extractor for a single image/video from motherless.com"""
subcategory = "media"
- pattern = (BASE_PATTERN +
- r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?"
- r"(?!G)[A-Z0-9]+)")
+ pattern = (rf"{BASE_PATTERN}/("
+ rf"(?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?"
+ rf"(?!G)[A-Z0-9]+)")
example = "https://motherless.com/ABC123"
def items(self):
file = self._extract_media(self.groups[0])
url = file["url"]
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, url, text.nameext_from_url(url, file)
@@ -171,7 +170,7 @@ class MotherlessGalleryExtractor(MotherlessExtractor):
directory_fmt = ("{category}", "{uploader}",
"{gallery_id} {gallery_title}")
archive_fmt = "{gallery_id}_{id}"
- pattern = BASE_PATTERN + "/G([IVG])?([A-Z0-9]+)/?$"
+ pattern = rf"{BASE_PATTERN}/G([IVG])?([A-Z0-9]+)/?$"
example = "https://motherless.com/GABC123"
def items(self):
@@ -198,7 +197,7 @@ class MotherlessGalleryExtractor(MotherlessExtractor):
file["num"] = num
file["thumbnail"] = thumbnail
url = file["url"]
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, url, text.nameext_from_url(url, file)
@@ -207,7 +206,7 @@ class MotherlessGroupExtractor(MotherlessExtractor):
directory_fmt = ("{category}", "{uploader}",
"{group_id} {group_title}")
archive_fmt = "{group_id}_{id}"
- pattern = BASE_PATTERN + "/g([iv]?)/?([a-z0-9_]+)/?$"
+ pattern = rf"{BASE_PATTERN}/g([iv]?)/?([a-z0-9_]+)/?$"
example = "https://motherless.com/g/abc123"
def items(self):
@@ -236,5 +235,5 @@ class MotherlessGroupExtractor(MotherlessExtractor):
file["uploader"] = uploader
file["group"] = file["group_id"]
url = file["url"]
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, url, text.nameext_from_url(url, file)
diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py
index 2a39dc9..936f857 100644
--- a/gallery_dl/extractor/myhentaigallery.py
+++ b/gallery_dl/extractor/myhentaigallery.py
@@ -6,17 +6,21 @@
"""Extractors for https://myhentaigallery.com/"""
-from .common import GalleryExtractor
+from .common import Extractor, GalleryExtractor, Message
from .. import text, exception
+BASE_PATTERN = r"(?:https?://)?myhentaigallery\.com"
-class MyhentaigalleryGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries from myhentaigallery.com"""
+
+class MyhentaigalleryBase():
category = "myhentaigallery"
root = "https://myhentaigallery.com"
+
+
+class MyhentaigalleryGalleryExtractor(MyhentaigalleryBase, GalleryExtractor):
+ """Extractor for image galleries from myhentaigallery.com"""
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
- pattern = (r"(?:https?://)?myhentaigallery\.com"
- r"/g(?:allery/(?:thumbnails|show))?/(\d+)")
+ pattern = rf"{BASE_PATTERN}/g(?:allery/(?:thumbnails|show))?/(\d+)"
example = "https://myhentaigallery.com/g/12345"
def __init__(self, match):
@@ -53,3 +57,32 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
"/thumbnail/", "/original/"), None)
for url in text.extract_iter(page, 'class="comic-thumb"', '</div>')
]
+
+
+class MyhentaigalleryTagExtractor(MyhentaigalleryBase, Extractor):
+ """Extractor for myhentaigallery tag searches"""
+ subcategory = "tag"
+ pattern = rf"{BASE_PATTERN}(/g/(artist|category|group|parody)/(\d+).*)"
+ example = "https://myhentaigallery.com/g/category/123"
+
+ def items(self):
+ data = {"_extractor": MyhentaigalleryGalleryExtractor}
+ for url in self.galleries():
+ yield Message.Queue, url, data
+
+ def galleries(self):
+ root = self.root
+ url = root + self.groups[0]
+
+ while True:
+ page = self.request(url).text
+
+ for inner in text.extract_iter(
+ page, '<div class="comic-inner">', "<div"):
+ yield root + text.extr(inner, 'href="', '"')
+
+ try:
+ pos = page.index(">Next<")
+ except ValueError:
+ return
+ url = root + text.rextr(page, 'href="', '"', pos)
diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py
index 0223d0b..3a21122 100644
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@@ -49,7 +49,7 @@ class MyportfolioGalleryExtractor(Extractor):
data = self.metadata(page)
imgs = self.images(page)
data["count"] = len(imgs)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(imgs, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/naverblog.py b/gallery_dl/extractor/naverblog.py
index b55e001..cc96e09 100644
--- a/gallery_dl/extractor/naverblog.py
+++ b/gallery_dl/extractor/naverblog.py
@@ -9,8 +9,7 @@
"""Extractors for https://blog.naver.com/"""
from .common import GalleryExtractor, Extractor, Message
-from .. import text, util
-import datetime
+from .. import text, util, dt
import time
@@ -67,11 +66,11 @@ class NaverBlogPostExtractor(NaverBlogBase, GalleryExtractor):
return data
- def _parse_datetime(self, date_string):
- if "전" in date_string:
+ def _parse_datetime(self, dt_string):
+ if "전" in dt_string:
ts = time.gmtime()
- return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
- return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M")
+ return dt.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
+ return dt.parse(dt_string, "%Y. %m. %d. %H:%M")
def images(self, page):
files = []
diff --git a/gallery_dl/extractor/naverchzzk.py b/gallery_dl/extractor/naverchzzk.py
index de4ee7a..5b56710 100644
--- a/gallery_dl/extractor/naverchzzk.py
+++ b/gallery_dl/extractor/naverchzzk.py
@@ -31,17 +31,17 @@ class NaverChzzkExtractor(Extractor):
data["uid"] = data["objectId"]
data["user"] = comment["user"]
data["count"] = len(files)
- data["date"] = text.parse_datetime(
+ data["date"] = self.parse_datetime(
data["createdDate"], "%Y%m%d%H%M%S")
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], file in enumerate(files, 1):
if extra := file.get("extraJson"):
file.update(util.json_loads(extra))
- file["date"] = text.parse_datetime(
- file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
- file["date_updated"] = text.parse_datetime(
- file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ file["date"] = self.parse_datetime_iso(
+ file["createdDate"])
+ file["date_updated"] = self.parse_datetime_iso(
+ file["updatedDate"])
data["file"] = file
url = file["attachValue"]
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py
index 3211941..72089d0 100644
--- a/gallery_dl/extractor/naverwebtoon.py
+++ b/gallery_dl/extractor/naverwebtoon.py
@@ -27,7 +27,7 @@ class NaverWebtoonEpisodeExtractor(NaverWebtoonBase, GalleryExtractor):
directory_fmt = ("{category}", "{comic}")
filename_fmt = "{episode:>03}-{num:>02}.{extension}"
archive_fmt = "{title_id}_{episode}_{num}"
- pattern = BASE_PATTERN + r"/detail(?:\.nhn)?\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/detail(?:\.nhn)?\?([^#]+)"
example = "https://comic.naver.com/webtoon/detail?titleId=12345&no=1"
def __init__(self, match):
@@ -66,7 +66,7 @@ class NaverWebtoonEpisodeExtractor(NaverWebtoonBase, GalleryExtractor):
class NaverWebtoonComicExtractor(NaverWebtoonBase, Extractor):
subcategory = "comic"
categorytransfer = True
- pattern = BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/list(?:\.nhn)?\?([^#]+)"
example = "https://comic.naver.com/webtoon/list?titleId=12345"
def __init__(self, match):
diff --git a/gallery_dl/extractor/nekohouse.py b/gallery_dl/extractor/nekohouse.py
index e6b0461..728912b 100644
--- a/gallery_dl/extractor/nekohouse.py
+++ b/gallery_dl/extractor/nekohouse.py
@@ -12,7 +12,7 @@ from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?nekohouse\.su"
-USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)/user/([^/?#]+)"
class NekohouseExtractor(Extractor):
@@ -27,7 +27,7 @@ class NekohousePostExtractor(NekohouseExtractor):
"{post_id} {date} {title[b:230]}")
filename_fmt = "{num:>02} {id|filename}.{extension}"
archive_fmt = "{service}_{user_id}_{post_id}_{hash}"
- pattern = USER_PATTERN + r"/post/([^/?#]+)"
+ pattern = rf"{USER_PATTERN}/post/([^/?#]+)"
example = "https://nekohouse.su/SERVICE/user/12345/post/12345"
def items(self):
@@ -42,7 +42,7 @@ class NekohousePostExtractor(NekohouseExtractor):
post["post_id"] = post_id
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
url = file["url"]
text.nameext_from_url(url, file)
@@ -59,8 +59,8 @@ class NekohousePostExtractor(NekohouseExtractor):
'class="scrape__user-name', '</').rpartition(">")[2].strip()),
"title" : text.unescape(extr(
'class="scrape__title', '</').rpartition(">")[2]),
- "date" : text.parse_datetime(extr(
- 'datetime="', '"')[:19], "%Y-%m-%d %H:%M:%S"),
+ "date" : self.parse_datetime_iso(extr(
+ 'datetime="', '"')[:19]),
"content": text.unescape(extr(
'class="scrape__content">', "</div>").strip()),
}
@@ -98,7 +98,7 @@ class NekohousePostExtractor(NekohouseExtractor):
class NekohouseUserExtractor(NekohouseExtractor):
subcategory = "user"
- pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)"
+ pattern = rf"{USER_PATTERN}/?(?:\?([^#]+))?(?:$|\?|#)"
example = "https://nekohouse.su/SERVICE/user/12345"
def items(self):
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index ffb4cad..f980f4b 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message, Dispatch
-from .. import text, util, exception
+from .. import text, util, dt, exception
from ..cache import cache
import itertools
@@ -34,7 +34,7 @@ class NewgroundsExtractor(Extractor):
self.user_root = f"https://{self.user}.newgrounds.com"
def _init(self):
- self._extract_comment_urls = util.re(
+ self._extract_comment_urls = text.re(
r'(?:<img |data-smartload-)src="([^"]+)').findall
self.flash = self.config("flash", True)
@@ -58,13 +58,13 @@ class NewgroundsExtractor(Extractor):
post = self.extract_post(post_url)
url = post.get("url")
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
url = None
if url:
if metadata:
post.update(metadata)
- yield Message.Directory, post
+ yield Message.Directory, "", post
post["num"] = 0
yield Message.Url, url, text.nameext_from_url(url, post)
@@ -88,6 +88,7 @@ class NewgroundsExtractor(Extractor):
text.nameext_from_url(url, post)
yield Message.Url, url, post
else:
+ self.status |= 1
self.log.warning(
"Unable to get download URL for '%s'", post_url)
@@ -218,7 +219,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')),
"type" : "art",
"_type" : "i",
- "date" : text.parse_datetime(extr(
+ "date" : dt.parse_iso(extr(
'itemprop="datePublished" content="', '"')),
"rating" : extr('class="rated-', '"'),
"url" : full('src="', '"'),
@@ -268,7 +269,7 @@ class NewgroundsExtractor(Extractor):
"description": text.unescape(extr(':description" content="', '"')),
"type" : "audio",
"_type" : "a",
- "date" : text.parse_datetime(extr(
+ "date" : dt.parse_iso(extr(
'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"),
"index" : text.parse_int(index),
@@ -287,7 +288,7 @@ class NewgroundsExtractor(Extractor):
src = src.replace("\\/", "/")
formats = ()
type = extr(',"description":"', '"')
- date = text.parse_datetime(extr(
+ date = dt.parse_iso(extr(
'itemprop="datePublished" content="', '"'))
if type:
type = type.rpartition(" ")[2].lower()
@@ -302,7 +303,7 @@ class NewgroundsExtractor(Extractor):
sources = self.request_json(url, headers=headers)["sources"]
formats = self._video_formats(sources)
src = next(formats, "")
- date = text.parse_timestamp(src.rpartition("?")[2])
+ date = self.parse_timestamp(src.rpartition("?")[2])
type = "movie"
return {
@@ -321,7 +322,7 @@ class NewgroundsExtractor(Extractor):
def _video_formats(self, sources):
src = sources["360p"][0]["src"]
- sub = util.re(r"\.360p\.\w+").sub
+ sub = text.re(r"\.360p\.\w+").sub
for fmt in self.format:
try:
@@ -411,7 +412,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
class NewgroundsMediaExtractor(NewgroundsExtractor):
"""Extractor for a media file from newgrounds.com"""
subcategory = "media"
- pattern = BASE_PATTERN + r"(/(?:portal/view|audio/listen)/\d+)"
+ pattern = rf"{BASE_PATTERN}(/(?:portal/view|audio/listen)/\d+)"
example = "https://www.newgrounds.com/portal/view/12345"
def __init__(self, match):
@@ -426,34 +427,34 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
class NewgroundsArtExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
subcategory = _path = "art"
- pattern = USER_PATTERN + r"/art(?:(?:/page/|/?\?page=)(\d+))?/?$"
+ pattern = rf"{USER_PATTERN}/art(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/art"
class NewgroundsAudioExtractor(NewgroundsExtractor):
"""Extractor for all audio submissions of a newgrounds user"""
subcategory = _path = "audio"
- pattern = USER_PATTERN + r"/audio(?:(?:/page/|/?\?page=)(\d+))?/?$"
+ pattern = rf"{USER_PATTERN}/audio(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/audio"
class NewgroundsMoviesExtractor(NewgroundsExtractor):
"""Extractor for all movies of a newgrounds user"""
subcategory = _path = "movies"
- pattern = USER_PATTERN + r"/movies(?:(?:/page/|/?\?page=)(\d+))?/?$"
+ pattern = rf"{USER_PATTERN}/movies(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/movies"
class NewgroundsGamesExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user's games"""
subcategory = _path = "games"
- pattern = USER_PATTERN + r"/games(?:(?:/page/|/?\?page=)(\d+))?/?$"
+ pattern = rf"{USER_PATTERN}/games(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/games"
class NewgroundsUserExtractor(Dispatch, NewgroundsExtractor):
"""Extractor for a newgrounds user profile"""
- pattern = USER_PATTERN + r"/?$"
+ pattern = rf"{USER_PATTERN}/?$"
example = "https://USER.newgrounds.com"
def items(self):
@@ -470,7 +471,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
"""Extractor for posts favorited by a newgrounds user"""
subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites")
- pattern = (USER_PATTERN + r"/favorites(?!/following)(?:/(art|audio|movies)"
+ pattern = (rf"{USER_PATTERN}/favorites(?!/following)(?:/(art|audio|movies)"
r"(?:(?:/page/|/?\?page=)(\d+))?)?")
example = "https://USER.newgrounds.com/favorites"
@@ -516,7 +517,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
"""Extractor for a newgrounds user's favorited users"""
subcategory = "following"
- pattern = (USER_PATTERN + r"/favorites/(following)"
+ pattern = (rf"{USER_PATTERN}/favorites/(following)"
r"(?:(?:/page/|/?\?page=)(\d+))?")
example = "https://USER.newgrounds.com/favorites/following"
@@ -538,7 +539,7 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
"""Extractor for newgrounds.com search reesults"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search_tags}")
- pattern = BASE_PATTERN + r"/search/conduct/([^/?#]+)/?\?([^#]+)"
+ pattern = rf"{BASE_PATTERN}/search/conduct/([^/?#]+)/?\?([^#]+)"
example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY"
def __init__(self, match):
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index c6df835..a6b01c2 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -9,7 +9,7 @@
"""Extractors for nijie instances"""
from .common import BaseExtractor, Message, Dispatch, AsynchronousMixin
-from .. import text, exception
+from .. import text, dt, exception
from ..cache import cache
@@ -59,7 +59,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
urls = self._extract_images(image_id, page)
data["count"] = len(urls)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, url in enumerate(urls):
image = text.nameext_from_url(url, {
"num": num,
@@ -82,8 +82,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"title" : keywords[0].strip(),
"description": text.unescape(extr(
'"description": "', '"').replace("&amp;", "&")),
- "date" : text.parse_datetime(extr(
- '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y", 9),
+ "date" : dt.parse(extr(
+ '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"
+ ) - dt.timedelta(hours=9),
"artist_id" : text.parse_int(extr('/members.php?id=', '"')),
"artist_name": keywords[1],
"tags" : keywords[2:-1],
@@ -101,9 +102,9 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
"artist_id" : text.parse_int(extr('members.php?id=', '"')),
"artist_name": keywords[1],
"tags" : keywords[2:-1],
- "date" : text.parse_datetime(extr(
- "itemprop='datePublished' content=", "<").rpartition(">")[2],
- "%Y-%m-%d %H:%M:%S", 9),
+ "date" : dt.parse_iso(extr(
+ "itemprop='datePublished' content=", "<").rpartition(">")[2]
+ ) - dt.timedelta(hours=9),
}
def _extract_images(self, image_id, page):
@@ -177,7 +178,7 @@ BASE_PATTERN = NijieExtractor.update({
class NijieUserExtractor(Dispatch, NijieExtractor):
"""Extractor for nijie user profiles"""
- pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/members\.php\?id=(\d+)"
example = "https://nijie.info/members.php?id=12345"
def items(self):
@@ -193,7 +194,7 @@ class NijieUserExtractor(Dispatch, NijieExtractor):
class NijieIllustrationExtractor(NijieExtractor):
"""Extractor for all illustrations of a nijie-user"""
subcategory = "illustration"
- pattern = BASE_PATTERN + r"/members_illust\.php\?id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/members_illust\.php\?id=(\d+)"
example = "https://nijie.info/members_illust.php?id=12345"
def image_ids(self):
@@ -203,7 +204,7 @@ class NijieIllustrationExtractor(NijieExtractor):
class NijieDoujinExtractor(NijieExtractor):
"""Extractor for doujin entries of a nijie user"""
subcategory = "doujin"
- pattern = BASE_PATTERN + r"/members_dojin\.php\?id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/members_dojin\.php\?id=(\d+)"
example = "https://nijie.info/members_dojin.php?id=12345"
def image_ids(self):
@@ -215,7 +216,7 @@ class NijieFavoriteExtractor(NijieExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "bookmarks", "{user_id}")
archive_fmt = "f_{user_id}_{image_id}_{num}"
- pattern = BASE_PATTERN + r"/user_like_illust_view\.php\?id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/user_like_illust_view\.php\?id=(\d+)"
example = "https://nijie.info/user_like_illust_view.php?id=12345"
def image_ids(self):
@@ -233,7 +234,7 @@ class NijieNuitaExtractor(NijieExtractor):
subcategory = "nuita"
directory_fmt = ("{category}", "nuita", "{user_id}")
archive_fmt = "n_{user_id}_{image_id}_{num}"
- pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/history_nuita\.php\?id=(\d+)"
example = "https://nijie.info/history_nuita.php?id=12345"
def image_ids(self):
@@ -252,7 +253,7 @@ class NijieNuitaExtractor(NijieExtractor):
class NijieFeedExtractor(NijieExtractor):
"""Extractor for nijie liked user feed"""
subcategory = "feed"
- pattern = BASE_PATTERN + r"/like_user_view\.php"
+ pattern = rf"{BASE_PATTERN}/like_user_view\.php"
example = "https://nijie.info/like_user_view.php"
def image_ids(self):
@@ -265,7 +266,7 @@ class NijieFeedExtractor(NijieExtractor):
class NijieFollowedExtractor(NijieExtractor):
"""Extractor for followed nijie users"""
subcategory = "followed"
- pattern = BASE_PATTERN + r"/like_my\.php"
+ pattern = rf"{BASE_PATTERN}/like_my\.php"
example = "https://nijie.info/like_my.php"
def items(self):
@@ -291,7 +292,7 @@ class NijieFollowedExtractor(NijieExtractor):
class NijieImageExtractor(NijieExtractor):
"""Extractor for a nijie work/image"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/view(?:_popup)?\.php\?id=(\d+)"
example = "https://nijie.info/view.php?id=12345"
def image_ids(self):
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index 69d8299..321883c 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -97,7 +97,7 @@ class NitterExtractor(BaseExtractor):
files = ()
tweet["count"] = len(files)
- yield Message.Directory, tweet
+ yield Message.Directory, "", tweet
for tweet["num"], file in enumerate(files, 1):
url = file["url"]
file.update(tweet)
@@ -114,7 +114,7 @@ class NitterExtractor(BaseExtractor):
return {
"author" : author,
"user" : self.user_obj or author,
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0],
"content": extr('class="tweet-content', "</div").partition(">")[2],
@@ -142,7 +142,7 @@ class NitterExtractor(BaseExtractor):
return {
"author" : author,
"user" : self.user_obj or author,
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0],
"content" : extr('class="quote-text', "</div").partition(">")[2],
@@ -173,7 +173,7 @@ class NitterExtractor(BaseExtractor):
"nick" : extr('title="', '"'),
"name" : extr('title="@', '"'),
"description" : extr('<p dir="auto">', '<'),
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr('class="profile-joindate"><span title="', '"'),
"%I:%M %p - %d %b %Y"),
"statuses_count" : text.parse_int(extr(
@@ -229,12 +229,12 @@ class NitterExtractor(BaseExtractor):
BASE_PATTERN = NitterExtractor.update({
})
-USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/(i(?:/user/|d:)(\d+)|[^/?#]+)"
class NitterTweetsExtractor(NitterExtractor):
subcategory = "tweets"
- pattern = USER_PATTERN + r"(?:/tweets)?(?:$|\?|#)"
+ pattern = rf"{USER_PATTERN}(?:/tweets)?(?:$|\?|#)"
example = "https://nitter.net/USER"
def tweets(self):
@@ -243,7 +243,7 @@ class NitterTweetsExtractor(NitterExtractor):
class NitterRepliesExtractor(NitterExtractor):
subcategory = "replies"
- pattern = USER_PATTERN + r"/with_replies"
+ pattern = rf"{USER_PATTERN}/with_replies"
example = "https://nitter.net/USER/with_replies"
def tweets(self):
@@ -252,7 +252,7 @@ class NitterRepliesExtractor(NitterExtractor):
class NitterMediaExtractor(NitterExtractor):
subcategory = "media"
- pattern = USER_PATTERN + r"/media"
+ pattern = rf"{USER_PATTERN}/media"
example = "https://nitter.net/USER/media"
def tweets(self):
@@ -261,7 +261,7 @@ class NitterMediaExtractor(NitterExtractor):
class NitterSearchExtractor(NitterExtractor):
subcategory = "search"
- pattern = USER_PATTERN + r"/search"
+ pattern = rf"{USER_PATTERN}/search"
example = "https://nitter.net/USER/search"
def tweets(self):
@@ -274,7 +274,7 @@ class NitterTweetExtractor(NitterExtractor):
directory_fmt = ("{category}", "{user[name]}")
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{num}"
- pattern = BASE_PATTERN + r"/(i/web|[^/?#]+)/status/(\d+())"
+ pattern = rf"{BASE_PATTERN}/(i/web|[^/?#]+)/status/(\d+())"
example = "https://nitter.net/USER/status/12345"
def tweets(self):
diff --git a/gallery_dl/extractor/noop.py b/gallery_dl/extractor/noop.py
index df2316c..fe88e63 100644
--- a/gallery_dl/extractor/noop.py
+++ b/gallery_dl/extractor/noop.py
@@ -8,7 +8,7 @@
"""noop extractor"""
-from .common import Extractor, Message
+from .common import Extractor
class NoopExtractor(Extractor):
@@ -17,11 +17,9 @@ class NoopExtractor(Extractor):
example = "noop"
def items(self):
- # yield *something* to prevent a 'No results' message
- yield Message.Version, 1
-
# Save cookies manually, since it happens automatically only after
# extended extractor initialization, i.e. Message.Directory, which
# itself might cause some unintended effects.
if self.cookies:
self.cookies_store()
+ return iter(((-1, "", None),))
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 528aff2..fdd3594 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -9,7 +9,7 @@
"""Extractors for https://nozomi.la/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, dt
def decode_nozomi(n):
@@ -49,10 +49,9 @@ class NozomiExtractor(Extractor):
post["character"] = self._list(post.get("character"))
try:
- post["date"] = text.parse_datetime(
- post["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
+ post["date"] = dt.parse_iso(post["date"] + ":00")
except Exception:
- post["date"] = None
+ post["date"] = dt.NONE
post.update(data)
@@ -61,7 +60,7 @@ class NozomiExtractor(Extractor):
if key in post:
del post[key]
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], image in enumerate(images, 1):
post["filename"] = post["dataid"] = did = image["dataid"]
post["is_video"] = video = \
diff --git a/gallery_dl/extractor/nudostar.py b/gallery_dl/extractor/nudostar.py
index 467d36a..2eb4340 100644
--- a/gallery_dl/extractor/nudostar.py
+++ b/gallery_dl/extractor/nudostar.py
@@ -21,7 +21,7 @@ class NudostarExtractor(GalleryExtractor):
class NudostarModelExtractor(NudostarExtractor):
"""Extractor for NudoStar models"""
subcategory = "model"
- pattern = BASE_PATTERN + r"(/models/([^/?#]+)/?)$"
+ pattern = rf"{BASE_PATTERN}(/models/([^/?#]+)/?)$"
example = "https://nudostar.tv/models/MODEL/"
def metadata(self, page):
@@ -53,7 +53,7 @@ class NudostarModelExtractor(NudostarExtractor):
class NudostarImageExtractor(NudostarExtractor):
"""Extractor for NudoStar images"""
subcategory = "image"
- pattern = BASE_PATTERN + r"(/models/([^/?#]+)/(\d+)/)"
+ pattern = rf"{BASE_PATTERN}(/models/([^/?#]+)/(\d+)/)"
example = "https://nudostar.tv/models/MODEL/123/"
def items(self):
@@ -67,5 +67,5 @@ class NudostarImageExtractor(NudostarExtractor):
data["num"] = text.parse_int(self.groups[2])
data["url"] = img_url
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, img_url, data
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index ff192c2..a0e3c9f 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -8,16 +8,14 @@
"""Utility classes to setup OAuth and link accounts to gallery-dl"""
-from .common import Extractor, Message
+from .common import Extractor
from .. import text, oauth, util, config, exception
from ..output import stdout_write
from ..cache import cache, memcache
-import urllib.parse
-import binascii
-import hashlib
REDIRECT_URI_LOCALHOST = "http://localhost:6414/"
REDIRECT_URI_HTTPS = "https://mikf.github.io/gallery-dl/oauth-redirect.html"
+NOOP = ((-1, "", None),)
class OAuthBase(Extractor):
@@ -86,7 +84,7 @@ class OAuthBase(Extractor):
def open(self, url, params, recv=None):
"""Open 'url' in browser amd return response parameters"""
- url += "?" + urllib.parse.urlencode(params)
+ url = f"{url}?{text.build_query(params)}"
if browser := self.config("browser", True):
try:
@@ -257,16 +255,18 @@ class OAuthFlickr(OAuthBase):
redirect_uri = REDIRECT_URI_HTTPS
def items(self):
- yield Message.Version, 1
- from . import flickr
+ # from . import flickr
self._oauth1_authorization_flow(
- flickr.FlickrAPI.API_KEY,
- flickr.FlickrAPI.API_SECRET,
+ # flickr.FlickrAPI.API_KEY,
+ # flickr.FlickrAPI.API_SECRET,
+ "",
+ "",
"https://www.flickr.com/services/oauth/request_token",
"https://www.flickr.com/services/oauth/authorize",
"https://www.flickr.com/services/oauth/access_token",
)
+ return iter(NOOP)
class OAuthSmugmug(OAuthBase):
@@ -275,7 +275,6 @@ class OAuthSmugmug(OAuthBase):
example = "oauth:smugmug"
def items(self):
- yield Message.Version, 1
from . import smugmug
self._oauth1_authorization_flow(
@@ -285,6 +284,7 @@ class OAuthSmugmug(OAuthBase):
"https://api.smugmug.com/services/oauth/1.0a/authorize",
"https://api.smugmug.com/services/oauth/1.0a/getAccessToken",
)
+ return iter(NOOP)
class OAuthTumblr(OAuthBase):
@@ -293,7 +293,6 @@ class OAuthTumblr(OAuthBase):
example = "oauth:tumblr"
def items(self):
- yield Message.Version, 1
from . import tumblr
self._oauth1_authorization_flow(
@@ -303,6 +302,7 @@ class OAuthTumblr(OAuthBase):
"https://www.tumblr.com/oauth/authorize",
"https://www.tumblr.com/oauth/access_token",
)
+ return iter(NOOP)
# --------------------------------------------------------------------
@@ -315,7 +315,6 @@ class OAuthDeviantart(OAuthBase):
redirect_uri = REDIRECT_URI_HTTPS
def items(self):
- yield Message.Version, 1
from . import deviantart
self._oauth2_authorization_code_grant(
@@ -328,6 +327,7 @@ class OAuthDeviantart(OAuthBase):
scope="browse user.manage",
cache=deviantart._refresh_token_cache,
)
+ return iter(NOOP)
class OAuthReddit(OAuthBase):
@@ -336,7 +336,6 @@ class OAuthReddit(OAuthBase):
example = "oauth:reddit"
def items(self):
- yield Message.Version, 1
from . import reddit
self.session.headers["User-Agent"] = reddit.RedditAPI.USER_AGENT
@@ -350,6 +349,7 @@ class OAuthReddit(OAuthBase):
scope="read history",
cache=reddit._refresh_token_cache,
)
+ return iter(NOOP)
class OAuthMastodon(OAuthBase):
@@ -362,7 +362,6 @@ class OAuthMastodon(OAuthBase):
self.instance = match[1]
def items(self):
- yield Message.Version, 1
from . import mastodon
for _, root, application in mastodon.MastodonExtractor.instances:
@@ -382,6 +381,7 @@ class OAuthMastodon(OAuthBase):
key="access_token",
cache=mastodon._access_token_cache,
)
+ return iter(NOOP)
@cache(maxage=36500*86400, keyarg=1)
def _register(self, instance):
@@ -416,8 +416,9 @@ class OAuthPixiv(OAuthBase):
example = "oauth:pixiv"
def items(self):
- yield Message.Version, 1
from . import pixiv
+ import binascii
+ import hashlib
code_verifier = util.generate_token(32)
digest = hashlib.sha256(code_verifier.encode()).digest()
@@ -464,6 +465,7 @@ class OAuthPixiv(OAuthBase):
self.log.info("Writing 'refresh-token' to cache")
stdout_write(self._generate_message(("refresh-token",), (token,)))
+ return iter(NOOP)
def _input_code(self):
stdout_write("""\
diff --git a/gallery_dl/extractor/okporn.py b/gallery_dl/extractor/okporn.py
new file mode 100644
index 0000000..e03f7cb
--- /dev/null
+++ b/gallery_dl/extractor/okporn.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://ok.porn/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class OkpornGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from ok.porn"""
+ category = "okporn"
+ root = "https://ok.porn"
+ pattern = r"(?:https?://)?(?:www\.)?ok\.porn/albums/(\d+)"
+ example = "https://ok.porn/albums/12345/"
+
+ def __init__(self, match):
+ url = f"{self.root}/albums/{match[1]}/"
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ return {
+ "gallery_id" : text.parse_int(self.groups[0]),
+ "title" : text.unescape(text.extr(
+ page, "h1 class=title>", "</h1>")),
+ "description": text.unescape(text.extr(
+ page, 'name="description" content="', '"')),
+ "tags": text.extr(
+ page, 'name="keywords" content="', '"').split(", "),
+ }
+
+ def images(self, page):
+ return [
+ (url, None)
+ for url in text.extract_iter(page, 'data-original="', '"')
+ ]
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 490243a..d56331f 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -31,7 +31,7 @@ class PahealExtractor(Extractor):
post["width"] = text.parse_int(post["width"])
post["height"] = text.parse_int(post["height"])
post.update(data)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, post["file_url"], post
def get_metadata(self):
@@ -53,8 +53,7 @@ class PahealExtractor(Extractor):
extr("<source src='", "'")),
"uploader": text.unquote(extr(
"class='username' href='/user/", "'")),
- "date" : text.parse_datetime(
- extr("datetime='", "'"), "%Y-%m-%dT%H:%M:%S%z"),
+ "date" : self.parse_datetime_iso(extr("datetime='", "'")),
"source" : text.unescape(text.extr(
extr(">Source Link<", "</td>"), "href='", "'")),
}
@@ -133,7 +132,7 @@ class PahealTagExtractor(PahealExtractor):
"duration" : text.parse_float(duration[:-1]),
"tags" : text.unescape(tags),
"size" : text.parse_bytes(size[:-1]),
- "date" : text.parse_datetime(date, "%B %d, %Y; %H:%M"),
+ "date" : self.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : f"{pid} - {tags}",
"extension": ext,
}
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index cf1a6d6..12dfd48 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.patreon.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, util, dt, exception
from ..cache import memcache
import collections
import itertools
@@ -46,20 +46,21 @@ class PatreonExtractor(Extractor):
for post in self.posts():
- yield Message.Directory, post
+ yield Message.Directory, "", post
if not post.get("current_user_can_view", True):
self.log.warning("Not allowed to view post %s", post["id"])
continue
post["num"] = 0
hashes = set()
- for kind, url, name in itertools.chain.from_iterable(
+ for kind, file, url, name in itertools.chain.from_iterable(
g(post) for g in generators):
fhash = self._filehash(url)
if fhash not in hashes or not fhash:
hashes.add(fhash)
post["hash"] = fhash
post["type"] = kind
+ post["file"] = file
post["num"] += 1
text.nameext_from_url(name, post)
if text.ext_from_url(url) == "m3u8":
@@ -86,7 +87,7 @@ class PatreonExtractor(Extractor):
name = url
else:
name = self._filename(url) or url
- return (("postfile", url, name),)
+ return (("postfile", postfile, url, name),)
return ()
def _images(self, post):
@@ -94,7 +95,7 @@ class PatreonExtractor(Extractor):
for image in images:
if url := self._images_url(image):
name = image.get("file_name") or self._filename(url) or url
- yield "image", url, name
+ yield "image", image, url, name
def _images_url(self, image):
return image.get("download_url")
@@ -109,24 +110,24 @@ class PatreonExtractor(Extractor):
if image := post.get("image"):
if url := image.get("large_url"):
name = image.get("file_name") or self._filename(url) or url
- return (("image_large", url, name),)
+ return (("image_large", image, url, name),)
return ()
def _attachments(self, post):
for attachment in post.get("attachments") or ():
if url := self.request_location(attachment["url"], fatal=False):
- yield "attachment", url, attachment["name"]
+ yield "attachment", attachment, url, attachment["name"]
for attachment in post.get("attachments_media") or ():
if url := attachment.get("download_url"):
- yield "attachment", url, attachment["file_name"]
+ yield "attachment", attachment, url, attachment["file_name"]
def _content(self, post):
if content := post.get("content"):
for img in text.extract_iter(
content, '<img data-media-id="', '>'):
if url := text.extr(img, 'src="', '"'):
- yield "content", url, self._filename(url) or url
+ yield "content", None, url, self._filename(url) or url
def posts(self):
"""Return all relevant post objects"""
@@ -177,8 +178,7 @@ class PatreonExtractor(Extractor):
post, included, "attachments")
attr["attachments_media"] = self._files(
post, included, "attachments_media")
- attr["date"] = text.parse_datetime(
- attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ attr["date"] = self.parse_datetime_iso(attr["published_at"])
try:
attr["campaign"] = (included["campaign"][
@@ -226,8 +226,7 @@ class PatreonExtractor(Extractor):
user = response.json()["data"]
attr = user["attributes"]
attr["id"] = user["id"]
- attr["date"] = text.parse_datetime(
- attr["created"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ attr["date"] = self.parse_datetime_iso(attr["created"])
return attr
def _collection(self, collection_id):
@@ -236,8 +235,7 @@ class PatreonExtractor(Extractor):
coll = data["data"]
attr = coll["attributes"]
attr["id"] = coll["id"]
- attr["date"] = text.parse_datetime(
- attr["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ attr["date"] = self.parse_datetime_iso(attr["created_at"])
return attr
def _filename(self, url):
@@ -256,7 +254,7 @@ class PatreonExtractor(Extractor):
return part
return ""
- def _build_url(self, endpoint, query):
+ def _build_url(self, endpoint, sort, query):
return (
f"https://www.patreon.com/api/{endpoint}"
@@ -291,11 +289,20 @@ class PatreonExtractor(Extractor):
"preview_views,video_duration"
f"&page[cursor]={self._init_cursor()}"
- f"{query}"
+ f"{query}{self._order(sort)}"
"&json-api-version=1.0"
)
+ def _order(self, sort):
+ if order := self.config("order-posts"):
+ if order in {"d", "desc"}:
+ order = "-published_at"
+ elif order in {"a", "asc", "r", "reverse"}:
+ order = "published_at"
+ return f"&sort={order}"
+ return f"&sort={sort}" if sort else ""
+
def _build_file_generators(self, filetypes):
if filetypes is None:
return (self._images, self._image_large,
@@ -358,17 +365,26 @@ class PatreonCollectionExtractor(PatreonExtractor):
campaign_id = text.extr(
collection["thumbnail"]["url"], "/campaign/", "/")
- url = self._build_url("posts", (
+ url = self._build_url("posts", "collection_order", (
# patreon returns '400 Bad Request' without campaign_id filter
f"&filter[campaign_id]={campaign_id}"
"&filter[contains_exclusive_posts]=true"
"&filter[is_draft]=false"
f"&filter[collection_id]={collection_id}"
"&filter[include_drops]=true"
- "&sort=collection_order"
))
return self._pagination(url)
+ def _order(self, sort):
+ if order := self.config("order-posts"):
+ if order in {"a", "asc"}:
+ order = "collection_order"
+ elif order in {"d", "desc", "r", "reverse"}:
+ # "-collection_order" results in a '400 Bad Request' error
+ order = "-published_at"
+ return f"&sort={order}"
+ return f"&sort={sort}" if sort else ""
+
class PatreonCreatorExtractor(PatreonExtractor):
"""Extractor for a creator's works"""
@@ -387,12 +403,11 @@ class PatreonCreatorExtractor(PatreonExtractor):
campaign_id = self._get_campaign_id(creator, params)
self.log.debug("campaign_id: %s", campaign_id)
- url = self._build_url("posts", (
+ url = self._build_url("posts", params.get("sort", "-published_at"), (
f"&filter[campaign_id]={campaign_id}"
"&filter[contains_exclusive_posts]=true"
"&filter[is_draft]=false"
f"{self._get_filters(params)}"
- f"&sort={params.get('sort', '-published_at')}"
))
return self._pagination(url)
@@ -445,11 +460,10 @@ class PatreonUserExtractor(PatreonExtractor):
def posts(self):
if date_max := self._get_date_min_max(None, None)[1]:
- self._cursor = cursor = \
- util.datetime_from_timestamp(date_max).isoformat()
+ self._cursor = cursor = dt.from_ts(date_max).isoformat()
self._init_cursor = lambda: cursor
- url = self._build_url("stream", (
+ url = self._build_url("stream", None, (
"&filter[is_following]=true"
"&json-api-use-default-includes=false"
))
diff --git a/gallery_dl/extractor/pexels.py b/gallery_dl/extractor/pexels.py
index f95d409..9e2f40c 100644
--- a/gallery_dl/extractor/pexels.py
+++ b/gallery_dl/extractor/pexels.py
@@ -35,8 +35,7 @@ class PexelsExtractor(Extractor):
post["type"] = attr["type"]
post.update(metadata)
- post["date"] = text.parse_datetime(
- post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
+ post["date"] = self.parse_datetime_iso(post["created_at"][:-5])
if "image" in post:
url, _, query = post["image"]["download_link"].partition("?")
@@ -49,7 +48,7 @@ class PexelsExtractor(Extractor):
self.log.warning("%s: Unsupported post type", post.get("id"))
continue
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, url, text.nameext_from_url(name, post)
def posts(self):
@@ -63,7 +62,7 @@ class PexelsCollectionExtractor(PexelsExtractor):
"""Extractor for a pexels.com collection"""
subcategory = "collection"
directory_fmt = ("{category}", "Collections", "{collection}")
- pattern = BASE_PATTERN + r"/collections/((?:[^/?#]*-)?(\w+))"
+ pattern = rf"{BASE_PATTERN}/collections/((?:[^/?#]*-)?(\w+))"
example = "https://www.pexels.com/collections/SLUG-a1b2c3/"
def metadata(self):
@@ -78,7 +77,7 @@ class PexelsSearchExtractor(PexelsExtractor):
"""Extractor for pexels.com search results"""
subcategory = "search"
directory_fmt = ("{category}", "Searches", "{search_tags}")
- pattern = BASE_PATTERN + r"/search/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/search/([^/?#]+)"
example = "https://www.pexels.com/search/QUERY/"
def metadata(self):
@@ -92,7 +91,7 @@ class PexelsUserExtractor(PexelsExtractor):
"""Extractor for pexels.com user galleries"""
subcategory = "user"
directory_fmt = ("{category}", "@{user[slug]}")
- pattern = BASE_PATTERN + r"/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))"
+ pattern = rf"{BASE_PATTERN}/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))"
example = "https://www.pexels.com/@USER-12345/"
def posts(self):
@@ -101,7 +100,7 @@ class PexelsUserExtractor(PexelsExtractor):
class PexelsImageExtractor(PexelsExtractor):
subcategory = "image"
- pattern = BASE_PATTERN + r"/photo/((?:[^/?#]*-)?\d+)"
+ pattern = rf"{BASE_PATTERN}/photo/((?:[^/?#]*-)?\d+)"
example = "https://www.pexels.com/photo/SLUG-12345/"
def posts(self):
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index 8891dc0..3634c66 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -36,8 +36,7 @@ class PhilomenaExtractor(BooruExtractor):
return url
def _prepare(self, post):
- post["date"] = text.parse_datetime(
- post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
+ post["date"] = self.parse_datetime_iso(post["created_at"][:19])
BASE_PATTERN = PhilomenaExtractor.update({
@@ -62,7 +61,7 @@ BASE_PATTERN = PhilomenaExtractor.update({
class PhilomenaPostExtractor(PhilomenaExtractor):
"""Extractor for single posts on a Philomena booru"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/(?:images/)?(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?:images/)?(\d+)"
example = "https://derpibooru.org/images/12345"
def posts(self):
@@ -73,7 +72,7 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
"""Extractor for Philomena search results"""
subcategory = "search"
directory_fmt = ("{category}", "{search_tags}")
- pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))"
+ pattern = rf"{BASE_PATTERN}/(?:search/?\?([^#]+)|tags/([^/?#]+))"
example = "https://derpibooru.org/search?q=QUERY"
def __init__(self, match):
@@ -107,7 +106,7 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
subcategory = "gallery"
directory_fmt = ("{category}", "galleries",
"{gallery[id]} {gallery[title]}")
- pattern = BASE_PATTERN + r"/galleries/(\d+)"
+ pattern = rf"{BASE_PATTERN}/galleries/(\d+)"
example = "https://derpibooru.org/galleries/12345"
def metadata(self):
diff --git a/gallery_dl/extractor/photovogue.py b/gallery_dl/extractor/photovogue.py
index e604304..cb16b23 100644
--- a/gallery_dl/extractor/photovogue.py
+++ b/gallery_dl/extractor/photovogue.py
@@ -18,7 +18,7 @@ class PhotovogueUserExtractor(Extractor):
directory_fmt = ("{category}", "{photographer[id]} {photographer[name]}")
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/photographers/(\d+)"
+ pattern = rf"{BASE_PATTERN}/photographers/(\d+)"
example = "https://www.vogue.com/photovogue/photographers/12345"
def __init__(self, match):
@@ -29,10 +29,9 @@ class PhotovogueUserExtractor(Extractor):
for photo in self.photos():
url = photo["gallery_image"]
photo["title"] = photo["title"].strip()
- photo["date"] = text.parse_datetime(
- photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ photo["date"] = self.parse_datetime_iso(photo["date"])
- yield Message.Directory, photo
+ yield Message.Directory, "", photo
yield Message.Url, url, text.nameext_from_url(url, photo)
def photos(self):
diff --git a/gallery_dl/extractor/picarto.py b/gallery_dl/extractor/picarto.py
index 62ac38a..b0fa079 100644
--- a/gallery_dl/extractor/picarto.py
+++ b/gallery_dl/extractor/picarto.py
@@ -29,10 +29,9 @@ class PicartoGalleryExtractor(Extractor):
def items(self):
for post in self.posts():
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%d %H:%M:%S")
+ post["date"] = self.parse_datetime_iso(post["created_at"])
variations = post.pop("variations", ())
- yield Message.Directory, post
+ yield Message.Directory, "", post
image = post["default_image"]
if not image:
diff --git a/gallery_dl/extractor/picazor.py b/gallery_dl/extractor/picazor.py
new file mode 100644
index 0000000..df1f436
--- /dev/null
+++ b/gallery_dl/extractor/picazor.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://picazor.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class PicazorUserExtractor(Extractor):
+ """Extractor for picazor users"""
+ category = "picazor"
+ subcategory = "user"
+ root = "https://picazor.com"
+ browser = "firefox"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = "{id}_{num:>03}.{extension}"
+ archive_fmt = "{id}_{num}"
+ pattern = r"(?:https?://)?(?:www\.)?picazor\.com/[a-z]{2}/([^/?#]+)"
+ example = "https://picazor.com/en/USERNAME"
+
+ def items(self):
+ user = self.groups[0]
+ first = True
+
+ url = f"{self.root}/api/files/{user}/sfiles"
+ params = {"page": 1}
+ headers = {"Referer": f"{self.root}/en/{user}"}
+
+ while True:
+ data = self.request_json(url, params=params, headers=headers)
+ if not data:
+ break
+
+ for item in data:
+ path = item.get("path")
+ if not path:
+ continue
+
+ if first:
+ first = False
+ self.kwdict["user"] = user
+ self.kwdict["count"] = item.get("order")
+ yield Message.Directory, "", {
+ "subject": item.get("subject"),
+ "user" : user,
+ }
+
+ item.pop("blurDataURL", None)
+ item["num"] = item["order"]
+
+ file_url = self.root + path
+ text.nameext_from_url(file_url, item)
+ yield Message.Url, file_url, item
+
+ params["page"] += 1
diff --git a/gallery_dl/extractor/pictoa.py b/gallery_dl/extractor/pictoa.py
index da252f3..0dfe304 100644
--- a/gallery_dl/extractor/pictoa.py
+++ b/gallery_dl/extractor/pictoa.py
@@ -24,7 +24,7 @@ class PictoaExtractor(Extractor):
class PictoaImageExtractor(PictoaExtractor):
"""Extractor for single images from pictoa.com"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/albums/(?:[\w-]+-)?(\d+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/albums/(?:[\w-]+-)?(\d+)/(\d+)"
example = "https://www.pictoa.com/albums/NAME-12345/12345.html"
def items(self):
@@ -43,14 +43,14 @@ class PictoaImageExtractor(PictoaExtractor):
}
text.nameext_from_url(image_url, data)
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, image_url, data
class PictoaAlbumExtractor(PictoaExtractor):
"""Extractor for image albums from pictoa.com"""
subcategory = "album"
- pattern = BASE_PATTERN + r"/albums/(?:[\w-]+-)?(\d+).html"
+ pattern = rf"{BASE_PATTERN}/albums/(?:[\w-]+-)?(\d+).html"
example = "https://www.pictoa.com/albums/NAME-12345.html"
def items(self):
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 968776b..6661e7d 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -26,14 +26,13 @@ class PiczelExtractor(Extractor):
def items(self):
for post in self.posts():
post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["date"] = self.parse_datetime_iso(post["created_at"])
if post["multi"]:
images = post["images"]
del post["images"]
post["count"] = len(images)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], image in enumerate(images):
if "id" in image:
del image["id"]
@@ -43,7 +42,7 @@ class PiczelExtractor(Extractor):
else:
post["count"] = 1
- yield Message.Directory, post
+ yield Message.Directory, "", post
post["num"] = 0
url = post["image"]["url"]
yield Message.Url, url, text.nameext_from_url(url, post)
@@ -67,7 +66,7 @@ class PiczelExtractor(Extractor):
class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}/gallery/([^/?#]+)/?$"
example = "https://piczel.tv/gallery/USER"
def posts(self):
@@ -80,7 +79,7 @@ class PiczelFolderExtractor(PiczelExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
archive_fmt = "f{folder[id]}_{id}_{num}"
- pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)"
+ pattern = rf"{BASE_PATTERN}/gallery/(?!image/)[^/?#]+/(\d+)"
example = "https://piczel.tv/gallery/USER/12345"
def posts(self):
@@ -91,7 +90,7 @@ class PiczelFolderExtractor(PiczelExtractor):
class PiczelImageExtractor(PiczelExtractor):
"""Extractor for individual images"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/gallery/image/(\d+)"
+ pattern = rf"{BASE_PATTERN}/gallery/image/(\d+)"
example = "https://piczel.tv/gallery/image/12345"
def posts(self):
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
index 05bc8e7..0b750fe 100644
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from ..cache import cache
-from .. import text, util, exception
+from .. import text, exception
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
@@ -36,7 +36,7 @@ class PillowfortExtractor(Extractor):
external = self.config("external", False)
if inline:
- inline = util.re(r'src="(https://img\d+\.pillowfort\.social'
+ inline = text.re(r'src="(https://img\d+\.pillowfort\.social'
r'/posts/[^"]+)').findall
for post in self.posts():
@@ -48,11 +48,10 @@ class PillowfortExtractor(Extractor):
for url in inline(post["content"]):
files.append({"url": url})
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["date"] = self.parse_datetime_iso(post["created_at"])
post["post_id"] = post.pop("id")
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
post["num"] = 0
for file in files:
@@ -76,8 +75,7 @@ class PillowfortExtractor(Extractor):
if "id" not in file:
post["id"] = post["hash"]
if "created_at" in file:
- post["date"] = text.parse_datetime(
- file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["date"] = self.parse_datetime_iso(file["created_at"])
yield msgtype, url, post
@@ -121,7 +119,7 @@ class PillowfortExtractor(Extractor):
class PillowfortPostExtractor(PillowfortExtractor):
"""Extractor for a single pillowfort post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/posts/(\d+)"
+ pattern = rf"{BASE_PATTERN}/posts/(\d+)"
example = "https://www.pillowfort.social/posts/12345"
def posts(self):
@@ -132,7 +130,7 @@ class PillowfortPostExtractor(PillowfortExtractor):
class PillowfortUserExtractor(PillowfortExtractor):
"""Extractor for all posts of a pillowfort user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+(?:/tagged/[^/?#]+)?)"
+ pattern = rf"{BASE_PATTERN}/(?!posts/)([^/?#]+(?:/tagged/[^/?#]+)?)"
example = "https://www.pillowfort.social/USER"
def posts(self):
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index ff771fb..7aa32ec 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -46,7 +46,7 @@ class PinterestExtractor(Extractor):
try:
files = self._extract_files(pin)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Error when extracting download URLs (%s: %s)",
pin.get("id"), exc.__class__.__name__, exc)
@@ -63,7 +63,7 @@ class PinterestExtractor(Extractor):
if value := pin.get(key):
pin[key] = value.strip()
- yield Message.Directory, pin
+ yield Message.Directory, "", pin
for pin["num"], file in enumerate(files, 1):
url = file["url"]
text.nameext_from_url(url, pin)
@@ -207,7 +207,7 @@ class PinterestExtractor(Extractor):
class PinterestUserExtractor(PinterestExtractor):
"""Extractor for a user's boards"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)(?:/_saved)?/?$"
+ pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)(?:/_saved)?/?$"
example = "https://www.pinterest.com/USER/"
def __init__(self, match):
@@ -225,7 +225,7 @@ class PinterestAllpinsExtractor(PinterestExtractor):
"""Extractor for a user's 'All Pins' feed"""
subcategory = "allpins"
directory_fmt = ("{category}", "{user}")
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/pins/?$"
+ pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/pins/?$"
example = "https://www.pinterest.com/USER/pins/"
def __init__(self, match):
@@ -243,7 +243,7 @@ class PinterestCreatedExtractor(PinterestExtractor):
"""Extractor for a user's created pins"""
subcategory = "created"
directory_fmt = ("{category}", "{user}")
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/_created/?$"
+ pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/_created/?$"
example = "https://www.pinterest.com/USER/_created/"
def __init__(self, match):
@@ -263,7 +263,7 @@ class PinterestSectionExtractor(PinterestExtractor):
directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "{section[title]}")
archive_fmt = "{board[id]}_{id}"
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)"
example = "https://www.pinterest.com/USER/BOARD/SECTION"
def __init__(self, match):
@@ -291,7 +291,7 @@ class PinterestSearchExtractor(PinterestExtractor):
"""Extractor for Pinterest search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = BASE_PATTERN + r"/search/pins/?\?q=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/search/pins/?\?q=([^&#]+)"
example = "https://www.pinterest.com/search/pins/?q=QUERY"
def __init__(self, match):
@@ -308,7 +308,7 @@ class PinterestSearchExtractor(PinterestExtractor):
class PinterestPinExtractor(PinterestExtractor):
"""Extractor for images from a single pin from pinterest.com"""
subcategory = "pin"
- pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
+ pattern = rf"{BASE_PATTERN}/pin/([^/?#]+)(?!.*#related$)"
example = "https://www.pinterest.com/pin/12345/"
def __init__(self, match):
@@ -329,7 +329,7 @@ class PinterestBoardExtractor(PinterestExtractor):
subcategory = "board"
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
- pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
+ pattern = (rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)"
r"/([^/?#]+)/?(?!.*#related$)")
example = "https://www.pinterest.com/USER/BOARD/"
@@ -361,7 +361,7 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor):
"""Extractor for related pins of another pin from pinterest.com"""
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[id]}")
- pattern = BASE_PATTERN + r"/pin/([^/?#]+).*#related$"
+ pattern = rf"{BASE_PATTERN}/pin/([^/?#]+).*#related$"
example = "https://www.pinterest.com/pin/12345/#related"
def metadata(self):
@@ -376,7 +376,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
subcategory = "related-board"
directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "related")
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$"
+ pattern = rf"{BASE_PATTERN}/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$"
example = "https://www.pinterest.com/USER/BOARD/#related"
def pins(self):
diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py
index 73f4b1f..1486976 100644
--- a/gallery_dl/extractor/pixeldrain.py
+++ b/gallery_dl/extractor/pixeldrain.py
@@ -24,16 +24,12 @@ class PixeldrainExtractor(Extractor):
if api_key := self.config("api-key"):
self.session.auth = util.HTTPBasicAuth("", api_key)
- def parse_datetime(self, date_string):
- return text.parse_datetime(
- date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
-
class PixeldrainFileExtractor(PixeldrainExtractor):
"""Extractor for pixeldrain files"""
subcategory = "file"
filename_fmt = "{filename[:230]} ({id}).{extension}"
- pattern = BASE_PATTERN + r"/(?:u|api/file)/(\w+)"
+ pattern = rf"{BASE_PATTERN}/(?:u|api/file)/(\w+)"
example = "https://pixeldrain.com/u/abcdefgh"
def __init__(self, match):
@@ -45,10 +41,10 @@ class PixeldrainFileExtractor(PixeldrainExtractor):
file = self.request_json(url + "/info")
file["url"] = url + "?download"
- file["date"] = self.parse_datetime(file["date_upload"])
+ file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file)
- yield Message.Directory, file
+ yield Message.Directory, "", file
yield Message.Url, file["url"], file
@@ -58,7 +54,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
directory_fmt = ("{category}",
"{album[date]:%Y-%m-%d} {album[title]} ({album[id]})")
filename_fmt = "{num:>03} {filename[:230]} ({id}).{extension}"
- pattern = BASE_PATTERN + r"/(?:l|api/list)/(\w+)(?:#item=(\d+))?"
+ pattern = rf"{BASE_PATTERN}/(?:l|api/list)/(\w+)(?:#item=(\d+))?"
example = "https://pixeldrain.com/l/abcdefgh"
def __init__(self, match):
@@ -72,7 +68,7 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
files = album["files"]
album["count"] = album["file_count"]
- album["date"] = self.parse_datetime(album["date_created"])
+ album["date"] = self.parse_datetime_iso(album["date_created"])
if self.file_index:
idx = text.parse_int(self.file_index)
@@ -86,12 +82,12 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
del album["files"]
del album["file_count"]
- yield Message.Directory, {"album": album}
+ yield Message.Directory, "", {"album": album}
for num, file in enumerate(files, idx+1):
file["album"] = album
file["num"] = num
file["url"] = url = f"{self.root}/api/file/{file['id']}?download"
- file["date"] = self.parse_datetime(file["date_upload"])
+ file["date"] = self.parse_datetime_iso(file["date_upload"])
text.nameext_from_url(file["name"], file)
yield Message.Url, url, file
@@ -101,7 +97,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor):
subcategory = "folder"
filename_fmt = "{filename[:230]}.{extension}"
archive_fmt = "{path}_{num}"
- pattern = BASE_PATTERN + r"/(?:d|api/filesystem)/([^?]+)"
+ pattern = rf"{BASE_PATTERN}/(?:d|api/filesystem)/([^?]+)"
example = "https://pixeldrain.com/d/abcdefgh"
def metadata(self, data):
@@ -112,7 +108,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor):
"mime_type" : data["file_type"],
"size" : data["file_size"],
"hash_sha256": data["sha256_sum"],
- "date" : self.parse_datetime(data["created"]),
+ "date" : self.parse_datetime_iso(data["created"]),
}
def items(self):
@@ -135,7 +131,7 @@ class PixeldrainFolderExtractor(PixeldrainExtractor):
folder = self.metadata(path)
folder["id"] = paths[0]["id"]
- yield Message.Directory, folder
+ yield Message.Directory, "", folder
num = 0
for child in children:
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 6276a2a..eb1a7f2 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -9,14 +9,13 @@
"""Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message, Dispatch
-from .. import text, util, exception
+from .. import text, util, dt, exception
from ..cache import cache, memcache
-from datetime import datetime, timedelta
import itertools
import hashlib
BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
-USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
+USER_PATTERN = rf"{BASE_PATTERN}/(?:en/)?users/(\d+)"
class PixivExtractor(Extractor):
@@ -44,7 +43,7 @@ class PixivExtractor(Extractor):
self.meta_captions = self.config("captions")
if self.sanity_workaround or self.meta_captions:
- self.meta_captions_sub = util.re(
+ self.meta_captions_sub = text.re(
r'<a href="/jump\.php\?([^"]+)').sub
def items(self):
@@ -96,12 +95,12 @@ class PixivExtractor(Extractor):
if transform_tags:
transform_tags(work)
work["num"] = 0
- work["date"] = text.parse_datetime(work["create_date"])
+ work["date"] = dt.parse_iso(work["create_date"])
work["rating"] = ratings.get(work["x_restrict"])
work["suffix"] = ""
work.update(metadata)
- yield Message.Directory, work
+ yield Message.Directory, "", work
for work["num"], file in enumerate(files):
url = file["url"]
work.update(file)
@@ -149,7 +148,7 @@ class PixivExtractor(Extractor):
self._extract_ajax(work, body)
return self._extract_ugoira(work, url)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Unable to extract Ugoira URL. Provide "
"logged-in cookies to access it", work["id"])
@@ -238,10 +237,13 @@ class PixivExtractor(Extractor):
return data["body"]
self.log.debug("Server response: %s", util.json_dumps(data))
- return self.log.error(
- "'%s'", data.get("message") or "General Error")
+ if (msg := data.get("message")) == "An unknown error occurred":
+ msg = "Invalid 'PHPSESSID' cookie"
+ else:
+ msg = f"'{msg or 'General Error'}'"
+ self.log.error("%s", msg)
except Exception:
- return None
+ pass
def _extract_ajax(self, work, body):
work["_ajax"] = True
@@ -274,6 +276,9 @@ class PixivExtractor(Extractor):
"profile_image_urls": {},
}
+ if "is_bookmarked" not in work:
+ work["is_bookmarked"] = True if body.get("bookmarkData") else False
+
work["tags"] = tags = []
for tag in body["tags"]["tags"]:
name = tag["tag"]
@@ -350,10 +355,10 @@ class PixivExtractor(Extractor):
if fmt in urls:
yield urls[fmt]
- def _date_from_url(self, url, offset=timedelta(hours=9)):
+ def _date_from_url(self, url, offset=dt.timedelta(hours=9)):
try:
_, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
- return datetime(
+ return dt.datetime(
int(y), int(m), int(d), int(H), int(M), int(S)) - offset
except Exception:
return None
@@ -388,7 +393,7 @@ class PixivExtractor(Extractor):
class PixivUserExtractor(Dispatch, PixivExtractor):
"""Extractor for a pixiv user profile"""
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
example = "https://www.pixiv.net/en/users/12345"
@@ -411,7 +416,7 @@ class PixivUserExtractor(Dispatch, PixivExtractor):
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
@@ -450,7 +455,7 @@ class PixivArtworksExtractor(PixivExtractor):
ajax_ids.extend(map(int, body["manga"]))
ajax_ids.sort()
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning("u%s: Failed to collect artwork IDs "
"using AJAX API", self.user_id)
else:
@@ -500,7 +505,7 @@ class PixivAvatarExtractor(PixivExtractor):
subcategory = "avatar"
filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "avatar_{user[id]}_{date}"
- pattern = USER_PATTERN + r"/avatar"
+ pattern = rf"{USER_PATTERN}/avatar"
example = "https://www.pixiv.net/en/users/12345/avatar"
def _init(self):
@@ -518,7 +523,7 @@ class PixivBackgroundExtractor(PixivExtractor):
subcategory = "background"
filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}"
archive_fmt = "background_{user[id]}_{date}"
- pattern = USER_PATTERN + "/background"
+ pattern = rf"{USER_PATTERN}/background"
example = "https://www.pixiv.net/en/users/12345/background"
def _init(self):
@@ -580,7 +585,7 @@ class PixivWorkExtractor(PixivExtractor):
class PixivUnlistedExtractor(PixivExtractor):
"""Extractor for a unlisted pixiv illustrations"""
subcategory = "unlisted"
- pattern = BASE_PATTERN + r"/(?:en/)?artworks/unlisted/(\w+)"
+ pattern = rf"{BASE_PATTERN}/(?:en/)?artworks/unlisted/(\w+)"
example = "https://www.pixiv.net/en/artworks/unlisted/a1b2c3d4e5f6g7h8i9j0"
def _extract_files(self, work):
@@ -599,7 +604,7 @@ class PixivFavoriteExtractor(PixivExtractor):
directory_fmt = ("{category}", "bookmarks",
"{user_bookmark[id]} {user_bookmark[account]}")
archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
- pattern = (BASE_PATTERN + r"/(?:(?:en/)?"
+ pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?"
r"users/(\d+)/(bookmarks/artworks|following)(?:/([^/?#]+))?"
r"|bookmark\.php)(?:\?([^#]*))?")
example = "https://www.pixiv.net/en/users/12345/bookmarks/artworks"
@@ -662,7 +667,7 @@ class PixivRankingExtractor(PixivExtractor):
archive_fmt = "r_{ranking[mode]}_{ranking[date]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "rankings",
"{ranking[mode]}", "{ranking[date]}")
- pattern = BASE_PATTERN + r"/ranking\.php(?:\?([^#]*))?"
+ pattern = rf"{BASE_PATTERN}/ranking\.php(?:\?([^#]*))?"
example = "https://www.pixiv.net/ranking.php"
def __init__(self, match):
@@ -712,8 +717,7 @@ class PixivRankingExtractor(PixivExtractor):
self.log.warning("invalid date '%s'", date)
date = None
if not date:
- now = util.datetime_utcnow()
- date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
+ date = (dt.now() - dt.timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date
self.type = type = query.get("content")
@@ -732,7 +736,7 @@ class PixivSearchExtractor(PixivExtractor):
subcategory = "search"
archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "search", "{search[word]}")
- pattern = (BASE_PATTERN + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
+ pattern = (rf"{BASE_PATTERN}/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
r"|search\.php)(?:\?([^#]+))?")
example = "https://www.pixiv.net/en/tags/TAG"
@@ -798,7 +802,7 @@ class PixivFollowExtractor(PixivExtractor):
subcategory = "follow"
archive_fmt = "F_{user_follow[id]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "following")
- pattern = BASE_PATTERN + r"/bookmark_new_illust\.php"
+ pattern = rf"{BASE_PATTERN}/bookmark_new_illust\.php"
example = "https://www.pixiv.net/bookmark_new_illust.php"
def works(self):
@@ -847,7 +851,7 @@ class PixivSeriesExtractor(PixivExtractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}",
"{series[id]} {series[title]}")
filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
- pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
+ pattern = rf"{BASE_PATTERN}/user/(\d+)/series/(\d+)"
example = "https://www.pixiv.net/user/12345/series/12345"
def __init__(self, match):
@@ -888,11 +892,10 @@ class PixivSketchExtractor(Extractor):
for post in self.posts():
media = post["media"]
post["post_id"] = post["id"]
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["date"] = dt.parse_iso(post["created_at"])
util.delete_items(post, ("id", "media", "_links"))
- yield Message.Directory, post
+ yield Message.Directory, "", post
post["_http_headers"] = headers
for photo in media:
@@ -969,11 +972,11 @@ class PixivNovelExtractor(PixivExtractor):
if transform_tags:
transform_tags(novel)
novel["num"] = 0
- novel["date"] = text.parse_datetime(novel["create_date"])
+ novel["date"] = dt.parse_iso(novel["create_date"])
novel["rating"] = ratings.get(novel["x_restrict"])
novel["suffix"] = ""
- yield Message.Directory, novel
+ yield Message.Directory, "", novel
try:
content = self.api.novel_webview(novel["id"])["text"]
@@ -1039,7 +1042,7 @@ class PixivNovelExtractor(PixivExtractor):
class PixivNovelNovelExtractor(PixivNovelExtractor):
"""Extractor for pixiv novels"""
subcategory = "novel"
- pattern = BASE_PATTERN + r"/n(?:ovel/show\.php\?id=|/)(\d+)"
+ pattern = rf"{BASE_PATTERN}/n(?:ovel/show\.php\?id=|/)(\d+)"
example = "https://www.pixiv.net/novel/show.php?id=12345"
def novels(self):
@@ -1053,7 +1056,7 @@ class PixivNovelNovelExtractor(PixivNovelExtractor):
class PixivNovelUserExtractor(PixivNovelExtractor):
"""Extractor for pixiv users' novels"""
subcategory = "user"
- pattern = USER_PATTERN + r"/novels"
+ pattern = rf"{USER_PATTERN}/novels"
example = "https://www.pixiv.net/en/users/12345/novels"
def novels(self):
@@ -1063,7 +1066,7 @@ class PixivNovelUserExtractor(PixivNovelExtractor):
class PixivNovelSeriesExtractor(PixivNovelExtractor):
"""Extractor for pixiv novel series"""
subcategory = "series"
- pattern = BASE_PATTERN + r"/novel/series/(\d+)"
+ pattern = rf"{BASE_PATTERN}/novel/series/(\d+)"
example = "https://www.pixiv.net/novel/series/12345"
def novels(self):
@@ -1073,7 +1076,7 @@ class PixivNovelSeriesExtractor(PixivNovelExtractor):
class PixivNovelBookmarkExtractor(PixivNovelExtractor):
"""Extractor for bookmarked pixiv novels"""
subcategory = "bookmark"
- pattern = (USER_PATTERN + r"/bookmarks/novels"
+ pattern = (rf"{USER_PATTERN}/bookmarks/novels"
r"(?:/([^/?#]+))?(?:/?\?([^#]+))?")
example = "https://www.pixiv.net/en/users/12345/bookmarks/novels"
@@ -1151,7 +1154,7 @@ class PixivAppAPI():
"get_secure_url": "1",
}
- time = util.datetime_utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
+ time = dt.now().strftime("%Y-%m-%dT%H:%M:%S+00:00")
headers = {
"X-Client-Time": time,
"X-Client-Hash": hashlib.md5(
@@ -1326,11 +1329,11 @@ class PixivAppAPI():
sort = params["sort"]
if sort == "date_desc":
date_key = "end_date"
- date_off = timedelta(days=1)
+ date_off = dt.timedelta(days=1)
date_cmp = lambda lhs, rhs: lhs >= rhs # noqa E731
elif sort == "date_asc":
date_key = "start_date"
- date_off = timedelta(days=-1)
+ date_off = dt.timedelta(days=-1)
date_cmp = lambda lhs, rhs: lhs <= rhs # noqa E731
else:
date_key = None
@@ -1357,8 +1360,8 @@ class PixivAppAPI():
if date_key and text.parse_int(params.get("offset")) >= 5000:
date_last = data["illusts"][-1]["create_date"]
- date_val = (text.parse_datetime(
- date_last) + date_off).strftime("%Y-%m-%d")
+ date_val = (dt.parse_iso(date_last) + date_off).strftime(
+ "%Y-%m-%d")
self.log.info("Reached 'offset' >= 5000; "
"Updating '%s' to '%s'", date_key, date_val)
params[date_key] = date_val
diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py
index 75c06bb..2feab95 100644
--- a/gallery_dl/extractor/pixnet.py
+++ b/gallery_dl/extractor/pixnet.py
@@ -65,7 +65,7 @@ class PixnetImageExtractor(PixnetExtractor):
subcategory = "image"
filename_fmt = "{id}.{extension}"
directory_fmt = ("{category}", "{blog}")
- pattern = BASE_PATTERN + r"/album/photo/(\d+)"
+ pattern = rf"{BASE_PATTERN}/album/photo/(\d+)"
example = "https://USER.pixnet.net/album/photo/12345"
def items(self):
@@ -83,7 +83,7 @@ class PixnetImageExtractor(PixnetExtractor):
data["blog"] = self.blog
data["user"] = data.pop("author_name")
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, data["url"], data
@@ -92,7 +92,7 @@ class PixnetSetExtractor(PixnetExtractor):
subcategory = "set"
directory_fmt = ("{category}", "{blog}",
"{folder_id} {folder_title}", "{set_id} {set_title}")
- pattern = BASE_PATTERN + r"/album/set/(\d+)"
+ pattern = rf"{BASE_PATTERN}/album/set/(\d+)"
example = "https://USER.pixnet.net/album/set/12345"
def items(self):
@@ -100,7 +100,7 @@ class PixnetSetExtractor(PixnetExtractor):
page = self.request(url, encoding="utf-8").text
data = self.metadata(page)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, info in enumerate(self._pagination(page), 1):
url, pos = text.extract(info, ' href="', '"')
src, pos = text.extract(info, ' src="', '"', pos)
@@ -137,7 +137,7 @@ class PixnetFolderExtractor(PixnetExtractor):
"""Extractor for all sets in a pixnet folder"""
subcategory = "folder"
url_fmt = "{}/album/folder/{}"
- pattern = BASE_PATTERN + r"/album/folder/(\d+)"
+ pattern = rf"{BASE_PATTERN}/album/folder/(\d+)"
example = "https://USER.pixnet.net/album/folder/12345"
@@ -145,5 +145,5 @@ class PixnetUserExtractor(PixnetExtractor):
"""Extractor for all sets and folders of a pixnet user"""
subcategory = "user"
url_fmt = "{}{}/album/list"
- pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])"
+ pattern = rf"{BASE_PATTERN}()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])"
example = "https://USER.pixnet.net/"
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 37b9b10..76ca59f 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -9,8 +9,7 @@
"""Extractors for https://www.plurk.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
-import datetime
+from .. import text, util, dt, exception
class PlurkExtractor(Extractor):
@@ -62,7 +61,7 @@ class PlurkExtractor(Extractor):
if not data:
raise exception.NotFoundError("user")
return util.json_loads(
- util.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
+ text.re(r"new Date\(([^)]+)\)").sub(r"\1", data))
class PlurkTimelineExtractor(PlurkExtractor):
@@ -88,12 +87,10 @@ class PlurkTimelineExtractor(PlurkExtractor):
while plurks:
yield from plurks
- offset = datetime.datetime.strptime(
- plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
+ offset = dt.parse(plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z")
- response = self.request(
- url, method="POST", headers=headers, data=data)
- plurks = response.json()["plurks"]
+ plurks = self.request_json(
+ url, method="POST", headers=headers, data=data)["plurks"]
class PlurkPostExtractor(PlurkExtractor):
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index 32ca528..c3aaaba 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -81,7 +81,7 @@ class PoipikuExtractor(Extractor):
"PasswordIcon", ">"):
post["password"] = True
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], url in enumerate(extract_files(
post, thumb, extr), 1):
yield Message.Url, url, text.nameext_from_url(url, post)
diff --git a/gallery_dl/extractor/poringa.py b/gallery_dl/extractor/poringa.py
index da17eae..832bedf 100644
--- a/gallery_dl/extractor/poringa.py
+++ b/gallery_dl/extractor/poringa.py
@@ -68,7 +68,7 @@ class PoringaExtractor(Extractor):
main_post, '<img class="imagen" border="0" src="', '"'))
data["count"] = len(urls)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(urls, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
@@ -104,7 +104,7 @@ class PoringaExtractor(Extractor):
class PoringaPostExtractor(PoringaExtractor):
"""Extractor for posts on poringa.net"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)"
+ pattern = rf"{BASE_PATTERN}/posts/imagenes/(\d+)"
example = "http://www.poringa.net/posts/imagenes/12345/TITLE.html"
def posts(self):
@@ -113,7 +113,7 @@ class PoringaPostExtractor(PoringaExtractor):
class PoringaUserExtractor(PoringaExtractor):
subcategory = "user"
- pattern = BASE_PATTERN + r"/(\w+)$"
+ pattern = rf"{BASE_PATTERN}/(\w+)$"
example = "http://www.poringa.net/USER"
def posts(self):
@@ -124,7 +124,7 @@ class PoringaUserExtractor(PoringaExtractor):
class PoringaSearchExtractor(PoringaExtractor):
subcategory = "search"
- pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/buscar/\?&?q=([^&#]+)"
example = "http://www.poringa.net/buscar/?q=QUERY"
def posts(self):
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index 1211397..5ced315 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -54,7 +54,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
directory_fmt = ("{category}", "{user}", "{gallery[id]} {gallery[title]}")
filename_fmt = "{num:>03}_{id}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/album/(\d+)"
+ pattern = rf"{BASE_PATTERN}/album/(\d+)"
example = "https://www.pornhub.com/album/12345"
def __init__(self, match):
@@ -64,7 +64,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
def items(self):
data = self.metadata()
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, img in enumerate(self.images(), 1):
image = {
@@ -134,7 +134,7 @@ class PornhubGifExtractor(PornhubExtractor):
directory_fmt = ("{category}", "{user}", "gifs")
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/gif/(\d+)"
+ pattern = rf"{BASE_PATTERN}/gif/(\d+)"
example = "https://www.pornhub.com/gif/12345"
def __init__(self, match):
@@ -150,21 +150,20 @@ class PornhubGifExtractor(PornhubExtractor):
"tags" : extr("data-context-tag='", "'").split(","),
"title": extr('"name": "', '"'),
"url" : extr('"contentUrl": "', '"'),
- "date" : text.parse_datetime(
- extr('"uploadDate": "', '"'), "%Y-%m-%d"),
+ "date" : self.parse_datetime_iso(extr('"uploadDate": "', '"')),
"viewkey" : extr('From this video: '
'<a href="/view_video.php?viewkey=', '"'),
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),
"user" : text.remove_html(extr("Created by:", "</div>")),
}
- yield Message.Directory, gif
+ yield Message.Directory, "", gif
yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
class PornhubUserExtractor(Dispatch, PornhubExtractor):
"""Extractor for a pornhub user"""
- pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)/?$"
example = "https://www.pornhub.com/model/USER"
def items(self):
@@ -178,7 +177,7 @@ class PornhubUserExtractor(Dispatch, PornhubExtractor):
class PornhubPhotosExtractor(PornhubExtractor):
"""Extractor for all galleries of a pornhub user"""
subcategory = "photos"
- pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ pattern = (rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)"
"/(photos(?:/[^/?#]+)?)")
example = "https://www.pornhub.com/model/USER/photos"
@@ -199,7 +198,7 @@ class PornhubPhotosExtractor(PornhubExtractor):
class PornhubGifsExtractor(PornhubExtractor):
"""Extractor for a pornhub user's gifs"""
subcategory = "gifs"
- pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ pattern = (rf"{BASE_PATTERN}/((?:users|model|pornstar)/[^/?#]+)"
"/(gifs(?:/[^/?#]+)?)")
example = "https://www.pornhub.com/model/USER/gifs"
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
index 34a0111..9c926e8 100644
--- a/gallery_dl/extractor/pornpics.py
+++ b/gallery_dl/extractor/pornpics.py
@@ -58,7 +58,7 @@ class PornpicsExtractor(Extractor):
class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
"""Extractor for pornpics galleries"""
- pattern = BASE_PATTERN + r"/galleries/((?:[^/?#]+-)?(\d+))"
+ pattern = rf"{BASE_PATTERN}/galleries/((?:[^/?#]+-)?(\d+))"
example = "https://www.pornpics.com/galleries/TITLE-12345/"
def __init__(self, match):
@@ -94,7 +94,7 @@ class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
class PornpicsTagExtractor(PornpicsExtractor):
"""Extractor for galleries from pornpics tag searches"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"/tags/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/tags/([^/?#]+)"
example = "https://www.pornpics.com/tags/TAGS/"
def galleries(self):
@@ -105,7 +105,7 @@ class PornpicsTagExtractor(PornpicsExtractor):
class PornpicsSearchExtractor(PornpicsExtractor):
"""Extractor for galleries from pornpics search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/(?:\?q=|pornstars/|channels/)([^/&#]+)"
+ pattern = rf"{BASE_PATTERN}/(?:\?q=|pornstars/|channels/)([^/&#]+)"
example = "https://www.pornpics.com/?q=QUERY"
def galleries(self):
@@ -116,3 +116,35 @@ class PornpicsSearchExtractor(PornpicsExtractor):
"offset": 0,
}
return self._pagination(url, params)
+
+
+class PornpicsListingExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics listing pages
+
+ These pages (popular, recent, etc.) don't support JSON pagination
+ and use single quotes in HTML, unlike category pages.
+ """
+ subcategory = "listing"
+ pattern = (rf"{BASE_PATTERN}"
+ rf"/(popular|recent|rating|likes|views|comments)/?$")
+ example = "https://www.pornpics.com/popular/"
+
+ def galleries(self):
+ url = f"{self.root}/{self.groups[0]}/"
+ page = self.request(url).text
+ return [
+ {"g_url": href}
+ for href in text.extract_iter(
+ page, "class='rel-link' href='", "'")
+ ]
+
+
+class PornpicsCategoryExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics categories"""
+ subcategory = "category"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$"
+ example = "https://www.pornpics.com/ass/"
+
+ def galleries(self):
+ url = f"{self.root}/{self.groups[0]}/"
+ return self._pagination(url)
diff --git a/gallery_dl/extractor/pornstarstube.py b/gallery_dl/extractor/pornstarstube.py
new file mode 100644
index 0000000..82519a0
--- /dev/null
+++ b/gallery_dl/extractor/pornstarstube.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pornstars.tube/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class PornstarstubeGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from pornstars.tube"""
+ category = "pornstarstube"
+ root = "https://pornstars.tube"
+ pattern = (r"(?:https?://)?(?:www\.)?pornstars\.tube"
+ r"/albums/(\d+)(?:/([\w-]+))?")
+ example = "https://pornstars.tube/albums/12345/SLUG/"
+
+ def __init__(self, match):
+ url = f"{self.root}/albums/{match[1]}/{match[2] or 'a'}/"
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ gid, slug = self.groups
+ return {
+ "gallery_id": text.parse_int(gid),
+ "slug" : slug or "",
+ "title" : text.unescape(text.extr(
+ page, "<title>", " - PORNSTARS.TUBE</title>")),
+ "description": text.unescape(text.extr(
+ page, 'name="description" content="', '"')),
+ "tags": text.extr(
+ page, 'name="keywords" content="', '"').split(", "),
+ }
+
+ def images(self, page):
+ album = text.extr(page, 'class="block-album"', "\n</div>")
+ return [
+ (url, None)
+ for url in text.extract_iter(album, ' href="', '"')
+ ]
diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py
index af971ab..e71246a 100644
--- a/gallery_dl/extractor/postmill.py
+++ b/gallery_dl/extractor/postmill.py
@@ -7,7 +7,7 @@
"""Extractors for Postmill instances"""
from .common import BaseExtractor, Message
-from .. import text, util, exception
+from .. import text, exception
class PostmillExtractor(BaseExtractor):
@@ -20,8 +20,8 @@ class PostmillExtractor(BaseExtractor):
def _init(self):
self.instance = self.root.partition("://")[2]
self.save_link_post_body = self.config("save-link-post-body", False)
- self._search_canonical_url = util.re(r"/f/([\w\d_]+)/(\d+)/").search
- self._search_image_tag = util.re(
+ self._search_canonical_url = text.re(r"/f/([\w\d_]+)/(\d+)/").search
+ self._search_image_tag = text.re(
r'<a href="[^"]+"\n +class="submission__image-link"').search
def items(self):
@@ -31,7 +31,7 @@ class PostmillExtractor(BaseExtractor):
title = text.unescape(extr(
'<meta property="og:title" content="', '">'))
- date = text.parse_datetime(extr(
+ date = self.parse_datetime_iso(extr(
'<meta property="og:article:published_time" content="', '">'))
username = extr(
'<meta property="og:article:author" content="', '">')
@@ -72,7 +72,7 @@ class PostmillExtractor(BaseExtractor):
urls.append((Message.Queue, url))
data["count"] = len(urls)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], (msg, url) in enumerate(urls, 1):
if url.startswith("text:"):
data["filename"], data["extension"] = "", "htm"
@@ -130,14 +130,14 @@ BASE_PATTERN = PostmillExtractor.update({
}
})
QUERY_RE = r"(?:\?([^#]+))?$"
-SORTING_RE = r"(/(?:hot|new|active|top|controversial|most_commented))?" + \
- QUERY_RE
+SORTING_RE = (rf"(/(?:hot|new|active|top|controversial|most_commented))?"
+ rf"{QUERY_RE}")
class PostmillPostExtractor(PostmillExtractor):
"""Extractor for a single submission URL"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/f/(\w+)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/f/(\w+)/(\d+)"
example = "https://raddle.me/f/FORUM/123/TITLE"
def __init__(self, match):
@@ -152,7 +152,7 @@ class PostmillPostExtractor(PostmillExtractor):
class PostmillShortURLExtractor(PostmillExtractor):
"""Extractor for short submission URLs"""
subcategory = "shorturl"
- pattern = BASE_PATTERN + r"(/\d+)$"
+ pattern = rf"{BASE_PATTERN}(/\d+)$"
example = "https://raddle.me/123"
def items(self):
@@ -165,34 +165,34 @@ class PostmillShortURLExtractor(PostmillExtractor):
class PostmillHomeExtractor(PostmillSubmissionsExtractor):
"""Extractor for the home page"""
subcategory = "home"
- pattern = BASE_PATTERN + r"(/(?:featured|subscribed|all)?)" + SORTING_RE
+ pattern = rf"{BASE_PATTERN}(/(?:featured|subscribed|all)?){SORTING_RE}"
example = "https://raddle.me/"
class PostmillForumExtractor(PostmillSubmissionsExtractor):
"""Extractor for submissions on a forum"""
subcategory = "forum"
- pattern = BASE_PATTERN + r"(/f/\w+)" + SORTING_RE
+ pattern = rf"{BASE_PATTERN}(/f/\w+){SORTING_RE}"
example = "https://raddle.me/f/FORUM"
class PostmillUserSubmissionsExtractor(PostmillSubmissionsExtractor):
"""Extractor for submissions made by a user"""
subcategory = "usersubmissions"
- pattern = BASE_PATTERN + r"(/user/\w+/submissions)()" + QUERY_RE
+ pattern = rf"{BASE_PATTERN}(/user/\w+/submissions)(){QUERY_RE}"
example = "https://raddle.me/user/USER/submissions"
class PostmillTagExtractor(PostmillSubmissionsExtractor):
"""Extractor for submissions on a forum with a specific tag"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"(/tag/\w+)" + SORTING_RE
+ pattern = rf"{BASE_PATTERN}(/tag/\w+){SORTING_RE}"
example = "https://raddle.me/tag/TAG"
class PostmillSearchExtractor(PostmillSubmissionsExtractor):
"""Extractor for search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"(/search)()\?(q=[^#]+)$"
+ pattern = rf"{BASE_PATTERN}(/search)()\?(q=[^#]+)$"
example = "https://raddle.me/search?q=QUERY"
whitelisted_parameters = ("q",)
diff --git a/gallery_dl/extractor/rawkuma.py b/gallery_dl/extractor/rawkuma.py
index 242486d..a4a0c9b 100644
--- a/gallery_dl/extractor/rawkuma.py
+++ b/gallery_dl/extractor/rawkuma.py
@@ -7,7 +7,7 @@
"""Extractors for https://rawkuma.net/"""
from .common import MangaExtractor, ChapterExtractor
-from .. import text, util
+from .. import text
BASE_PATTERN = r"(?:https?://)?rawkuma\.(?:net|com)"
@@ -21,43 +21,40 @@ class RawkumaBase():
class RawkumaChapterExtractor(RawkumaBase, ChapterExtractor):
"""Extractor for manga chapters from rawkuma.net"""
archive_fmt = "{chapter_id}_{page}"
- pattern = BASE_PATTERN + r"/([^/?#]+-chapter-\d+(?:-\d+)?)"
- example = "https://rawkuma.net/TITLE-chapter-123/"
+ pattern = rf"{BASE_PATTERN}(/manga/[^/?#]+/chapter-\d+(?:.\d+)?\.(\d+))"
+ example = "https://rawkuma.net/manga/7TITLE/chapter-123.321"
def __init__(self, match):
url = f"{self.root}/{match[1]}/"
ChapterExtractor.__init__(self, match, url)
def metadata(self, page):
- item = util.json_loads(text.extr(page, ',"item":', "}};"))
- title = text.rextr(
- page, '<h1 class="entry-title', "</h1>").partition(" &#8211; ")[2]
- date = text.extr(page, 'datetime="', '"')
- chapter, sep, minor = item["c"].partition(".")
+ manga, _, chapter = text.extr(
+ page, '<title>', "<").rpartition(" Chapter ")
+ chapter, sep, minor = chapter.partition(" &#8211; ")[0].partition(".")
return {
- "manga" : item["s"],
- "manga_id" : text.parse_int(item["mid"]),
+ "manga" : text.unescape(manga),
+ "manga_id" : text.parse_int(text.extr(page, "manga_id=", "&")),
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
- "chapter_id" : text.parse_int(item["cid"]),
- "title" : text.unescape(title),
- "date" : text.parse_datetime(
- date, "%Y-%m-%dWIB%H:%M:%S%z"),
- "thumbnail" : item.get("t"),
+ "chapter_id" : text.parse_int(self.groups[-1]),
+ # "title" : text.unescape(title),
+ "date" : self.parse_datetime_iso(text.extr(
+ page, 'datetime="', '"')),
"lang" : "ja",
"language" : "Japanese",
}
def images(self, page):
- images = util.json_loads(text.extr(page, '","images":', '}'))
- return [(url, None) for url in images]
+ return [(url, None) for url in text.extract_iter(
+ page, "<img src='", "'")]
class RawkumaMangaExtractor(RawkumaBase, MangaExtractor):
"""Extractor for manga from rawkuma.net"""
chapterclass = RawkumaChapterExtractor
- pattern = BASE_PATTERN + r"/manga/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/manga/([^/?#]+)"
example = "https://rawkuma.net/manga/TITLE/"
def __init__(self, match):
@@ -66,18 +63,36 @@ class RawkumaMangaExtractor(RawkumaBase, MangaExtractor):
def chapters(self, page):
manga = text.unescape(text.extr(page, "<title>", " &#8211; "))
+ manga_id = text.parse_int(text.extr(page, "manga_id=", "&"))
+
+ url = f"{self.root}/wp-admin/admin-ajax.php"
+ params = {
+ "manga_id": manga_id,
+ "page" : "1",
+ "action" : "chapter_list",
+ }
+ headers = {
+ "HX-Request" : "true",
+ "HX-Trigger" : "chapter-list",
+ "HX-Target" : "chapter-list",
+ "HX-Current-URL": self.page_url,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-origin",
+ }
+ html = self.request(url, params=params, headers=headers).text
results = []
- for chbox in text.extract_iter(
- page, '<li data-num="', "</a>"):
- info = text.extr(chbox, '', '"')
- chapter, _, title = info.partition(" - ")
+ for url in text.extract_iter(html, '<a href="', '"'):
+ info = url[url.rfind("-")+1:-1]
+ chapter, _, chapter_id = info.rpartition(".")
chapter, sep, minor = chapter.partition(".")
- results.append((text.extr(chbox, 'href="', '"'), {
+ results.append((url, {
"manga" : manga,
+ "manga_id" : manga_id,
"chapter" : text.parse_int(chapter),
"chapter-minor": sep + minor,
- "title" : title,
+ "chapter_id" : text.parse_int(chapter_id),
}))
return results
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 483a5ba..8e974d2 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -38,7 +38,7 @@ class ReactorExtractor(BaseExtractor):
def items(self):
data = self.metadata()
- yield Message.Directory, data
+ yield Message.Directory, "", data
for post in self.posts():
for image in self._parse_post(post):
url = image["url"]
@@ -97,7 +97,7 @@ class ReactorExtractor(BaseExtractor):
return
num = 0
- date = text.parse_datetime(data["datePublished"])
+ date = self.parse_datetime_iso(data["datePublished"])
user = data["author"]["name"]
description = text.unescape(data["description"])
title, _, tags = text.unescape(data["headline"]).partition(" / ")
@@ -171,7 +171,7 @@ class ReactorTagExtractor(ReactorExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{search_tags}_{post_id}_{num}"
- pattern = BASE_PATTERN + r"/tag/([^/?#]+)(?:/[^/?#]+)?"
+ pattern = rf"{BASE_PATTERN}/tag/([^/?#]+)(?:/[^/?#]+)?"
example = "http://reactor.cc/tag/TAG"
def __init__(self, match):
@@ -187,7 +187,7 @@ class ReactorSearchExtractor(ReactorExtractor):
subcategory = "search"
directory_fmt = ("{category}", "search", "{search_tags}")
archive_fmt = "s_{search_tags}_{post_id}_{num}"
- pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/search(?:/|\?q=)([^/?#]+)"
example = "http://reactor.cc/search?q=QUERY"
def __init__(self, match):
@@ -202,7 +202,7 @@ class ReactorUserExtractor(ReactorExtractor):
"""Extractor for all posts of a user on *reactor.cc sites"""
subcategory = "user"
directory_fmt = ("{category}", "user", "{user}")
- pattern = BASE_PATTERN + r"/user/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)"
example = "http://reactor.cc/user/USER"
def __init__(self, match):
@@ -216,7 +216,7 @@ class ReactorUserExtractor(ReactorExtractor):
class ReactorPostExtractor(ReactorExtractor):
"""Extractor for single posts on *reactor.cc sites"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/post/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post/(\d+)"
example = "http://reactor.cc/post/12345"
def __init__(self, match):
@@ -228,6 +228,6 @@ class ReactorPostExtractor(ReactorExtractor):
pos = post.find('class="uhead">')
for image in self._parse_post(post[pos:]):
if image["num"] == 1:
- yield Message.Directory, image
+ yield Message.Directory, "", image
url = image["url"]
yield Message.Url, url, text.nameext_from_url(url, image)
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 24a0171..dccf91d 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -44,7 +44,7 @@ class ReadcomiconlineBase():
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.li"""
subcategory = "issue"
- pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)"
+ pattern = rf"{BASE_PATTERN}(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)"
example = "https://readcomiconline.li/Comic/TITLE/Issue-123?id=12345"
def _init(self):
@@ -98,7 +98,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
"""Extractor for comics from readcomiconline.li"""
chapterclass = ReadcomiconlineIssueExtractor
subcategory = "comic"
- pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/?)$"
+ pattern = rf"{BASE_PATTERN}(/Comic/[^/?#]+/?)$"
example = "https://readcomiconline.li/Comic/TITLE"
def chapters(self, page):
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py
index cf45578..7f731f8 100644
--- a/gallery_dl/extractor/realbooru.py
+++ b/gallery_dl/extractor/realbooru.py
@@ -28,18 +28,31 @@ class RealbooruExtractor(booru.BooruExtractor):
extr('class="container"', '>')
post = {
- "_html" : page,
"id" : post_id,
"rating" : "e" if rating == "adult" else (rating or "?")[0],
- "tags" : text.unescape(extr(' alt="', '"')),
- "file_url" : extr('src="', '"'),
+ "file_url" : (s := extr('src="', '"')),
+ "_fallback" : (extr('src="', '"'),) if s.endswith(".mp4") else (),
"created_at": extr(">Posted at ", " by "),
"uploader" : extr(">", "<"),
"score" : extr('">', "<"),
+ "tags" : extr('<br />', "</div>"),
"title" : extr('id="title" style="width: 100%;" value="', '"'),
"source" : extr('d="source" style="width: 100%;" value="', '"'),
}
+ tags_container = post["tags"]
+ tags = []
+ tags_categories = collections.defaultdict(list)
+ pattern = text.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
+ for tag_type, tag_name in pattern.findall(tags_container):
+ tag = text.unescape(text.unquote(tag_name))
+ tags.append(tag)
+ tags_categories[tag_type].append(tag)
+ for key, value in tags_categories.items():
+ post[f"tags_{key}"] = ", ".join(value)
+ tags.sort()
+
+ post["tags"] = ", ".join(tags)
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
return post
@@ -48,7 +61,7 @@ class RealbooruExtractor(booru.BooruExtractor):
return num
def _prepare(self, post):
- post["date"] = text.parse_datetime(post["created_at"], "%b, %d %Y")
+ post["date"] = self.parse_datetime(post["created_at"], "%b, %d %Y")
def _pagination(self, params, begin, end):
url = self.root + "/index.php"
@@ -66,23 +79,13 @@ class RealbooruExtractor(booru.BooruExtractor):
return
params["pid"] += self.per_page
- def _tags(self, post, _):
- page = post["_html"]
- tag_container = text.extr(page, 'id="tagLink"', '</div>')
- tags = collections.defaultdict(list)
- pattern = util.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
- for tag_type, tag_name in pattern.findall(tag_container):
- tags[tag_type].append(text.unescape(text.unquote(tag_name)))
- for key, value in tags.items():
- post["tags_" + key] = " ".join(value)
-
class RealbooruTagExtractor(RealbooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
per_page = 42
- pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=list&tags=([^&#]*)"
example = "https://realbooru.com/index.php?page=post&s=list&tags=TAG"
def metadata(self):
@@ -102,7 +105,7 @@ class RealbooruFavoriteExtractor(RealbooruExtractor):
directory_fmt = ("{category}", "favorites", "{favorite_id}")
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
- pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=favorites&s=view&id=(\d+)"
example = "https://realbooru.com/index.php?page=favorites&s=view&id=12345"
def metadata(self):
@@ -120,7 +123,7 @@ class RealbooruPoolExtractor(RealbooruExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool} {pool_name}")
archive_fmt = "p_{pool}_{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=pool&s=show&id=(\d+)"
example = "https://realbooru.com/index.php?page=pool&s=show&id=12345"
def metadata(self):
@@ -147,7 +150,7 @@ class RealbooruPoolExtractor(RealbooruExtractor):
class RealbooruPostExtractor(RealbooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
+ pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=view&id=(\d+)"
example = "https://realbooru.com/index.php?page=post&s=view&id=12345"
def posts(self):
diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py
index c553fec..0bf17d3 100644
--- a/gallery_dl/extractor/recursive.py
+++ b/gallery_dl/extractor/recursive.py
@@ -9,7 +9,7 @@
"""Recursive extractor"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
class RecursiveExtractor(Extractor):
@@ -27,5 +27,5 @@ class RecursiveExtractor(Extractor):
else:
page = self.request(text.ensure_http_scheme(url)).text
- for match in util.re(r"https?://[^\s\"']+").finditer(page):
+ for match in text.re(r"https?://[^\s\"']+").finditer(page):
yield Message.Queue, match[0], {}
diff --git a/gallery_dl/extractor/redbust.py b/gallery_dl/extractor/redbust.py
deleted file mode 100644
index d00ed52..0000000
--- a/gallery_dl/extractor/redbust.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://redbust.com/"""
-
-from .common import GalleryExtractor, Extractor, Message
-from .. import text
-
-BASE_PATTERN = r"(?:https?://)?redbust\.com"
-
-
-class RedbustExtractor(Extractor):
- """Base class for RedBust extractors"""
- category = "redbust"
- root = "https://redbust.com"
- filename_fmt = "{filename}.{extension}"
-
- def items(self):
- data = {"_extractor": RedbustGalleryExtractor}
- for url in self.galleries():
- yield Message.Queue, url, data
-
- def _pagination(self, path, page=None):
- if page is None:
- url = f"{self.root}{path}/"
- base = url + "page/"
- page = self.request(url).text
- else:
- base = f"{self.root}{path}/page/"
-
- pnum = 1
- while True:
- for post in text.extract_iter(
- page, '<h2 class="post-title">', "rel="):
- yield text.extr(post, 'href="', '"')
-
- pnum += 1
- url = f"{base}{pnum}/"
- if url not in page:
- return
- page = self.request(url).text
-
-
-class RedbustGalleryExtractor(GalleryExtractor, RedbustExtractor):
- """Extractor for RedBust galleries"""
- pattern = BASE_PATTERN + r"/([\w-]+)/?$"
- example = "https://redbust.com/TITLE/"
-
- def items(self):
- url = f"{self.root}/{self.groups[0]}/"
- self.page = page = self.request(url).text
-
- self.gallery_id = gid = text.extr(
- page, "<link rel='shortlink' href='https://redbust.com/?p=", "'")
-
- if gid:
- self.page_url = False
- return GalleryExtractor.items(self)
- else:
- self.subcategory = "category"
- return self._items_category(page)
-
- def _items_category(self, _):
- page = self.page
- data = {"_extractor": RedbustGalleryExtractor}
- base = f"{self.root}/{self.groups[0]}/page/"
- pnum = 1
-
- while True:
- for post in text.extract_iter(
- page, '<h2 class="post-title">', "rel="):
- url = text.extr(post, 'href="', '"')
- yield Message.Queue, url, data
-
- pnum += 1
- url = f"{base}{pnum}/"
- if url not in page:
- return
- page = self.request(url).text
-
- def metadata(self, _):
- extr = text.extract_from(self.page)
-
- return {
- "gallery_id" : self.gallery_id,
- "gallery_slug": self.groups[0],
- "categories" : text.split_html(extr(
- '<li class="category">', "</li>"))[::2],
- "title" : text.unescape(extr('class="post-title">', "<")),
- "date" : text.parse_datetime(
- extr('class="post-byline">', "<").strip(), "%B %d, %Y"),
- "views" : text.parse_int(extr("</b>", "v").replace(",", "")),
- "tags" : text.split_html(extr(
- 'class="post-tags">', "</p"))[1:],
- }
-
- def images(self, _):
- results = []
-
- for img in text.extract_iter(self.page, "'><img ", ">"):
- if src := text.extr(img, 'src="', '"'):
- path, _, end = src.rpartition("-")
- if "x" in end:
- url = f"{path}.{end.rpartition('.')[2]}"
- data = None if src == url else {"_fallback": (src,)}
- else:
- url = src
- data = None
- results.append((url, data))
-
- if not results:
- # fallback for older galleries
- for path in text.extract_iter(
- self.page, '<img src="/wp-content/uploads/', '"'):
- results.append(
- (f"{self.root}/wp-content/uploads/{path}", None))
-
- return results
-
-
-class RedbustTagExtractor(RedbustExtractor):
- """Extractor for RedBust tag searches"""
- subcategory = "tag"
- pattern = BASE_PATTERN + r"/tag/([\w-]+)"
- example = "https://redbust.com/tag/TAG/"
-
- def galleries(self):
- return self._pagination("/tag/" + self.groups[0])
-
-
-class RedbustArchiveExtractor(RedbustExtractor):
- """Extractor for RedBust monthly archive collections"""
- subcategory = "archive"
- pattern = BASE_PATTERN + r"(/\d{4}/\d{2})"
- example = "https://redbust.com/2010/01/"
-
- def galleries(self):
- return self._pagination(self.groups[0])
-
-
-class RedbustImageExtractor(RedbustExtractor):
- """Extractor for RedBust images"""
- subcategory = "image"
- directory_fmt = ("{category}", "{title}")
- pattern = BASE_PATTERN + r"/(?!tag/|\d{4}/)([\w-]+)/([\w-]+)/?$"
- example = "https://redbust.com/TITLE/SLUG/"
-
- def items(self):
- gallery_slug, image_slug = self.groups
- url = f"{self.root}/{gallery_slug}/{image_slug}/"
- page = self.request(url).text
-
- img_url = None
-
- # Look for the largest image in srcset first
- if srcset := text.extr(page, 'srcset="', '"'):
- # Extract the largest image from srcset (typically last one)
- urls = srcset.split(", ")
- img_url = urls[-1].partition(" ")[0] if urls else None
-
- # Fallback to original extraction method
- if not img_url:
- if entry := text.extr(page, "entry-inner ", "alt="):
- img_url = text.extr(entry, "img src=", " ").strip("\"'")
-
- if not img_url:
- return
-
- end = img_url.rpartition("-")[2]
- data = text.nameext_from_url(img_url, {
- "title" : text.unescape(text.extr(
- page, 'title="Return to ', '"')),
- "image_id" : text.extr(
- page, "rel='shortlink' href='https://redbust.com/?p=", "'"),
- "gallery_slug": gallery_slug,
- "image_slug" : image_slug,
- "num" : text.parse_int(end.partition(".")[0]),
- "count" : 1,
- "url" : img_url,
- })
-
- yield Message.Directory, data
- yield Message.Url, img_url, data
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index e20d80e..cc73e47 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -33,11 +33,11 @@ class RedditExtractor(Extractor):
previews = self.config("previews", True)
embeds = self.config("embeds", True)
- if videos := self.config("videos", True):
- if videos == "ytdl":
- self._extract_video = self._extract_video_ytdl
- elif videos == "dash":
+ if videos := self.config("videos", "dash"):
+ if videos == "dash":
self._extract_video = self._extract_video_dash
+ elif videos == "ytdl":
+ self._extract_video = self._extract_video_ytdl
videos = True
selftext = self.config("selftext")
@@ -57,9 +57,9 @@ class RedditExtractor(Extractor):
if submission:
submission["comment"] = None
- submission["date"] = text.parse_timestamp(
+ submission["date"] = self.parse_timestamp(
submission["created_utc"])
- yield Message.Directory, submission
+ yield Message.Directory, "", submission
visited.add(submission["id"])
submission["num"] = 0
@@ -86,7 +86,7 @@ class RedditExtractor(Extractor):
yield Message.Url, url, submission
elif embeds and "media_metadata" in media:
- for embed in self._extract_embed(submission):
+ for embed in self._extract_embed(submission, media):
submission["num"] += 1
text.nameext_from_url(embed, submission)
yield Message.Url, embed, submission
@@ -94,6 +94,8 @@ class RedditExtractor(Extractor):
elif media["is_video"]:
if videos:
text.nameext_from_url(url, submission)
+ if not submission["extension"]:
+ submission["extension"] = "mp4"
url = "ytdl:" + self._extract_video(media)
yield Message.Url, url, submission
@@ -105,14 +107,14 @@ class RedditExtractor(Extractor):
urls.append((url, submission))
elif parentdir:
- yield Message.Directory, comments[0]
+ yield Message.Directory, "", comments[0]
if self.api.comments:
if comments and not submission:
submission = comments[0]
submission.setdefault("num", 0)
if not parentdir:
- yield Message.Directory, submission
+ yield Message.Directory, "", submission
for comment in comments:
media = (embeds and "media_metadata" in comment)
@@ -124,11 +126,11 @@ class RedditExtractor(Extractor):
data = submission.copy()
data["comment"] = comment
- comment["date"] = text.parse_timestamp(
+ comment["date"] = self.parse_timestamp(
comment["created_utc"])
if media:
- for url in self._extract_embed(comment):
+ for url in self._extract_embed(data, comment):
data["num"] += 1
text.nameext_from_url(url, data)
yield Message.Url, url, data
@@ -199,8 +201,8 @@ class RedditExtractor(Extractor):
submission["id"], item["media_id"])
self.log.debug(src)
- def _extract_embed(self, submission):
- meta = submission["media_metadata"]
+ def _extract_embed(self, submission, media):
+ meta = media["media_metadata"]
if not meta:
return
@@ -317,8 +319,8 @@ class RedditSubmissionExtractor(RedditExtractor):
"""Extractor for URLs from a submission on reddit.com"""
subcategory = "submission"
pattern = (r"(?:https?://)?(?:"
- r"(?:\w+\.)?reddit\.com/(?:(?:r|u|user)/[^/?#]+"
- r"/comments|gallery)|redd\.it)/([a-z0-9]+)")
+ r"(?:\w+\.)?reddit\.com/(?:(?:(?:r|u|user)/[^/?#]+/)?"
+ r"comments|gallery)|redd\.it)/([a-z0-9]+)")
example = "https://www.reddit.com/r/SUBREDDIT/comments/id/"
def __init__(self, match):
@@ -352,7 +354,7 @@ class RedditImageExtractor(Extractor):
def items(self):
url = f"https://{self.domain}/{self.path}{self.query}"
data = text.nameext_from_url(url)
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
@@ -394,7 +396,7 @@ class RedditAPI():
self.morecomments = config("morecomments", False)
self._warn_429 = False
- if config("api") == "rest":
+ if config("api") != "oauth":
self.root = "https://www.reddit.com"
self.headers = None
self.authenticate = util.noop
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 4098c54..164fdf4 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -51,8 +51,8 @@ class RedgifsExtractor(Extractor):
gif.update(metadata)
gif["count"] = cnt
- gif["date"] = text.parse_timestamp(gif.get("createDate"))
- yield Message.Directory, gif
+ gif["date"] = self.parse_timestamp(gif.get("createDate"))
+ yield Message.Directory, "", gif
for num, gif in enumerate(gifs, enum):
gif["_fallback"] = formats = self._formats(gif)
diff --git a/gallery_dl/extractor/rule34us.py b/gallery_dl/extractor/rule34us.py
index a43ea4c..d48539e 100644
--- a/gallery_dl/extractor/rule34us.py
+++ b/gallery_dl/extractor/rule34us.py
@@ -9,7 +9,7 @@
"""Extractors for https://rule34.us/"""
from .booru import BooruExtractor
-from .. import text, util
+from .. import text
import collections
@@ -19,7 +19,7 @@ class Rule34usExtractor(BooruExtractor):
per_page = 42
def _init(self):
- self._find_tags = util.re(
+ self._find_tags = text.re(
r'<li class="([^-"]+)-tag"[^>]*><a href="[^;"]+;q=([^"]+)').findall
def _parse_post(self, post_id):
@@ -57,7 +57,7 @@ class Rule34usTagExtractor(Rule34usExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = r"(?:https?://)?rule34\.us/index\.php\?r=posts/index&q=([^&#]+)"
+ pattern = r"(?:https?://)?rule34\.us/index\.php\?r=posts/index&q=([^&#]*)"
example = "https://rule34.us/index.php?r=posts/index&q=TAG"
def __init__(self, match):
diff --git a/gallery_dl/extractor/rule34vault.py b/gallery_dl/extractor/rule34vault.py
index 14d5aef..9f75f64 100644
--- a/gallery_dl/extractor/rule34vault.py
+++ b/gallery_dl/extractor/rule34vault.py
@@ -36,8 +36,7 @@ class Rule34vaultExtractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
- post["date"] = text.parse_datetime(
- post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["created"])
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]
@@ -80,7 +79,7 @@ class Rule34vaultExtractor(BooruExtractor):
class Rule34vaultPostExtractor(Rule34vaultExtractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post/(\d+)"
example = "https://rule34vault.com/post/12345"
def posts(self):
@@ -91,7 +90,7 @@ class Rule34vaultPlaylistExtractor(Rule34vaultExtractor):
subcategory = "playlist"
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
- pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
+ pattern = rf"{BASE_PATTERN}/playlists/view/(\d+)"
example = "https://rule34vault.com/playlists/view/12345"
def metadata(self):
@@ -106,7 +105,7 @@ class Rule34vaultTagExtractor(Rule34vaultExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/(?!p(?:ost|laylists)/)([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/(?!p(?:ost|laylists)/)([^/?#]+)"
example = "https://rule34vault.com/TAG"
def metadata(self):
diff --git a/gallery_dl/extractor/rule34xyz.py b/gallery_dl/extractor/rule34xyz.py
index 05915ba..ddd656f 100644
--- a/gallery_dl/extractor/rule34xyz.py
+++ b/gallery_dl/extractor/rule34xyz.py
@@ -68,8 +68,7 @@ class Rule34xyzExtractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
- post["date"] = text.parse_datetime(
- post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["created"])
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]
@@ -135,7 +134,7 @@ class Rule34xyzExtractor(BooruExtractor):
class Rule34xyzPostExtractor(Rule34xyzExtractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post/(\d+)"
example = "https://rule34.xyz/post/12345"
def posts(self):
@@ -146,7 +145,7 @@ class Rule34xyzPlaylistExtractor(Rule34xyzExtractor):
subcategory = "playlist"
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
- pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
+ pattern = rf"{BASE_PATTERN}/playlists/view/(\d+)"
example = "https://rule34.xyz/playlists/view/12345"
def metadata(self):
@@ -161,7 +160,7 @@ class Rule34xyzTagExtractor(Rule34xyzExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/([^/?#]+)$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)$"
example = "https://rule34.xyz/TAG"
def metadata(self):
diff --git a/gallery_dl/extractor/s3ndpics.py b/gallery_dl/extractor/s3ndpics.py
index 215f160..9201a3f 100644
--- a/gallery_dl/extractor/s3ndpics.py
+++ b/gallery_dl/extractor/s3ndpics.py
@@ -30,15 +30,13 @@ class S3ndpicsExtractor(Extractor):
for post in self.posts():
post["id"] = post.pop("_id", None)
post["user"] = post.pop("userId", None)
- post["date"] = text.parse_datetime(
- post["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
- post["date_updated"] = text.parse_datetime(
- post["updatedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["createdAt"])
+ post["date_updated"] = self.parse_datetime_iso(post["updatedAt"])
files = post.pop("files", ())
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post["type"] = file["type"]
path = file["url"]
diff --git a/gallery_dl/extractor/saint.py b/gallery_dl/extractor/saint.py
index 07d490a..e15c628 100644
--- a/gallery_dl/extractor/saint.py
+++ b/gallery_dl/extractor/saint.py
@@ -18,7 +18,7 @@ class SaintAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for saint albums"""
category = "saint"
root = "https://saint2.su"
- pattern = BASE_PATTERN + r"/a/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/a/([^/?#]+)"
example = "https://saint2.su/a/ID"
def fetch_album(self, album_id):
@@ -36,7 +36,7 @@ class SaintAlbumExtractor(LolisafeAlbumExtractor):
break
files.append({
"id2" : id2,
- "date" : text.parse_timestamp(extr("", ".")),
+ "date" : self.parse_timestamp(extr("", ".")),
"id" : extr("/embed/", '"'),
"size" : text.parse_int(extr('data="', '"')),
"file" : text.unescape(extr(
@@ -58,7 +58,7 @@ class SaintMediaExtractor(SaintAlbumExtractor):
"""Extractor for saint media links"""
subcategory = "media"
directory_fmt = ("{category}",)
- pattern = BASE_PATTERN + r"(/(embe)?d/([^/?#]+))"
+ pattern = rf"{BASE_PATTERN}(/(embe)?d/([^/?#]+))"
example = "https://saint2.su/embed/ID"
def fetch_album(self, album_id):
@@ -73,7 +73,7 @@ class SaintMediaExtractor(SaintAlbumExtractor):
file = {
"id" : album_id,
"id2" : extr("/thumbs/", "-"),
- "date" : text.parse_timestamp(extr("", ".")),
+ "date" : self.parse_timestamp(extr("", ".")),
"file" : text.unescape(extr('<source src="', '"')),
"id_dl": extr("/d/", "'"),
}
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 5caad4b..690b515 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -16,7 +16,7 @@ import collections
BASE_PATTERN = r"(?:https?://)?" \
r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
- r"(?:/[a-z]{2})?"
+ r"(?:/[a-z]{2}(?:[-_][A-Z]{2})?)?"
class SankakuExtractor(BooruExtractor):
@@ -47,7 +47,7 @@ class SankakuExtractor(BooruExtractor):
self.api = SankakuAPI(self)
if self.config("tags") == "extended":
self._tags = self._tags_extended
- self._tags_findall = util.re(
+ self._tags_findall = text.re(
r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
def _file_url(self, post):
@@ -61,13 +61,13 @@ class SankakuExtractor(BooruExtractor):
self.log.warning(
"Login required to download 'contentious_content' posts")
SankakuExtractor._warning = False
- elif url[8] == "v":
- url = "https://s.sankakucomplex.com" + url[url.index("/", 8):]
+ elif url[4] != "s":
+ url = "https" + url[4:]
return url
def _prepare(self, post):
post["created_at"] = post["created_at"]["s"]
- post["date"] = text.parse_timestamp(post["created_at"])
+ post["date"] = self.parse_timestamp(post["created_at"])
post["tags"] = post.pop("tag_names", ())
post["tag_string"] = " ".join(post["tags"])
post["_http_validate"] = self._check_expired
@@ -119,7 +119,7 @@ class SankakuTagExtractor(SankakuExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)"
+ pattern = rf"{BASE_PATTERN}(?:/posts)?/?\?([^#]*)"
example = "https://sankaku.app/?tags=TAG"
def __init__(self, match):
@@ -129,10 +129,10 @@ class SankakuTagExtractor(SankakuExtractor):
if "date:" in self.tags:
# rewrite 'date:' tags (#1790)
- self.tags = util.re(
+ self.tags = text.re(
r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)").sub(
r"date:\3-\2-\1T00:00", self.tags)
- self.tags = util.re(
+ self.tags = text.re(
r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)").sub(
r"date:\1-\2-\3T00:00", self.tags)
@@ -149,7 +149,7 @@ class SankakuPoolExtractor(SankakuExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}")
archive_fmt = "p_{pool}_{id}"
- pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)"
+ pattern = rf"{BASE_PATTERN}/(?:books|pools?/show)/(\w+)"
example = "https://sankaku.app/books/12345"
def metadata(self):
@@ -171,7 +171,7 @@ class SankakuPostExtractor(SankakuExtractor):
"""Extractor for single posts from sankaku.app"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
+ pattern = rf"{BASE_PATTERN}/posts?(?:/show)?/(\w+)"
example = "https://sankaku.app/post/show/12345"
def posts(self):
@@ -181,7 +181,7 @@ class SankakuPostExtractor(SankakuExtractor):
class SankakuBooksExtractor(SankakuExtractor):
"""Extractor for books by tag search on sankaku.app"""
subcategory = "books"
- pattern = BASE_PATTERN + r"/books/?\?([^#]*)"
+ pattern = rf"{BASE_PATTERN}/books/?\?([^#]*)"
example = "https://sankaku.app/books?tags=TAG"
def __init__(self, match):
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index 405e07e..cf5af81 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -40,7 +40,7 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
extr('property="og:title" content="', '"')),
"description": text.unescape(
extr('property="og:description" content="', '"')),
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime_iso(
extr('property="article:published_time" content="', '"')),
}
content = extr('<div class="entry-content">', '</article>')
@@ -53,7 +53,7 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
files += self._extract_embeds(content)
data["count"] = len(files)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, url in enumerate(files, 1):
file = text.nameext_from_url(url)
if url[0] == "/":
@@ -64,19 +64,19 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
yield Message.Url, url, file
def _extract_images(self, content):
- orig_sub = util.re(r"-\d+x\d+\.").sub
+ orig_sub = text.re(r"-\d+x\d+\.").sub
return [
orig_sub(".", url) for url in
util.unique(text.extract_iter(content, 'data-lazy-src="', '"'))
]
def _extract_videos(self, content):
- return util.re(r"<source [^>]*src=[\"']([^\"']+)").findall(content)
+ return text.re(r"<source [^>]*src=[\"']([^\"']+)").findall(content)
def _extract_embeds(self, content):
return [
"ytdl:" + url for url in
- util.re(r"<iframe [^>]*src=[\"']([^\"']+)").findall(content)
+ text.re(r"<iframe [^>]*src=[\"']([^\"']+)").findall(content)
]
diff --git a/gallery_dl/extractor/schalenetwork.py b/gallery_dl/extractor/schalenetwork.py
index a4ef3b0..bbbb9da 100644
--- a/gallery_dl/extractor/schalenetwork.py
+++ b/gallery_dl/extractor/schalenetwork.py
@@ -126,7 +126,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
data = self.request_json(url, headers=headers)
try:
- data["date"] = text.parse_timestamp(data["created_at"] // 1000)
+ data["date"] = self.parse_timestamp(data["created_at"] // 1000)
data["count"] = len(data["thumbnails"]["entries"])
del data["thumbnails"]
except Exception:
@@ -138,14 +138,13 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
name = tag["name"]
namespace = tag.get("namespace", 0)
tags.append(types[namespace] + ":" + name)
- data["tags"] = tags
-
if self.config("tags", False):
- tags = collections.defaultdict(list)
+ categories = collections.defaultdict(list)
for tag in data["tags"]:
- tags[tag.get("namespace", 0)].append(tag["name"])
- for type, values in tags.items():
+ categories[tag.get("namespace", 0)].append(tag["name"])
+ for type, values in categories.items():
data["tags_" + types[type]] = values
+ data["tags"] = tags
url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={self._crt()}"
if token := self._token(False):
@@ -169,6 +168,20 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
url = (f"{self.root_api}/books/data/{gid}/{gkey}"
f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={self._crt()}")
headers = self.headers
+
+ if self.config("cbz", False):
+ headers["Authorization"] = self._token()
+ dl = self.request_json(
+ f"{url}&action=dl", method="POST", headers=headers)
+ # 'crt' parameter here is necessary for 'hdoujin' downloads
+ url = f"{dl['base']}?crt={self._crt()}"
+ info = text.nameext_from_url(url)
+ if "fallback" in dl:
+ info["_fallback"] = (dl["fallback"],)
+ if not info["extension"]:
+ info["extension"] = "cbz"
+ return ((url, info),)
+
data = self.request_json(url, headers=headers)
base = data["base"]
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py
index ff191db..b853f53 100644
--- a/gallery_dl/extractor/scrolller.py
+++ b/gallery_dl/extractor/scrolller.py
@@ -34,7 +34,7 @@ class ScrolllerExtractor(Extractor):
files = self._extract_files(post)
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for file in files:
url = file["url"]
post.update(file)
@@ -136,7 +136,7 @@ class ScrolllerExtractor(Extractor):
class ScrolllerSubredditExtractor(ScrolllerExtractor):
"""Extractor for media from a scrolller subreddit"""
subcategory = "subreddit"
- pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}(/r/[^/?#]+)(?:/?\?([^#]+))?"
example = "https://scrolller.com/r/SUBREDDIT"
def posts(self):
@@ -173,7 +173,7 @@ class ScrolllerSubredditExtractor(ScrolllerExtractor):
class ScrolllerFollowingExtractor(ScrolllerExtractor):
"""Extractor for followed scrolller subreddits"""
subcategory = "following"
- pattern = BASE_PATTERN + r"/following"
+ pattern = rf"{BASE_PATTERN}/following"
example = "https://scrolller.com/following"
def items(self):
@@ -199,7 +199,7 @@ class ScrolllerFollowingExtractor(ScrolllerExtractor):
class ScrolllerPostExtractor(ScrolllerExtractor):
"""Extractor for media from a single scrolller post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/(?!r/|following$)([^/?#]+)"
example = "https://scrolller.com/TITLE-SLUG-a1b2c3d4f5"
def posts(self):
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index 7319731..705227d 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -31,7 +31,7 @@ class SeigaExtractor(Extractor):
images = iter(self.get_images())
data = next(images)
- yield Message.Directory, data
+ yield Message.Directory, "", data
for image in util.advance(images, self.start_image):
data.update(image)
data["extension"] = None
@@ -213,7 +213,7 @@ class SeigaImageExtractor(SeigaExtractor):
data["description"] = text.remove_html(data["description"])
data["image_id"] = text.parse_int(self.image_id)
- data["date"] = text.parse_datetime(
+ data["date"] = self.parse_datetime(
data["date"] + ":00+0900", "%Y年%m月%d日 %H:%M:%S%z")
return (data, data)
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 2feb64e..b599f70 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -9,8 +9,7 @@
"""Extractors for https://www.sex.com/"""
from .common import Extractor, Message
-from .. import text
-from datetime import datetime
+from .. import text, dt
BASE_PATTERN = r"(?:https?://)?(?:www\.)?sex\.com(?:/[a-z]{2})?"
@@ -26,7 +25,7 @@ class SexcomExtractor(Extractor):
def items(self):
self.gifs = self.config("gifs", True)
- yield Message.Directory, self.metadata()
+ yield Message.Directory, "", self.metadata()
for pin in map(self._parse_pin, self.pins()):
if not pin:
continue
@@ -34,10 +33,10 @@ class SexcomExtractor(Extractor):
url = pin["url"]
parts = url.rsplit("/", 4)
try:
- pin["date_url"] = dt = datetime(
+ pin["date_url"] = d = dt.datetime(
int(parts[1]), int(parts[2]), int(parts[3]))
if "date" not in pin:
- pin["date"] = dt
+ pin["date"] = d
except Exception:
pass
pin["tags"] = [t[1:] for t in pin["tags"]]
@@ -136,7 +135,7 @@ class SexcomExtractor(Extractor):
text.nameext_from_url(data["url"], data)
data["uploader"] = extr('itemprop="author">', '<')
- data["date"] = text.parse_datetime(extr('datetime="', '"'))
+ data["date"] = dt.parse_iso(extr('datetime="', '"'))
data["tags"] = text.split_html(extr('class="tags"> Tags', '</div>'))
data["comments"] = text.parse_int(extr('Comments (', ')'))
@@ -195,8 +194,8 @@ class SexcomPinExtractor(SexcomExtractor):
"""Extractor for a pinned image or video on www.sex.com"""
subcategory = "pin"
directory_fmt = ("{category}",)
- pattern = (BASE_PATTERN +
- r"(/(?:\w\w/(?:pic|gif|video)s|pin)/\d+/?)(?!.*#related$)")
+ pattern = (rf"{BASE_PATTERN}"
+ rf"(/(?:\w\w/(?:pic|gif|video)s|pin)/\d+/?)(?!.*#related$)")
example = "https://www.sex.com/pin/12345-TITLE/"
def pins(self):
@@ -207,7 +206,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
"""Extractor for related pins on www.sex.com"""
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[pin_id]}")
- pattern = BASE_PATTERN + r"(/pin/(\d+)/?).*#related$"
+ pattern = rf"{BASE_PATTERN}(/pin/(\d+)/?).*#related$"
example = "https://www.sex.com/pin/12345#related"
def metadata(self):
@@ -224,7 +223,7 @@ class SexcomPinsExtractor(SexcomExtractor):
"""Extractor for a user's pins on www.sex.com"""
subcategory = "pins"
directory_fmt = ("{category}", "{user}")
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/pins/"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/pins/"
example = "https://www.sex.com/user/USER/pins/"
def metadata(self):
@@ -239,7 +238,7 @@ class SexcomLikesExtractor(SexcomExtractor):
"""Extractor for a user's liked pins on www.sex.com"""
subcategory = "likes"
directory_fmt = ("{category}", "{user}", "Likes")
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/likes/"
+ pattern = rf"{BASE_PATTERN}/user/([^/?#]+)/likes/"
example = "https://www.sex.com/user/USER/likes/"
def metadata(self):
@@ -254,8 +253,8 @@ class SexcomBoardExtractor(SexcomExtractor):
"""Extractor for pins from a board on www.sex.com"""
subcategory = "board"
directory_fmt = ("{category}", "{user}", "{board}")
- pattern = (BASE_PATTERN + r"/user"
- r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)")
+ pattern = (rf"{BASE_PATTERN}/user"
+ rf"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)")
example = "https://www.sex.com/user/USER/BOARD/"
def metadata(self):
@@ -270,14 +269,31 @@ class SexcomBoardExtractor(SexcomExtractor):
return self._pagination(url)
+class SexcomFeedExtractor(SexcomExtractor):
+ """Extractor for pins from your account's main feed on www.sex.com"""
+ subcategory = "feed"
+ directory_fmt = ("{category}", "feed")
+ pattern = rf"{BASE_PATTERN}/feed"
+ example = "https://www.sex.com/feed/"
+
+ def metadata(self):
+ return {"feed": True}
+
+ def pins(self):
+ if not self.cookies_check(("sess_sex",)):
+ self.log.warning("no 'sess_sex' cookie set")
+ url = f"{self.root}/feed/"
+ return self._pagination(url)
+
+
class SexcomSearchExtractor(SexcomExtractor):
"""Extractor for search results on www.sex.com"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search[search]}")
- pattern = (BASE_PATTERN + r"/(?:"
- r"(pic|gif|video)s(?:\?(search=[^#]+)$|/([^/?#]*))"
- r"|search/(pic|gif|video)s"
- r")/?(?:\?([^#]+))?")
+ pattern = (rf"{BASE_PATTERN}/(?:"
+ rf"(pic|gif|video)s(?:\?(search=[^#]+)$|/([^/?#]*))"
+ rf"|search/(pic|gif|video)s"
+ rf")/?(?:\?([^#]+))?")
example = "https://www.sex.com/search/pics?query=QUERY"
def _init(self):
@@ -314,7 +330,7 @@ class SexcomSearchExtractor(SexcomExtractor):
parts = path.rsplit("/", 4)
try:
- pin["date_url"] = pin["date"] = datetime(
+ pin["date_url"] = pin["date"] = dt.datetime(
int(parts[1]), int(parts[2]), int(parts[3]))
except Exception:
pass
@@ -329,7 +345,7 @@ class SexcomSearchExtractor(SexcomExtractor):
path = f"{path[:-4]}gif"
pin["url"] = f"{root}{path}"
- yield Message.Directory, pin
+ yield Message.Directory, "", pin
yield Message.Url, pin["url"], pin
if params["page"] >= data["paging"]["numberOfPages"]:
diff --git a/gallery_dl/extractor/shimmie2.py b/gallery_dl/extractor/shimmie2.py
index 36b083b..5572b4d 100644
--- a/gallery_dl/extractor/shimmie2.py
+++ b/gallery_dl/extractor/shimmie2.py
@@ -25,6 +25,8 @@ class Shimmie2Extractor(BaseExtractor):
if file_url := self.config_instance("file_url"):
self.file_url_fmt = file_url
+ if quote := self.config_instance("quote"):
+ self._quote_type = lambda _: quote
def items(self):
data = self.metadata()
@@ -44,7 +46,7 @@ class Shimmie2Extractor(BaseExtractor):
else:
text.nameext_from_url(url, post)
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, url, post
def metadata(self):
@@ -85,6 +87,11 @@ BASE_PATTERN = Shimmie2Extractor.update({
"root": "https://co.llection.pics",
"pattern": r"co\.llection\.pics",
},
+ "soybooru": {
+ "root": "https://soybooru.com",
+ "pattern": r"soybooru\.com",
+ "quote": "'",
+ },
}) + r"/(?:index\.php\?q=/?)?"
@@ -93,7 +100,7 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
file_url_fmt = "{}/_images/{}/{}%20-%20{}.{}"
- pattern = BASE_PATTERN + r"post/list/([^/?#]+)(?:/(\d+))?"
+ pattern = rf"{BASE_PATTERN}post/list/([^/?#]+)(?:/(\d+))?"
example = "https://vidya.pics/post/list/TAG/1"
def metadata(self):
@@ -150,15 +157,14 @@ class Shimmie2TagExtractor(Shimmie2Extractor):
}
pnum += 1
- if not extr(">Next<", ">"):
- if not extr(f"/{pnum}'>{pnum}<", ">"):
- return
+ if not extr(f"/{pnum}{quote}>Next</", ">"):
+ return
class Shimmie2PostExtractor(Shimmie2Extractor):
"""Extractor for single shimmie2 posts"""
subcategory = "post"
- pattern = BASE_PATTERN + r"post/view/(\d+)"
+ pattern = rf"{BASE_PATTERN}post/view/(\d+)"
example = "https://vidya.pics/post/view/12345"
def posts(self):
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 84c9a84..ad38562 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -20,7 +20,7 @@ class ShopifyExtractor(BaseExtractor):
def items(self):
data = self.metadata()
- yield Message.Directory, data
+ yield Message.Directory, "", data
for product in self.products():
for num, image in enumerate(product.pop("images"), 1):
@@ -90,7 +90,7 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
"""Base class for collection extractors for Shopify based sites"""
subcategory = "collection"
directory_fmt = ("{category}", "{collection[title]}")
- pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])"
+ pattern = rf"{BASE_PATTERN}(/collections/[\w-]+)/?(?:$|[?#])"
example = "https://www.fashionnova.com/collections/TITLE"
def metadata(self):
@@ -113,7 +113,7 @@ class ShopifyProductExtractor(ShopifyExtractor):
"""Base class for product extractors for Shopify based sites"""
subcategory = "product"
directory_fmt = ("{category}", "Products")
- pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)"
+ pattern = rf"{BASE_PATTERN}((?:/collections/[\w-]+)?/products/[\w-]+)"
example = "https://www.fashionnova.com/collections/TITLE/products/NAME"
def products(self):
diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/simpcity.py
deleted file mode 100644
index d8227fa..0000000
--- a/gallery_dl/extractor/simpcity.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2025 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://simpcity.cr/"""
-
-from .common import Extractor, Message
-from .. import text, exception
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?simpcity\.(?:cr|su)"
-
-
-class SimpcityExtractor(Extractor):
- """Base class for simpcity extractors"""
- category = "simpcity"
- root = "https://simpcity.cr"
-
- def items(self):
- extract_urls = text.re(
- r'<(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)').findall
-
- for post in self.posts():
- urls = extract_urls(post["content"])
- data = {"post": post}
- post["count"] = data["count"] = len(urls)
- yield Message.Directory, data
- for data["num"], url in enumerate(urls, 1):
- yield Message.Queue, url, data
-
- def request_page(self, url):
- try:
- return self.request(url)
- except exception.HttpError as exc:
- if exc.status == 403 and b">Log in<" in exc.response.content:
- msg = text.extr(exc.response.text, "blockMessage--error", "</")
- raise exception.AuthRequired(
- "'authenticated cookies'", None,
- msg.rpartition(">")[2].strip())
- raise
-
- def _pagination(self, base, pnum=None):
- base = f"{self.root}{base}"
-
- if pnum is None:
- url = f"{base}/"
- pnum = 1
- else:
- url = f"{base}/page-{pnum}"
- pnum = None
-
- while True:
- page = self.request_page(url).text
-
- yield page
-
- if pnum is None or "pageNav-jump--next" not in page:
- return
- pnum += 1
- url = f"{base}/page-{pnum}"
-
- def _pagination_reverse(self, base, pnum=None):
- base = f"{self.root}{base}"
-
- url = f"{base}/page-9999" # force redirect to last page
- with self.request_page(url) as response:
- url = response.url
- if url[-1] == "/":
- pnum = 1
- else:
- pnum = text.parse_int(url[url.rfind("-")+1:], 1)
- page = response.text
-
- while True:
- yield page
-
- pnum -= 1
- if pnum > 1:
- url = f"{base}/page-{pnum}"
- elif pnum == 1:
- url = f"{base}/"
- else:
- return
-
- page = self.request_page(url).text
-
- def _parse_thread(self, page):
- schema = self._extract_jsonld(page)["mainEntity"]
- author = schema["author"]
- stats = schema["interactionStatistic"]
- url_t = schema["url"]
- url_a = author.get("url") or ""
-
- thread = {
- "id" : url_t[url_t.rfind(".")+1:-1],
- "url" : url_t,
- "title": schema["headline"],
- "date" : text.parse_datetime(schema["datePublished"]),
- "views": stats[0]["userInteractionCount"],
- "posts": stats[1]["userInteractionCount"],
- "tags" : (schema["keywords"].split(", ")
- if "keywords" in schema else ()),
- "section" : schema["articleSection"],
- "author" : author.get("name") or "",
- "author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else
- (author.get("name") or "")[15:]),
- "author_url": url_a,
- }
-
- return thread
-
- def _parse_post(self, html):
- extr = text.extract_from(html)
-
- post = {
- "author": extr('data-author="', '"'),
- "id": extr('data-content="post-', '"'),
- "author_url": extr('itemprop="url" content="', '"'),
- "date": text.parse_datetime(extr('datetime="', '"')),
- "content": extr('<div itemprop="text">',
- '<div class="js-selectToQuote').strip(),
- }
-
- url_a = post["author_url"]
- post["author_id"] = url_a[url_a.rfind(".")+1:-1]
-
- return post
-
-
-class SimpcityPostExtractor(SimpcityExtractor):
- subcategory = "post"
- pattern = rf"{BASE_PATTERN}/(?:threads/[^/?#]+/post-|posts/)(\d+)"
- example = "https://simpcity.cr/threads/TITLE.12345/post-54321"
-
- def posts(self):
- post_id = self.groups[0]
- url = f"{self.root}/posts/{post_id}/"
- page = self.request_page(url).text
-
- pos = page.find(f'data-content="post-{post_id}"')
- if pos < 0:
- raise exception.NotFoundError("post")
- html = text.extract(page, "<article ", "</article>", pos-200)[0]
-
- self.kwdict["thread"] = self._parse_thread(page)
- return (self._parse_post(html),)
-
-
-class SimpcityThreadExtractor(SimpcityExtractor):
- subcategory = "thread"
- pattern = rf"{BASE_PATTERN}(/threads/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
- example = "https://simpcity.cr/threads/TITLE.12345/"
-
- def posts(self):
- if (order := self.config("order-posts")) and \
- order[0] not in ("d", "r"):
- pages = self._pagination(*self.groups)
- reverse = False
- else:
- pages = self._pagination_reverse(*self.groups)
- reverse = True
-
- for page in pages:
- if "thread" not in self.kwdict:
- self.kwdict["thread"] = self._parse_thread(page)
- posts = text.extract_iter(page, "<article ", "</article>")
- if reverse:
- posts = list(posts)
- posts.reverse()
- for html in posts:
- yield self._parse_post(html)
-
-
-class SimpcityForumExtractor(SimpcityExtractor):
- subcategory = "forum"
- pattern = rf"{BASE_PATTERN}(/forums/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
- example = "https://simpcity.cr/forums/TITLE.123/"
-
- def items(self):
- data = {"_extractor": SimpcityThreadExtractor}
- for page in self._pagination(*self.groups):
- for path in text.extract_iter(page, ' uix-href="', '"'):
- yield Message.Queue, f"{self.root}{text.unquote(path)}", data
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index d6541b2..78d3daf 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -48,7 +48,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
"characters": split(extr('box-title">Characters</div>', '</div>')),
"tags" : split(extr('box-title">Tags</div>', '</div>')),
"artist" : split(extr('box-title">Artists</div>', '</div>')),
- "date" : text.parse_datetime(text.remove_html(
+ "date" : self.parse_datetime(text.remove_html(
extr('Uploaded', '</div>')), "%d.%m.%Y"),
}
data["lang"] = util.language_to_code(data["language"])
@@ -106,7 +106,7 @@ class SimplyhentaiImageExtractor(Extractor):
})
data["token"] = data["filename"].rpartition("_")[2]
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
@@ -152,9 +152,9 @@ class SimplyhentaiVideoExtractor(Extractor):
"episode": text.parse_int(episode),
"tags": text.split_html(tags)[::2],
"type": "video",
- "date": text.parse_datetime(text.remove_html(
+ "date": self.parse_datetime(text.remove_html(
date), "%B %d, %Y %H:%M"),
})
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, video_url, data
diff --git a/gallery_dl/extractor/sizebooru.py b/gallery_dl/extractor/sizebooru.py
index cad4b23..00002b8 100644
--- a/gallery_dl/extractor/sizebooru.py
+++ b/gallery_dl/extractor/sizebooru.py
@@ -45,9 +45,9 @@ class SizebooruExtractor(BooruExtractor):
post.update({
"id" : text.parse_int(post_id),
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr("<b>Posted Date:</b> ", "<"), "%m/%d/%Y"),
- "date_approved": text.parse_datetime(
+ "date_approved": self.parse_datetime(
extr("<b>Approved Date:</b> ", "<"), "%m/%d/%Y"),
"approver" : text.remove_html(extr("<b>Approved By:</b>", "</")),
"uploader" : text.remove_html(extr("<b>Posted By:</b>", "</")),
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 3c7205a..43e518e 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -10,7 +10,7 @@ from .common import Extractor, Message, Dispatch
from .. import text
BASE_PATTERN = r"(?:https?://)?skeb\.jp"
-USER_PATTERN = BASE_PATTERN + r"/@([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/@([^/?#]+)"
class SkebExtractor(Extractor):
@@ -57,7 +57,7 @@ class SkebExtractor(Extractor):
files = self._get_files_from_post(response)
post["count"] = len(files)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
post.update(file)
url = file["file_url"]
@@ -194,7 +194,7 @@ class SkebExtractor(Extractor):
class SkebPostExtractor(SkebExtractor):
"""Extractor for a single skeb post"""
subcategory = "post"
- pattern = USER_PATTERN + r"/works/(\d+)"
+ pattern = rf"{USER_PATTERN}/works/(\d+)"
example = "https://skeb.jp/@USER/works/123"
def posts(self):
@@ -204,7 +204,7 @@ class SkebPostExtractor(SkebExtractor):
class SkebWorksExtractor(SkebExtractor):
"""Extractor for a skeb user's works"""
subcategory = "works"
- pattern = USER_PATTERN + r"/works"
+ pattern = rf"{USER_PATTERN}/works"
example = "https://skeb.jp/@USER/works"
def posts(self):
@@ -216,7 +216,7 @@ class SkebWorksExtractor(SkebExtractor):
class SkebSentrequestsExtractor(SkebExtractor):
"""Extractor for a skeb user's sent requests"""
subcategory = "sentrequests"
- pattern = USER_PATTERN + r"/sent[ _-]?requests"
+ pattern = rf"{USER_PATTERN}/sent[ _-]?requests"
example = "https://skeb.jp/@USER/sentrequests"
def posts(self):
@@ -227,7 +227,7 @@ class SkebSentrequestsExtractor(SkebExtractor):
class SkebUserExtractor(Dispatch, SkebExtractor):
"""Extractor for a skeb user profile"""
- pattern = USER_PATTERN + r"/?$"
+ pattern = rf"{USER_PATTERN}/?$"
example = "https://skeb.jp/@USER"
def items(self):
@@ -246,7 +246,7 @@ class SkebUserExtractor(Dispatch, SkebExtractor):
class SkebSearchExtractor(SkebExtractor):
"""Extractor for skeb search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/search\?q=([^&#]+)"
example = "https://skeb.jp/search?q=QUERY"
def metadata(self):
@@ -298,7 +298,7 @@ class SkebSearchExtractor(SkebExtractor):
class SkebFollowingExtractor(SkebExtractor):
"""Extractor for all creators followed by a skeb user"""
subcategory = "following"
- pattern = USER_PATTERN + r"/following_creators"
+ pattern = rf"{USER_PATTERN}/following_creators"
example = "https://skeb.jp/@USER/following_creators"
items = SkebExtractor.items_users
@@ -312,7 +312,7 @@ class SkebFollowingExtractor(SkebExtractor):
class SkebFollowingUsersExtractor(SkebExtractor):
"""Extractor for your followed users"""
subcategory = "following-users"
- pattern = BASE_PATTERN + r"/following_users"
+ pattern = rf"{BASE_PATTERN}/following_users"
example = "https://skeb.jp/following_users"
items = SkebExtractor.items_users
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index ee877f2..6f723c8 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -32,7 +32,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
"{album[id]} {album[title]}")
filename_fmt = "{num:>03}_{id}{title:?_//}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/albums/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/albums/([^/?#]+)"
example = "https://USER.slickpic.com/albums/TITLE/"
def __init__(self, match):
@@ -56,7 +56,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
"count": len(imgs),
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, img in enumerate(imgs, 1):
url = img["url_rsz"] + "/o/" + img["fname"]
img = text.nameext_from_url(img["fname"], {
@@ -110,7 +110,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
class SlickpicUserExtractor(SlickpicExtractor):
subcategory = "user"
- pattern = BASE_PATTERN + r"(?:/gallery)?/?(?:$|[?#])"
+ pattern = rf"{BASE_PATTERN}(?:/gallery)?/?(?:$|[?#])"
example = "https://USER.slickpic.com/"
def items(self):
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index c0f0e36..1bb70ed 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -10,7 +10,6 @@
"""Extractors for https://www.slideshare.net/"""
from .common import GalleryExtractor
-from .. import text
class SlidesharePresentationExtractor(GalleryExtractor):
@@ -40,8 +39,8 @@ class SlidesharePresentationExtractor(GalleryExtractor):
"description" : slideshow["description"].strip(),
"views" : slideshow["views"],
"likes" : slideshow["likes"],
- "date" : text.parse_datetime(
- slideshow["createdAt"], "%Y-%m-%d %H:%M:%S %Z"),
+ "date" : self.parse_datetime_iso(
+ slideshow["createdAt"][:19]),
}
def images(self, page):
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index e9c89a1..902044c 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -81,7 +81,7 @@ class SmugmugAlbumExtractor(SmugmugExtractor):
del album["Uris"]
data = {"Album": album, "User": user}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for image in self.api.album_images(self.album_id, "ImageSizeDetails"):
url = self._select_format(image)
@@ -93,7 +93,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
"""Extractor for individual smugmug images"""
subcategory = "image"
archive_fmt = "{Image[ImageKey]}"
- pattern = BASE_PATTERN + r"(?:/[^/?#]+)+/i-([^/?#-]+)"
+ pattern = rf"{BASE_PATTERN}(?:/[^/?#]+)+/i-([^/?#-]+)"
example = "https://USER.smugmug.com/PATH/i-ID"
def __init__(self, match):
@@ -107,14 +107,14 @@ class SmugmugImageExtractor(SmugmugExtractor):
data = {"Image": image}
text.nameext_from_url(url, data)
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
class SmugmugPathExtractor(SmugmugExtractor):
"""Extractor for smugmug albums from URL paths and users"""
subcategory = "path"
- pattern = BASE_PATTERN + r"((?:/[^/?#a-fh-mo-z][^/?#]*)*)/?$"
+ pattern = rf"{BASE_PATTERN}((?:/[^/?#a-fh-mo-z][^/?#]*)*)/?$"
example = "https://USER.smugmug.com/PATH"
def __init__(self, match):
diff --git a/gallery_dl/extractor/soundgasm.py b/gallery_dl/extractor/soundgasm.py
index 79ab74d..a4617dd 100644
--- a/gallery_dl/extractor/soundgasm.py
+++ b/gallery_dl/extractor/soundgasm.py
@@ -26,7 +26,7 @@ class SoundgasmExtractor(Extractor):
def items(self):
for sound in map(self._extract_sound, self.sounds()):
url = sound["url"]
- yield Message.Directory, sound
+ yield Message.Directory, "", sound
yield Message.Url, url, text.nameext_from_url(url, sound)
def _extract_sound(self, url):
@@ -50,7 +50,7 @@ class SoundgasmExtractor(Extractor):
class SoundgasmAudioExtractor(SoundgasmExtractor):
"""Extractor for audio clips from soundgasm.net"""
subcategory = "audio"
- pattern = BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/([^/?#]+)"
example = "https://soundgasm.net/u/USER/TITLE"
def __init__(self, match):
@@ -64,7 +64,7 @@ class SoundgasmAudioExtractor(SoundgasmExtractor):
class SoundgasmUserExtractor(SoundgasmExtractor):
"""Extractor for all sounds from a soundgasm user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/([^/?#]+)/?$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$"
example = "https://soundgasm.net/u/USER"
def __init__(self, match):
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
index b809b7f..412b3b7 100644
--- a/gallery_dl/extractor/speakerdeck.py
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -9,7 +9,7 @@
"""Extractors for https://speakerdeck.com/"""
from .common import GalleryExtractor
-from .. import text, util
+from .. import text
class SpeakerdeckPresentationExtractor(GalleryExtractor):
@@ -46,7 +46,7 @@ class SpeakerdeckPresentationExtractor(GalleryExtractor):
def images(self, _):
url = f"{self.root}/player/{self.presentation_id}"
page = self.request(url).text
- page = util.re(r"\s+").sub(" ", page)
+ page = text.re(r"\s+").sub(" ", page)
return [
(url, None)
for url in text.extract_iter(page, 'js-sd-slide" data-url="', '"')
diff --git a/gallery_dl/extractor/steamgriddb.py b/gallery_dl/extractor/steamgriddb.py
index e17b9fd..c3af7fd 100644
--- a/gallery_dl/extractor/steamgriddb.py
+++ b/gallery_dl/extractor/steamgriddb.py
@@ -59,7 +59,7 @@ class SteamgriddbExtractor(Extractor):
fake_png = download_fake_png and asset.get("fake_png")
asset["count"] = 2 if fake_png else 1
- yield Message.Directory, asset
+ yield Message.Directory, "", asset
asset["num"] = 1
url = asset["url"]
@@ -157,7 +157,7 @@ class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
class SteamgriddbAssetExtractor(SteamgriddbExtractor):
"""Extractor for a single asset"""
subcategory = "asset"
- pattern = BASE_PATTERN + r"/(grid|hero|logo|icon)/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(grid|hero|logo|icon)/(\d+)"
example = "https://www.steamgriddb.com/grid/1234"
def __init__(self, match):
@@ -177,7 +177,7 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor):
class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor):
subcategory = "grids"
asset_type = "grid"
- pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/grids(?:/(\d+))?"
+ pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/grids(?:/(\d+))?"
example = "https://www.steamgriddb.com/game/1234/grids"
valid_dimensions = ("460x215", "920x430", "600x900", "342x482", "660x930",
"512x512", "1024x1024")
@@ -189,7 +189,7 @@ class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor):
class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor):
subcategory = "heroes"
asset_type = "hero"
- pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/heroes(?:/(\d+))?"
+ pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/heroes(?:/(\d+))?"
example = "https://www.steamgriddb.com/game/1234/heroes"
valid_dimensions = ("1920x620", "3840x1240", "1600x650")
valid_styles = ("alternate", "blurred", "material")
@@ -199,7 +199,7 @@ class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor):
class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor):
subcategory = "logos"
asset_type = "logo"
- pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/logos(?:/(\d+))?"
+ pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/logos(?:/(\d+))?"
example = "https://www.steamgriddb.com/game/1234/logos"
valid_dimensions = None
valid_styles = ("official", "white", "black", "custom")
@@ -209,7 +209,7 @@ class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor):
class SteamgriddbIconsExtractor(SteamgriddbAssetsExtractor):
subcategory = "icons"
asset_type = "icon"
- pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/icons(?:/(\d+))?"
+ pattern = rf"{BASE_PATTERN}/(game|collection)/(\d+)/icons(?:/(\d+))?"
example = "https://www.steamgriddb.com/game/1234/icons"
valid_dimensions = [f"{i}x{i}" for i in (8, 10, 14, 16, 20, 24,
28, 32, 35, 40, 48, 54, 56, 57, 60, 64, 72, 76, 80, 90,
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 989e6cc..280c8d7 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -46,14 +46,20 @@ class SubscribestarExtractor(Extractor):
content, "<body>", "</body>")
data["title"] = text.unescape(text.rextr(content, "<h1>", "</h1>"))
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, item in enumerate(media, 1):
item.update(data)
item["num"] = num
- text.nameext_from_url(item.get("name") or item["url"], item)
- if item["url"][0] == "/":
- item["url"] = self.root + item["url"]
- yield Message.Url, item["url"], item
+
+ url = item["url"]
+ if name := (item.get("name") or item.get("original_filename")):
+ text.nameext_from_name(name, item)
+ else:
+ text.nameext_from_url(url, item)
+
+ if url[0] == "/":
+ url = f"{self.root}{url}"
+ yield Message.Url, url, item
def posts(self):
"""Yield HTML content of all relevant posts"""
@@ -155,7 +161,7 @@ class SubscribestarExtractor(Extractor):
attachments = text.extr(
html, 'class="uploads-docs"', 'class="post-edit_form"')
if attachments:
- for att in util.re(r'class="doc_preview[" ]').split(
+ for att in text.re(r'class="doc_preview[" ]').split(
attachments)[1:]:
media.append({
"id" : text.parse_int(text.extr(
@@ -169,7 +175,7 @@ class SubscribestarExtractor(Extractor):
audios = text.extr(
html, 'class="uploads-audios"', 'class="post-edit_form"')
if audios:
- for audio in util.re(r'class="audio_preview-data[" ]').split(
+ for audio in text.re(r'class="audio_preview-data[" ]').split(
audios)[1:]:
media.append({
"id" : text.parse_int(text.extr(
@@ -202,9 +208,9 @@ class SubscribestarExtractor(Extractor):
def _parse_datetime(self, dt):
if dt.startswith("Updated on "):
dt = dt[11:]
- date = text.parse_datetime(dt, "%b %d, %Y %I:%M %p")
+ date = self.parse_datetime(dt, "%b %d, %Y %I:%M %p")
if date is dt:
- date = text.parse_datetime(dt, "%B %d, %Y %I:%M %p")
+ date = self.parse_datetime(dt, "%B %d, %Y %I:%M %p")
return date
def _warn_preview(self):
@@ -215,7 +221,7 @@ class SubscribestarExtractor(Extractor):
class SubscribestarUserExtractor(SubscribestarExtractor):
"""Extractor for media from a subscribestar user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/(?!posts/)([^/?#]+)"
example = "https://www.subscribestar.com/USER"
def posts(self):
@@ -237,7 +243,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
class SubscribestarPostExtractor(SubscribestarExtractor):
"""Extractor for media from a single subscribestar post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/posts/(\d+)"
+ pattern = rf"{BASE_PATTERN}/posts/(\d+)"
example = "https://www.subscribestar.com/posts/12345"
def posts(self):
diff --git a/gallery_dl/extractor/sxypix.py b/gallery_dl/extractor/sxypix.py
new file mode 100644
index 0000000..c9a1701
--- /dev/null
+++ b/gallery_dl/extractor/sxypix.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://sxypix.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class SxypixGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from sxypix.com"""
+ category = "sxypix"
+ root = "https://sxypix.com"
+ pattern = r"(?:https?://)?(?:www\.)?sxypix\.com(/w/(\w+))"
+ example = "https://sxypix.com/w/2bbaf1b24a5863d0e73436619bbaa7ee"
+
+ def metadata(self, page):
+ return {
+ "gallery_id": self.groups[1],
+ "title": text.unescape(text.extr(
+ page, '<meta name="keywords" content="', '"')),
+ }
+
+ def images(self, page):
+ data = {
+ "aid" : text.extr(page, "data-aid='", "'"),
+ "ghash": text.extr(page, "data-ghash='", "'"),
+ }
+ gallery = self.request_json(
+ "https://sxypix.com/php/gall.php", method="POST", data=data)
+
+ base = "https://x."
+ return [
+ (base + text.extr(entry, "data-src='//.", "'"), None)
+ for entry in gallery["r"]
+ ]
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index 190ccbf..59477cc 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -57,8 +57,7 @@ class SzurubooruExtractor(booru.BooruExtractor):
return url
def _prepare(self, post):
- post["date"] = text.parse_datetime(
- post["creationTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["creationTime"])
tags = []
tags_categories = collections.defaultdict(list)
@@ -94,7 +93,7 @@ class SzurubooruTagExtractor(SzurubooruExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}_{version}"
- pattern = BASE_PATTERN + r"/posts(?:/query=([^/?#]*))?"
+ pattern = rf"{BASE_PATTERN}/posts(?:/query=([^/?#]*))?"
example = "https://booru.bcbnsfw.space/posts/query=TAG"
def __init__(self, match):
@@ -117,7 +116,7 @@ class SzurubooruTagExtractor(SzurubooruExtractor):
class SzurubooruPostExtractor(SzurubooruExtractor):
subcategory = "post"
archive_fmt = "{id}_{version}"
- pattern = BASE_PATTERN + r"/post/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post/(\d+)"
example = "https://booru.bcbnsfw.space/post/12345"
def posts(self):
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index d823f6a..5f8cb67 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -72,7 +72,7 @@ class TapasExtractor(Extractor):
class TapasEpisodeExtractor(TapasExtractor):
subcategory = "episode"
- pattern = BASE_PATTERN + r"/episode/(\d+)"
+ pattern = rf"{BASE_PATTERN}/episode/(\d+)"
example = "https://tapas.io/episode/12345"
def items(self):
@@ -89,8 +89,8 @@ class TapasEpisodeExtractor(TapasExtractor):
html = data["html"]
episode["series"] = self._extract_series(html)
- episode["date"] = text.parse_datetime(episode["publish_date"])
- yield Message.Directory, episode
+ episode["date"] = self.parse_datetime_iso(episode["publish_date"])
+ yield Message.Directory, "", episode
if episode["book"]:
content = text.extr(
@@ -116,7 +116,7 @@ class TapasEpisodeExtractor(TapasExtractor):
class TapasSeriesExtractor(TapasExtractor):
subcategory = "series"
- pattern = BASE_PATTERN + r"/series/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/series/([^/?#]+)"
example = "https://tapas.io/series/TITLE"
def items(self):
@@ -150,7 +150,7 @@ class TapasSeriesExtractor(TapasExtractor):
class TapasCreatorExtractor(TapasExtractor):
subcategory = "creator"
- pattern = BASE_PATTERN + r"/(?!series|episode)([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/(?!series|episode)([^/?#]+)"
example = "https://tapas.io/CREATOR"
def items(self):
diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py
index 6dcb153..e27ef0d 100644
--- a/gallery_dl/extractor/tcbscans.py
+++ b/gallery_dl/extractor/tcbscans.py
@@ -15,7 +15,7 @@ BASE_PATTERN = (r"(?:https?://)?(?:tcb(?:-backup\.bihar-mirchi|scans)"
class TcbscansChapterExtractor(ChapterExtractor):
category = "tcbscans"
- pattern = BASE_PATTERN + r"(/chapters/\d+/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/chapters/\d+/[^/?#]+)"
example = "https://tcbscans.me/chapters/12345/MANGA-chapter-123"
def __init__(self, match):
@@ -44,7 +44,7 @@ class TcbscansChapterExtractor(ChapterExtractor):
class TcbscansMangaExtractor(MangaExtractor):
category = "tcbscans"
chapterclass = TcbscansChapterExtractor
- pattern = BASE_PATTERN + r"(/mangas/\d+/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/mangas/\d+/[^/?#]+)"
example = "https://tcbscans.me/mangas/123/MANGA"
def __init__(self, match):
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index 2713621..ab77b31 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -27,9 +27,8 @@ class TelegraphGalleryExtractor(GalleryExtractor):
'property="og:title" content="', '"')),
"description": text.unescape(extr(
'property="og:description" content="', '"')),
- "date": text.parse_datetime(extr(
- 'property="article:published_time" content="', '"'),
- "%Y-%m-%dT%H:%M:%S%z"),
+ "date": self.parse_datetime_iso(extr(
+ 'property="article:published_time" content="', '"')),
"author": text.unescape(extr(
'property="article:author" content="', '"')),
"post_url": text.unescape(extr(
diff --git a/gallery_dl/extractor/tenor.py b/gallery_dl/extractor/tenor.py
index 7e1f802..3e4bab0 100644
--- a/gallery_dl/extractor/tenor.py
+++ b/gallery_dl/extractor/tenor.py
@@ -40,16 +40,17 @@ class TenorExtractor(Extractor):
continue
url = fmt["url"]
+ title = gif.pop("h1_title", "")
+ gif["title"] = title[:-4] if title.endswith(" GIF") else title
+ gif["width"], gif["height"] = fmt.pop("dims") or (0, 0)
+ gif["description"] = gif.pop("content_description", "")
gif["id_format"] = url.rsplit("/", 2)[1]
gif["format"] = fmt["name"]
- gif["width"], gif["height"] = fmt["dims"]
gif["duration"] = fmt["duration"]
gif["size"] = fmt["size"]
- gif["title"] = gif["h1_title"][:-4]
- gif["description"] = gif.pop("content_description", "")
- gif["date"] = text.parse_timestamp(gif["created"])
+ gif["date"] = self.parse_timestamp(gif["created"])
- yield Message.Directory, gif
+ yield Message.Directory, "", gif
yield Message.Url, url, text.nameext_from_url(url, gif)
def _extract_format(self, gif):
@@ -110,7 +111,7 @@ class TenorExtractor(Extractor):
class TenorImageExtractor(TenorExtractor):
subcategory = "image"
- pattern = BASE_PATTERN + r"view/(?:[^/?#]*-)?(\d+)"
+ pattern = rf"{BASE_PATTERN}view/(?:[^/?#]*-)?(\d+)"
example = "https://tenor.com/view/SLUG-1234567890"
def gifs(self):
@@ -124,7 +125,7 @@ class TenorImageExtractor(TenorExtractor):
class TenorSearchExtractor(TenorExtractor):
subcategory = "search"
directory_fmt = ("{category}", "{search_tags}")
- pattern = BASE_PATTERN + r"search/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}search/([^/?#]+)"
example = "https://tenor.com/search/QUERY"
def gifs(self):
@@ -140,7 +141,7 @@ class TenorSearchExtractor(TenorExtractor):
class TenorUserExtractor(TenorExtractor):
subcategory = "user"
directory_fmt = ("{category}", "@{user[username]}")
- pattern = BASE_PATTERN + r"(?:users|official)/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(?:users|official)/([^/?#]+)"
example = "https://tenor.com/users/USER"
def gifs(self):
diff --git a/gallery_dl/extractor/thehentaiworld.py b/gallery_dl/extractor/thehentaiworld.py
index 9a30654..773f300 100644
--- a/gallery_dl/extractor/thehentaiworld.py
+++ b/gallery_dl/extractor/thehentaiworld.py
@@ -36,12 +36,12 @@ class ThehentaiworldExtractor(Extractor):
if "file_urls" in post:
urls = post["file_urls"]
post["count"] = len(urls)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for post["num"], url in enumerate(urls, 1):
text.nameext_from_url(url, post)
yield Message.Url, url, post
else:
- yield Message.Directory, post
+ yield Message.Directory, "", post
url = post["file_url"]
text.nameext_from_url(url, post)
yield Message.Url, url, post
@@ -56,8 +56,7 @@ class ThehentaiworldExtractor(Extractor):
"id" : text.parse_int(extr(" postid-", " ")),
"slug" : extr(" post-", '"'),
"tags" : extr('id="tagsHead">', "</ul>"),
- "date" : text.parse_datetime(extr(
- "<li>Posted: ", "<"), "%Y-%m-%d"),
+ "date" : self.parse_datetime_iso(extr("<li>Posted: ", "<")),
}
if (c := url[27]) == "v":
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index f450806..a4c7171 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -25,6 +25,7 @@ class TiktokExtractor(Extractor):
def _init(self):
self.audio = self.config("audio", True)
self.video = self.config("videos", True)
+ self.cover = self.config("covers", False)
def items(self):
for tiktok_url in self.urls():
@@ -43,10 +44,10 @@ class TiktokExtractor(Extractor):
post = video_detail["itemInfo"]["itemStruct"]
post["user"] = (a := post.get("author")) and a["uniqueId"] or ""
- post["date"] = text.parse_timestamp(post["createTime"])
+ post["date"] = self.parse_timestamp(post["createTime"])
original_title = title = post["desc"]
- yield Message.Directory, post
+ yield Message.Directory, "", post
ytdl_media = False
if "imagePost" in post:
@@ -70,12 +71,14 @@ class TiktokExtractor(Extractor):
if self.audio and "music" in post:
if self.audio == "ytdl":
ytdl_media = "audio"
- else:
- url = self._extract_audio(post)
+ elif url := self._extract_audio(post):
yield Message.Url, url, post
- elif self.video and "video" in post:
- ytdl_media = "video"
+ elif "video" in post:
+ if self.video:
+ ytdl_media = "video"
+ if self.cover and (url := self._extract_cover(post, "video")):
+ yield Message.Url, url, post
else:
self.log.info("%s: Skipping post", tiktok_url)
@@ -144,6 +147,30 @@ class TiktokExtractor(Extractor):
post["extension"] = "mp3"
return url
+ def _extract_cover(self, post, type):
+ media = post[type]
+
+ for cover_id in ("thumbnail", "cover", "originCover", "dynamicCover"):
+ if url := media.get(cover_id):
+ break
+ else:
+ return
+
+ text.nameext_from_url(url, post)
+ post.update({
+ "type" : "cover",
+ "extension": "jpg",
+ "image" : url,
+ "title" : post["desc"] or f"TikTok {type} cover #{post['id']}",
+ "duration" : media.get("duration"),
+ "num" : 0,
+ "img_id" : "",
+ "cover_id" : cover_id,
+ "width" : 0,
+ "height" : 0,
+ })
+ return url
+
def _check_status_code(self, detail, url):
status = detail.get("statusCode")
if not status:
@@ -166,7 +193,7 @@ class TiktokExtractor(Extractor):
class TiktokPostExtractor(TiktokExtractor):
"""Extract a single video or photo TikTok link"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/(?:@([\w_.-]*)|share)/(?:phot|vide)o/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?:@([\w_.-]*)|share)/(?:phot|vide)o/(\d+)"
example = "https://www.tiktok.com/@USER/photo/1234567890"
def urls(self):
@@ -199,7 +226,7 @@ class TiktokVmpostExtractor(TiktokExtractor):
class TiktokUserExtractor(TiktokExtractor):
"""Extract a TikTok user's profile"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)"
+ pattern = rf"{BASE_PATTERN}/@([\w_.-]+)/?(?:$|\?|#)"
example = "https://www.tiktok.com/@USER"
def _init(self):
@@ -214,7 +241,7 @@ class TiktokUserExtractor(TiktokExtractor):
except (ImportError, SyntaxError) as exc:
self.log.error("Cannot import module '%s'",
getattr(exc, "name", ""))
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
raise exception.ExtractionError("yt-dlp or youtube-dl is required "
"for this feature!")
@@ -254,7 +281,7 @@ class TiktokUserExtractor(TiktokExtractor):
self.log.warning("Unable to extract 'avatar' URL (%s: %s)",
exc.__class__.__name__, exc)
else:
- yield Message.Directory, avatar
+ yield Message.Directory, "", avatar
yield Message.Url, avatar_url, avatar
with ytdl_instance as ydl:
diff --git a/gallery_dl/extractor/tmohentai.py b/gallery_dl/extractor/tmohentai.py
index ef441d3..873cce8 100644
--- a/gallery_dl/extractor/tmohentai.py
+++ b/gallery_dl/extractor/tmohentai.py
@@ -16,7 +16,7 @@ class TmohentaiGalleryExtractor(GalleryExtractor):
category = "tmohentai"
root = "http://tmohentai.com"
directory_fmt = ("{category}", "{title} ({gallery_id})")
- pattern = BASE_PATTERN + r"/(?:contents|reader)/(\w+)"
+ pattern = rf"{BASE_PATTERN}/(?:contents|reader)/(\w+)"
example = "https://tmohentai.com/contents/12345a67b89c0"
def __init__(self, match):
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
index 7add79a..cc29b11 100644
--- a/gallery_dl/extractor/toyhouse.py
+++ b/gallery_dl/extractor/toyhouse.py
@@ -34,7 +34,7 @@ class ToyhouseExtractor(Extractor):
post.update(metadata)
text.nameext_from_url(post["url"], post)
post["id"], _, post["hash"] = post["filename"].partition("_")
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, post["url"], post
def posts(self):
@@ -51,7 +51,7 @@ class ToyhouseExtractor(Extractor):
extr = text.extract_from(post)
return {
"url": extr(needle, '"'),
- "date": text.parse_datetime(extr(
+ "date": self.parse_datetime(extr(
'</h2>\n <div class="mb-1">', '<'),
"%d %b %Y, %I:%M:%S %p"),
"artists": [
@@ -104,7 +104,7 @@ class ToyhouseExtractor(Extractor):
class ToyhouseArtExtractor(ToyhouseExtractor):
"""Extractor for artworks of a toyhouse user"""
subcategory = "art"
- pattern = BASE_PATTERN + r"/([^/?#]+)/art"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/art"
example = "https://www.toyhou.se/USER/art"
def posts(self):
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index 8732c60..1ccdafb 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -65,7 +65,7 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
"title_jp" : title_jp,
"thumbnail" : extr('"og:image" content="', '"'),
"uploader" : text.remove_html(extr('id="Uploader">', '</div>')),
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr('id="Uploaded">', '</div>').strip(), "%Y %B %d"),
"rating" : text.parse_float(extr(
'id="Rating">', '</div>').partition(" ")[0]),
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 92fc831..5bb5a40 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -9,8 +9,7 @@
"""Extractors for https://www.tumblr.com/"""
from .common import Extractor, Message
-from .. import text, util, oauth, exception
-from datetime import datetime, date, timedelta
+from .. import text, util, dt, oauth, exception
BASE_PATTERN = (
@@ -61,16 +60,16 @@ class TumblrExtractor(Extractor):
blog = None
# pre-compile regular expressions
- self._sub_video = util.re(
+ self._sub_video = text.re(
r"https?://((?:vt|vtt|ve)(?:\.media)?\.tumblr\.com"
r"/tumblr_[^_]+)_\d+\.([0-9a-z]+)").sub
if self.inline:
- self._sub_image = util.re(
+ self._sub_image = text.re(
r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)").sub
- self._subn_orig_image = util.re(r"/s\d+x\d+/").subn
- _findall_image = util.re('<img src="([^"]+)"').findall
- _findall_video = util.re('<source src="([^"]+)"').findall
+ self._subn_orig_image = text.re(r"/s\d+x\d+/").subn
+ _findall_image = text.re('<img src="([^"]+)"').findall
+ _findall_video = text.re('<source src="([^"]+)"').findall
for post in self.posts():
if self.date_min > post["timestamp"]:
@@ -88,7 +87,7 @@ class TumblrExtractor(Extractor):
if self.avatar:
url = self.api.avatar(self.blog)
- yield Message.Directory, {"blog": blog}
+ yield Message.Directory, "", {"blog": blog}
yield self._prepare_avatar(url, post.copy(), blog)
post["blog"] = blog
@@ -100,7 +99,7 @@ class TumblrExtractor(Extractor):
if "trail" in post:
del post["trail"]
- post["date"] = text.parse_timestamp(post["timestamp"])
+ post["date"] = self.parse_timestamp(post["timestamp"])
posts = []
if "photos" in post: # type "photo" or "link"
@@ -161,7 +160,7 @@ class TumblrExtractor(Extractor):
del post["extension"]
post["count"] = len(posts)
- yield Message.Directory, post
+ yield Message.Directory, "", post
for num, (msg, url, post) in enumerate(posts, 1):
post["num"] = num
@@ -271,7 +270,7 @@ class TumblrExtractor(Extractor):
class TumblrUserExtractor(TumblrExtractor):
"""Extractor for a Tumblr user's posts"""
subcategory = "user"
- pattern = BASE_PATTERN + r"(?:/page/\d+|/archive)?/?$"
+ pattern = rf"{BASE_PATTERN}(?:/page/\d+|/archive)?/?$"
example = "https://www.tumblr.com/BLOG"
def posts(self):
@@ -281,7 +280,7 @@ class TumblrUserExtractor(TumblrExtractor):
class TumblrPostExtractor(TumblrExtractor):
"""Extractor for a single Tumblr post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)"
+ pattern = rf"{BASE_PATTERN}/(?:post/|image/)?(\d+)"
example = "https://www.tumblr.com/BLOG/12345"
def posts(self):
@@ -296,7 +295,7 @@ class TumblrPostExtractor(TumblrExtractor):
class TumblrTagExtractor(TumblrExtractor):
"""Extractor for Tumblr user's posts by tag"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"(?:/archive)?/tagged/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(?:/archive)?/tagged/([^/?#]+)"
example = "https://www.tumblr.com/BLOG/tagged/TAG"
def posts(self):
@@ -308,12 +307,12 @@ class TumblrTagExtractor(TumblrExtractor):
class TumblrDayExtractor(TumblrExtractor):
"""Extractor for Tumblr user's posts by day"""
subcategory = "day"
- pattern = BASE_PATTERN + r"/day/(\d\d\d\d/\d\d/\d\d)"
+ pattern = rf"{BASE_PATTERN}/day/(\d\d\d\d/\d\d/\d\d)"
example = "https://www.tumblr.com/BLOG/day/1970/01/01"
def posts(self):
year, month, day = self.groups[3].split("/")
- ordinal = date(int(year), int(month), int(day)).toordinal()
+ ordinal = dt.date(int(year), int(month), int(day)).toordinal()
# 719163 == date(1970, 1, 1).toordinal()
self.date_min = (ordinal - 719163) * 86400
@@ -326,7 +325,7 @@ class TumblrLikesExtractor(TumblrExtractor):
subcategory = "likes"
directory_fmt = ("{category}", "{blog_name}", "likes")
archive_fmt = "f_{blog[name]}_{id}_{num}"
- pattern = BASE_PATTERN + r"/likes"
+ pattern = rf"{BASE_PATTERN}/likes"
example = "https://www.tumblr.com/BLOG/likes"
def posts(self):
@@ -336,7 +335,7 @@ class TumblrLikesExtractor(TumblrExtractor):
class TumblrFollowingExtractor(TumblrExtractor):
"""Extractor for a Tumblr user's followed blogs"""
subcategory = "following"
- pattern = BASE_PATTERN + r"/following"
+ pattern = rf"{BASE_PATTERN}/following"
example = "https://www.tumblr.com/BLOG/following"
items = TumblrExtractor.items_blogs
@@ -348,7 +347,7 @@ class TumblrFollowingExtractor(TumblrExtractor):
class TumblrFollowersExtractor(TumblrExtractor):
"""Extractor for a Tumblr user's followers"""
subcategory = "followers"
- pattern = BASE_PATTERN + r"/followers"
+ pattern = rf"{BASE_PATTERN}/followers"
example = "https://www.tumblr.com/BLOG/followers"
items = TumblrExtractor.items_blogs
@@ -514,7 +513,7 @@ class TumblrAPI(oauth.OAuth1API):
self.extractor.wait(seconds=reset)
continue
- t = (datetime.now() + timedelta(0, float(reset))).time()
+ t = (dt.now() + dt.timedelta(0, float(reset))).time()
raise exception.AbortExtraction(
f"Aborting - Rate limit will reset at "
f"{t.hour:02}:{t.minute:02}:{t.second:02}")
diff --git a/gallery_dl/extractor/tumblrgallery.py b/gallery_dl/extractor/tumblrgallery.py
index 26868ec..68c9ec7 100644
--- a/gallery_dl/extractor/tumblrgallery.py
+++ b/gallery_dl/extractor/tumblrgallery.py
@@ -36,7 +36,7 @@ class TumblrgalleryExtractor(GalleryExtractor):
class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor):
"""Extractor for Tumblrblog on tumblrgallery.xyz"""
subcategory = "tumblrblog"
- pattern = BASE_PATTERN + r"(/tumblrblog/gallery/(\d+)\.html)"
+ pattern = rf"{BASE_PATTERN}(/tumblrblog/gallery/(\d+)\.html)"
example = "https://tumblrgallery.xyz/tumblrblog/gallery/12345.html"
def __init__(self, match):
@@ -68,7 +68,7 @@ class TumblrgalleryTumblrblogExtractor(TumblrgalleryExtractor):
class TumblrgalleryPostExtractor(TumblrgalleryExtractor):
"""Extractor for Posts on tumblrgallery.xyz"""
subcategory = "post"
- pattern = BASE_PATTERN + r"(/post/(\d+)\.html)"
+ pattern = rf"{BASE_PATTERN}(/post/(\d+)\.html)"
example = "https://tumblrgallery.xyz/post/12345.html"
def __init__(self, match):
@@ -93,7 +93,7 @@ class TumblrgallerySearchExtractor(TumblrgalleryExtractor):
subcategory = "search"
filename_fmt = "{category}_{num:>03}_{gallery_id}_{id}_{title}.{extension}"
directory_fmt = ("{category}", "{search_term}")
- pattern = BASE_PATTERN + r"(/s\.php\?q=([^&#]+))"
+ pattern = rf"{BASE_PATTERN}(/s\.php\?q=([^&#]+))"
example = "https://tumblrgallery.xyz/s.php?q=QUERY"
def __init__(self, match):
diff --git a/gallery_dl/extractor/tungsten.py b/gallery_dl/extractor/tungsten.py
index 45836a9..67c0b50 100644
--- a/gallery_dl/extractor/tungsten.py
+++ b/gallery_dl/extractor/tungsten.py
@@ -23,10 +23,10 @@ class TungstenExtractor(Extractor):
def items(self):
for post in self.posts():
url = post["original_url"]
- post["date"] = text.parse_datetime(post["created_at"])
+ post["date"] = self.parse_datetime_iso(post["created_at"])
post["filename"] = url[url.rfind("/")+1:]
post["extension"] = "webp"
- yield Message.Directory, post
+ yield Message.Directory, "", post
yield Message.Url, url, post
def _pagination(self, url, params):
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index 4f9fe84..e21ef2a 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -37,8 +37,7 @@ class TwibooruExtractor(BooruExtractor):
return post["view_url"]
def _prepare(self, post):
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["created_at"])
if "name" in post:
name, sep, rest = post["name"].rpartition(".")
@@ -49,7 +48,7 @@ class TwibooruPostExtractor(TwibooruExtractor):
"""Extractor for single twibooru posts"""
subcategory = "post"
request_interval = (0.5, 1.5)
- pattern = BASE_PATTERN + r"/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(\d+)"
example = "https://twibooru.org/12345"
def __init__(self, match):
@@ -64,7 +63,7 @@ class TwibooruSearchExtractor(TwibooruExtractor):
"""Extractor for twibooru search results"""
subcategory = "search"
directory_fmt = ("{category}", "{search_tags}")
- pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))"
+ pattern = rf"{BASE_PATTERN}/(?:search/?\?([^#]+)|tags/([^/?#]+))"
example = "https://twibooru.org/search?q=TAG"
def __init__(self, match):
@@ -98,7 +97,7 @@ class TwibooruGalleryExtractor(TwibooruExtractor):
subcategory = "gallery"
directory_fmt = ("{category}", "galleries",
"{gallery[id]} {gallery[title]}")
- pattern = BASE_PATTERN + r"/galleries/(\d+)"
+ pattern = rf"{BASE_PATTERN}/galleries/(\d+)"
example = "https://twibooru.org/galleries/12345"
def __init__(self, match):
@@ -146,8 +145,8 @@ class TwibooruAPI():
return response.json()
if response.status_code == 429:
- until = text.parse_datetime(
- response.headers["X-RL-Reset"], "%Y-%m-%d %H:%M:%S %Z")
+ until = self.parse_datetime_iso(
+ response.headers["X-RL-Reset"][:19])
# wait an extra minute, just to be safe
self.extractor.wait(until=until, adjust=60.0)
continue
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index bf125a6..546e8e1 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -76,7 +76,7 @@ class TwitterExtractor(Extractor):
seen_tweets = set() if self.config("unique", True) else None
if self.twitpic:
- self._find_twitpic = util.re(
+ self._find_twitpic = text.re(
r"https?(://twitpic\.com/(?!photos/)\w+)").findall
tweets = self.tweets()
@@ -124,12 +124,11 @@ class TwitterExtractor(Extractor):
tdata = self._transform_tweet(tweet)
tdata.update(metadata)
tdata["count"] = len(files)
- yield Message.Directory, tdata
+ yield Message.Directory, "", tdata
- del tdata["source_id"]
- del tdata["sensitive_flags"]
- if "source_user" in tdata:
- del tdata["source_user"]
+ tdata.pop("source_id", None)
+ tdata.pop("source_user", None)
+ tdata.pop("sensitive_flags", None)
for tdata["num"], file in enumerate(files, 1):
file.update(tdata)
@@ -146,7 +145,7 @@ class TwitterExtractor(Extractor):
self._extract_media(
data, data["extended_entities"]["media"], files)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Error while extracting media files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
@@ -155,7 +154,7 @@ class TwitterExtractor(Extractor):
try:
self._extract_card(tweet, files)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Error while extracting Card files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
@@ -164,7 +163,7 @@ class TwitterExtractor(Extractor):
try:
self._extract_twitpic(data, files)
except Exception as exc:
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
self.log.warning(
"%s: Error while extracting TwitPic files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
@@ -347,32 +346,36 @@ class TwitterExtractor(Extractor):
files.append({"url": url})
def _transform_tweet(self, tweet):
+ if "legacy" in tweet:
+ legacy = tweet["legacy"]
+ else:
+ legacy = tweet
+ tweet_id = int(legacy["id_str"])
+
if "author" in tweet:
author = tweet["author"]
elif "core" in tweet:
- author = tweet["core"]["user_results"]["result"]
+ try:
+ author = tweet["core"]["user_results"]["result"]
+ except KeyError:
+ self.log.warning("%s: Missing 'author' data", tweet_id)
+ author = util.NONE
else:
author = tweet["user"]
author = self._transform_user(author)
- if "legacy" in tweet:
- legacy = tweet["legacy"]
- else:
- legacy = tweet
- tget = legacy.get
-
- tweet_id = int(legacy["id_str"])
if tweet_id >= 300000000000000:
- date = text.parse_timestamp(
+ date = self.parse_timestamp(
((tweet_id >> 22) + 1288834974657) // 1000)
else:
try:
- date = text.parse_datetime(
+ date = self.parse_datetime(
legacy["created_at"], "%a %b %d %H:%M:%S %z %Y")
except Exception:
date = util.NONE
source = tweet.get("source")
+ tget = legacy.get
tdata = {
"tweet_id" : tweet_id,
"retweet_id" : text.parse_int(
@@ -439,6 +442,8 @@ class TwitterExtractor(Extractor):
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
+ if "pinned" in tweet:
+ tdata["pinned"] = True
if "birdwatch_pivot" in tweet:
try:
tdata["birdwatch"] = \
@@ -455,7 +460,7 @@ class TwitterExtractor(Extractor):
tdata, legacy["extended_entities"]["media"][0])
if tdata["retweet_id"]:
tdata["content"] = f"RT @{author['name']}: {tdata['content']}"
- tdata["date_original"] = text.parse_timestamp(
+ tdata["date_original"] = self.parse_timestamp(
((tdata["retweet_id"] >> 22) + 1288834974657) // 1000)
return tdata
@@ -492,7 +497,7 @@ class TwitterExtractor(Extractor):
"id": text.parse_int(cid),
"name": com.get("name"),
"description": com.get("description"),
- "date": text.parse_timestamp(com.get("created_at", 0) // 1000),
+ "date": self.parse_timestamp(com.get("created_at", 0) // 1000),
"nsfw": com.get("is_nsfw"),
"role": com.get("role"),
"member_count": com.get("member_count"),
@@ -528,13 +533,13 @@ class TwitterExtractor(Extractor):
"id" : text.parse_int(uid),
"name" : core.get("screen_name"),
"nick" : core.get("name"),
- "location" : user["location"]["location"],
- "date" : text.parse_datetime(
+ "location" : user["location"].get("location"),
+ "date" : self.parse_datetime(
core["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"verified" : user["verification"]["verified"],
"protected" : user["privacy"]["protected"],
"profile_banner" : lget("profile_banner_url", ""),
- "profile_image" : user["avatar"]["image_url"].replace(
+ "profile_image" : user["avatar"].get("image_url", "").replace(
"_normal.", "."),
"favourites_count": lget("favourites_count"),
"followers_count" : lget("followers_count"),
@@ -591,9 +596,12 @@ class TwitterExtractor(Extractor):
obj = tweet["legacy"] if "legacy" in tweet else tweet
cid = obj.get("conversation_id_str")
if not cid:
- tid = obj["id_str"]
- self.log.warning(
- "Unable to expand %s (no 'conversation_id')", tid)
+ if cid is False:
+ yield tweet
+ else:
+ tid = obj["id_str"]
+ self.log.warning(
+ "Unable to expand %s (no 'conversation_id')", tid)
continue
if cid in seen:
self.log.debug(
@@ -608,6 +616,7 @@ class TwitterExtractor(Extractor):
def _make_tweet(self, user, url, id_str):
return {
"id_str": id_str,
+ "conversation_id_str": False,
"lang": None,
"user": user,
"source": "><",
@@ -658,8 +667,8 @@ class TwitterExtractor(Extractor):
class TwitterHomeExtractor(TwitterExtractor):
"""Extractor for Twitter home timelines"""
subcategory = "home"
- pattern = (BASE_PATTERN +
- r"/(?:home(?:/fo(?:llowing|r[-_ ]?you()))?|i/timeline)/?$")
+ pattern = (rf"{BASE_PATTERN}/"
+ rf"(?:home(?:/fo(?:llowing|r[-_ ]?you()))?|i/timeline)/?$")
example = "https://x.com/home"
def tweets(self):
@@ -671,7 +680,7 @@ class TwitterHomeExtractor(TwitterExtractor):
class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for Twitter search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
+ pattern = rf"{BASE_PATTERN}/search/?\?(?:[^&#]+&)*q=([^&#]+)"
example = "https://x.com/search?q=QUERY"
def metadata(self):
@@ -702,7 +711,7 @@ class TwitterSearchExtractor(TwitterExtractor):
class TwitterHashtagExtractor(TwitterExtractor):
"""Extractor for Twitter hashtags"""
subcategory = "hashtag"
- pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/hashtag/([^/?#]+)"
example = "https://x.com/hashtag/NAME"
def items(self):
@@ -713,7 +722,7 @@ class TwitterHashtagExtractor(TwitterExtractor):
class TwitterUserExtractor(Dispatch, TwitterExtractor):
"""Extractor for a Twitter user"""
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"([^/?#]+)/?(?:$|\?|#)"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
example = "https://x.com/USER"
@@ -890,7 +899,7 @@ class TwitterLikesExtractor(TwitterExtractor):
class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
subcategory = "bookmark"
- pattern = BASE_PATTERN + r"/i/bookmarks()"
+ pattern = rf"{BASE_PATTERN}/i/bookmarks()"
example = "https://x.com/i/bookmarks"
def tweets(self):
@@ -898,7 +907,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
def _transform_tweet(self, tweet):
tdata = TwitterExtractor._transform_tweet(self, tweet)
- tdata["date_bookmarked"] = text.parse_timestamp(
+ tdata["date_bookmarked"] = self.parse_timestamp(
(int(tweet["sortIndex"] or 0) >> 20) // 1000)
return tdata
@@ -906,7 +915,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
class TwitterListExtractor(TwitterExtractor):
"""Extractor for Twitter lists"""
subcategory = "list"
- pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
+ pattern = rf"{BASE_PATTERN}/i/lists/(\d+)/?$"
example = "https://x.com/i/lists/12345"
def tweets(self):
@@ -916,7 +925,7 @@ class TwitterListExtractor(TwitterExtractor):
class TwitterListMembersExtractor(TwitterExtractor):
"""Extractor for members of a Twitter list"""
subcategory = "list-members"
- pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
+ pattern = rf"{BASE_PATTERN}/i/lists/(\d+)/members"
example = "https://x.com/i/lists/12345/members"
def items(self):
@@ -952,7 +961,7 @@ class TwitterCommunityExtractor(TwitterExtractor):
directory_fmt = ("{category}", "Communities",
"{community[name]} ({community[id]})")
archive_fmt = "C_{community[id]}_{tweet_id}_{num}"
- pattern = BASE_PATTERN + r"/i/communities/(\d+)"
+ pattern = rf"{BASE_PATTERN}/i/communities/(\d+)"
example = "https://x.com/i/communities/12345"
def tweets(self):
@@ -966,7 +975,7 @@ class TwitterCommunitiesExtractor(TwitterExtractor):
subcategory = "communities"
directory_fmt = TwitterCommunityExtractor.directory_fmt
archive_fmt = TwitterCommunityExtractor.archive_fmt
- pattern = BASE_PATTERN + r"/([^/?#]+)/communities/?$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/communities/?$"
example = "https://x.com/i/communities"
def tweets(self):
@@ -978,7 +987,7 @@ class TwitterEventExtractor(TwitterExtractor):
subcategory = "event"
directory_fmt = ("{category}", "Events",
"{event[id]} {event[short_title]}")
- pattern = BASE_PATTERN + r"/i/events/(\d+)"
+ pattern = rf"{BASE_PATTERN}/i/events/(\d+)"
example = "https://x.com/i/events/12345"
def metadata(self):
@@ -991,7 +1000,7 @@ class TwitterEventExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for individual tweets"""
subcategory = "tweet"
- pattern = (BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
+ pattern = (rf"{BASE_PATTERN}/([^/?#]+|i/web)/status/(\d+)"
r"/?(?:$|\?|#|photo/|video/)")
example = "https://x.com/USER/status/12345"
@@ -1072,7 +1081,7 @@ class TwitterTweetExtractor(TwitterExtractor):
class TwitterQuotesExtractor(TwitterExtractor):
"""Extractor for quotes of a Tweet"""
subcategory = "quotes"
- pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
+ pattern = rf"{BASE_PATTERN}/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
example = "https://x.com/USER/status/12345/quotes"
def items(self):
@@ -1096,7 +1105,7 @@ class TwitterInfoExtractor(TwitterExtractor):
else:
user = api.user_by_screen_name(screen_name)
- return iter(((Message.Directory, self._transform_user(user)),))
+ return iter(((Message.Directory, "", self._transform_user(user)),))
class TwitterAvatarExtractor(TwitterExtractor):
@@ -1162,7 +1171,7 @@ class TwitterImageExtractor(Extractor):
"_fallback": TwitterExtractor._image_fallback(self, base),
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, base + self._size_image, data
@@ -1369,7 +1378,7 @@ class TwitterAPI():
endpoint = "/graphql/E8Wq-_jFSaU7hxVcuOPR9g/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withQuickPromoteEligibilityTweetFields": False,
"withVoice": True,
@@ -1384,7 +1393,7 @@ class TwitterAPI():
endpoint = "/graphql/-O3QOHrVn1aOm_cF5wyTCQ/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withCommunity": True,
"withVoice": True,
@@ -1399,7 +1408,7 @@ class TwitterAPI():
endpoint = "/graphql/gmHw9geMTncZ7jeLLUUNOw/UserHighlightsTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withVoice": True,
}
@@ -1413,7 +1422,7 @@ class TwitterAPI():
endpoint = "/graphql/jCRhbOzdgOHp6u9H4g2tEg/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withClientEventToken": False,
"withBirdwatchNotes": False,
@@ -1429,7 +1438,7 @@ class TwitterAPI():
endpoint = "/graphql/TGEKkJG_meudeaFcqaxM-Q/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"withClientEventToken": False,
"withBirdwatchNotes": False,
@@ -1444,32 +1453,45 @@ class TwitterAPI():
def user_bookmarks(self):
endpoint = "/graphql/pLtjrO4ubNh996M_Cubwsg/Bookmarks"
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
}
return self._pagination_tweets(
endpoint, variables, ("bookmark_timeline_v2", "timeline"),
stop_tweets=128)
- def search_timeline(self, query, product="Latest"):
+ def search_timeline(self, query, product=None):
+ cfg = self.extractor.config
+
+ if product is None:
+ if product := cfg("search-results"):
+ product = {
+ "top" : "Top",
+ "live" : "Latest",
+ "user" : "People",
+ "media": "Media",
+ "list" : "Lists",
+ }.get(product.lower(), product).capitalize()
+ else:
+ product = "Latest"
+
endpoint = "/graphql/4fpceYZ6-YQCx_JSl_Cn_A/SearchTimeline"
variables = {
"rawQuery": query,
- "count": self.extractor.config("search-limit", 20),
+ "count": cfg("search-limit", 20),
"querySource": "typed_query",
"product": product,
"withGrokTranslatedBio": False,
}
- if self.extractor.config("search-pagination") in (
- "max_id", "maxid", "id"):
+ if cfg("search-pagination") in ("max_id", "maxid", "id"):
update_variables = self._update_variables_search
else:
update_variables = None
- stop_tweets = self.extractor.config("search-stop")
+ stop_tweets = cfg("search-stop")
if stop_tweets is None or stop_tweets == "auto":
- stop_tweets = 3 if update_variables is None else 0
+ stop_tweets = 3
return self._pagination_tweets(
endpoint, variables,
@@ -1494,7 +1516,7 @@ class TwitterAPI():
endpoint = "/graphql/Nyt-88UX4-pPCImZNUl9RQ/CommunityTweetsTimeline"
variables = {
"communityId": community_id,
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"displayLocation": "Community",
"rankingMode": "Recency",
"withCommunity": True,
@@ -1508,7 +1530,7 @@ class TwitterAPI():
endpoint = "/graphql/ZniZ7AAK_VVu1xtSx1V-gQ/CommunityMediaTimeline"
variables = {
"communityId": community_id,
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"withCommunity": True,
}
return self._pagination_tweets(
@@ -1520,7 +1542,7 @@ class TwitterAPI():
endpoint = ("/graphql/p048a9n3hTPppQyK7FQTFw"
"/CommunitiesMainPageTimeline")
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"withCommunity": True,
}
return self._pagination_tweets(
@@ -1530,7 +1552,7 @@ class TwitterAPI():
def home_timeline(self):
endpoint = "/graphql/DXmgQYmIft1oLP6vMkJixw/HomeTimeline"
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"latestControlAvailable": True,
"withCommunity": True,
@@ -1541,7 +1563,7 @@ class TwitterAPI():
def home_latest_timeline(self):
endpoint = "/graphql/SFxmNKWfN9ySJcXG_tjX8g/HomeLatestTimeline"
variables = {
- "count": 100,
+ "count": self.extractor.config("limit", 50),
"includePromotedContent": False,
"latestControlAvailable": True,
}
@@ -1568,7 +1590,7 @@ class TwitterAPI():
endpoint = "/graphql/06JtmwM8k_1cthpFZITVVA/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
- "count": 100,
+ "count": self.extractor.config("limit", 50),
}
return self._pagination_tweets(
endpoint, variables, ("list", "tweets_timeline", "timeline"))
@@ -1654,10 +1676,8 @@ class TwitterAPI():
self.extractor._assign_user(user)
return user["rest_id"]
except KeyError:
- if "unavailable_message" in user:
- raise exception.NotFoundError(
- f"{user['unavailable_message'].get('text')} "
- f"({user.get('reason')})", False)
+ if user and user.get("__typename") == "UserUnavailable":
+ raise exception.NotFoundError(user["message"], False)
else:
raise exception.NotFoundError("user")
@@ -1700,7 +1720,7 @@ class TwitterAPI():
self.client_transaction.generate_transaction_id(method, path)
def _call(self, endpoint, params, method="GET", auth=True, root=None):
- url = (root or self.root) + endpoint
+ url = (self.root if root is None else root) + endpoint
while True:
if auth:
@@ -1877,8 +1897,17 @@ class TwitterAPI():
features=None, field_toggles=None):
extr = self.extractor
original_retweets = (extr.retweets == "original")
- pinned_tweet = extr.pinned
+ pinned_tweet = True if extr.pinned else None
stop_tweets_max = stop_tweets
+ api_retries = None
+
+ if isinstance(count := variables.get("count"), list):
+ count = count.copy()
+ count.reverse()
+ self.log.debug("Using 'count: %s'", count[-1])
+ variables["count"] = count.pop()
+ else:
+ count = False
params = {"variables": None}
if cursor := extr._init_cursor():
@@ -1892,14 +1921,14 @@ class TwitterAPI():
while True:
params["variables"] = self._json_dumps(variables)
- data = self._call(endpoint, params)["data"]
+ data = self._call(endpoint, params)
try:
if path is None:
- instructions = (data["user"]["result"]["timeline"]
+ instructions = (data["data"]["user"]["result"]["timeline"]
["timeline"]["instructions"])
else:
- instructions = data
+ instructions = data["data"]
for key in path:
instructions = instructions[key]
instructions = instructions["instructions"]
@@ -1916,7 +1945,7 @@ class TwitterAPI():
elif instr_type == "TimelineAddToModule":
entries = instr["moduleItems"]
elif instr_type == "TimelinePinEntry":
- if pinned_tweet:
+ if pinned_tweet is not None:
pinned_tweet = instr["entry"]
elif instr_type == "TimelineReplaceEntry":
entry = instr["entry"]
@@ -1930,6 +1959,26 @@ class TwitterAPI():
except LookupError:
extr.log.debug(data)
+ if errors := data.get("errors"):
+ if api_retries is None:
+ api_tries = 1
+ api_retries = extr.config("retries-api", 9)
+ if api_retries < 0:
+ api_retries = float("inf")
+
+ err = []
+ srv = False
+ for e in errors:
+ err.append(f"- '{e.get('message') or e.get('name')}'")
+ if e.get("source") == "Server":
+ srv = True
+
+ self.log.warning("API errors (%s/%s):\n%s",
+ api_tries, api_retries+1, "\n".join(err))
+ if srv and api_tries <= api_retries:
+ api_tries += 1
+ continue
+
if user := extr._user_obj:
user = user["legacy"]
if user.get("blocked_by"):
@@ -1950,14 +1999,13 @@ class TwitterAPI():
"Unable to retrieve Tweets from this timeline")
tweets = []
- tweet = None
+ tweet = last_tweet = retry = None
+ api_tries = 1
- if pinned_tweet:
- if isinstance(pinned_tweet, dict):
- tweets.append(pinned_tweet)
- elif instructions[-1]["type"] == "TimelinePinEntry":
- tweets.append(instructions[-1]["entry"])
- pinned_tweet = False
+ if pinned_tweet is not None and isinstance(pinned_tweet, dict):
+ pinned_tweet["pinned"] = True
+ tweets.append(pinned_tweet)
+ pinned_tweet = None
for entry in entries:
esw = entry["entryId"].startswith
@@ -1965,6 +2013,7 @@ class TwitterAPI():
if esw("tweet-"):
tweets.append(entry)
elif esw(("profile-grid-",
+ "search-grid-",
"communities-grid-")):
if "content" in entry:
tweets.extend(entry["content"]["items"])
@@ -1988,6 +2037,28 @@ class TwitterAPI():
tweet = True
cursor = cursor.get("value")
+ if pinned_tweet is not None:
+ if extr._user_obj is None:
+ pinned = None
+ elif pinned := extr._user_obj["legacy"].get(
+ "pinned_tweet_ids_str"):
+ pinned = f"-tweet-{pinned[0]}"
+ for idx, entry in enumerate(tweets):
+ if entry["entryId"].endswith(pinned):
+ # mark as pinned / set 'pinned = True'
+ pinned_tweet = (
+ (entry.get("content") or entry["item"])
+ ["itemContent"]["tweet_results"]["result"])
+ if "tweet" in pinned_tweet:
+ pinned_tweet = pinned_tweet["tweet"]
+ pinned_tweet["pinned"] = True
+ # move to front of 'tweets'
+ del tweets[idx]
+ tweets.insert(0, entry)
+ break
+ del pinned
+ pinned_tweet = None
+
for entry in tweets:
try:
item = ((entry.get("content") or entry["item"])
@@ -2015,6 +2086,16 @@ class TwitterAPI():
(entry.get("entryId") or "").rpartition("-")[2])
continue
+ if retry is None:
+ try:
+ tweet["core"]["user_results"]["result"]
+ retry = False
+ except KeyError:
+ self.log.warning("Received Tweet results without "
+ "'core' data ... Retrying")
+ retry = True
+ break
+
if "retweeted_status_result" in legacy:
try:
retweet = legacy["retweeted_status_result"]["result"]
@@ -2071,18 +2152,25 @@ class TwitterAPI():
tweet.get("rest_id"))
continue
- if tweet:
+ if retry:
+ continue
+ elif tweet:
stop_tweets = stop_tweets_max
last_tweet = tweet
- else:
- if stop_tweets <= 0:
+ elif stop_tweets <= 0:
+ if not count:
return extr._update_cursor(None)
+ self.log.debug("Switching to 'count: %s'", count[-1])
+ variables["count"] = count.pop()
+ continue
+ else:
self.log.debug(
"No Tweet results (%s/%s)",
stop_tweets_max - stop_tweets + 1, stop_tweets_max)
stop_tweets -= 1
if not cursor or cursor == variables.get("cursor"):
+ self.log.debug("No continuation cursor")
return extr._update_cursor(None)
if update_variables is None:
@@ -2169,7 +2257,7 @@ class TwitterAPI():
else:
variables["rawQuery"] = f"{query} {max_id}"
- if prefix := self.extractor._cursor_prefix:
+ if prefix := getattr(self.extractor, "_cursor_prefix", None):
self.extractor._cursor_prefix = \
f"{prefix.partition('_')[0]}_{tweet_id}/"
variables["cursor"] = None
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index cf6631f..b77be95 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -41,11 +41,11 @@ class UnsplashExtractor(Extractor):
if metadata:
photo.update(metadata)
photo["extension"] = "jpg"
- photo["date"] = text.parse_datetime(photo["created_at"])
+ photo["date"] = self.parse_datetime_iso(photo["created_at"])
if "tags" in photo:
photo["tags"] = [t["title"] for t in photo["tags"]]
- yield Message.Directory, photo
+ yield Message.Directory, "", photo
yield Message.Url, url, photo
def metadata(self):
@@ -74,7 +74,7 @@ class UnsplashExtractor(Extractor):
class UnsplashImageExtractor(UnsplashExtractor):
"""Extractor for a single unsplash photo"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/photos/([^/?#]+)"
example = "https://unsplash.com/photos/ID"
def photos(self):
@@ -85,7 +85,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
class UnsplashUserExtractor(UnsplashExtractor):
"""Extractor for all photos of an unsplash user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/@(\w+)/?$"
+ pattern = rf"{BASE_PATTERN}/@(\w+)/?$"
example = "https://unsplash.com/@USER"
def photos(self):
@@ -97,7 +97,7 @@ class UnsplashUserExtractor(UnsplashExtractor):
class UnsplashFavoriteExtractor(UnsplashExtractor):
"""Extractor for all likes of an unsplash user"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/@(\w+)/likes"
+ pattern = rf"{BASE_PATTERN}/@(\w+)/likes"
example = "https://unsplash.com/@USER/likes"
def photos(self):
@@ -109,7 +109,7 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
class UnsplashCollectionExtractor(UnsplashExtractor):
"""Extractor for an unsplash collection"""
subcategory = "collection"
- pattern = BASE_PATTERN + r"/collections/([^/?#]+)(?:/([^/?#]+))?"
+ pattern = rf"{BASE_PATTERN}/collections/([^/?#]+)(?:/([^/?#]+))?"
example = "https://unsplash.com/collections/12345/TITLE"
def __init__(self, match):
@@ -128,7 +128,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor):
class UnsplashSearchExtractor(UnsplashExtractor):
"""Extractor for unsplash search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/s/photos/([^/?#]+)(?:\?([^#]+))?"
example = "https://unsplash.com/s/photos/QUERY"
def __init__(self, match):
diff --git a/gallery_dl/extractor/uploadir.py b/gallery_dl/extractor/uploadir.py
index d06c2ad..d80abba 100644
--- a/gallery_dl/extractor/uploadir.py
+++ b/gallery_dl/extractor/uploadir.py
@@ -53,5 +53,5 @@ class UploadirFileExtractor(Extractor):
data = text.nameext_from_url(name)
data["id"] = self.file_id
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py
index 4369ac6..0d8b3d3 100644
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@@ -38,7 +38,7 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
data["count"] = len(imgs)
root = self.root
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], img in enumerate(imgs, 1):
page = self.request(root + img).text
url = text.extr(page, "window.location.href = '", "'")
@@ -52,7 +52,7 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
"blog" : text.unescape(extr(' title="', '"')),
"_rprt": extr(' title="', '"'), # report button
"title": text.unescape(extr(' title="', '"').strip()),
- "date" : text.parse_datetime(
+ "date" : self.parse_datetime(
extr(" images in gallery | ", "<"), "%B %d, %Y"),
}
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index 7a9269d..0056d1b 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -32,7 +32,7 @@ BASE_PATTERN = UrlshortenerExtractor.update({
class UrlshortenerLinkExtractor(UrlshortenerExtractor):
"""Extractor for general-purpose URL shorteners"""
subcategory = "link"
- pattern = BASE_PATTERN + r"(/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/[^/?#]+)"
example = "https://bit.ly/abcde"
def items(self):
diff --git a/gallery_dl/extractor/vanillarock.py b/gallery_dl/extractor/vanillarock.py
index e0107f3..63fc7fa 100644
--- a/gallery_dl/extractor/vanillarock.py
+++ b/gallery_dl/extractor/vanillarock.py
@@ -47,13 +47,13 @@ class VanillarockPostExtractor(VanillarockExtractor):
"count": len(imgs),
"title": text.unescape(name),
"path" : self.path.strip("/"),
- "date" : text.parse_datetime(extr(
- '<div class="date">', '</div>'), "%Y-%m-%d %H:%M"),
+ "date" : self.parse_datetime_iso(extr(
+ '<div class="date">', '</div>')),
"tags" : text.split_html(extr(
'<div class="cat-tag">', '</div>'))[::2],
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(imgs, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/vichan.py b/gallery_dl/extractor/vichan.py
index cbb44ee..86758f3 100644
--- a/gallery_dl/extractor/vichan.py
+++ b/gallery_dl/extractor/vichan.py
@@ -39,7 +39,7 @@ class VichanThreadExtractor(VichanExtractor):
directory_fmt = ("{category}", "{board}", "{thread} {title}")
filename_fmt = "{time}{num:?-//} {filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)"
example = "https://8kun.top/a/res/12345.html"
def items(self):
@@ -58,7 +58,7 @@ class VichanThreadExtractor(VichanExtractor):
"num" : 0,
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for post in posts:
if "filename" in post:
yield process(post, data)
@@ -93,7 +93,7 @@ class VichanThreadExtractor(VichanExtractor):
class VichanBoardExtractor(VichanExtractor):
"""Extractor for vichan boards"""
subcategory = "board"
- pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
example = "https://8kun.top/a/"
def items(self):
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 294fc57..8f6368b 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -75,7 +75,7 @@ class VipergirlsExtractor(Extractor):
data["count"] = len(images)
del data["imagecount"]
- yield Message.Directory, data
+ yield Message.Directory, "", data
if images:
for data["num"], image in enumerate(images, 1):
yield Message.Queue, image.attrib["main_url"], data
@@ -124,8 +124,8 @@ class VipergirlsExtractor(Extractor):
class VipergirlsThreadExtractor(VipergirlsExtractor):
"""Extractor for vipergirls threads"""
subcategory = "thread"
- pattern = (BASE_PATTERN +
- r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?(?:$|#|\?(?!p=))")
+ pattern = (rf"{BASE_PATTERN}"
+ rf"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?(?:$|#|\?(?!p=))")
example = "https://vipergirls.to/threads/12345-TITLE"
def __init__(self, match):
@@ -140,8 +140,8 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
class VipergirlsPostExtractor(VipergirlsExtractor):
"""Extractor for vipergirls posts"""
subcategory = "post"
- pattern = (BASE_PATTERN +
- r"/threads/(\d+)(?:-[^/?#]+)?\?p=\d+[^#]*#post(\d+)")
+ pattern = (rf"{BASE_PATTERN}"
+ rf"/threads/(\d+)(?:-[^/?#]+)?\?p=\d+[^#]*#post(\d+)")
example = "https://vipergirls.to/threads/12345-TITLE?p=23456#post23456"
def __init__(self, match):
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 22d4b9a..e7453fc 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -9,7 +9,7 @@
"""Extractors for https://vk.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, exception
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
@@ -36,11 +36,11 @@ class VkExtractor(Extractor):
return num
def items(self):
- subn = util.re(r"/imp[fg]/").subn
+ subn = text.re(r"/imp[fg]/").subn
sizes = "wzyxrqpo"
data = self.metadata()
- yield Message.Directory, data
+ yield Message.Directory, "", data
for photo in self.photos():
@@ -72,7 +72,7 @@ class VkExtractor(Extractor):
photo["width"] = photo["height"] = 0
photo["id"] = photo["id"].rpartition("_")[2]
- photo["date"] = text.parse_timestamp(text.extr(
+ photo["date"] = self.parse_timestamp(text.extr(
photo["date"], 'data-date="', '"'))
photo["description"] = text.unescape(text.extr(
photo.get("desc", ""), ">", "<"))
@@ -134,7 +134,7 @@ class VkExtractor(Extractor):
class VkPhotosExtractor(VkExtractor):
"""Extractor for photos from a vk user"""
subcategory = "photos"
- pattern = (BASE_PATTERN + r"/(?:"
+ pattern = (rf"{BASE_PATTERN}/(?:"
r"(?:albums|photos|id)(-?\d+)"
r"|(?!(?:album|tag|wall)-?\d+_?)([^/?#]+))")
example = "https://vk.com/id12345"
@@ -184,7 +184,7 @@ class VkAlbumExtractor(VkExtractor):
"""Extractor for a vk album"""
subcategory = "album"
directory_fmt = ("{category}", "{user[id]}", "{album[id]}")
- pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
+ pattern = rf"{BASE_PATTERN}/album(-?\d+)_(\d+)$"
example = "https://vk.com/album12345_00"
def photos(self):
@@ -228,7 +228,7 @@ class VkTaggedExtractor(VkExtractor):
"""Extractor for a vk tagged photos"""
subcategory = "tagged"
directory_fmt = ("{category}", "{user[id]}", "tags")
- pattern = BASE_PATTERN + r"/tag(-?\d+)$"
+ pattern = rf"{BASE_PATTERN}/tag(-?\d+)$"
example = "https://vk.com/tag12345"
def __init__(self, match):
@@ -247,7 +247,7 @@ class VkWallPostExtractor(VkExtractor):
subcategory = "wall-post"
directory_fmt = ("{category}", "{user[id]}", "wall")
filename_fmt = "{wall[id]}_{num}.{extension}"
- pattern = BASE_PATTERN + r"/wall(-?\d+)_(\d+)"
+ pattern = rf"{BASE_PATTERN}/wall(-?\d+)_(\d+)"
example = "https://vk.com/wall12345_123"
def photos(self):
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index df09fce..b8da813 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -12,7 +12,7 @@ from .common import Extractor, Message, Dispatch
from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co"
-USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)"
+USER_PATTERN = rf"{BASE_PATTERN}/([^/?#]+)"
class VscoExtractor(Extractor):
@@ -22,7 +22,7 @@ class VscoExtractor(Extractor):
directory_fmt = ("{category}", "{user}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
- tls12 = False
+ browser = "firefox"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -30,7 +30,7 @@ class VscoExtractor(Extractor):
def items(self):
videos = self.config("videos", True)
- yield Message.Directory, {"user": self.user}
+ yield Message.Directory, "", {"user": self.user}
for img in self.images():
if not img:
@@ -62,7 +62,7 @@ class VscoExtractor(Extractor):
"grid" : img["grid_name"],
"meta" : img.get("image_meta") or {},
"tags" : [tag["text"] for tag in img.get("tags") or ()],
- "date" : text.parse_timestamp(img["upload_date"] // 1000),
+ "date" : self.parse_timestamp(img["upload_date"] // 1000),
"video" : img["is_video"],
"width" : img["width"],
"height": img["height"],
@@ -133,7 +133,7 @@ class VscoExtractor(Extractor):
class VscoUserExtractor(Dispatch, VscoExtractor):
"""Extractor for a vsco user profile"""
- pattern = USER_PATTERN + r"/?$"
+ pattern = rf"{USER_PATTERN}/?$"
example = "https://vsco.co/USER"
def items(self):
@@ -149,7 +149,7 @@ class VscoUserExtractor(Dispatch, VscoExtractor):
class VscoGalleryExtractor(VscoExtractor):
"""Extractor for a vsco user's gallery"""
subcategory = "gallery"
- pattern = USER_PATTERN + r"/(?:gallery|images)"
+ pattern = rf"{USER_PATTERN}/(?:gallery|images)"
example = "https://vsco.co/USER/gallery"
def images(self):
@@ -173,7 +173,7 @@ class VscoCollectionExtractor(VscoExtractor):
subcategory = "collection"
directory_fmt = ("{category}", "{user}", "collection")
archive_fmt = "c_{user}_{id}"
- pattern = USER_PATTERN + r"/collection"
+ pattern = rf"{USER_PATTERN}/collection"
example = "https://vsco.co/USER/collection/1"
def images(self):
@@ -198,7 +198,7 @@ class VscoSpaceExtractor(VscoExtractor):
subcategory = "space"
directory_fmt = ("{category}", "space", "{user}")
archive_fmt = "s_{user}_{id}"
- pattern = BASE_PATTERN + r"/spaces/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/spaces/([^/?#]+)"
example = "https://vsco.co/spaces/a1b2c3d4e5f"
def images(self):
@@ -245,7 +245,7 @@ class VscoSpaceExtractor(VscoExtractor):
class VscoSpacesExtractor(VscoExtractor):
"""Extractor for a vsco.co user's spaces"""
subcategory = "spaces"
- pattern = USER_PATTERN + r"/spaces"
+ pattern = rf"{USER_PATTERN}/spaces"
example = "https://vsco.co/USER/spaces"
def items(self):
@@ -275,7 +275,7 @@ class VscoSpacesExtractor(VscoExtractor):
class VscoAvatarExtractor(VscoExtractor):
"""Extractor for vsco.co user avatars"""
subcategory = "avatar"
- pattern = USER_PATTERN + r"/avatar"
+ pattern = rf"{USER_PATTERN}/avatar"
example = "https://vsco.co/USER/avatar"
def images(self):
@@ -303,7 +303,7 @@ class VscoAvatarExtractor(VscoExtractor):
class VscoImageExtractor(VscoExtractor):
"""Extractor for individual images on vsco.co"""
subcategory = "image"
- pattern = USER_PATTERN + r"/media/([0-9a-fA-F]+)"
+ pattern = rf"{USER_PATTERN}/media/([0-9a-fA-F]+)"
example = "https://vsco.co/USER/media/0123456789abcdef"
def images(self):
@@ -316,7 +316,7 @@ class VscoImageExtractor(VscoExtractor):
class VscoVideoExtractor(VscoExtractor):
"""Extractor for vsco.co videos links"""
subcategory = "video"
- pattern = USER_PATTERN + r"/video/([^/?#]+)"
+ pattern = rf"{USER_PATTERN}/video/([^/?#]+)"
example = "https://vsco.co/USER/video/012345678-9abc-def0"
def images(self):
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index f0f27e0..9ea3c36 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -29,7 +29,7 @@ class WallhavenExtractor(Extractor):
self._transform(wp)
wp.update(metadata)
url = wp["url"]
- yield Message.Directory, wp
+ yield Message.Directory, "", wp
yield Message.Url, url, text.nameext_from_url(url, wp)
def wallpapers(self):
@@ -43,8 +43,7 @@ class WallhavenExtractor(Extractor):
wp["url"] = wp.pop("path")
if "tags" in wp:
wp["tags"] = [t["name"] for t in wp["tags"]]
- wp["date"] = text.parse_datetime(
- wp.pop("created_at"), "%Y-%m-%d %H:%M:%S")
+ wp["date"] = self.parse_datetime_iso(wp.pop("created_at"))
wp["width"] = wp.pop("dimension_x")
wp["height"] = wp.pop("dimension_y")
wp["wh_category"] = wp["category"]
diff --git a/gallery_dl/extractor/wallpapercave.py b/gallery_dl/extractor/wallpapercave.py
index 65fca24..1392164 100644
--- a/gallery_dl/extractor/wallpapercave.py
+++ b/gallery_dl/extractor/wallpapercave.py
@@ -27,7 +27,7 @@ class WallpapercaveImageExtractor(Extractor):
path = None
for path in text.extract_iter(page, 'class="download" href="', '"'):
image = text.nameext_from_url(path)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, self.root + path, image
if path is None:
@@ -38,7 +38,7 @@ class WallpapercaveImageExtractor(Extractor):
pass
else:
image = text.nameext_from_url(path)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, self.root + path, image
if path is None:
@@ -46,5 +46,5 @@ class WallpapercaveImageExtractor(Extractor):
page, 'class="wallpaper" id="wp', '</picture>'):
if path := text.rextr(wp, ' src="', '"'):
image = text.nameext_from_url(path)
- yield Message.Directory, image
+ yield Message.Directory, "", image
yield Message.Url, self.root + path, image
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 8ae2a49..b66ba8d 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -37,12 +37,12 @@ class WarosuThreadExtractor(Extractor):
data["title"] = text.unescape(text.remove_html(
posts[0]["com"]))[:50]
- yield Message.Directory, data
+ yield Message.Directory, "", data
for post in posts:
if "image" in post:
for key in ("w", "h", "no", "time", "tim"):
post[key] = text.parse_int(post[key])
- dt = text.parse_timestamp(post["time"])
+ dt = self.parse_timestamp(post["time"])
# avoid zero-padding 'day' with %d
post["now"] = dt.strftime(f"%a, %b {dt.day}, %Y %H:%M:%S")
post.update(data)
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index a69f3a8..e718e51 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -24,8 +24,7 @@ class WeasylExtractor(Extractor):
# Some submissions don't have content and can be skipped
if "submission" in data["media"]:
data["url"] = data["media"]["submission"][0]["url"]
- data["date"] = text.parse_datetime(
- data["posted_at"][:19], "%Y-%m-%dT%H:%M:%S")
+ data["date"] = self.parse_datetime_iso(data["posted_at"][:19])
text.nameext_from_url(data["url"], data)
return True
return False
@@ -42,7 +41,7 @@ class WeasylExtractor(Extractor):
f"{self.root}/api/journals/{journalid}/view")
data["extension"] = "html"
data["html"] = "text:" + data["content"]
- data["date"] = text.parse_datetime(data["posted_at"])
+ data["date"] = self.parse_datetime_iso(data["posted_at"])
return data
def submissions(self, owner_login, folderid=None):
@@ -71,7 +70,7 @@ class WeasylExtractor(Extractor):
class WeasylSubmissionExtractor(WeasylExtractor):
subcategory = "submission"
- pattern = BASE_PATTERN + r"(?:~[\w~-]+/submissions|submission|view)/(\d+)"
+ pattern = rf"{BASE_PATTERN}(?:~[\w~-]+/submissions|submission|view)/(\d+)"
example = "https://www.weasyl.com/~USER/submissions/12345/TITLE"
def __init__(self, match):
@@ -81,13 +80,13 @@ class WeasylSubmissionExtractor(WeasylExtractor):
def items(self):
data = self.request_submission(self.submitid)
if self.populate_submission(data):
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, data["url"], data
class WeasylSubmissionsExtractor(WeasylExtractor):
subcategory = "submissions"
- pattern = BASE_PATTERN + r"(?:~|submissions/)([\w~-]+)/?$"
+ pattern = rf"{BASE_PATTERN}(?:~|submissions/)([\w~-]+)/?$"
example = "https://www.weasyl.com/submissions/USER"
def __init__(self, match):
@@ -95,14 +94,14 @@ class WeasylSubmissionsExtractor(WeasylExtractor):
self.owner_login = match[1]
def items(self):
- yield Message.Directory, {"owner_login": self.owner_login}
+ yield Message.Directory, "", {"owner_login": self.owner_login}
yield from self.submissions(self.owner_login)
class WeasylFolderExtractor(WeasylExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{owner_login}", "{folder_name}")
- pattern = BASE_PATTERN + r"submissions/([\w~-]+)\?folderid=(\d+)"
+ pattern = rf"{BASE_PATTERN}submissions/([\w~-]+)\?folderid=(\d+)"
example = "https://www.weasyl.com/submissions/USER?folderid=12345"
def __init__(self, match):
@@ -114,7 +113,7 @@ class WeasylFolderExtractor(WeasylExtractor):
# Folder names are only on single submission api calls
msg, url, data = next(iter)
details = self.request_submission(data["submitid"])
- yield Message.Directory, details
+ yield Message.Directory, "", details
yield msg, url, data
yield from iter
@@ -123,7 +122,7 @@ class WeasylJournalExtractor(WeasylExtractor):
subcategory = "journal"
filename_fmt = "{journalid} {title}.{extension}"
archive_fmt = "{journalid}"
- pattern = BASE_PATTERN + r"journal/(\d+)"
+ pattern = rf"{BASE_PATTERN}journal/(\d+)"
example = "https://www.weasyl.com/journal/12345"
def __init__(self, match):
@@ -132,7 +131,7 @@ class WeasylJournalExtractor(WeasylExtractor):
def items(self):
data = self.retrieve_journal(self.journalid)
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, data["html"], data
@@ -140,7 +139,7 @@ class WeasylJournalsExtractor(WeasylExtractor):
subcategory = "journals"
filename_fmt = "{journalid} {title}.{extension}"
archive_fmt = "{journalid}"
- pattern = BASE_PATTERN + r"journals/([\w~-]+)"
+ pattern = rf"{BASE_PATTERN}journals/([\w~-]+)"
example = "https://www.weasyl.com/journals/USER"
def __init__(self, match):
@@ -148,7 +147,7 @@ class WeasylJournalsExtractor(WeasylExtractor):
self.owner_login = match[1]
def items(self):
- yield Message.Directory, {"owner_login": self.owner_login}
+ yield Message.Directory, "", {"owner_login": self.owner_login}
url = f"{self.root}/journals/{self.owner_login}"
page = self.request(url).text
@@ -160,7 +159,7 @@ class WeasylJournalsExtractor(WeasylExtractor):
class WeasylFavoriteExtractor(WeasylExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites")
- pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|/([^/?#]+))"
+ pattern = rf"{BASE_PATTERN}favorites(?:\?userid=(\d+)|/([^/?#]+))"
example = "https://www.weasyl.com/favorites?userid=12345"
def items(self):
@@ -192,7 +191,7 @@ class WeasylFavoriteExtractor(WeasylExtractor):
submission = self.request_submission(submitid)
if self.populate_submission(submission):
submission["user"] = owner_login
- yield Message.Directory, submission
+ yield Message.Directory, "", submission
yield Message.Url, submission["url"], submission
try:
diff --git a/gallery_dl/extractor/webmshare.py b/gallery_dl/extractor/webmshare.py
index cc41b03..2cb41bb 100644
--- a/gallery_dl/extractor/webmshare.py
+++ b/gallery_dl/extractor/webmshare.py
@@ -40,7 +40,7 @@ class WebmshareVideoExtractor(Extractor):
'property="og:video:width" content="', '"')),
"height": text.parse_int(extr(
'property="og:video:height" content="', '"')),
- "date" : text.parse_datetime(extr(
+ "date" : self.parse_datetime(extr(
"<small>Added ", "<"), "%B %d, %Y"),
"views": text.parse_int(extr('glyphicon-eye-open"></span>', '<')),
"id" : self.video_id,
@@ -51,5 +51,5 @@ class WebmshareVideoExtractor(Extractor):
if data["title"] == "webmshare":
data["title"] = ""
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, data["url"], data
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 79120c1..bed251b 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -48,7 +48,7 @@ class WebtoonsBase():
class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"""Extractor for an episode on webtoons.com"""
subcategory = "episode"
- pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
+ pattern = (rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+)/[^/?#]+)"
r"/viewer\?([^#'\"]+)")
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
"?title_no=123&episode_no=12345")
@@ -131,7 +131,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
def assets(self, page):
if self.config("thumbnails", False):
- active = text.extr(page, 'class="on ', '</a>')
+ active = text.extr(page, 'class="on', '</a>')
url = _url(text.extr(active, 'data-url="', '"'))
return ({"url": url, "type": "thumbnail"},)
@@ -142,7 +142,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
categorytransfer = True
filename_fmt = "{type}.{extension}"
archive_fmt = "{title_no}_{type}"
- pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
+ pattern = rf"{LANG_PATTERN}/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
def items(self):
@@ -160,7 +160,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
page = response.text
if self.config("banners") and (asset := self._asset_banner(page)):
- yield Message.Directory, asset
+ yield Message.Directory, "", asset
yield Message.Url, asset["url"], asset
data = {"_extractor": WebtoonsEpisodeExtractor}
@@ -197,7 +197,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
"""Extractor for webtoons.com artists"""
subcategory = "artist"
- pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/p/community/([^/?#]+)/u/([^/?#]+)"
example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
def items(self):
diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py
index 03cbf29..31cdaac 100644
--- a/gallery_dl/extractor/weebcentral.py
+++ b/gallery_dl/extractor/weebcentral.py
@@ -44,7 +44,7 @@ class WeebcentralBase():
class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
"""Extractor for manga chapters from weebcentral.com"""
- pattern = BASE_PATTERN + r"(/chapters/(\w+))"
+ pattern = rf"{BASE_PATTERN}(/chapters/(\w+))"
example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV"
def metadata(self, page):
@@ -95,7 +95,7 @@ class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
"""Extractor for manga from weebcentral.com"""
chapterclass = WeebcentralChapterExtractor
- pattern = BASE_PATTERN + r"/series/(\w+)"
+ pattern = rf"{BASE_PATTERN}/series/(\w+)"
example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE"
def chapters(self, _):
@@ -127,8 +127,8 @@ class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_type" : type,
- "date" : text.parse_datetime(
- extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"),
+ "date" : self.parse_datetime_iso(extr(
+ ' datetime="', '"')[:-5]),
}
chapter.update(data)
results.append((base + chapter_id, chapter))
diff --git a/gallery_dl/extractor/weebdex.py b/gallery_dl/extractor/weebdex.py
new file mode 100644
index 0000000..78fbda1
--- /dev/null
+++ b/gallery_dl/extractor/weebdex.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://weebdex.org/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?weebdex\.org"
+
+
+class WeebdexBase():
+ """Base class for weebdex extractors"""
+ category = "weebdex"
+ root = "https://weebdex.org"
+ root_api = "https://api.weebdex.org"
+ request_interval = 0.2 # 5 requests per second
+
+ def _init(self):
+ self.headers_api = {
+ "Referer": self.root + "/",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-site",
+ }
+
+
+class WeebdexChapterExtractor(WeebdexBase, ChapterExtractor):
+ """Extractor for weebdex manga chapters"""
+ archive_fmt = "{chapter_id}_{version}_{page}"
+ pattern = BASE_PATTERN + r"/chapter/(\w+)"
+ example = "https://weebdex.org/chapter/ID/PAGE"
+
+ def metadata(self, _):
+ cid = self.groups[0]
+ url = f"{self.root_api}/chapter/{cid}"
+ self.data = data = self.request_json(url, headers=self.headers_api)
+
+ rel = data.pop("relationships")
+ chapter, sep, minor = data["chapter"].partition(".")
+
+ return {
+ **_manga_info(self, rel["manga"]["id"]),
+ "title" : data.get("title", ""),
+ "version" : data["version"],
+ "volume" : text.parse_int(data["volume"]),
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": sep + minor,
+ "chapter_id" : cid,
+ "date" : self.parse_datetime_iso(data["created_at"]),
+ "date_updated" : self.parse_datetime_iso(data["updated_at"]),
+ "lang" : data["language"],
+ "uploader": rel["uploader"]["name"] if "uploader" in rel else "",
+ "group" : [g["name"] for g in rel.get("groups") or ()],
+ }
+
+ def images(self, _):
+ data = self.data
+ base = f"{data['node']}/data/{data['id']}/"
+
+ return [
+ (base + page["name"], {
+ "width" : page["dimensions"][0],
+ "height": page["dimensions"][1],
+ })
+ for page in data["data"]
+ ]
+
+
+class WeebdexMangaExtractor(WeebdexBase, MangaExtractor):
+ """Extractor for weebdex manga"""
+ chapterclass = WeebdexChapterExtractor
+ pattern = BASE_PATTERN + r"/title/(\w+)"
+ example = "https://weebdex.org/title/ID/SLUG"
+
+ def chapters(self, page):
+ mid = self.groups[0]
+ url = f"{self.root_api}/manga/{mid}/chapters"
+ params = {
+ "limit": 100,
+ "order": "asc" if self.config("chapter-reverse") else "desc",
+ }
+
+ base = self.root + "/chapter/"
+ manga = _manga_info(self, mid)
+ results = []
+
+ while True:
+ data = self.request_json(
+ url, params=params, headers=self.headers_api)
+
+ for ch in data["data"]:
+ chapter, sep, minor = ch["chapter"].partition(".")
+ ch["volume"] = text.parse_int(ch["volume"])
+ ch["chapter"] = text.parse_int(chapter)
+ ch["chapter_minor"] = sep + minor
+ ch.update(manga)
+ results.append((base + ch["id"], ch))
+
+ if data["total"] <= data["page"] * params["limit"]:
+ break
+ params["page"] = data["page"] + 1
+
+ return results
+
+
+@memcache(keyarg=1)
+def _manga_info(self, mid):
+ url = f"{self.root_api}/manga/{mid}"
+ manga = self.request_json(url, headers=self.headers_api)
+ rel = manga["relationships"]
+
+ return {
+ "manga" : manga["title"],
+ "manga_id": manga["id"],
+ "manga_date": self.parse_datetime_iso(manga["created_at"]),
+ "year" : manga["year"],
+ "status" : manga["status"],
+ "origin" : manga["language"],
+ "description": manga["description"],
+ "demographic": manga["demographic"],
+ "tags" : [f"{t['group']}:{t['name']}" for t in rel["tags"]],
+ "author" : [a["name"] for a in rel["authors"]],
+ "artist" : [a["name"] for a in rel["artists"]],
+ }
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 3c0f077..abec0f7 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -34,6 +34,7 @@ class WeiboExtractor(Extractor):
def _init(self):
self.livephoto = self.config("livephoto", True)
self.retweets = self.config("retweets", False)
+ self.longtext = self.config("text", False)
self.videos = self.config("videos", True)
self.movies = self.config("movies", False)
self.gifs = self.config("gifs", True)
@@ -98,10 +99,14 @@ class WeiboExtractor(Extractor):
files = []
self._extract_status(status, files)
- status["date"] = text.parse_datetime(
+ if self.longtext and status.get("isLongText") and \
+ status["text"].endswith('class="expand">展开</span>'):
+ status = self._status_by_id(status["id"])
+
+ status["date"] = self.parse_datetime(
status["created_at"], "%a %b %d %H:%M:%S %z %Y")
status["count"] = len(files)
- yield Message.Directory, status
+ yield Message.Directory, "", status
num = 0
for file in files:
@@ -190,7 +195,8 @@ class WeiboExtractor(Extractor):
return video
def _status_by_id(self, status_id):
- url = f"{self.root}/ajax/statuses/show?id={status_id}"
+ url = (f"{self.root}/ajax/statuses/show"
+ f"?id={status_id}&isGetLongText=true")
return self.request_json(url)
def _user_id(self):
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 830d880..8f3a1c9 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -27,7 +27,7 @@ class WikiartExtractor(Extractor):
def items(self):
data = self.metadata()
- yield Message.Directory, data
+ yield Message.Directory, "", data
for painting in self.paintings():
url = painting["image"]
painting.update(data)
@@ -68,7 +68,7 @@ class WikiartArtistExtractor(WikiartExtractor):
"""Extractor for an artist's paintings on wikiart.org"""
subcategory = "artist"
directory_fmt = ("{category}", "{artist[artistName]}")
- pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
+ pattern = rf"{BASE_PATTERN}/(?!\w+-by-)([\w-]+)/?$"
example = "https://www.wikiart.org/en/ARTIST"
def __init__(self, match):
@@ -89,7 +89,7 @@ class WikiartArtistExtractor(WikiartExtractor):
class WikiartImageExtractor(WikiartArtistExtractor):
"""Extractor for individual paintings on wikiart.org"""
subcategory = "image"
- pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)"
+ pattern = rf"{BASE_PATTERN}/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)"
example = "https://www.wikiart.org/en/ARTIST/TITLE"
def __init__(self, match):
@@ -109,7 +109,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
"""Extractor for artwork collections on wikiart.org"""
subcategory = "artworks"
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
- pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
+ pattern = rf"{BASE_PATTERN}/paintings-by-([\w-]+)/([\w-]+)"
example = "https://www.wikiart.org/en/paintings-by-GROUP/TYPE"
def __init__(self, match):
@@ -128,7 +128,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
class WikiartArtistsExtractor(WikiartExtractor):
"""Extractor for artist collections on wikiart.org"""
subcategory = "artists"
- pattern = (BASE_PATTERN + r"/artists-by-([\w-]+)/([\w-]+)")
+ pattern = (rf"{BASE_PATTERN}/artists-by-([\w-]+)/([\w-]+)")
example = "https://www.wikiart.org/en/artists-by-GROUP/TYPE"
def __init__(self, match):
diff --git a/gallery_dl/extractor/wikifeet.py b/gallery_dl/extractor/wikifeet.py
index 31dc9cd..a07fd84 100644
--- a/gallery_dl/extractor/wikifeet.py
+++ b/gallery_dl/extractor/wikifeet.py
@@ -34,8 +34,8 @@ class WikifeetGalleryExtractor(GalleryExtractor):
"celeb" : self.celeb,
"type" : self.type,
"birthplace": text.unescape(extr('"bplace":"', '"')),
- "birthday" : text.parse_datetime(text.unescape(
- extr('"bdate":"', '"'))[:10], "%Y-%m-%d"),
+ "birthday" : self.parse_datetime_iso(text.unescape(extr(
+ '"bdate":"', '"'))[:10]),
"shoesize" : text.unescape(extr('"ssize":', ',')),
"rating" : text.parse_float(extr('"score":', ',')),
"celebrity" : text.unescape(extr('"cname":"', '"')),
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index ba020d5..70e42c6 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -22,25 +22,32 @@ class WikimediaExtractor(BaseExtractor):
request_interval = (1.0, 2.0)
def __init__(self, match):
- BaseExtractor.__init__(self, match)
+ self._init_category(match)
+ self.format = False
if self.category == "wikimedia":
- self.category = self.root.split(".")[-2]
+ labels = self.root.split(".")
+ self.lang = labels[-3][-2:]
+ self.category = labels[-2]
elif self.category in ("fandom", "wikigg"):
+ self.lang = "en"
+ self.format = "original"
+ self.basesubcategory = self.category
self.category = (
f"{self.category}-"
f"{self.root.partition('.')[0].rpartition('/')[2]}")
-
- self.per_page = self.config("limit", 50)
- self.subcategories = False
+ else:
+ self.lang = ""
if useragent := self.config_instance("useragent"):
self.useragent = useragent
+ BaseExtractor.__init__(self, match)
+
def _init(self):
if api_path := self.config_instance("api-path"):
if api_path[0] == "/":
- self.api_url = self.root + api_path
+ self.api_url = f"{self.root}{api_path}"
else:
self.api_url = api_path
else:
@@ -51,12 +58,15 @@ class WikimediaExtractor(BaseExtractor):
# https://www.mediawiki.org/wiki/API:Revisions
# https://www.mediawiki.org/wiki/API:Imageinfo
self.image_revisions = self.config("image-revisions", 1)
+ self.format = self.config("format", self.format)
+ self.per_page = self.config("limit", 50)
+ self.subcategories = False
@cache(maxage=36500*86400, keyarg=1)
def _search_api_path(self, root):
self.log.debug("Probing possible API endpoints")
for path in ("/api.php", "/w/api.php", "/wiki/api.php"):
- url = root + path
+ url = f"{root}{path}"
response = self.request(url, method="HEAD", fatal=None)
if response.status_code < 400:
return url
@@ -74,12 +84,19 @@ class WikimediaExtractor(BaseExtractor):
m["name"]: m["value"]
for m in image["commonmetadata"] or ()}
- text.nameext_from_url(image["canonicaltitle"].partition(":")[2], image)
- image["date"] = text.parse_datetime(
- image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
+ text.nameext_from_name(
+ image["canonicaltitle"].partition(":")[2], image)
+ image["date"] = self.parse_datetime_iso(image["timestamp"])
+
+ if self.format:
+ url = image["url"]
+ image["url"] = (f"{url}{'&' if '?' in url else '?'}"
+ f"format={self.format}")
def items(self):
- for info in self._pagination(self.params):
+ params = self.params()
+
+ for info in self._pagination(params):
try:
images = info.pop("imageinfo")
except KeyError:
@@ -88,7 +105,7 @@ class WikimediaExtractor(BaseExtractor):
info["count"] = len(images)
self.prepare_info(info)
- yield Message.Directory, info
+ yield Message.Directory, "", info
num = 0
for image in images:
@@ -105,10 +122,10 @@ class WikimediaExtractor(BaseExtractor):
yield Message.Url, image["url"], image
if self.subcategories:
- base = self.root + "/wiki/"
- self.params["gcmtype"] = "subcat"
- for subcat in self._pagination(self.params):
- url = base + subcat["title"].replace(" ", "_")
+ base = f"{self.root}/wiki/"
+ params["gcmtype"] = "subcat"
+ for subcat in self._pagination(params):
+ url = f"{base}{subcat['title'].replace(' ', '_')}"
subcat["_extractor"] = WikimediaArticleExtractor
yield Message.Queue, url, subcat
@@ -219,7 +236,7 @@ class WikimediaArticleExtractor(WikimediaExtractor):
"""Extractor for wikimedia articles"""
subcategory = "article"
directory_fmt = ("{category}", "{page}")
- pattern = BASE_PATTERN + r"/(?!static/)([^?#]+)"
+ pattern = rf"{BASE_PATTERN}/(?!static/)([^?#]+)"
example = "https://en.wikipedia.org/wiki/TITLE"
def __init__(self, match):
@@ -227,53 +244,54 @@ class WikimediaArticleExtractor(WikimediaExtractor):
path = self.groups[-1]
if path[2] == "/":
- self.root = self.root + "/" + path[:2]
+ self.lang = lang = path[:2]
+ self.root = f"{self.root}/{lang}"
path = path[3:]
if path.startswith("wiki/"):
path = path[5:]
+ self.path = text.unquote(path)
pre, sep, _ = path.partition(":")
- prefix = pre.lower() if sep else None
-
- self.title = path = text.unquote(path)
- if prefix:
+ self.prefix = prefix = pre.lower() if sep else None
+ if prefix is not None:
self.subcategory = prefix
- if prefix == "category":
+ def params(self):
+ if self.prefix == "category":
if self.config("subcategories", True):
self.subcategories = True
- self.params = {
+ return {
"generator": "categorymembers",
- "gcmtitle" : path,
+ "gcmtitle" : self.path,
"gcmtype" : "file",
"gcmlimit" : self.per_page,
}
- elif prefix == "file":
- self.params = {
- "titles" : path,
- }
- else:
- self.params = {
- "generator": "images",
- "gimlimit" : self.per_page,
- "titles" : path,
+
+ if self.prefix == "file":
+ return {
+ "titles": self.path,
}
+ return {
+ "generator": "images",
+ "gimlimit" : self.per_page,
+ "titles" : self.path,
+ }
+
def prepare_info(self, info):
- info["page"] = self.title
+ info["page"] = self.path
+ info["lang"] = self.lang
class WikimediaWikiExtractor(WikimediaExtractor):
"""Extractor for all files on a MediaWiki instance"""
subcategory = "wiki"
- pattern = BASE_PATTERN + r"/?$"
+ pattern = rf"{BASE_PATTERN}/?$"
example = "https://en.wikipedia.org/"
- def __init__(self, match):
- WikimediaExtractor.__init__(self, match)
-
+ def params(self):
# ref: https://www.mediawiki.org/wiki/API:Allpages
- self.params = {
+ return {
"generator" : "allpages",
"gapnamespace": 6, # "File" namespace
"gaplimit" : self.per_page,
diff --git a/gallery_dl/extractor/xasiat.py b/gallery_dl/extractor/xasiat.py
index 6aa3168..d4dbea1 100644
--- a/gallery_dl/extractor/xasiat.py
+++ b/gallery_dl/extractor/xasiat.py
@@ -7,7 +7,7 @@
"""Extractors for https://www.xasiat.com"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
import time
BASE_PATTERN = r"(?:https?://)?(?:www\.)?xasiat\.com((?:/fr|/ja)?/albums"
@@ -29,7 +29,7 @@ class XasiatExtractor(Extractor):
def _pagination(self, path, pnum=1):
url = f"{self.root}{path}/"
- find_posts = util.re(r'class="item ">\s*<a href="([^"]+)').findall
+ find_posts = text.re(r'class="item ">\s*<a href="([^"]+)').findall
while True:
params = {
@@ -38,7 +38,7 @@ class XasiatExtractor(Extractor):
"block_id": "list_albums_common_albums_list",
"sort_by": "post_date",
"from": pnum,
- "_": int(time.time() * 1000)
+ "_": int(time.time() * 1000),
}
page = self.request(url, params=params).text
@@ -52,7 +52,7 @@ class XasiatExtractor(Extractor):
class XasiatAlbumExtractor(XasiatExtractor):
subcategory = "album"
- pattern = BASE_PATTERN + r"/(\d+)/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/(\d+)/[^/?#]+)"
example = "https://www.xasiat.com/albums/12345/TITLE/"
def items(self):
@@ -66,38 +66,37 @@ class XasiatAlbumExtractor(XasiatExtractor):
images = extr('class="images"', "</div>")
urls = list(text.extract_iter(images, 'href="', '"'))
-
+ categories = text.re(r'categories/[^"]+\">\s*(.+)\s*</a').findall(info)
data = {
"title": text.unescape(title),
- "model": util.re(
+ "model": text.re(
r'top_models1"></i>\s*(.+)\s*</span').findall(info),
- "tags": util.re(
+ "tags": text.re(
r'tags/[^"]+\">\s*(.+)\s*</a').findall(info),
- "album_category": util.re(
- r'categories/[^"]+\">\s*(.+)\s*</a').findall(info)[0],
+ "album_category": categories[0] if categories else "",
"album_url": response.url,
"album_id": text.parse_int(album_id),
"count": len(urls),
}
- yield Message.Directory, data
+ yield Message.Directory, "", data
for data["num"], url in enumerate(urls, 1):
yield Message.Url, url, text.nameext_from_url(url[:-1], data)
class XasiatTagExtractor(XasiatExtractor):
subcategory = "tag"
- pattern = BASE_PATTERN + r"/tags/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/tags/[^/?#]+)"
example = "https://www.xasiat.com/albums/tags/TAG/"
class XasiatCategoryExtractor(XasiatExtractor):
subcategory = "category"
- pattern = BASE_PATTERN + r"/categories/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/categories/[^/?#]+)"
example = "https://www.xasiat.com/albums/categories/CATEGORY/"
class XasiatModelExtractor(XasiatExtractor):
subcategory = "model"
- pattern = BASE_PATTERN + r"/models/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}/models/[^/?#]+)"
example = "https://www.xasiat.com/albums/models/MODEL/"
diff --git a/gallery_dl/extractor/xenforo.py b/gallery_dl/extractor/xenforo.py
new file mode 100644
index 0000000..d8536b0
--- /dev/null
+++ b/gallery_dl/extractor/xenforo.py
@@ -0,0 +1,348 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for XenForo forums"""
+
+from .common import BaseExtractor, Message
+from .. import text, exception
+from ..cache import cache
+
+
+class XenforoExtractor(BaseExtractor):
+ """Base class for xenforo extractors"""
+ basecategory = "xenforo"
+ directory_fmt = ("{category}", "{thread[section]}",
+ "{thread[title]} ({thread[id]})")
+ filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}"
+ archive_fmt = "{post[id]}/{type[0]}{id}_{filename}"
+
+ def __init__(self, match):
+ BaseExtractor.__init__(self, match)
+ self.cookies_domain = "." + self.root.split("/")[2]
+ self.cookies_names = self.config_instance("cookies")
+
+ def items(self):
+ self.login()
+
+ extract_urls = text.re(
+ r'(?s)(?:'
+ r'<video (.*?\ssrc="[^"]+".*?)</video>'
+ r'|<a [^>]*?href="[^"]*?'
+ r'(/(?:index\.php\?)?attachments/[^"]+".*?)</a>'
+ r'|<div [^>]*?data-src="[^"]*?'
+ r'(/(?:index\.php\?)attachments/[^"]+".*?)/>'
+ r'|(?:<a [^>]*?href="|<iframe [^>]*?src="|'
+ r'''onclick="loadMedia\(this, ')([^"']+)'''
+ r')'
+ ).findall
+
+ for post in self.posts():
+ urls = extract_urls(post["content"])
+ if post["attachments"]:
+ urls.extend(extract_urls(post["attachments"]))
+
+ data = {"post": post}
+ post["count"] = data["count"] = len(urls)
+ yield Message.Directory, "", data
+
+ id_last = None
+ data["_http_expected_status"] = (403,)
+ data["_http_validate"] = self._validate
+ data["num"] = data["num_internal"] = data["num_external"] = 0
+ for video, inl1, inl2, ext in urls:
+ if ext:
+ data["num"] += 1
+ data["num_external"] += 1
+ data["type"] = "external"
+ if ext[0] == "/":
+ if ext[1] == "/":
+ ext = "https:" + ext
+ else:
+ continue
+ yield Message.Queue, ext, data
+
+ elif video:
+ data["num"] += 1
+ data["num_internal"] += 1
+ data["type"] = "video"
+ url = text.extr(video, 'src="', '"')
+ text.nameext_from_url(url, data)
+ data["id"] = text.parse_int(
+ data["filename"].partition("-")[0])
+ yield Message.Url, url, data
+
+ elif (inline := inl1 or inl2):
+ path = inline[:inline.find('"')]
+ name, _, id = path[path.rfind("/", 0, -1):].strip(
+ "/").rpartition(".")
+ if id == id_last:
+ id_last = None
+ continue
+ else:
+ id_last = id
+ data["id"] = text.parse_int(id)
+ if alt := text.extr(inline, 'alt="', '"'):
+ text.nameext_from_name(alt, data)
+ if not data["extension"]:
+ data["extension"] = name.rpartition("-")[2]
+ else:
+ data["filename"], _, data["extension"] = \
+ name.rpartition("-")
+ data["num"] += 1
+ data["num_internal"] += 1
+ data["type"] = "inline"
+ yield Message.Url, self.root + path, data
+
+ def request_page(self, url):
+ try:
+ return self.request(url)
+ except exception.HttpError as exc:
+ if exc.status == 403 and b">Log in<" in exc.response.content:
+ self._require_auth(exc.response)
+ raise
+
+ def login(self):
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ self.cookies_update(self._login_impl(username, password))
+
+ @cache(maxage=365*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = f"{self.root}/login/login"
+ page = self.request(url).text
+ data = {
+ "_xfToken": text.extr(page, 'name="_xfToken" value="', '"'),
+ "login" : username,
+ "password": password,
+ "remember": "1",
+ "_xfRedirect": "",
+ }
+ response = self.request(url, method="POST", data=data)
+
+ if not response.history:
+ err = self._extract_error(response.text)
+ raise exception.AuthenticationError(f'"{err}"')
+
+ return {
+ cookie.name: cookie.value
+ for cookie in self.cookies
+ if cookie.domain.endswith(self.cookies_domain)
+ }
+
+ def _pagination(self, base, pnum=None):
+ base = f"{self.root}{base}"
+
+ if pnum is None:
+ url = f"{base}/"
+ pnum = 1
+ else:
+ url = f"{base}/page-{pnum}"
+ pnum = None
+
+ while True:
+ page = self.request_page(url).text
+
+ yield page
+
+ if pnum is None or "pageNav-jump--next" not in page:
+ return
+ pnum += 1
+ url = f"{base}/page-{pnum}"
+
+ def _pagination_reverse(self, base, pnum=None):
+ base = f"{self.root}{base}"
+
+ url = f"{base}/page-{'9999' if pnum is None else pnum}"
+ with self.request_page(url) as response:
+ if pnum is None and not response.history:
+ self._require_auth()
+ url = response.url
+ if url[-1] == "/":
+ pnum = 1
+ else:
+ pnum = text.parse_int(url[url.rfind("-")+1:], 1)
+ page = response.text
+
+ while True:
+ yield page
+
+ pnum -= 1
+ if pnum > 1:
+ url = f"{base}/page-{pnum}"
+ elif pnum == 1:
+ url = f"{base}/"
+ else:
+ return
+
+ page = self.request_page(url).text
+
+ def _extract_error(self, html):
+ return text.unescape(text.extr(
+ html, "blockMessage--error", "</").rpartition(">")[2].strip())
+
+ def _parse_thread(self, page):
+ try:
+ data = self._extract_jsonld(page)
+ except ValueError:
+ return {}
+
+ schema = data.get("mainEntity", data)
+ author = schema["author"]
+ stats = schema["interactionStatistic"]
+ url_t = schema.get("url") or schema.get("@id") or ""
+ url_a = author.get("url") or ""
+
+ thread = {
+ "id" : url_t[url_t.rfind(".")+1:-1],
+ "url" : url_t,
+ "title": schema["headline"],
+ "date" : self.parse_datetime_iso(schema["datePublished"]),
+ "tags" : (schema["keywords"].split(", ")
+ if "keywords" in schema else ()),
+ "section" : schema["articleSection"],
+ "author" : author.get("name") or "",
+ "author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else
+ (author.get("name") or "")[15:]),
+ "author_url": url_a,
+ }
+
+ if isinstance(stats, list):
+ thread["views"] = stats[0]["userInteractionCount"]
+ thread["posts"] = stats[1]["userInteractionCount"]
+ else:
+ thread["views"] = -1
+ thread["posts"] = stats["userInteractionCount"]
+
+ return thread
+
+ def _parse_post(self, html):
+ extr = text.extract_from(html)
+
+ post = {
+ "author": extr('data-author="', '"'),
+ "id": extr('data-content="post-', '"'),
+ "author_url": (extr('itemprop="url" content="', '"') or
+ extr('<a href="', '"')),
+ "date": self.parse_datetime_iso(extr('datetime="', '"')),
+ "content": extr('class="message-body',
+ '<div class="js-selectToQuote'),
+ "attachments": extr('<section class="message-attachments">',
+ '</section>'),
+ }
+
+ url_a = post["author_url"]
+ post["author_id"] = url_a[url_a.rfind(".")+1:-1]
+
+ con = post["content"]
+ if (pos := con.find('<div class="bbWrapper')) >= 0:
+ con = con[pos:]
+ post["content"] = con.strip()
+
+ return post
+
+ def _require_auth(self, response=None):
+ raise exception.AuthRequired(
+ ("username & password", "authenticated cookies"), None,
+ None if response is None else self._extract_error(response.text))
+
+ def _validate(self, response):
+ if response.status_code == 403 and b">Log in<" in response.content:
+ self._require_auth(response)
+ return True
+
+
+BASE_PATTERN = XenforoExtractor.update({
+ "simpcity": {
+ "root": "https://simpcity.cr",
+ "pattern": r"(?:www\.)?simpcity\.(?:cr|su)",
+ "cookies": ("ogaddgmetaprof_user",),
+ },
+ "nudostarforum": {
+ "root": "https://nudostar.com/forum",
+ "pattern": r"(?:www\.)?nudostar\.com/forum",
+ "cookies": ("xf_user",),
+ },
+ "atfforum": {
+ "root": "https://allthefallen.moe/forum",
+ "pattern": r"(?:www\.)?allthefallen\.moe/forum",
+ "cookies": ("xf_user",),
+ },
+})
+
+
+class XenforoPostExtractor(XenforoExtractor):
+ subcategory = "post"
+ pattern = (rf"{BASE_PATTERN}(/(?:index\.php\?)?threads"
+ rf"/[^/?#]+/post-|/posts/)(\d+)")
+ example = "https://simpcity.cr/threads/TITLE.12345/post-54321"
+
+ def posts(self):
+ path = self.groups[-2]
+ post_id = self.groups[-1]
+ url = f"{self.root}{path}{post_id}/"
+ page = self.request_page(url).text
+
+ pos = page.find(f'data-content="post-{post_id}"')
+ if pos < 0:
+ raise exception.NotFoundError("post")
+ html = text.extract(page, "<article ", "<footer", pos-200)[0]
+
+ self.kwdict["thread"] = self._parse_thread(page)
+ return (self._parse_post(html),)
+
+
+class XenforoThreadExtractor(XenforoExtractor):
+ subcategory = "thread"
+ pattern = (rf"{BASE_PATTERN}(/(?:index\.php\?)?threads"
+ rf"/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?")
+ example = "https://simpcity.cr/threads/TITLE.12345/"
+
+ def posts(self):
+ path = self.groups[-2]
+ pnum = self.groups[-1]
+
+ if (order := self.config("order-posts")) and \
+ order[0] not in ("d", "r"):
+ pages = self._pagination(path, pnum)
+ reverse = False
+ else:
+ pages = self._pagination_reverse(path, pnum)
+ reverse = True
+
+ for page in pages:
+ if "thread" not in self.kwdict:
+ self.kwdict["thread"] = self._parse_thread(page)
+ posts = text.extract_iter(page, "<article ", "<footer")
+ if reverse:
+ posts = list(posts)
+ posts.reverse()
+ for html in posts:
+ yield self._parse_post(html)
+
+
+class XenforoForumExtractor(XenforoExtractor):
+ subcategory = "forum"
+ pattern = (rf"{BASE_PATTERN}(/(?:index\.php\?)?forums"
+ rf"/(?:[^/?#]+\.)?[^/?#]+)(?:/page-(\d+))?")
+ example = "https://simpcity.cr/forums/TITLE.123/"
+
+ def items(self):
+ extract_threads = text.re(
+ r'(/(?:index\.php\?)?threads/[^"]+)"[^>]+data-xf-init=').findall
+
+ data = {"_extractor": XenforoThreadExtractor}
+ path = self.groups[-2]
+ pnum = self.groups[-1]
+ for page in self._pagination(path, pnum):
+ for path in extract_threads(page):
+ yield Message.Queue, f"{self.root}{text.unquote(path)}", data
diff --git a/gallery_dl/extractor/xfolio.py b/gallery_dl/extractor/xfolio.py
index 12f437a..8caff85 100644
--- a/gallery_dl/extractor/xfolio.py
+++ b/gallery_dl/extractor/xfolio.py
@@ -45,7 +45,7 @@ class XfolioExtractor(Extractor):
class XfolioWorkExtractor(XfolioExtractor):
subcategory = "work"
- pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/works/(\d+)"
+ pattern = rf"{BASE_PATTERN}/portfolio/([^/?#]+)/works/(\d+)"
example = "https://xfolio.jp/portfolio/USER/works/12345"
def items(self):
@@ -57,7 +57,7 @@ class XfolioWorkExtractor(XfolioExtractor):
files = self._extract_files(html, work)
work["count"] = len(files)
- yield Message.Directory, work
+ yield Message.Directory, "", work
for work["num"], file in enumerate(files, 1):
file.update(work)
yield Message.Url, file["url"], file
@@ -107,7 +107,7 @@ class XfolioWorkExtractor(XfolioExtractor):
class XfolioUserExtractor(XfolioExtractor):
subcategory = "user"
- pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)(?:/works)?/?(?:$|\?|#)"
+ pattern = rf"{BASE_PATTERN}/portfolio/([^/?#]+)(?:/works)?/?(?:$|\?|#)"
example = "https://xfolio.jp/portfolio/USER"
def works(self):
@@ -129,7 +129,7 @@ class XfolioUserExtractor(XfolioExtractor):
class XfolioSeriesExtractor(XfolioExtractor):
subcategory = "series"
- pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/series/(\d+)"
+ pattern = rf"{BASE_PATTERN}/portfolio/([^/?#]+)/series/(\d+)"
example = "https://xfolio.jp/portfolio/USER/series/12345"
def works(self):
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index 6c97175..64113d3 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -31,12 +31,12 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
"{gallery[id]} {gallery[title]}")
filename_fmt = "{num:>03}_{id}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)"
+ pattern = rf"{BASE_PATTERN}(/photos/gallery/[^/?#]+)"
example = "https://xhamster.com/photos/gallery/12345"
def items(self):
data = self.metadata()
- yield Message.Directory, data
+ yield Message.Directory, "", data
for num, image in enumerate(self.images(), 1):
url = image["imageURL"]
image.update(data)
@@ -67,7 +67,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
{
"id" : text.parse_int(gallery["id"]),
"tags" : [t["label"] for t in info["categoriesTags"]],
- "date" : text.parse_timestamp(model["created"]),
+ "date" : self.parse_timestamp(model["created"]),
"views" : text.parse_int(model["views"]),
"likes" : text.parse_int(model["rating"]["likes"]),
"dislikes" : text.parse_int(model["rating"]["dislikes"]),
@@ -102,7 +102,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
class XhamsterUserExtractor(XhamsterExtractor):
"""Extractor for all galleries of an xhamster user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])"
+ pattern = rf"{BASE_PATTERN}/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])"
example = "https://xhamster.com/users/USER/photos"
def items(self):
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 6c016ec..1f33eac 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -28,7 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
"{gallery[id]} {gallery[title]}")
filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}"
archive_fmt = "{gallery[id]}_{num}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/photos/(\d+)"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/photos/(\d+)"
example = "https://www.xvideos.com/profiles/USER/photos/12345"
def __init__(self, match):
@@ -86,7 +86,7 @@ class XvideosUserExtractor(XvideosBase, Extractor):
"""Extractor for user profiles on xvideos.com"""
subcategory = "user"
categorytransfer = True
- pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:#.*)?$"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/?(?:#.*)?$"
example = "https://www.xvideos.com/profiles/USER"
def __init__(self, match):
diff --git a/gallery_dl/extractor/yiffverse.py b/gallery_dl/extractor/yiffverse.py
index 1595b4d..65289e2 100644
--- a/gallery_dl/extractor/yiffverse.py
+++ b/gallery_dl/extractor/yiffverse.py
@@ -55,8 +55,7 @@ class YiffverseExtractor(BooruExtractor):
def _prepare(self, post):
post.pop("files", None)
- post["date"] = text.parse_datetime(
- post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["date"] = self.parse_datetime_iso(post["created"])
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]
@@ -99,7 +98,7 @@ class YiffverseExtractor(BooruExtractor):
class YiffversePostExtractor(YiffverseExtractor):
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/(\d+)"
+ pattern = rf"{BASE_PATTERN}/post/(\d+)"
example = "https://yiffverse.com/post/12345"
def posts(self):
@@ -110,7 +109,7 @@ class YiffversePlaylistExtractor(YiffverseExtractor):
subcategory = "playlist"
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
- pattern = BASE_PATTERN + r"/playlist/(\d+)"
+ pattern = rf"{BASE_PATTERN}/playlist/(\d+)"
example = "https://yiffverse.com/playlist/12345"
def metadata(self):
@@ -125,7 +124,7 @@ class YiffverseTagExtractor(YiffverseExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/(?:tag/([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)"
+ pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)"
example = "https://yiffverse.com/tag/TAG"
def _init(self):
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index eb33b65..ea3b615 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -114,7 +114,7 @@ class YoutubeDLExtractor(Extractor):
info_dict.get("webpage_url") or
self.ytdl_url)
- yield Message.Directory, info_dict
+ yield Message.Directory, "", info_dict
yield Message.Url, url, info_dict
def _process_entries(self, ytdl_module, ytdl_instance, entries):
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 7bff83b..b4bbd5a 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -76,7 +76,7 @@ class ZerochanExtractor(BooruExtractor):
data = {
"id" : text.parse_int(entry_id),
"file_url": jsonld["contentUrl"],
- "date" : text.parse_datetime(jsonld["datePublished"]),
+ "date" : self.parse_datetime_iso(jsonld["datePublished"]),
"width" : text.parse_int(jsonld["width"][:-3]),
"height" : text.parse_int(jsonld["height"][:-3]),
"size" : text.parse_bytes(jsonld["contentSize"][:-1]),
@@ -128,7 +128,7 @@ class ZerochanExtractor(BooruExtractor):
return data
def _parse_json(self, txt):
- txt = util.re(r"[\x00-\x1f\x7f]").sub("", txt)
+ txt = text.re(r"[\x00-\x1f\x7f]").sub("", txt)
main, _, tags = txt.partition('tags": [')
item = {}
@@ -160,7 +160,7 @@ class ZerochanExtractor(BooruExtractor):
class ZerochanTagExtractor(ZerochanExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
- pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?"
+ pattern = rf"{BASE_PATTERN}/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?"
example = "https://www.zerochan.net/TAG"
def __init__(self, match):
@@ -286,7 +286,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
class ZerochanImageExtractor(ZerochanExtractor):
subcategory = "image"
- pattern = BASE_PATTERN + r"/(\d+)"
+ pattern = rf"{BASE_PATTERN}/(\d+)"
example = "https://www.zerochan.net/12345"
def posts(self):
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 5246f66..0787464 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -13,9 +13,8 @@ import sys
import time
import string
import _string
-import datetime
import operator
-from . import text, util
+from . import text, util, dt
NONE = util.NONE
@@ -68,8 +67,8 @@ class StringFormatter():
- "g": calls text.slugify()
- "j": calls json.dumps
- "t": calls str.strip
- - "T": calls util.datetime_to_timestamp_string()
- - "d": calls text.parse_timestamp
+ - "T": calls dt.to_ts_string()
+ - "d": calls dt.parse_ts()
- "s": calls str()
- "S": calls util.to_string()
- "U": calls urllib.parse.unescape
@@ -331,10 +330,10 @@ def _slice(indices):
)
-def _bytesgetter(slice, encoding=sys.getfilesystemencoding()):
+def _bytesgetter(slice):
def apply_slice_bytes(obj):
- return obj.encode(encoding)[slice].decode(encoding, "ignore")
+ return obj.encode(_ENCODING)[slice].decode(_ENCODING, "ignore")
return apply_slice_bytes
@@ -414,15 +413,27 @@ def _parse_conversion(format_spec, default):
def _parse_maxlen(format_spec, default):
maxlen, replacement, format_spec = format_spec.split(_SEPARATOR, 2)
- maxlen = text.parse_int(maxlen[1:])
fmt = _build_format_func(format_spec, default)
- def mlen(obj):
- obj = fmt(obj)
- return obj if len(obj) <= maxlen else replacement
+ if maxlen[1] == "b":
+ maxlen = text.parse_int(maxlen[2:])
+
+ def mlen(obj):
+ obj = fmt(obj)
+ return obj if len(obj.encode(_ENCODING)) <= maxlen else replacement
+ else:
+ maxlen = text.parse_int(maxlen[1:])
+
+ def mlen(obj):
+ obj = fmt(obj)
+ return obj if len(obj) <= maxlen else replacement
return mlen
+def _parse_identity(format_spec, default):
+ return util.identity
+
+
def _parse_join(format_spec, default):
separator, _, format_spec = format_spec.partition(_SEPARATOR)
join = separator[1:].join
@@ -471,9 +482,9 @@ def _parse_datetime(format_spec, default):
dt_format = dt_format[1:]
fmt = _build_format_func(format_spec, default)
- def dt(obj):
- return fmt(text.parse_datetime(obj, dt_format))
- return dt
+ def dt_parse(obj):
+ return fmt(dt.parse(obj, dt_format))
+ return dt_parse
def _parse_offset(format_spec, default):
@@ -482,15 +493,15 @@ def _parse_offset(format_spec, default):
fmt = _build_format_func(format_spec, default)
if not offset or offset == "local":
- def off(dt):
- local = time.localtime(util.datetime_to_timestamp(dt))
- return fmt(dt + datetime.timedelta(0, local.tm_gmtoff))
+ def off(dt_utc):
+ local = time.localtime(dt.to_ts(dt_utc))
+ return fmt(dt_utc + dt.timedelta(0, local.tm_gmtoff))
else:
hours, _, minutes = offset.partition(":")
offset = 3600 * int(hours)
if minutes:
offset += 60 * (int(minutes) if offset > 0 else -int(minutes))
- offset = datetime.timedelta(0, offset)
+ offset = dt.timedelta(0, offset)
def off(obj):
return fmt(obj + offset)
@@ -502,25 +513,36 @@ def _parse_sort(format_spec, default):
fmt = _build_format_func(format_spec, default)
if "d" in args or "r" in args:
- def sort_desc(obj):
+ def sort(obj):
return fmt(sorted(obj, reverse=True))
- return sort_desc
else:
- def sort_asc(obj):
+ def sort(obj):
return fmt(sorted(obj))
- return sort_asc
+ return sort
def _parse_limit(format_spec, default):
limit, hint, format_spec = format_spec.split(_SEPARATOR, 2)
- limit = int(limit[1:])
- limit_hint = limit - len(hint)
fmt = _build_format_func(format_spec, default)
- def apply_limit(obj):
- if len(obj) > limit:
- obj = obj[:limit_hint] + hint
- return fmt(obj)
+ if limit[1] == "b":
+ hint = hint.encode(_ENCODING)
+ limit = int(limit[2:])
+ limit_hint = limit - len(hint)
+
+ def apply_limit(obj):
+ objb = obj.encode(_ENCODING)
+ if len(objb) > limit:
+ obj = (objb[:limit_hint] + hint).decode(_ENCODING, "ignore")
+ return fmt(obj)
+ else:
+ limit = int(limit[1:])
+ limit_hint = limit - len(hint)
+
+ def apply_limit(obj):
+ if len(obj) > limit:
+ obj = obj[:limit_hint] + hint
+ return fmt(obj)
return apply_limit
@@ -541,6 +563,7 @@ class Literal():
_literal = Literal()
_CACHE = {}
+_ENCODING = sys.getfilesystemencoding()
_SEPARATOR = "/"
_FORMATTERS = {
"E" : ExpressionFormatter,
@@ -557,7 +580,7 @@ _FORMATTERS = {
_GLOBALS = {
"_env": lambda: os.environ,
"_lit": lambda: _literal,
- "_now": datetime.datetime.now,
+ "_now": dt.datetime.now,
"_nul": lambda: util.NONE,
}
_CONVERSIONS = {
@@ -569,9 +592,9 @@ _CONVERSIONS = {
"t": str.strip,
"n": len,
"L": util.code_to_language,
- "T": util.datetime_to_timestamp_string,
- "d": text.parse_timestamp,
- "D": util.to_datetime,
+ "T": dt.to_ts_string,
+ "d": dt.parse_ts,
+ "D": dt.convert,
"U": text.unescape,
"H": lambda s: text.unescape(text.remove_html(s)),
"g": text.slugify,
@@ -590,6 +613,7 @@ _FORMAT_SPECIFIERS = {
"A": _parse_arithmetic,
"C": _parse_conversion,
"D": _parse_datetime,
+ "I": _parse_identity,
"J": _parse_join,
"L": _parse_maxlen,
"M": _parse_map,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 9369e5d..7a52bd6 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -87,17 +87,22 @@ class Job():
"current_git_head": util.git_head()
}
# user-supplied metadata
- if kwdict := extr.config("keywords"):
- if extr.config("keywords-eval"):
- self.kwdict_eval = []
- for key, value in kwdict.items():
- if isinstance(value, str):
- fmt = formatter.parse(value, None, util.identity)
- self.kwdict_eval.append((key, fmt.format_map))
- else:
- self.kwdict[key] = value
- else:
- self.kwdict.update(kwdict)
+ kwdict = extr.config("keywords")
+ if kwdict_global := extr.config("keywords-global"):
+ kwdict = {**kwdict_global, **kwdict} if kwdict else kwdict_global
+ elif not kwdict:
+ return
+
+ if extr.config("keywords-eval"):
+ self.kwdict_eval = []
+ for key, value in kwdict.items():
+ if isinstance(value, str):
+ fmt = formatter.parse(value, None, util.identity)
+ self.kwdict_eval.append((key, fmt.format_map))
+ else:
+ self.kwdict[key] = value
+ else:
+ self.kwdict.update(kwdict)
def _build_config_path(self, parent):
extr = self.extractor
@@ -130,6 +135,8 @@ class Job():
if extr.basecategory:
if not cfgpath:
cfgpath.append((extr.category, extr.subcategory))
+ if extr.basesubcategory:
+ cfgpath.append((extr.basesubcategory, extr.subcategory))
cfgpath.append((extr.basecategory, extr.subcategory))
return cfgpath
@@ -138,37 +145,35 @@ class Job():
"""Execute or run the job"""
extractor = self.extractor
log = extractor.log
- msg = None
self._init()
# sleep before extractor start
sleep = util.build_duration_func(
extractor.config("sleep-extractor"))
- if sleep:
+ if sleep is not None:
extractor.sleep(sleep(), "extractor")
try:
- for msg in extractor:
- self.dispatch(msg)
+ msg = self.dispatch(extractor)
except exception.StopExtraction as exc:
if exc.depth > 1 and exc.target != extractor.__class__.subcategory:
exc.depth -= 1
raise
pass
except exception.AbortExtraction as exc:
+ log.traceback(exc)
log.error(exc.message)
self.status |= exc.code
except (exception.TerminateExtraction, exception.RestartExtraction):
raise
except exception.GalleryDLException as exc:
log.error("%s: %s", exc.__class__.__name__, exc)
- log.debug("", exc_info=exc)
+ log.traceback(exc)
self.status |= exc.code
except OSError as exc:
- log.debug("", exc_info=exc)
- name = exc.__class__.__name__
- if name == "JSONDecodeError":
+ log.traceback(exc)
+ if (name := exc.__class__.__name__) == "JSONDecodeError":
log.error("Failed to parse JSON data: %s: %s", name, exc)
self.status |= 1
else: # regular OSError
@@ -180,7 +185,7 @@ class Job():
"copy its output and report this issue on "
"https://github.com/mikf/gallery-dl/issues ."),
exc.__class__.__name__, exc)
- log.debug("", exc_info=exc)
+ log.traceback(exc)
self.status |= 1
except BaseException:
self.status |= 1
@@ -196,31 +201,47 @@ class Job():
self.status |= s
return self.status
- def dispatch(self, msg):
+ def dispatch(self, messages):
"""Call the appropriate message handler"""
- if msg[0] == Message.Url:
- _, url, kwdict = msg
- if self.metadata_url:
- kwdict[self.metadata_url] = url
- if self.pred_url(url, kwdict):
- self.update_kwdict(kwdict)
- self.handle_url(url, kwdict)
- if FLAGS.FILE is not None:
- FLAGS.process("FILE")
-
- elif msg[0] == Message.Directory:
- self.update_kwdict(msg[1])
- self.handle_directory(msg[1])
-
- elif msg[0] == Message.Queue:
- _, url, kwdict = msg
- if self.metadata_url:
- kwdict[self.metadata_url] = url
- if self.pred_queue(url, kwdict):
- self.update_kwdict(kwdict)
- self.handle_queue(url, kwdict)
- if FLAGS.CHILD is not None:
- FLAGS.process("CHILD")
+ msg = None
+ process = True
+
+ for msg, url, kwdict in messages:
+
+ if msg == Message.Directory:
+ if self.pred_post(url, kwdict):
+ process = True
+ self.update_kwdict(kwdict)
+ self.handle_directory(kwdict)
+ else:
+ process = None
+ if FLAGS.POST is not None:
+ FLAGS.process("POST")
+
+ elif process is None:
+ continue
+
+ elif msg == Message.Url:
+ if self.metadata_url:
+ kwdict[self.metadata_url] = url
+ if self.pred_url(url, kwdict):
+ self.update_kwdict(kwdict)
+ self.handle_url(url, kwdict)
+ if FLAGS.FILE is not None:
+ FLAGS.process("FILE")
+
+ elif msg == Message.Queue:
+ if process is None:
+ continue
+ if self.metadata_url:
+ kwdict[self.metadata_url] = url
+ if self.pred_queue(url, kwdict):
+ self.update_kwdict(kwdict)
+ self.handle_queue(url, kwdict)
+ if FLAGS.CHILD is not None:
+ FLAGS.process("CHILD")
+
+ return msg
def handle_url(self, url, kwdict):
"""Handle Message.Url"""
@@ -252,15 +273,16 @@ class Job():
def _init(self):
self.extractor.initialize()
self.pred_url = self._prepare_predicates("image", True)
+ self.pred_post = self._prepare_predicates("post", False)
self.pred_queue = self._prepare_predicates("chapter", False)
def _prepare_predicates(self, target, skip=True):
predicates = []
- if self.extractor.config(target + "-unique"):
+ if self.extractor.config(f"{target}-unique"):
predicates.append(util.UniquePredicate())
- if pfilter := self.extractor.config(target + "-filter"):
+ if pfilter := self.extractor.config(f"{target}-filter"):
try:
pred = util.FilterPredicate(pfilter, target)
except (SyntaxError, ValueError, TypeError) as exc:
@@ -268,7 +290,7 @@ class Job():
else:
predicates.append(pred)
- if prange := self.extractor.config(target + "-range"):
+ if prange := self.extractor.config(f"{target}-range"):
try:
pred = util.RangePredicate(prange)
except ValueError as exc:
@@ -288,7 +310,7 @@ class Job():
return self._logger_adapter(logger, self)
def _write_unsupported(self, url):
- if self.ulog:
+ if self.ulog is not None:
self.ulog.info(url)
@@ -321,7 +343,7 @@ class DownloadJob(Job):
for callback in hooks["prepare"]:
callback(pathfmt)
- if archive and archive.check(kwdict):
+ if archive is not None and archive.check(kwdict):
pathfmt.fix_extension()
self.handle_skip()
return
@@ -330,7 +352,7 @@ class DownloadJob(Job):
pathfmt.build_path()
if pathfmt.exists():
- if archive and self._archive_write_skip:
+ if archive is not None and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@@ -340,12 +362,12 @@ class DownloadJob(Job):
callback(pathfmt)
if kwdict.pop("_file_recheck", False) and pathfmt.exists():
- if archive and self._archive_write_skip:
+ if archive is not None and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
- if self.sleep:
+ if self.sleep is not None:
self.extractor.sleep(self.sleep(), "download")
# download from URL
@@ -369,7 +391,7 @@ class DownloadJob(Job):
return
if not pathfmt.temppath:
- if archive and self._archive_write_skip:
+ if archive is not None and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@@ -383,15 +405,17 @@ class DownloadJob(Job):
pathfmt.finalize()
self.out.success(pathfmt.path)
self._skipcnt = 0
- if archive and self._archive_write_file:
+ if archive is not None and self._archive_write_file:
archive.add(kwdict)
if "after" in hooks:
for callback in hooks["after"]:
callback(pathfmt)
+ if archive is not None and self._archive_write_after:
+ archive.add(kwdict)
def handle_directory(self, kwdict):
"""Set and create the target directory for downloads"""
- if not self.pathfmt:
+ if self.pathfmt is None:
self.initialize(kwdict)
else:
if "post-after" in self.hooks:
@@ -428,7 +452,8 @@ class DownloadJob(Job):
else:
extr._parentdir = pextr._parentdir
- if pmeta := pextr.config2("parent-metadata", "metadata-parent"):
+ if pmeta := pextr.config2(
+ "parent-metadata", "metadata-parent", pextr.parent):
if isinstance(pmeta, str):
data = self.kwdict.copy()
if kwdict:
@@ -509,7 +534,7 @@ class DownloadJob(Job):
self.out.skip(pathfmt.path)
if self._skipexc:
- if not self._skipftr or self._skipftr(pathfmt.kwdict):
+ if self._skipftr is None or self._skipftr(pathfmt.kwdict):
self._skipcnt += 1
if self._skipcnt >= self._skipmax:
raise self._skipexc
@@ -553,7 +578,7 @@ class DownloadJob(Job):
cfg = extr.config
pathfmt = self.pathfmt = path.PathFormat(extr)
- if kwdict:
+ if kwdict is not None:
pathfmt.set_directory(kwdict)
self.sleep = util.build_duration_func(cfg("sleep"))
@@ -593,11 +618,13 @@ class DownloadJob(Job):
if events is None:
self._archive_write_file = True
self._archive_write_skip = False
+ self._archive_write_after = False
else:
if isinstance(events, str):
events = events.split(",")
self._archive_write_file = ("file" in events)
self._archive_write_skip = ("skip" in events)
+ self._archive_write_after = ("after" in events)
if skip := cfg("skip", True):
self._skipexc = None
@@ -621,7 +648,7 @@ class DownloadJob(Job):
else:
# monkey-patch methods to always return False
pathfmt.exists = lambda x=None: False
- if self.archive:
+ if self.archive is not None:
self.archive.check = pathfmt.exists
if not cfg("postprocess", True):
@@ -681,15 +708,15 @@ class DownloadJob(Job):
pp_dict["__init__"] = None
pp_cls = postprocessor.find(name)
- if not pp_cls:
+ if pp_cls is None:
pp_log.warning("module '%s' not found", name)
continue
try:
pp_obj = pp_cls(self, pp_dict)
except Exception as exc:
+ pp_log.traceback(exc)
pp_log.error("'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
- pp_log.debug("", exc_info=exc)
else:
pp_list.append(pp_obj)
@@ -706,15 +733,11 @@ class DownloadJob(Job):
condition = util.compile_filter(expr)
for hook, callback in hooks.items():
self.hooks[hook].append(functools.partial(
- self._call_hook, callback, condition))
+ _call_hook_condition, callback, condition))
else:
for hook, callback in hooks.items():
self.hooks[hook].append(callback)
- def _call_hook(self, callback, condition, pathfmt):
- if condition(pathfmt.kwdict):
- callback(pathfmt)
-
def _build_extractor_filter(self):
clist = self.extractor.config("whitelist")
if clist is not None:
@@ -730,20 +753,25 @@ class DownloadJob(Job):
return util.build_extractor_filter(clist, negate, special)
+def _call_hook_condition(callback, condition, pathfmt):
+ if condition(pathfmt.kwdict):
+ callback(pathfmt)
+
+
class SimulationJob(DownloadJob):
"""Simulate the extraction process without downloading anything"""
def handle_url(self, url, kwdict):
ext = kwdict["extension"] or "jpg"
kwdict["extension"] = self.pathfmt.extension_map(ext, ext)
- if self.sleep:
+ if self.sleep is not None:
self.extractor.sleep(self.sleep(), "download")
- if self.archive and self._archive_write_skip:
+ if self.archive is not None and self._archive_write_skip:
self.archive.add(kwdict)
self.out.skip(self.pathfmt.build_filename(kwdict))
def handle_directory(self, kwdict):
- if not self.pathfmt:
+ if self.pathfmt is None:
self.initialize()
@@ -931,13 +959,12 @@ class DataJob(Job):
extractor = self.extractor
sleep = util.build_duration_func(
extractor.config("sleep-extractor"))
- if sleep:
+ if sleep is not None:
extractor.sleep(sleep(), "extractor")
# collect data
try:
- for msg in extractor:
- self.dispatch(msg)
+ self.dispatch(extractor)
except exception.StopExtraction:
pass
except Exception as exc:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 05cc9d3..a47d8cd 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -698,10 +698,15 @@ def build_parser():
"(e.g. '5', '8-20', or '1:24:3')"),
)
selection.add_argument(
+ "--post-range",
+ dest="post-range", metavar="RANGE", action=ConfigAction,
+ help=("Like '--range', but for posts"),
+ )
+ selection.add_argument(
"--chapter-range",
dest="chapter-range", metavar="RANGE", action=ConfigAction,
- help=("Like '--range', but applies to manga chapters "
- "and other delegated URLs"),
+ help=("Like '--range', but for child extractors handling "
+ "manga chapters, external URLs, etc."),
)
selection.add_argument(
"--filter",
@@ -713,10 +718,15 @@ def build_parser():
"rating in ('s', 'q')\""),
)
selection.add_argument(
+ "--post-filter",
+ dest="post-filter", metavar="EXPR", action=ConfigAction,
+ help=("Like '--filter', but for posts"),
+ )
+ selection.add_argument(
"--chapter-filter",
dest="chapter-filter", metavar="EXPR", action=ConfigAction,
- help=("Like '--filter', but applies to manga chapters "
- "and other delegated URLs"),
+ help=("Like '--filter', but for child extractors handling "
+ "manga chapters, external URLs, etc."),
)
infojson = {
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 9e0888b..fe7235e 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -89,6 +89,11 @@ class LoggerAdapter():
self.logger = logger
self.extra = job._logger_extra
+ def traceback(self, exc):
+ if self.logger.isEnabledFor(logging.DEBUG):
+ self.logger._log(
+ logging.DEBUG, "", None, exc_info=exc, extra=self.extra)
+
def debug(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.DEBUG):
kwargs["extra"] = self.extra
@@ -171,6 +176,48 @@ class Formatter(logging.Formatter):
return msg
+class FileHandler(logging.StreamHandler):
+ def __init__(self, path, mode, encoding, delay=True):
+ self.path = path
+ self.mode = mode
+ self.errors = None
+ self.encoding = encoding
+
+ if delay:
+ logging.Handler.__init__(self)
+ self.stream = None
+ self.emit = self.emit_delayed
+ else:
+ logging.StreamHandler.__init__(self, self._open())
+
+ def close(self):
+ with self.lock:
+ try:
+ if self.stream:
+ try:
+ self.flush()
+ self.stream.close()
+ finally:
+ self.stream = None
+ finally:
+ logging.StreamHandler.close(self)
+
+ def _open(self):
+ try:
+ return open(self.path, self.mode,
+ encoding=self.encoding, errors=self.errors)
+ except FileNotFoundError:
+ os.makedirs(os.path.dirname(self.path))
+ return open(self.path, self.mode,
+ encoding=self.encoding, errors=self.errors)
+
+ def emit_delayed(self, record):
+ if self.mode != "w" or not self._closed:
+ self.stream = self._open()
+ self.emit = logging.StreamHandler.emit.__get__(self)
+ self.emit(record)
+
+
def initialize_logging(loglevel):
"""Setup basic logging functionality before configfiles have been loaded"""
# convert levelnames to lowercase
@@ -242,7 +289,8 @@ def configure_logging(loglevel):
root.setLevel(minlevel)
-def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL, mode="w"):
+def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL, mode="w",
+ defer=False):
"""Setup a new logging handler"""
opts = config.interpolate(("output",), key)
if not opts:
@@ -253,12 +301,10 @@ def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL, mode="w"):
path = opts.get("path")
mode = opts.get("mode", mode)
encoding = opts.get("encoding", "utf-8")
+ delay = opts.get("defer", defer)
try:
path = util.expand_path(path)
- handler = logging.FileHandler(path, mode, encoding)
- except FileNotFoundError:
- os.makedirs(os.path.dirname(path))
- handler = logging.FileHandler(path, mode, encoding)
+ handler = FileHandler(path, mode, encoding, delay)
except (OSError, ValueError) as exc:
logging.getLogger("gallery-dl").warning(
"%s: %s", key, exc)
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 763fb55..be2dcc9 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -31,6 +31,8 @@ class PathFormat():
if kwdefault is None:
kwdefault = util.NONE
+ self.filename_conditions = self.directory_conditions = None
+
filename_fmt = config("filename")
try:
if filename_fmt is None:
@@ -41,7 +43,6 @@ class PathFormat():
formatter.parse(fmt, kwdefault).format_map)
for expr, fmt in filename_fmt.items() if expr
]
- self.build_filename = self.build_filename_conditional
filename_fmt = filename_fmt.get("", extractor.filename_fmt)
self.filename_formatter = formatter.parse(
@@ -50,7 +51,6 @@ class PathFormat():
raise exception.FilenameFormatError(exc)
directory_fmt = config("directory")
- self.directory_conditions = ()
try:
if directory_fmt is None:
directory_fmt = extractor.directory_fmt
@@ -62,7 +62,6 @@ class PathFormat():
])
for expr, fmts in directory_fmt.items() if expr
]
- self.build_directory = self.build_directory_conditional
directory_fmt = directory_fmt.get("", extractor.directory_fmt)
self.directory_formatters = [
@@ -160,8 +159,12 @@ class PathFormat():
def exists(self):
"""Return True if the file exists on disk"""
- if self.extension and os.path.exists(self.realpath):
- return self.check_file()
+ if self.extension:
+ try:
+ os.lstat(self.realpath) # raises OSError if file doesn't exist
+ return self.check_file()
+ except OSError:
+ pass
return False
def check_file(self):
@@ -174,7 +177,7 @@ class PathFormat():
prefix = format(num) + "."
self.kwdict["extension"] = prefix + self.extension
self.build_path()
- os.stat(self.realpath) # raises OSError if file doesn't exist
+ os.lstat(self.realpath) # raises OSError if file doesn't exist
num += 1
except OSError:
pass
@@ -252,55 +255,47 @@ class PathFormat():
def build_filename(self, kwdict):
"""Apply 'kwdict' to filename format string"""
try:
- return self.clean_path(self.clean_segment(
- self.filename_formatter(kwdict)))
- except Exception as exc:
- raise exception.FilenameFormatError(exc)
-
- def build_filename_conditional(self, kwdict):
- try:
- for condition, fmt in self.filename_conditions:
- if condition(kwdict):
- break
- else:
+ if self.filename_conditions is None:
fmt = self.filename_formatter
+ else:
+ for condition, fmt in self.filename_conditions:
+ if condition(kwdict):
+ break
+ else:
+ fmt = self.filename_formatter
return self.clean_path(self.clean_segment(fmt(kwdict)))
except Exception as exc:
raise exception.FilenameFormatError(exc)
def build_directory(self, kwdict):
"""Apply 'kwdict' to directory format strings"""
- segments = []
- strip = self.strip
-
try:
- for fmt in self.directory_formatters:
- segment = fmt(kwdict).strip()
- if strip and segment not in {".", ".."}:
- # remove trailing dots and spaces (#647)
- segment = segment.rstrip(strip)
- if segment:
- segments.append(self.clean_segment(segment))
- return segments
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- def build_directory_conditional(self, kwdict):
- segments = []
- strip = self.strip
-
- try:
- for condition, formatters in self.directory_conditions:
- if condition(kwdict):
- break
- else:
+ if self.directory_conditions is None:
formatters = self.directory_formatters
+ else:
+ for condition, formatters in self.directory_conditions:
+ if condition(kwdict):
+ break
+ else:
+ formatters = self.directory_formatters
+
+ segments = []
+ strip = self.strip
for fmt in formatters:
- segment = fmt(kwdict).strip()
- if strip and segment != "..":
- segment = segment.rstrip(strip)
- if segment:
- segments.append(self.clean_segment(segment))
+ segment = fmt(kwdict)
+ if segment.__class__ is str:
+ segment = segment.strip()
+ if strip and segment not in {".", ".."}:
+ segment = segment.rstrip(strip)
+ if segment:
+ segments.append(self.clean_segment(segment))
+ else: # assume list
+ for segment in segment:
+ segment = segment.strip()
+ if strip and segment not in {".", ".."}:
+ segment = segment.rstrip(strip)
+ if segment:
+ segments.append(self.clean_segment(segment))
return segments
except Exception as exc:
raise exception.DirectoryFormatError(exc)
@@ -321,7 +316,15 @@ class PathFormat():
self.kwdict["extension"] = self.prefix + self.extension_map(
"part", "part")
self.build_path()
- if part_directory:
+
+ if part_directory is not None:
+ if isinstance(part_directory, list):
+ for condition, part_directory in part_directory:
+ if condition(self.kwdict):
+ break
+ else:
+ return
+
self.temppath = os.path.join(
part_directory,
os.path.basename(self.temppath),
diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py
index 1a4ce56..6da0330 100644
--- a/gallery_dl/postprocessor/__init__.py
+++ b/gallery_dl/postprocessor/__init__.py
@@ -33,7 +33,7 @@ def find(name):
cls = None
if name in modules: # prevent unwanted imports
try:
- module = __import__(name, globals(), None, (), 1)
+ module = __import__(name, globals(), None, None, 1)
except ImportError:
pass
else:
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index 3b0ab22..9e2e4df 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -22,6 +22,10 @@ else:
from shlex import quote
+def trim(args):
+ return (args.partition(" ") if isinstance(args, str) else args)[0]
+
+
class ExecPP(PostProcessor):
def __init__(self, job, options):
@@ -35,6 +39,7 @@ class ExecPP(PostProcessor):
if options.get("async", False):
self._exec = self._popen
+ self.verbose = options.get("verbose", True)
self.session = False
self.creationflags = 0
if options.get("session"):
@@ -115,11 +120,11 @@ class ExecPP(PostProcessor):
def _exec(self, args, shell):
if retcode := self._popen(args, shell).wait():
self.log.warning("'%s' returned with non-zero exit status (%d)",
- args, retcode)
+ args if self.verbose else trim(args), retcode)
return retcode
def _popen(self, args, shell):
- self.log.debug("Running '%s'", args)
+ self.log.debug("Running '%s'", args if self.verbose else trim(args))
return util.Popen(
args,
shell=shell,
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 90e6e3d..0017b5b 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -117,9 +117,15 @@ class MetadataPP(PostProcessor):
self.mtime = options.get("mtime")
self.omode = options.get("open", omode)
self.encoding = options.get("encoding", "utf-8")
+ self.newline = options.get("newline")
self.skip = options.get("skip", False)
self.meta_path = options.get("metadata-path")
+ def open(self, path):
+ return open(path, self.omode,
+ encoding=self.encoding,
+ newline=self.newline)
+
def run(self, pathfmt):
archive = self.archive
if archive and archive.check(pathfmt.kwdict):
@@ -138,11 +144,11 @@ class MetadataPP(PostProcessor):
return
try:
- with open(path, self.omode, encoding=self.encoding) as fp:
+ with self.open(path) as fp:
self.write(fp, pathfmt.kwdict)
except FileNotFoundError:
os.makedirs(directory, exist_ok=True)
- with open(path, self.omode, encoding=self.encoding) as fp:
+ with self.open(path) as fp:
self.write(fp, pathfmt.kwdict)
if archive:
diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py
index b1269dd..7d4796e 100644
--- a/gallery_dl/postprocessor/mtime.py
+++ b/gallery_dl/postprocessor/mtime.py
@@ -9,8 +9,7 @@
"""Use metadata as file modification time"""
from .common import PostProcessor
-from .. import text, util, formatter
-from datetime import datetime
+from .. import text, util, dt, formatter
class MtimePP(PostProcessor):
@@ -36,8 +35,8 @@ class MtimePP(PostProcessor):
return
pathfmt.kwdict["_mtime_meta"] = (
- util.datetime_to_timestamp(mtime)
- if isinstance(mtime, datetime) else
+ dt.to_ts(mtime)
+ if isinstance(mtime, dt.datetime) else
text.parse_int(mtime)
)
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 1a55e22..3813fae 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -151,7 +151,7 @@ class UgoiraPP(PostProcessor):
"%s: Unable to extract frames from %s (%s: %s)",
pathfmt.kwdict.get("id"), pathfmt.filename,
exc.__class__.__name__, exc)
- return self.log.debug("", exc_info=exc)
+ return self.log.traceback(exc)
if self.convert(pathfmt, tempdir):
if self.delete:
@@ -227,12 +227,12 @@ class UgoiraPP(PostProcessor):
output.stderr_write("\n")
self.log.error("Unable to invoke FFmpeg (%s: %s)",
exc.__class__.__name__, exc)
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
pathfmt.realpath = pathfmt.temppath
except Exception as exc:
output.stderr_write("\n")
self.log.error("%s: %s", exc.__class__.__name__, exc)
- self.log.debug("", exc_info=exc)
+ self.log.traceback(exc)
pathfmt.realpath = pathfmt.temppath
else:
if self.mtime:
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 98bba48..5b074d9 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -8,10 +8,7 @@
"""Collection of functions that work on strings/text"""
-import sys
import html
-import time
-import datetime
import urllib.parse
import re as re_module
@@ -113,9 +110,27 @@ def nameext_from_url(url, data=None):
filename = unquote(filename_from_url(url))
name, _, ext = filename.rpartition(".")
if name and len(ext) <= 16:
- data["filename"], data["extension"] = name, ext.lower()
+ data["filename"] = name
+ data["extension"] = ext.lower()
else:
- data["filename"], data["extension"] = filename, ""
+ data["filename"] = filename
+ data["extension"] = ""
+
+ return data
+
+
+def nameext_from_name(filename, data=None):
+ """Extract the last part of an URL and fill 'data' accordingly"""
+ if data is None:
+ data = {}
+
+ name, _, ext = filename.rpartition(".")
+ if name and len(ext) <= 16:
+ data["filename"] = name
+ data["extension"] = ext.lower()
+ else:
+ data["filename"] = filename
+ data["extension"] = ""
return data
@@ -322,46 +337,6 @@ def build_query(params):
])
-if sys.hexversion < 0x30c0000:
- # Python <= 3.11
- def parse_timestamp(ts, default=None):
- """Create a datetime object from a Unix timestamp"""
- try:
- return datetime.datetime.utcfromtimestamp(int(ts))
- except Exception:
- return default
-else:
- # Python >= 3.12
- def parse_timestamp(ts, default=None):
- """Create a datetime object from a Unix timestamp"""
- try:
- Y, m, d, H, M, S, _, _, _ = time.gmtime(int(ts))
- return datetime.datetime(Y, m, d, H, M, S)
- except Exception:
- return default
-
-
-def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z", utcoffset=0):
- """Create a datetime object by parsing 'date_string'"""
- try:
- d = datetime.datetime.strptime(date_string, format)
- o = d.utcoffset()
- if o is not None:
- # convert to naive UTC
- d = d.replace(tzinfo=None, microsecond=0) - o
- else:
- if d.microsecond:
- d = d.replace(microsecond=0)
- if utcoffset:
- # apply manual UTC offset
- d += datetime.timedelta(0, utcoffset * -3600)
- return d
- except (TypeError, IndexError, KeyError):
- return None
- except (ValueError, OverflowError):
- return date_string
-
-
urljoin = urllib.parse.urljoin
quote = urllib.parse.quote
diff --git a/gallery_dl/update.py b/gallery_dl/update.py
index 273ca18..e51a4b3 100644
--- a/gallery_dl/update.py
+++ b/gallery_dl/update.py
@@ -212,5 +212,5 @@ class UpdateExtractor(Extractor):
url = (f"{self.root}/{path_repo}/releases/download"
f"/{data['tag_name']}/{binary_name}")
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 49c1ba8..7d54d4c 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -16,7 +16,6 @@ import random
import getpass
import hashlib
import binascii
-import datetime
import functools
import itertools
import subprocess
@@ -24,7 +23,7 @@ import collections
import urllib.parse
from http.cookiejar import Cookie
from email.utils import mktime_tz, parsedate_tz
-from . import text, version, exception
+from . import text, dt, version, exception
def bencode(num, alphabet="0123456789"):
@@ -228,63 +227,6 @@ def to_string(value):
return str(value)
-def to_datetime(value):
- """Convert 'value' to a datetime object"""
- if not value:
- return EPOCH
-
- if isinstance(value, datetime.datetime):
- return value
-
- if isinstance(value, str):
- try:
- if value[-1] == "Z":
- # compat for Python < 3.11
- value = value[:-1]
- dt = datetime.datetime.fromisoformat(value)
- if dt.tzinfo is None:
- if dt.microsecond:
- dt = dt.replace(microsecond=0)
- else:
- # convert to naive UTC
- dt = dt.astimezone(datetime.timezone.utc).replace(
- microsecond=0, tzinfo=None)
- return dt
- except Exception:
- pass
-
- return text.parse_timestamp(value, EPOCH)
-
-
-def datetime_to_timestamp(dt):
- """Convert naive UTC datetime to Unix timestamp"""
- return (dt - EPOCH) / SECOND
-
-
-def datetime_to_timestamp_string(dt):
- """Convert naive UTC datetime to Unix timestamp string"""
- try:
- return str((dt - EPOCH) // SECOND)
- except Exception:
- return ""
-
-
-if sys.hexversion < 0x30c0000:
- # Python <= 3.11
- datetime_utcfromtimestamp = datetime.datetime.utcfromtimestamp
- datetime_utcnow = datetime.datetime.utcnow
- datetime_from_timestamp = datetime_utcfromtimestamp
-else:
- # Python >= 3.12
- def datetime_from_timestamp(ts=None):
- """Convert Unix timestamp to naive UTC datetime"""
- Y, m, d, H, M, S, _, _, _ = time.gmtime(ts)
- return datetime.datetime(Y, m, d, H, M, S)
-
- datetime_utcfromtimestamp = datetime_from_timestamp
- datetime_utcnow = datetime_from_timestamp
-
-
def json_default(obj):
if isinstance(obj, CustomNone):
return None
@@ -379,7 +321,7 @@ def extract_headers(response):
text.nameext_from_url(name, data)
if hlm := headers.get("last-modified"):
- data["date"] = datetime.datetime(*parsedate_tz(hlm)[:6])
+ data["date"] = dt.datetime(*parsedate_tz(hlm)[:6])
return data
@@ -751,11 +693,11 @@ class Flags():
# 735506 == 739342 - 137 * 28
# v135.0 release of Chrome on 2025-04-01 has ordinal 739342
# 735562 == 739342 - 135 * 28
-# _ord_today = datetime.date.today().toordinal()
+# _ord_today = dt.date.today().toordinal()
# _ff_ver = (_ord_today - 735506) // 28
# _ch_ver = (_ord_today - 735562) // 28
-_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
+_ff_ver = (dt.date.today().toordinal() - 735506) // 28
# _ch_ver = _ff_ver - 2
re = text.re
@@ -763,8 +705,6 @@ re_compile = text.re_compile
NONE = CustomNone()
FLAGS = Flags()
-EPOCH = datetime.datetime(1970, 1, 1)
-SECOND = datetime.timedelta(0, 1)
WINDOWS = (os.name == "nt")
SENTINEL = object()
EXECUTABLE = getattr(sys, "frozen", False)
@@ -786,8 +726,8 @@ GLOBALS = {
"contains" : contains,
"parse_int": text.parse_int,
"urlsplit" : urllib.parse.urlsplit,
- "datetime" : datetime.datetime,
- "timedelta": datetime.timedelta,
+ "datetime" : dt.datetime,
+ "timedelta": dt.timedelta,
"abort" : raises(exception.StopExtraction),
"error" : raises(exception.AbortExtraction),
"terminate": raises(exception.TerminateExtraction),
@@ -1071,6 +1011,8 @@ class RangePredicate():
if isinstance(rangespec, str):
rangespec = rangespec.split(",")
+ elif isinstance(rangespec, int):
+ rangespec = (str(rangespec),)
for group in rangespec:
if not group:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index bc70f74..0dcb01a 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.30.10"
+__version__ = "1.31.1"
__variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index b7ee1ca..a4d8097 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -55,6 +55,8 @@ def construct_YoutubeDL(module, obj, user_opts, system_opts=None):
opts["min_filesize"] = text.parse_bytes(config("filesize-min"), None)
if opts.get("max_filesize") is None:
opts["max_filesize"] = text.parse_bytes(config("filesize-max"), None)
+ if opts.get("overwrites") is None and not config("skip", True):
+ opts["overwrites"] = True
if opts.get("ratelimit") is None:
if rate := config("rate"):
func = util.build_selection_func(rate, 0, text.parse_bytes)
@@ -262,7 +264,7 @@ def parse_command_line(module, argv):
else module.match_filter_func(opts.match_filter))
if cookiesfrombrowser := getattr(opts, "cookiesfrombrowser", None):
- pattern = util.re(r"""(?x)
+ pattern = text.re(r"""(?x)
(?P<name>[^+:]+)
(?:\s*\+\s*(?P<keyring>[^:]+))?
(?:\s*:\s*(?!:)(?P<profile>.+?))?
@@ -528,7 +530,7 @@ def legacy_postprocessors(opts, module, ytdlp, compat_opts):
if len(dur) == 2 and all(t is not None for t in dur):
remove_ranges.append(tuple(dur))
continue
- remove_chapters_patterns.append(util.re(regex))
+ remove_chapters_patterns.append(text.re(regex))
if opts.remove_chapters or sponsorblock_query:
postprocessors.append({
"key": "ModifyChapters",
diff --git a/setup.py b/setup.py
index f289d0f..fd2bff7 100644
--- a/setup.py
+++ b/setup.py
@@ -143,6 +143,7 @@ def build_setuptools():
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Internet :: WWW/HTTP",
diff --git a/test/test_downloader.py b/test/test_downloader.py
index f6c3dbe..fb442c4 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -298,6 +298,15 @@ class TestHTTPDownloader(TestDownloaderBase):
self.assertTrue(success)
self.assertEqual(pathfmt.temppath, "")
+ def test_http_empty(self):
+ url = f"{self.address}/~NUL"
+ pathfmt = self._prepare_destination(None, extension=None)
+ with self.assertLogs(self.downloader.log, "WARNING") as log_info:
+ success = self.downloader.download(url, pathfmt)
+ self.assertFalse(success)
+ self.assertEqual(log_info.output[0],
+ "WARNING:downloader.http:Empty file")
+
class TestTextDownloader(TestDownloaderBase):
@@ -400,6 +409,7 @@ SAMPLES = {
("blend", b"BLENDER-v303RENDH"),
("obj" , b"# Blender v3.2.0 OBJ File: 'foo.blend'"),
("clip", b"CSFCHUNK\x00\x00\x00\x00"),
+ ("~NUL", b""),
}
@@ -428,8 +438,9 @@ def generate_tests():
return test
for idx, (ext, content) in enumerate(SAMPLES):
- test = generate_test(idx, ext, content)
- setattr(TestHTTPDownloader, test.__name__, test)
+ if ext[0].isalnum():
+ test = generate_test(idx, ext, content)
+ setattr(TestHTTPDownloader, test.__name__, test)
generate_tests()
diff --git a/test/test_dt.py b/test/test_dt.py
new file mode 100644
index 0000000..02e3ac2
--- /dev/null
+++ b/test/test_dt.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os
+import sys
+import unittest
+
+import datetime
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import dt # noqa E402
+
+
+class TestDatetime(unittest.TestCase):
+
+ def test_convert(self, f=dt.convert):
+
+ def _assert(value, expected):
+ result = f(value)
+ self.assertIsInstance(result, datetime.datetime)
+ self.assertEqual(result, expected, msg=repr(value))
+
+ d = datetime.datetime(2010, 1, 1)
+ self.assertIs(f(d), d)
+
+ _assert(d , d)
+ _assert(1262304000 , d)
+ _assert(1262304000.0 , d)
+ _assert(1262304000.123, d)
+ _assert("1262304000" , d)
+
+ _assert("2010-01-01" , d)
+ _assert("2010-01-01 00:00:00" , d)
+ _assert("2010-01-01T00:00:00" , d)
+ _assert("2010-01-01T00:00:00.123456" , d)
+ _assert("2009-12-31T19:00:00-05:00" , d)
+ _assert("2009-12-31T19:00:00.123456-05:00", d)
+ _assert("2010-01-01T00:00:00Z" , d)
+ _assert("2010-01-01T00:00:00.123456Z" , d)
+ _assert("2009-12-31T19:00:00-0500" , d)
+ _assert("2009-12-31T19:00:00.123456-0500" , d)
+
+ _assert(0 , dt.NONE)
+ _assert("" , dt.NONE)
+ _assert("foo", dt.NONE)
+ _assert(None , dt.NONE)
+ _assert(() , dt.NONE)
+ _assert([] , dt.NONE)
+ _assert({} , dt.NONE)
+ _assert((1, 2, 3), dt.NONE)
+
+ @unittest.skipIf(sys.hexversion < 0x30b0000,
+ "extended fromisoformat timezones")
+ def test_convert_tz(self, f=dt.convert):
+
+ def _assert(value, expected):
+ result = f(value)
+ self.assertIsInstance(result, datetime.datetime)
+ self.assertEqual(result, expected, msg=repr(value))
+
+ d = datetime.datetime(2010, 1, 1)
+ _assert("2009-12-31T19:00:00-05" , d)
+ _assert("2009-12-31T19:00:00.123456-05" , d)
+
+ def test_to_timestamp(self, f=dt.to_ts):
+ self.assertEqual(f(dt.EPOCH), 0.0)
+ self.assertEqual(f(datetime.datetime(2010, 1, 1)), 1262304000.0)
+ self.assertEqual(f(datetime.datetime(2010, 1, 1, 0, 0, 0, 128000)),
+ 1262304000.128000)
+ with self.assertRaises(TypeError):
+ f(None)
+
+ def test_to_timestamp_string(self, f=dt.to_ts_string):
+ self.assertEqual(f(dt.EPOCH), "0")
+ self.assertEqual(f(datetime.datetime(2010, 1, 1)), "1262304000")
+ self.assertEqual(f(None), "")
+
+ def test_from_timestamp(self, f=dt.from_ts):
+ self.assertEqual(f(0.0), dt.EPOCH)
+ self.assertEqual(f(1262304000.0), datetime.datetime(2010, 1, 1))
+ self.assertEqual(f(1262304000.128000).replace(microsecond=0),
+ datetime.datetime(2010, 1, 1, 0, 0, 0))
+
+ def test_now(self, f=dt.now):
+ self.assertIsInstance(f(), datetime.datetime)
+
+ def test_parse_timestamp(self, f=dt.parse_ts):
+ null = dt.from_ts(0)
+ value = dt.from_ts(1555816235)
+
+ self.assertEqual(f(0) , null)
+ self.assertEqual(f("0") , null)
+ self.assertEqual(f(1555816235) , value)
+ self.assertEqual(f("1555816235"), value)
+
+ for value in ((), [], {}, None, ""):
+ self.assertEqual(f(value), dt.NONE)
+ self.assertEqual(f(value, "foo"), "foo")
+
+ def test_parse(self, f=dt.parse):
+ self.assertEqual(
+ f("1970.01.01", "%Y.%m.%d"),
+ dt.EPOCH,
+ )
+ self.assertEqual(
+ f("May 7, 2019 9:33 am", "%B %d, %Y %I:%M %p"),
+ datetime.datetime(2019, 5, 7, 9, 33, 0),
+ )
+ self.assertEqual(
+ f("2019-05-07T21:25:02.753+0900", "%Y-%m-%dT%H:%M:%S.%f%z"),
+ datetime.datetime(2019, 5, 7, 12, 25, 2),
+ )
+
+ for value in ((), [], {}, None, 1, 2.3):
+ self.assertEqual(f(value, "%Y"), dt.NONE)
+
+ def test_parse_iso(self, f=dt.parse_iso):
+ self.assertEqual(
+ f("1970-01-01T00:00:00+00:00"),
+ dt.from_ts(0),
+ )
+ self.assertEqual(
+ f("2019-05-07T21:25:02+09:00"),
+ datetime.datetime(2019, 5, 7, 12, 25, 2),
+ )
+ self.assertEqual(
+ f("2019-05-07T12:25:02Z"),
+ datetime.datetime(2019, 5, 7, 12, 25, 2),
+ )
+ self.assertEqual(
+ f("2019-05-07 21:25:02"),
+ datetime.datetime(2019, 5, 7, 21, 25, 2),
+ )
+ self.assertEqual(
+ f("1970-01-01"),
+ dt.EPOCH,
+ )
+ self.assertEqual(
+ f("1970.01.01"),
+ dt.NONE,
+ )
+ self.assertEqual(
+ f("1970-01-01T00:00:00+0000"),
+ dt.EPOCH,
+ )
+ self.assertEqual(
+ f("2019-05-07T21:25:02.753+0900"),
+ datetime.datetime(2019, 5, 7, 12, 25, 2),
+ )
+
+ for value in ((), [], {}, None, 1, 2.3):
+ self.assertEqual(f(value), dt.NONE)
+
+ def test_none(self):
+ self.assertFalse(dt.NONE)
+ self.assertIsInstance(dt.NONE, dt.datetime)
+ self.assertEqual(str(dt.NONE), "[Invalid DateTime]")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/test/test_extractor.py b/test/test_extractor.py
index a623e1d..c06b890 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -14,10 +14,9 @@ from unittest.mock import patch
import time
import string
-from datetime import datetime, timedelta
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from gallery_dl import extractor, util # noqa E402
+from gallery_dl import extractor, util, dt, config # noqa E402
from gallery_dl.extractor import mastodon # noqa E402
from gallery_dl.extractor.common import Extractor, Message # noqa E402
from gallery_dl.extractor.directlink import DirectlinkExtractor # noqa E402
@@ -40,7 +39,7 @@ class FakeExtractor(Extractor):
pattern = "fake:"
def items(self):
- yield Message.Version, 1
+ yield Message.Noop
yield Message.Url, "text:foobar", {}
@@ -233,8 +232,8 @@ class TestExtractorWait(unittest.TestCase):
def test_wait_until_datetime(self):
extr = extractor.find("generic:https://example.org/")
- until = util.datetime_utcnow() + timedelta(seconds=5)
- until_local = datetime.now() + timedelta(seconds=5)
+ until = dt.now() + dt.timedelta(seconds=5)
+ until_local = dt.datetime.now() + dt.timedelta(seconds=5)
if not until.microsecond:
until = until.replace(microsecond=until_local.microsecond)
@@ -251,8 +250,8 @@ class TestExtractorWait(unittest.TestCase):
self._assert_isotime(calls[0][1][1], until_local)
def _assert_isotime(self, output, until):
- if not isinstance(until, datetime):
- until = datetime.fromtimestamp(until)
+ if not isinstance(until, dt.datetime):
+ until = dt.datetime.fromtimestamp(until)
o = self._isotime_to_seconds(output)
u = self._isotime_to_seconds(until.time().isoformat()[:8])
self.assertLessEqual(o-u, 1.0)
@@ -262,6 +261,79 @@ class TestExtractorWait(unittest.TestCase):
return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
+class TextExtractorCommonDateminmax(unittest.TestCase):
+
+ def setUp(self):
+ config.clear()
+
+ tearDown = setUp
+
+ def test_date_min_max_default(self):
+ extr = extractor.find("generic:https://example.org/")
+
+ dmin, dmax = extr._get_date_min_max()
+ self.assertEqual(dmin, None)
+ self.assertEqual(dmax, None)
+
+ dmin, dmax = extr._get_date_min_max(..., -1)
+ self.assertEqual(dmin, ...)
+ self.assertEqual(dmax, -1)
+
+ def test_date_min_max_timestamp(self):
+ extr = extractor.find("generic:https://example.org/")
+ config.set((), "date-min", 1262304000)
+ config.set((), "date-max", 1262304000.123)
+
+ dmin, dmax = extr._get_date_min_max()
+ self.assertEqual(dmin, 1262304000)
+ self.assertEqual(dmax, 1262304000.123)
+
+ def test_date_min_max_iso(self):
+ extr = extractor.find("generic:https://example.org/")
+ config.set((), "date-min", "2010-01-01")
+ config.set((), "date-max", "2010-01-01T00:01:03")
+
+ dmin, dmax = extr._get_date_min_max()
+ self.assertEqual(dmin, 1262304000)
+ self.assertEqual(dmax, 1262304063)
+
+ def test_date_min_max_iso_invalid(self):
+ extr = extractor.find("generic:https://example.org/")
+ config.set((), "date-min", "2010-01-01")
+ config.set((), "date-max", "2010-01")
+
+ with self.assertLogs() as log_info:
+ dmin, dmax = extr._get_date_min_max()
+ self.assertEqual(dmin, 1262304000)
+ self.assertEqual(dmax, None)
+
+ self.assertEqual(len(log_info.output), 1)
+ self.assertEqual(
+ log_info.output[0],
+ "WARNING:generic:Unable to parse 'date-max': "
+ "Invalid isoformat string '2010-01'")
+
+ def test_date_min_max_fmt(self):
+ extr = extractor.find("generic:https://example.org/")
+ config.set((), "date-format", "%B %d %Y")
+ config.set((), "date-min", "January 01 2010")
+ config.set((), "date-max", "August 18 2022")
+
+ dmin, dmax = extr._get_date_min_max()
+ self.assertEqual(dmin, 1262304000)
+ self.assertEqual(dmax, 1660780800)
+
+ def test_date_min_max_mix(self):
+ extr = extractor.find("generic:https://example.org/")
+ config.set((), "date-format", "%B %d %Y")
+ config.set((), "date-min", "January 01 2010")
+ config.set((), "date-max", 1262304061)
+
+ dmin, dmax = extr._get_date_min_max()
+ self.assertEqual(dmin, 1262304000)
+ self.assertEqual(dmax, 1262304061)
+
+
class TextExtractorOAuth(unittest.TestCase):
def test_oauth1(self):
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 01e3a88..67df279 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -15,7 +15,7 @@ import datetime
import tempfile
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from gallery_dl import formatter, text, util, config # noqa E402
+from gallery_dl import formatter, text, dt, util, config # noqa E402
try:
import jinja2
@@ -154,7 +154,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{t}" , self.kwdict["t"] , None, int)
self._run_test("{t}" , self.kwdict["t"] , None, util.identity)
self._run_test("{dt}", self.kwdict["dt"], None, util.identity)
- self._run_test("{ds}", self.kwdict["dt"], None, text.parse_datetime)
+ self._run_test("{ds}", self.kwdict["dt"], None, dt.parse_iso)
self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z}", self.kwdict["dt"],
None, util.identity)
@@ -248,6 +248,19 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a:L50/foo/>51}", "foo")
self._run_test("{a:Lab/foo/}", "foo")
+ def test_specifier_maxlen_bytes(self):
+ v = self.kwdict["a"]
+ self._run_test("{a:Lb5/foo/}" , "foo")
+ self._run_test("{a:Lb50/foo/}", v)
+ self._run_test("{a:Lb50/foo/>50}", " " * 39 + v)
+ self._run_test("{a:Lb50/foo/>51}", "foo")
+ self._run_test("{a:Lbab/foo/}", "foo")
+
+ v = self.kwdict["j"]
+ self._run_test("{j:Lb5/foo/}" , "foo")
+ self._run_test("{j:Lb50/foo/}", v)
+ self._run_test("{j:Lbab/foo/}", "foo")
+
def test_specifier_join(self):
self._run_test("{l:J}" , "abc")
self._run_test("{l:J,}" , "a,b,c")
@@ -271,8 +284,8 @@ class TestFormatter(unittest.TestCase):
def test_specifier_datetime(self):
self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z}", "2010-01-01 00:00:00")
- self._run_test("{ds:D%Y}", "2010-01-01T01:00:00+01:00")
- self._run_test("{l:D%Y}", "None")
+ self._run_test("{ds:D%Y}", "[Invalid DateTime]")
+ self._run_test("{l2:D%Y}", "[Invalid DateTime]")
def test_specifier_offset(self):
self._run_test("{dt:O 01:00}", "2010-01-01 01:00:00")
@@ -332,6 +345,17 @@ class TestFormatter(unittest.TestCase):
with self.assertRaises(ValueError):
self._run_test("{a:Xfoo/ */}", "hello wo *")
+ def test_specifier_limit_bytes(self):
+ self._run_test("{a:Xb20/ */}", "hElLo wOrLd")
+ self._run_test("{a:Xb10/ */}", "hElLo wO *")
+
+ self._run_test("{j:Xb50/〜/}", "げんそうきょう")
+ self._run_test("{j:Xb20/〜/}", "げんそうき〜")
+ self._run_test("{j:Xb20/ */}", "げんそうきょ *")
+
+ with self.assertRaises(ValueError):
+ self._run_test("{a:Xbfoo/ */}", "hello wo *")
+
def test_specifier_map(self):
self._run_test("{L:Mname/}" ,
"['John Doe', 'Jane Smith', 'Max Mustermann']")
@@ -345,6 +369,15 @@ class TestFormatter(unittest.TestCase):
with self.assertRaises(ValueError):
self._run_test("{t:Mname", "")
+ def test_specifier_identity(self):
+ self._run_test("{a:I}", self.kwdict["a"])
+ self._run_test("{i:I}", self.kwdict["i"])
+ self._run_test("{dt:I}", self.kwdict["dt"])
+
+ self._run_test("{t!D:I}", self.kwdict["dt"])
+ self._run_test("{t!D:I/O+01:30}", self.kwdict["dt"])
+ self._run_test("{i:A+1/I}", self.kwdict["i"]+1)
+
def test_chain_special(self):
# multiple replacements
self._run_test("{a:Rh/C/RE/e/RL/l/}", "Cello wOrld")
diff --git a/test/test_job.py b/test/test_job.py
index 0a533ea..ec86c6c 100644
--- a/test/test_job.py
+++ b/test/test_job.py
@@ -214,6 +214,7 @@ Request interval (default):
def test_base_category(self):
extr = TestExtractor.from_url("test:")
extr.basecategory = "test_basecategory"
+ extr.basesubcategory = "test_basesubcategory"
self.assertEqual(self._capture_stdout(extr), """\
Category / Subcategory / Basecategory
@@ -376,7 +377,7 @@ class TestExtractor(Extractor):
root = "https://example.org"
user = self.user
- yield Message.Directory, {
+ yield Message.Directory, "", {
"user": user,
"author": user,
}
diff --git a/test/test_path.py b/test/test_path.py
new file mode 100644
index 0000000..0639339
--- /dev/null
+++ b/test/test_path.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import path, extractor, config # noqa E402
+
+KWDICT = {
+ "category" : "test",
+ "filename" : "file",
+ "extension": "ext",
+ "name" : "test-テスト-'&>-/:~",
+ "ext" : "txt",
+ "foo" : "bar",
+ "id" : 123,
+}
+
+
+class TestPath(unittest.TestCase):
+
+ def _pfmt(self, data={}, kwdict=False, extr=extractor.find("noop")):
+ pathfmt = path.PathFormat(extr)
+
+ if kwdict:
+ pathfmt.set_directory({
+ **(kwdict if isinstance(kwdict, dict) else KWDICT),
+ **data,
+ })
+
+ return pathfmt
+
+ def setUp(self):
+ config.clear()
+ path.WINDOWS = False
+
+
+class TestPathObject(TestPath):
+
+ def test_default(self):
+ pfmt = self._pfmt()
+
+ self.assertEqual(pfmt.kwdict, {})
+ self.assertEqual(pfmt.delete, False)
+ self.assertEqual(pfmt.filename, "")
+ self.assertEqual(pfmt.extension, "")
+ self.assertEqual(pfmt.directory, "")
+ self.assertEqual(pfmt.realdirectory, "")
+ self.assertEqual(pfmt.path, "")
+ self.assertEqual(pfmt.realpath, "")
+ self.assertEqual(pfmt.temppath, "")
+ self.assertEqual(pfmt.basedirectory, "./gallery-dl/")
+ self.assertEqual(pfmt.strip, "")
+
+ self.assertIs(pfmt.filename_conditions, None)
+ self.assertIs(pfmt.directory_conditions, None)
+
+ self.assertTrue(callable(pfmt.extension_map))
+ self.assertTrue(callable(pfmt.extension_map))
+ self.assertTrue(callable(pfmt.extension_map))
+ self.assertTrue(callable(pfmt.clean_segment))
+ self.assertTrue(callable(pfmt.clean_path))
+
+ self.assertTrue(callable(pfmt.filename_formatter))
+ for fmt in pfmt.directory_formatters:
+ self.assertTrue(callable(fmt))
+
+ def test_str(self):
+ pfmt = self._pfmt()
+ self.assertEqual(str(pfmt), pfmt.realdirectory)
+ self.assertEqual(str(pfmt), "")
+
+ pfmt = self._pfmt()
+ pfmt.set_directory(KWDICT)
+ pfmt.set_filename(KWDICT)
+ pfmt.build_path()
+ self.assertEqual(str(pfmt), pfmt.realpath)
+ self.assertEqual(str(pfmt), "./gallery-dl/test/file.ext")
+
+
+class TestPathOptions(TestPath):
+
+ def test_option_filename(self):
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname , "file.ext")
+
+ config.set((), "filename", "foo.{foo}")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "foo.bar")
+
+ config.set((), "filename", {
+ "foo == 'baz'": "foo",
+ "id % 2" : "bar",
+ "" : "baz",
+ })
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "bar")
+
+ def test_option_directory(self):
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.directory , "./gallery-dl/test/")
+ self.assertEqual(pfmt.realdirectory, "./gallery-dl/test/")
+
+ config.set((), "directory", ["{foo}", "bar"])
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.directory , "./gallery-dl/bar/bar/")
+ self.assertEqual(pfmt.realdirectory, "./gallery-dl/bar/bar/")
+
+ config.set((), "directory", {
+ "foo == 'baz'": ["a", "b", "c"],
+ "id % 2" : ["odd", "{id}"],
+ "" : ["{foo}", "bar"],
+ })
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.directory , "./gallery-dl/odd/123/")
+ self.assertEqual(pfmt.realdirectory, "./gallery-dl/odd/123/")
+
+ def test_option_basedirectory(self):
+ config.set((), "base-directory", "{foo}")
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.realdirectory, "{foo}/test/")
+
+ config.set((), "base-directory", {
+ "foo == 'baz'": "bar",
+ "id % 2" : "./odd",
+ "" : "./default",
+ })
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.realdirectory, "./odd/test/")
+
+ def test_option_keywordsdefault(self):
+ config.set((), "directory", ["{baz}"])
+ config.set((), "base-directory", "")
+
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.realdirectory, "None/")
+
+ config.set((), "keywords-default", "ND")
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.realdirectory, "ND/")
+
+ config.set((), "keywords-default", "")
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(pfmt.realdirectory, "")
+
+ def test_option_extensionmap_default(self):
+ kwdict = KWDICT.copy()
+ pfmt = self._pfmt()
+ pfmt.set_filename(kwdict)
+ self.assertEqual(pfmt.extension, "ext")
+
+ pfmt.set_extension("jpg")
+ self.assertEqual(pfmt.extension, "jpg")
+ self.assertEqual(kwdict["extension"], "jpg")
+
+ pfmt.set_extension("png")
+ self.assertEqual(pfmt.extension, "png")
+ self.assertEqual(kwdict["extension"], "png")
+
+ pfmt.set_extension("jpeg")
+ self.assertEqual(pfmt.extension, "jpg")
+ self.assertEqual(kwdict["extension"], "jpg")
+
+ for ext, repl in path.EXTENSION_MAP.items():
+ pfmt.set_extension(ext)
+ self.assertEqual(pfmt.extension, repl)
+ self.assertEqual(kwdict["extension"], repl)
+
+ def test_option_extensionmap_custom(self):
+ extmap = {
+ "bitmap": "bmp",
+ "ping" : "png",
+ "jiff" : "gif",
+ }
+ config.set((), "extension-map", extmap)
+
+ kwdict = KWDICT.copy()
+ pfmt = self._pfmt()
+ pfmt.set_filename(kwdict)
+
+ pfmt.set_extension("jpg")
+ self.assertEqual(pfmt.extension, "jpg")
+ self.assertEqual(kwdict["extension"], "jpg")
+
+ pfmt.set_extension("ping")
+ self.assertEqual(pfmt.extension, "png")
+ self.assertEqual(kwdict["extension"], "png")
+
+ for ext, repl in extmap.items():
+ pfmt.set_extension(ext)
+ self.assertEqual(pfmt.extension, repl)
+ self.assertEqual(kwdict["extension"], repl)
+
+ for ext, repl in path.EXTENSION_MAP.items():
+ pfmt.set_extension(ext)
+ self.assertNotEqual(pfmt.extension, repl)
+ self.assertNotEqual(kwdict["extension"], repl)
+
+ def test_option_pathrestrict(self):
+ config.set((), "filename", "{name}.{ext}")
+
+ config.set((), "path-restrict", "unix")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "test-テスト-'&>-_:~.txt", "unix")
+
+ config.set((), "path-restrict", "windows")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "test-テスト-'&_-__~.txt", "windows")
+
+ config.set((), "path-restrict", "ascii")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "test____________.txt", "ascii")
+
+ config.set((), "path-restrict", "ascii+")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "test-___-'&_-__~.txt", "ascii+")
+
+ def test_option_pathrestrict_custom(self):
+ config.set((), "filename", "{name}.{ext}")
+
+ config.set((), "path-restrict", "ts-")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "_e___テスト_'&>_/:~._x_", "custom str")
+
+ config.set((), "path-restrict", {
+ "t": "A",
+ "s": "B",
+ "-": "###",
+ "/": "|"
+ })
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "AeBA###テスト###'&>###|:~.AxA", "custom dict")
+
+ config.set((), "path-restrict", {
+ "a-z": "x",
+ "テ": "te",
+ "ス": "su",
+ "ト": "to",
+ })
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "xxxx-tesuto-'&>-/:~.xxx", "custom dict range")
+
+ def test_option_pathreplace(self):
+ config.set((), "filename", "{name}.{ext}")
+
+ config.set((), "path-restrict", "unix")
+ config.set((), "path-replace", "&")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "test-テスト-'&>-&:~.txt", "&")
+
+ config.set((), "path-restrict", "windows")
+ config.set((), "path-replace", "***")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "test-テスト-'&***-******~.txt", "***")
+
+ def test_option_pathremove(self):
+ config.set((), "filename", "{name}.{ext}")
+
+ config.set((), "path-remove", "-&/")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "testテスト'>_:~.txt")
+
+ config.set((), "path-remove", "a-z0-9")
+ fname = self._pfmt().build_filename(KWDICT)
+ self.assertEqual(fname, "-テスト-'&>-_:~.")
+
+ def test_option_pathstrip(self):
+ config.set((), "directory", [" . {name}.{ext} . "])
+ config.set((), "base-directory", "")
+ config.set((), "path-restrict", "unix")
+
+ config.set((), "path-strip", "unix")
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(
+ pfmt.realdirectory, ". test-テスト-'&>-_:~.txt ./", "unix")
+
+ config.set((), "path-strip", "windows")
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(
+ pfmt.realdirectory, ". test-テスト-'&>-_:~.txt/", "windows")
+
+ config.set((), "path-strip", "txt")
+ pfmt = self._pfmt(kwdict=True)
+ self.assertEqual(
+ pfmt.realdirectory, ". test-テスト-'&>-_:~.txt ./", "custom")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 5d52e1d..e4d01c2 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -78,6 +78,7 @@ class BasePostprocessorTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.dir = tempfile.TemporaryDirectory()
+ config.clear()
config.set((), "base-directory", cls.dir.name)
cls.job = FakeJob()
@@ -374,6 +375,40 @@ class ExecTest(BasePostprocessorTest):
m_aa.assert_called_once_with(self.pathfmt.kwdict)
m_ac.assert_called_once()
+ def test_verbose_string(self):
+ self._create({
+ "command": "echo foo bar",
+ "verbose": False,
+ })
+
+ with patch("gallery_dl.util.Popen") as p, \
+ self.assertLogs(level=10) as log_info:
+ i = Mock()
+ i.wait.return_value = 123
+ p.return_value = i
+ self._trigger(("after",))
+
+ msg = "DEBUG:postprocessor.exec:Running 'echo'"
+ self.assertEqual(log_info.output[0], msg)
+ self.assertIn("'echo' returned with non-zero ", log_info.output[1])
+
+ def test_verbose_list(self):
+ self._create({
+ "command": ["echo", "foo", "bar"],
+ "verbose": False,
+ })
+
+ with patch("gallery_dl.util.Popen") as p, \
+ self.assertLogs(level=10) as log_info:
+ i = Mock()
+ i.wait.return_value = 123
+ p.return_value = i
+ self._trigger(("after",))
+
+ msg = "DEBUG:postprocessor.exec:Running 'echo'"
+ self.assertEqual(log_info.output[0], msg)
+ self.assertIn("'echo' returned with non-zero ", log_info.output[1])
+
class HashTest(BasePostprocessorTest):
@@ -453,7 +488,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realpath}.JSON"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
self.assertEqual(self._output(m), """{
"category": "test",
@@ -473,6 +508,7 @@ class MetadataTest(BasePostprocessorTest):
"indent" : None,
"open" : "a",
"encoding" : "UTF-8",
+ "newline" : "\r\n",
"extension" : "JSON",
}, {
"public" : "hello ワールド",
@@ -487,7 +523,9 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realpath}.JSON"
- m.assert_called_once_with(path, "a", encoding="UTF-8")
+ m.assert_called_once_with(path, "a", encoding="UTF-8", newline='\r\n')
+ # Since we mocked the call to open,
+ # we don't actually see the effect of setting newline.
self.assertEqual(self._output(m), """{\
"_private" : "foo \\u30d0\\u30fc",\
"category" : "test",\
@@ -508,7 +546,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realpath}.txt"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
self.assertEqual(self._output(m), "foo\nbar\nbaz\n")
def test_metadata_tags_split_1(self):
@@ -599,7 +637,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realdirectory}file.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_extfmt_2(self):
self._create({
@@ -611,7 +649,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realdirectory}file.2.EXT-data:tESt"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_directory(self):
self._create({
@@ -622,7 +660,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realdirectory}metadata/file.ext.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_directory_2(self):
self._create({
@@ -634,7 +672,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realdirectory}metadata/file.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_directory_format(self):
self._create(
@@ -646,7 +684,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realdirectory}../json/12500/file.ext.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_directory_empty(self):
self._create(
@@ -657,7 +695,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realdirectory}./file.ext.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_basedirectory(self):
self._create({"base-directory": True})
@@ -666,7 +704,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.basedirectory}file.ext.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_basedirectory_custom(self):
self._create({
@@ -678,7 +716,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = "/home/test/meta/file.ext.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_filename(self):
self._create({
@@ -690,7 +728,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
path = f"{self.pathfmt.realdirectory}test_file__meta_.data"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_meta_path(self):
self._create({
@@ -790,7 +828,7 @@ class MetadataTest(BasePostprocessorTest):
self.assertGreater(len(self._output(m)), 0)
path = f"{self.pathfmt.realdirectory}file.ext.json"
- m.assert_called_once_with(path, "w", encoding="utf-8")
+ m.assert_called_once_with(path, "w", encoding="utf-8", newline=None)
def test_metadata_option_skip_false(self):
self._create({"skip": False})
@@ -802,6 +840,28 @@ class MetadataTest(BasePostprocessorTest):
self.assertTrue(not e.called)
self.assertTrue(m.called)
+ def test_metadata_option_newline(self):
+ self._create({
+ "newline": "\r\n",
+ "filename" : "data.json",
+ "directory" : "",
+ "base-directory": self.dir.name,
+ })
+
+ self._trigger()
+
+ path = os.path.join(self.dir.name, "data.json")
+ with open(path, newline="") as fp:
+ content = fp.read()
+
+ self.assertEqual(content, """\
+{\r\n\
+ "category": "test",\r\n\
+ "filename": "file",\r\n\
+ "extension": "ext"\r\n\
+}\r\n\
+""")
+
def test_metadata_option_include(self):
self._create(
{"include": ["_private", "filename", "foo"], "sort": True},
diff --git a/test/test_results.py b/test/test_results.py
index e7fcabf..0865686 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -310,10 +310,13 @@ class TestExtractorResults(unittest.TestCase):
elif isinstance(test, range):
self.assertRange(value, test, msg=path)
elif isinstance(test, set):
- try:
- self.assertIn(value, test, msg=path)
- except AssertionError:
- self.assertIn(type(value), test, msg=path)
+ for item in test:
+ if isinstance(item, type) and isinstance(value, item) or \
+ value == item:
+ break
+ else:
+ v = type(value) if len(str(value)) > 64 else value
+ self.fail(f"{v!r} not in {test}: {path}")
elif isinstance(test, list):
subtest = False
for idx, item in enumerate(test):
@@ -423,8 +426,7 @@ class ResultJob(job.DownloadJob):
def run(self):
self._init()
- for msg in self.extractor:
- self.dispatch(msg)
+ self.dispatch(self.extractor)
def handle_url(self, url, kwdict, fallback=None):
self._update_url(url)
diff --git a/test/test_text.py b/test/test_text.py
index 0e187d7..eac7906 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -11,8 +11,6 @@ import os
import sys
import unittest
-import datetime
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import text, util # noqa E402
@@ -203,6 +201,10 @@ class TestText(unittest.TestCase):
self.assertEqual(f("http://example.org/v2/filename.ext"), result)
self.assertEqual(
f("http://example.org/v2/filename.ext?param=value#frag"), result)
+ self.assertEqual(
+ f("http://example.org/v2/foo%202?bar&<>.ext?param=value#frag"),
+ {"filename": "foo 2", "extension": ""},
+ )
# long "extension"
fn = "httpswww.example.orgpath-path-path-path-path-path-path-path"
@@ -212,6 +214,24 @@ class TestText(unittest.TestCase):
for value in INVALID:
self.assertEqual(f(value), empty)
+ def test_nameext_from_name(self, f=text.nameext_from_name):
+ self.assertEqual(
+ f(""),
+ {"filename": "", "extension": ""},
+ )
+ self.assertEqual(
+ f("filename.ext"),
+ {"filename": "filename", "extension": "ext"},
+ )
+ self.assertEqual(
+ f("foo%202?bar&<>.ext"),
+ {"filename": "foo%202?bar&<>", "extension": "ext"},
+ )
+
+ # long "extension"
+ fn = "httpswww.example.orgpath-path-path-path-path-path-path-path"
+ self.assertEqual(f(fn), {"filename": fn, "extension": ""})
+
def test_extract(self, f=text.extract):
txt = "<a><b>"
self.assertEqual(f(txt, "<", ">"), ("a" , 3))
@@ -519,51 +539,6 @@ class TestText(unittest.TestCase):
self.assertEqual(f({"ä&": "あと", "#": "?"}),
"%C3%A4%26=%E3%81%82%E3%81%A8&%23=%3F")
- def test_parse_timestamp(self, f=text.parse_timestamp):
- null = util.datetime_utcfromtimestamp(0)
- value = util.datetime_utcfromtimestamp(1555816235)
-
- self.assertEqual(f(0) , null)
- self.assertEqual(f("0") , null)
- self.assertEqual(f(1555816235) , value)
- self.assertEqual(f("1555816235"), value)
-
- for value in INVALID_ALT:
- self.assertEqual(f(value), None)
- self.assertEqual(f(value, "foo"), "foo")
-
- def test_parse_datetime(self, f=text.parse_datetime):
- null = util.datetime_utcfromtimestamp(0)
-
- self.assertEqual(f("1970-01-01T00:00:00+00:00"), null)
- self.assertEqual(f("1970-01-01T00:00:00+0000") , null)
- self.assertEqual(f("1970.01.01", "%Y.%m.%d") , null)
-
- self.assertEqual(
- f("2019-05-07T21:25:02+09:00"),
- datetime.datetime(2019, 5, 7, 12, 25, 2),
- )
- self.assertEqual(
- f("2019-05-07T21:25:02+0900"),
- datetime.datetime(2019, 5, 7, 12, 25, 2),
- )
- self.assertEqual(
- f("2019-05-07T21:25:02.753+0900", "%Y-%m-%dT%H:%M:%S.%f%z"),
- datetime.datetime(2019, 5, 7, 12, 25, 2),
- )
- self.assertEqual(
- f("2019-05-07T21:25:02", "%Y-%m-%dT%H:%M:%S", utcoffset=9),
- datetime.datetime(2019, 5, 7, 12, 25, 2),
- )
- self.assertEqual(
- f("2019-05-07 21:25:02"),
- "2019-05-07 21:25:02",
- )
-
- for value in INVALID:
- self.assertEqual(f(value), None)
- self.assertEqual(f("1970.01.01"), "1970.01.01")
-
if __name__ == "__main__":
unittest.main()
diff --git a/test/test_util.py b/test/test_util.py
index bfaab01..6784874 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -40,6 +40,7 @@ class TestRange(unittest.TestCase):
def test_parse_digit(self):
f = self.predicate._parse
+ self.assertEqual(f(2), [range(2, 3)])
self.assertEqual(f("2"), [range(2, 3)])
self.assertEqual(
@@ -48,6 +49,12 @@ class TestRange(unittest.TestCase):
range(3, 4),
range(4, 5)],
)
+ self.assertEqual(
+ f(["2", "3", "4"]),
+ [range(2, 3),
+ range(3, 4),
+ range(4, 5)],
+ )
def test_parse_range(self):
f = self.predicate._parse
@@ -406,89 +413,6 @@ def hash(value):
self.assertEqual(expr(value), result)
-class TestDatetime(unittest.TestCase):
-
- def test_to_datetime(self, f=util.to_datetime):
-
- def _assert(value, expected):
- result = f(value)
- self.assertIsInstance(result, datetime.datetime)
- self.assertEqual(result, expected, msg=repr(value))
-
- dt = datetime.datetime(2010, 1, 1)
- self.assertIs(f(dt), dt)
-
- _assert(dt , dt)
- _assert(1262304000 , dt)
- _assert(1262304000.0 , dt)
- _assert(1262304000.123, dt)
- _assert("1262304000" , dt)
-
- _assert("2010-01-01" , dt)
- _assert("2010-01-01 00:00:00" , dt)
- _assert("2010-01-01T00:00:00" , dt)
- _assert("2010-01-01T00:00:00.123456" , dt)
- _assert("2009-12-31T19:00:00-05:00" , dt)
- _assert("2009-12-31T19:00:00.123456-05:00", dt)
- _assert("2010-01-01T00:00:00Z" , dt)
- _assert("2010-01-01T00:00:00.123456Z" , dt)
-
- _assert(0 , util.EPOCH)
- _assert("" , util.EPOCH)
- _assert("foo", util.EPOCH)
- _assert(None , util.EPOCH)
- _assert(() , util.EPOCH)
- _assert([] , util.EPOCH)
- _assert({} , util.EPOCH)
- _assert((1, 2, 3), util.EPOCH)
-
- @unittest.skipIf(sys.hexversion < 0x30b0000,
- "extended fromisoformat timezones")
- def test_to_datetime_tz(self, f=util.to_datetime):
-
- def _assert(value, expected):
- result = f(value)
- self.assertIsInstance(result, datetime.datetime)
- self.assertEqual(result, expected, msg=repr(value))
-
- dt = datetime.datetime(2010, 1, 1)
-
- _assert("2009-12-31T19:00:00-05" , dt)
- _assert("2009-12-31T19:00:00-0500" , dt)
- _assert("2009-12-31T19:00:00.123456-05" , dt)
- _assert("2009-12-31T19:00:00.123456-0500" , dt)
-
- def test_datetime_to_timestamp(self, f=util.datetime_to_timestamp):
- self.assertEqual(f(util.EPOCH), 0.0)
- self.assertEqual(f(datetime.datetime(2010, 1, 1)), 1262304000.0)
- self.assertEqual(f(datetime.datetime(2010, 1, 1, 0, 0, 0, 128000)),
- 1262304000.128000)
- with self.assertRaises(TypeError):
- f(None)
-
- def test_datetime_to_timestamp_string(
- self, f=util.datetime_to_timestamp_string):
- self.assertEqual(f(util.EPOCH), "0")
- self.assertEqual(f(datetime.datetime(2010, 1, 1)), "1262304000")
- self.assertEqual(f(None), "")
-
- def test_datetime_from_timestamp(
- self, f=util.datetime_from_timestamp):
- self.assertEqual(f(0.0), util.EPOCH)
- self.assertEqual(f(1262304000.0), datetime.datetime(2010, 1, 1))
- self.assertEqual(f(1262304000.128000).replace(microsecond=0),
- datetime.datetime(2010, 1, 1, 0, 0, 0))
-
- def test_datetime_utcfromtimestamp(
- self, f=util.datetime_utcfromtimestamp):
- self.assertEqual(f(0.0), util.EPOCH)
- self.assertEqual(f(1262304000.0), datetime.datetime(2010, 1, 1))
-
- def test_datetime_utcnow(
- self, f=util.datetime_utcnow):
- self.assertIsInstance(f(), datetime.datetime)
-
-
class TestOther(unittest.TestCase):
def test_bencode(self):