summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/aes.py337
-rw-r--r--gallery_dl/extractor/500px.py6
-rw-r--r--gallery_dl/extractor/8muses.py10
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/behance.py281
-rw-r--r--gallery_dl/extractor/deviantart.py13
-rw-r--r--gallery_dl/extractor/exhentai.py3
-rw-r--r--gallery_dl/extractor/fallenangels.py6
-rw-r--r--gallery_dl/extractor/foolslide.py3
-rw-r--r--gallery_dl/extractor/furaffinity.py4
-rw-r--r--gallery_dl/extractor/gfycat.py6
-rw-r--r--gallery_dl/extractor/hentaicafe.py16
-rw-r--r--gallery_dl/extractor/hentaifoundry.py129
-rw-r--r--gallery_dl/extractor/kissmanga.py222
-rw-r--r--gallery_dl/extractor/mangoxo.py4
-rw-r--r--gallery_dl/extractor/newgrounds.py87
-rw-r--r--gallery_dl/extractor/nijie.py23
-rw-r--r--gallery_dl/extractor/nozomi.py2
-rw-r--r--gallery_dl/extractor/oauth.py35
-rw-r--r--gallery_dl/extractor/paheal.py4
-rw-r--r--gallery_dl/extractor/pixiv.py2
-rw-r--r--gallery_dl/extractor/pornhub.py8
-rw-r--r--gallery_dl/extractor/reactor.py2
-rw-r--r--gallery_dl/extractor/readcomiconline.py30
-rw-r--r--gallery_dl/extractor/sankaku.py2
-rw-r--r--gallery_dl/extractor/twitter.py34
-rw-r--r--gallery_dl/extractor/weasyl.py236
-rw-r--r--gallery_dl/extractor/weibo.py55
-rw-r--r--gallery_dl/extractor/xvideos.py2
-rw-r--r--gallery_dl/job.py38
-rw-r--r--gallery_dl/util.py2
-rw-r--r--gallery_dl/version.py2
32 files changed, 874 insertions, 732 deletions
diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py
deleted file mode 100644
index a45f50e..0000000
--- a/gallery_dl/aes.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This is a stripped down version of youtube-dl's aes module.
-# All credit for this code goes to the authors of the youtube-dl project.
-# https://ytdl-org.github.io/youtube-dl/
-# https://github.com/ytdl-org/youtube-dl/
-
-import base64
-from math import ceil
-
-BLOCK_SIZE_BYTES = 16
-
-
-def aes_cbc_decrypt(data, key, iv):
- """
- Decrypt with aes in CBC mode
-
- @param {int[]} data cipher
- @param {int[]} key 16/24/32-Byte cipher key
- @param {int[]} iv 16-Byte IV
- @returns {int[]} decrypted data
- """
- expanded_key = key_expansion(key)
- block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-
- decrypted_data = []
- previous_cipher_block = iv
- for i in range(block_count):
- block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- block += [0] * (BLOCK_SIZE_BYTES - len(block))
-
- decrypted_block = aes_decrypt(block, expanded_key)
- decrypted_data += xor(decrypted_block, previous_cipher_block)
- previous_cipher_block = block
- decrypted_data = decrypted_data[:len(data)]
-
- return decrypted_data
-
-
-def aes_cbc_decrypt_text(data, key, iv):
- """
- Decrypt with aes in CBC mode
-
- @param {string} data base64 encoded cipher
- @param {int[]} key 16/24/32-Byte cipher key
- @param {int[]} iv 16-Byte IV
- @returns {string} decrypted data as utf8 encoded string
- """
- data = base64.standard_b64decode(bytes(data, "ascii"))
- charcodes = aes_cbc_decrypt(list(data), key, iv)
- last = charcodes[-1]
- if last <= 16:
- charcodes = charcodes[:-last]
- return bytes(charcodes).decode()
-
-
-def key_expansion(data):
- """
- Generate key schedule
-
- @param {int[]} data 16/24/32-Byte cipher key
- @returns {int[]} 176/208/240-Byte expanded key
- """
- data = data[:] # copy
- rcon_iteration = 1
- key_size_bytes = len(data)
- expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
-
- while len(data) < expanded_key_size_bytes:
- temp = data[-4:]
- temp = key_schedule_core(temp, rcon_iteration)
- rcon_iteration += 1
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- for _ in range(3):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- if key_size_bytes == 32:
- temp = data[-4:]
- temp = sub_bytes(temp)
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
-
- if key_size_bytes == 32:
- rounds = 3
- elif key_size_bytes == 24:
- rounds = 2
- else:
- rounds = 0
- for _ in range(rounds):
- temp = data[-4:]
- data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
- data = data[:expanded_key_size_bytes]
-
- return data
-
-
-def aes_decrypt(data, expanded_key):
- """
- Decrypt one block with aes
-
- @param {int[]} data 16-Byte cipher
- @param {int[]} expanded_key 176/208/240-Byte expanded key
- @returns {int[]} 16-Byte state
- """
- rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-
- for i in range(rounds, 0, -1):
- data = xor(
- data,
- expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- )
- if i != rounds:
- data = mix_columns_inv(data)
- data = shift_rows_inv(data)
- data = sub_bytes_inv(data)
- data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
-
- return data
-
-
-RCON = (
- 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
-)
-SBOX = (
- 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
- 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
- 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
- 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
- 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
- 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
- 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
- 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
- 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
- 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
- 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
- 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
- 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
- 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
- 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
- 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
- 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
- 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
- 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
- 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
- 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
- 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
- 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
- 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
- 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
- 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
- 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
- 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
- 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
- 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
- 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
- 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
-)
-SBOX_INV = (
- 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
- 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
- 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
- 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
- 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
- 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
- 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
- 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
- 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
- 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
- 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
- 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
- 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
- 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
- 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
- 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
- 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
- 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
- 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
- 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
- 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
- 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
- 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
- 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
- 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
- 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
- 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
- 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
- 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
- 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
- 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
- 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
-)
-MIX_COLUMN_MATRIX = (
- (0x2, 0x3, 0x1, 0x1),
- (0x1, 0x2, 0x3, 0x1),
- (0x1, 0x1, 0x2, 0x3),
- (0x3, 0x1, 0x1, 0x2),
-)
-MIX_COLUMN_MATRIX_INV = (
- (0xE, 0xB, 0xD, 0x9),
- (0x9, 0xE, 0xB, 0xD),
- (0xD, 0x9, 0xE, 0xB),
- (0xB, 0xD, 0x9, 0xE),
-)
-RIJNDAEL_EXP_TABLE = (
- 0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF,
- 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
- 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4,
- 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
- 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26,
- 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
- 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC,
- 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
- 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7,
- 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
- 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F,
- 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
- 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0,
- 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
- 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC,
- 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
- 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2,
- 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
- 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0,
- 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
- 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E,
- 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
- 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF,
- 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
- 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09,
- 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
- 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91,
- 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
- 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C,
- 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
- 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD,
- 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01,
-)
-RIJNDAEL_LOG_TABLE = (
- 0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6,
- 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
- 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef,
- 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
- 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a,
- 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
- 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24,
- 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
- 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94,
- 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
- 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62,
- 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
- 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42,
- 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
- 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca,
- 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
- 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74,
- 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
- 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5,
- 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
- 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec,
- 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
- 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86,
- 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
- 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc,
- 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
- 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47,
- 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
- 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89,
- 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
- 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18,
- 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07,
-)
-
-
-def sub_bytes(data):
- return [SBOX[x] for x in data]
-
-
-def sub_bytes_inv(data):
- return [SBOX_INV[x] for x in data]
-
-
-def rotate(data):
- return data[1:] + [data[0]]
-
-
-def key_schedule_core(data, rcon_iteration):
- data = rotate(data)
- data = sub_bytes(data)
- data[0] = data[0] ^ RCON[rcon_iteration]
- return data
-
-
-def xor(data1, data2):
- return [x ^ y for x, y in zip(data1, data2)]
-
-
-def rijndael_mul(a, b):
- if a == 0 or b == 0:
- return 0
- return RIJNDAEL_EXP_TABLE[
- (RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF
- ]
-
-
-def mix_column(data, matrix):
- data_mixed = []
- for row in range(4):
- mixed = 0
- for column in range(4):
- # xor is (+) and (-)
- mixed ^= rijndael_mul(data[column], matrix[row][column])
- data_mixed.append(mixed)
- return data_mixed
-
-
-def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
- data_mixed = []
- for i in range(4):
- column = data[i * 4: (i + 1) * 4]
- data_mixed += mix_column(column, matrix)
- return data_mixed
-
-
-def mix_columns_inv(data):
- return mix_columns(data, MIX_COLUMN_MATRIX_INV)
-
-
-def shift_rows_inv(data):
- data_shifted = []
- for column in range(4):
- for row in range(4):
- data_shifted.append(data[((column - row) & 0b11) * 4 + row])
- return data_shifted
-
-
-__all__ = ['key_expansion', 'aes_cbc_decrypt', 'aes_cbc_decrypt_text']
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 4dc4f0d..fd973c3 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -166,7 +166,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
}
gallery = self._request_graphql(
"GalleriesDetailQueryRendererQuery", variables,
- "1afc7dede86ff73456b4defbc5aeb593e330b990943d114cbef7da5be0d7ce2f",
+ "fd367cacf9bebcdc0620bd749dbd8fc9b0ccbeb54fc76b8b4b95e66a8c0cba49",
)["gallery"]
self._photos = gallery["photos"]
@@ -194,8 +194,8 @@ class _500pxGalleryExtractor(_500pxExtractor):
variables["cursor"] = photos["pageInfo"]["endCursor"]
photos = self._request_graphql(
"GalleriesDetailPaginationContainerQuery", variables,
- "3fcbc9ea1589f31c86fc43a0a02c2163"
- "cab070f9d376651f270de9f30f031539",
+ "457c66d976f56863c81795f03e98cb54"
+ "3c7c6cdae7abeab8fe9e8e8a67479fa9",
)["galleryByOwnerIdAndSlugOrToken"]["photos"]
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index fafb785..b248735 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -94,12 +94,12 @@ class _8musesAlbumExtractor(Extractor):
if albums:
for album in albums:
url = self.root + "/comics/album/" + album["permalink"]
- album = {
- "url" : url,
- "name" : album["name"],
- "private": album["isPrivate"],
+ yield Message.Queue, url, {
+ "url" : url,
+ "name" : album["name"],
+ "private" : album["isPrivate"],
+ "_extractor": _8musesAlbumExtractor,
}
- yield Message.Queue, url, album
if data["page"] >= data["pages"]:
return
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 53bc726..b8e39bc 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -59,7 +59,6 @@ modules = [
"kabeuchi",
"keenspot",
"khinsider",
- "kissmanga",
"komikcast",
"konachan",
"lineblog",
@@ -118,6 +117,7 @@ modules = [
"vsco",
"wallhaven",
"warosu",
+ "weasyl",
"webtoons",
"weibo",
"wikiart",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 1126615..be498bc 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -31,8 +31,14 @@ class BehanceExtractor(Extractor):
def _update(data):
# compress data to simple lists
if data["fields"] and isinstance(data["fields"][0], dict):
- data["fields"] = [field["name"] for field in data["fields"]]
- data["owners"] = [owner["display_name"] for owner in data["owners"]]
+ data["fields"] = [
+ field.get("name") or field.get("label")
+ for field in data["fields"]
+ ]
+ data["owners"] = [
+ owner.get("display_name") or owner.get("displayName")
+ for owner in data["owners"]
+ ]
tags = data.get("tags") or ()
if tags and isinstance(tags[0], dict):
@@ -101,7 +107,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
cookies = {
"_evidon_consent_cookie":
'{"consent_date":"2019-01-31T09:41:15.132Z"}',
- "bcp": "815b5eee-8bdf-4898-ac79-33c2bcc0ed19",
+ "bcp": "4c34489d-914c-46cd-b44c-dfd0e661136d",
"gk_suid": "66981391",
"gki": '{"feature_project_view":false,'
'"feature_discover_login_prompt":false,'
@@ -184,14 +190,267 @@ class BehanceCollectionExtractor(BehanceExtractor):
self.collection_id = match.group(1)
def galleries(self):
- url = "{}/collection/{}/a".format(self.root, self.collection_id)
- params = {"offset": 0}
- headers = {"X-Requested-With": "XMLHttpRequest"}
+ url = self.root + "/v3/graphql"
+ headers = {
+ "Origin" : self.root,
+ "Referer": self.root + "/collection/" + self.collection_id,
+ "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
+ "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ cookies = {
+ "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
+ "gk_suid": "66981391",
+ "ilo0" : "true",
+ }
+
+ query = """
+query GetMoodboardItemsAndRecommendations(
+ $id: Int!
+ $firstItem: Int!
+ $afterItem: String
+ $shouldGetRecommendations: Boolean!
+ $shouldGetItems: Boolean!
+ $shouldGetMoodboardFields: Boolean!
+ ) {
+ viewer @include(if: $shouldGetMoodboardFields) {
+ isOptedOutOfRecommendations
+ }
+ moodboard(id: $id) {
+ ...moodboardFields @include(if: $shouldGetMoodboardFields)
+
+ items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems)
+ {
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ nodes {
+ ...nodesFields
+ }
+ }
+
+ recommendedItems(first: 80) @include(if: $shouldGetRecommendations) {
+ nodes {
+ ...nodesFields
+ fetchSource
+ }
+ }
+ }
+ }
+
+ fragment moodboardFields on Moodboard {
+ id
+ label
+ privacy
+ followerCount
+ isFollowing
+ projectCount
+ url
+ isOwner
+ owners {
+ id
+ displayName
+ url
+ firstName
+ location
+ locationUrl
+ images {
+ size_50 {
+ url
+ }
+ size_100 {
+ url
+ }
+ size_115 {
+ url
+ }
+ size_230 {
+ url
+ }
+ size_138 {
+ url
+ }
+ size_276 {
+ url
+ }
+ }
+ }
+ }
+
+ fragment projectFields on Project {
+ id
+ isOwner
+ publishedOn
+ matureAccess
+ hasMatureContent
+ modifiedOn
+ name
+ url
+ isPrivate
+ slug
+ fields {
+ label
+ }
+ colors {
+ r
+ g
+ b
+ }
+ owners {
+ url
+ displayName
+ id
+ location
+ locationUrl
+ isProfileOwner
+ images {
+ size_50 {
+ url
+ }
+ size_100 {
+ url
+ }
+ size_115 {
+ url
+ }
+ size_230 {
+ url
+ }
+ size_138 {
+ url
+ }
+ size_276 {
+ url
+ }
+ }
+ }
+ covers {
+ size_original {
+ url
+ }
+ size_max_808 {
+ url
+ }
+ size_808 {
+ url
+ }
+ size_404 {
+ url
+ }
+ size_202 {
+ url
+ }
+ size_230 {
+ url
+ }
+ size_115 {
+ url
+ }
+ }
+ stats {
+ views {
+ all
+ }
+ appreciations {
+ all
+ }
+ comments {
+ all
+ }
+ }
+ }
+
+ fragment exifDataValueFields on exifDataValue {
+ id
+ label
+ value
+ searchValue
+ }
+
+ fragment nodesFields on MoodboardItem {
+ id
+ entityType
+ width
+ height
+ flexWidth
+ flexHeight
+ images {
+ size
+ url
+ }
+
+ entity {
+ ... on Project {
+ ...projectFields
+ }
+
+ ... on ImageModule {
+ project {
+ ...projectFields
+ }
+
+ exifData {
+ lens {
+ ...exifDataValueFields
+ }
+ software {
+ ...exifDataValueFields
+ }
+ makeAndModel {
+ ...exifDataValueFields
+ }
+ focalLength {
+ ...exifDataValueFields
+ }
+ iso {
+ ...exifDataValueFields
+ }
+ location {
+ ...exifDataValueFields
+ }
+ flash {
+ ...exifDataValueFields
+ }
+ exposureMode {
+ ...exifDataValueFields
+ }
+ shutterSpeed {
+ ...exifDataValueFields
+ }
+ aperture {
+ ...exifDataValueFields
+ }
+ }
+ }
+
+ ... on MediaCollectionComponent {
+ project {
+ ...projectFields
+ }
+ }
+ }
+ }
+"""
+ variables = {
+ "afterItem": "MAo=",
+ "firstItem": 40,
+ "id" : self.collection_id,
+ "shouldGetItems" : True,
+ "shouldGetMoodboardFields": False,
+ "shouldGetRecommendations": False,
+ }
+ data = {"query": query, "variables": variables}
while True:
- data = self.request(url, params=params, headers=headers).json()
- for item in data["items"]:
- yield item["project"]
- if len(data["items"]) < 40:
+ items = self.request(
+ url, method="POST", headers=headers,
+ cookies=cookies, json=data,
+ ).json()["data"]["moodboard"]["items"]
+
+ for node in items["nodes"]:
+ yield node["entity"]
+
+ if not items["pageInfo"]["hasNextPage"]:
return
- params["offset"] += len(data["items"])
+ variables["afterItem"] = items["pageInfo"]["endCursor"]
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index a0f4d1c..9cceaee 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -262,9 +262,11 @@ class DeviantartExtractor(Extractor):
return folder
raise exception.NotFoundError("folder")
- def _folder_urls(self, folders, category):
- url = "{}/{}/{}/0/".format(self.root, self.user, category)
- return [(url + folder["name"], folder) for folder in folders]
+ def _folder_urls(self, folders, category, extractor):
+ base = "{}/{}/{}/0/".format(self.root, self.user, category)
+ for folder in folders:
+ folder["_extractor"] = extractor
+ yield base + folder["name"], folder
def _update_content_default(self, deviation, content):
public = "premium_folder_data" not in deviation
@@ -450,7 +452,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
if self.flat and not self.group:
return self.api.gallery_all(self.user, self.offset)
folders = self.api.gallery_folders(self.user)
- return self._folder_urls(folders, "gallery")
+ return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
class DeviantartFolderExtractor(DeviantartExtractor):
@@ -589,7 +591,8 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
self.api.collections(self.user, folder["folderid"])
for folder in folders
)
- return self._folder_urls(folders, "favourites")
+ return self._folder_urls(
+ folders, "favourites", DeviantartCollectionExtractor)
class DeviantartCollectionExtractor(DeviantartExtractor):
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index cb4df11..06b5ba2 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -392,6 +392,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
def items(self):
self.login()
yield Message.Version, 1
+ data = {"_extractor": ExhentaiGalleryExtractor}
while True:
last = None
@@ -402,7 +403,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
if url == last:
continue
last = url
- yield Message.Queue, url, {}
+ yield Message.Queue, url, data
if 'class="ptdd">&gt;<' in page or ">No hits found</p>" in page:
return
diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py
index a2d8c04..44863a9 100644
--- a/gallery_dl/extractor/fallenangels.py
+++ b/gallery_dl/extractor/fallenangels.py
@@ -66,9 +66,9 @@ class FallenangelsMangaExtractor(MangaExtractor):
category = "fallenangels"
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
test = (
- ("http://manga.fascans.com/manga/trinity-seven", {
- "url": "293057f264de6c438b979bd1c3de4719568db452",
- "keyword": "50e0374dba60734230e4284b5ffdadef5104ae62",
+ ("https://manga.fascans.com/manga/chronos-ruler", {
+ "url": "eea07dd50f5bc4903aa09e2cc3e45c7241c9a9c2",
+ "keyword": "c414249525d4c74ad83498b3c59a813557e59d7e",
}),
("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", {
"url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b",
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 0ab42db..bf925b6 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -173,8 +173,7 @@ EXTRACTORS = {
),
"test-manga":
("https://sensescans.com/reader/series/yotsubato/", {
- "url": "305e6eb6160e3bb90c3de39ff5fb7c971e052087",
- "keyword": "562fb5a7362a4cb43d59d5c8a6ea8080fc65cf99",
+ "count": ">= 3",
}),
},
"_ckey": "chapterclass",
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 950a174..2a5ef6e 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -236,7 +236,9 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
test = (
("https://www.furaffinity.net/view/21835115/", {
- "url": "d80254eb4fba654597b4df8320d55916e11ba375",
+ "pattern": r"https://d\d*\.facdn\.net/(download/)?art/mirlinthloth"
+ r"/music/1488278723/1480267446.mirlinthloth_dj_fennmink"
+ r"_-_bude_s_4_ever\.mp3",
"keyword": {
"artist" : "mirlinthloth",
"artist_url" : "mirlinthloth",
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index ac1bca3..ba2fe5d 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -100,13 +100,13 @@ class GfycatImageExtractor(GfycatExtractor):
"gfyName": "GrayGenerousCowrie",
"gfyNumber": "755075459",
"title": "Bottom's up",
- "userName": "jackson3oh3",
+ "username": "jackson3oh3",
"createDate": 1495884169,
"md5": "a4796e05b0db9ba9ce5140145cd318aa",
"width": 400,
"height": 224,
- "frameRate": 23,
- "numFrames": 158,
+ "frameRate": 23.0,
+ "numFrames": 158.0,
"views": int,
},
}),
diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py
index 1ab71d6..833135e 100644
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2019 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -50,17 +50,17 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
# single chapter
("https://hentai.cafe/hazuki-yuuto-summer-blues/", {
"url": "f8e24a07d6fbb7c6a6ec5ad8ad8faf2436f8751b",
- "keyword": "eb9f98544098c961bd8cf5dbe69e6da51c4fb2f6",
+ "keyword": "5af1c570bb5f533a32b3375f9cdaa17a0152ba67",
}),
# multi-chapter
("https://hentai.cafe/saitom-saitom-box/", {
"url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
- "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb",
+ "keyword": "3c28517d356cac6acbd9895c9eeefae505304078",
}),
# new-style URL
("https://hentai.cafe/hc.fyi/2782", {
"url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076",
- "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb",
+ "keyword": "3c28517d356cac6acbd9895c9eeefae505304078",
}),
# foolslide URL
("https://hentai.cafe/manga/series/saitom-box/", {
@@ -80,12 +80,14 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
chapters.reverse()
return chapters
- tags , pos = text.extract(page, "<p>Tags: ", "</br>")
+ url , pos = text.extract(page, '<link rel="canonical" href="', '"')
+ tags , pos = text.extract(page, "<p>Tags: ", "</br>", pos)
artist, pos = text.extract(page, "\nArtists: ", "</br>", pos)
manga , pos = text.extract(page, "/manga/read/", "/", pos)
data = {
- "tags" : text.split_html(tags)[::2],
- "artist": text.split_html(artist),
+ "manga_id": text.parse_int(url.rpartition("/")[2]),
+ "tags" : text.split_html(tags)[::2],
+ "artist" : text.split_html(artist),
}
HentaicafeChapterExtractor._data(manga).update(data)
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 6e82091..5eb46b6 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -34,7 +34,7 @@ class HentaifoundryExtractor(Extractor):
yield Message.Directory, data
self.set_filters()
- for page_url in util.advance(self.get_image_pages(), self.start_post):
+ for page_url in util.advance(self._pagination(), self.start_post):
image = self.get_image_metadata(page_url)
image.update(data)
yield Message.Url, image["src"], image
@@ -50,13 +50,12 @@ class HentaifoundryExtractor(Extractor):
self.request(self.root + "/?enterAgree=1")
return {"user": self.user}
- def get_image_pages(self):
- """Yield urls of all relevant image pages"""
+ def _pagination(self, begin='thumbTitle"><a href="', end='"'):
num = self.start_page
while True:
page = self.request("{}/page/{}".format(self.page_url, num)).text
- yield from text.extract_iter(page, 'thumbTitle"><a href="', '"')
+ yield from text.extract_iter(page, begin, end)
if 'class="pager"' not in page or 'class="last hidden"' in page:
return
@@ -90,6 +89,33 @@ class HentaifoundryExtractor(Extractor):
return text.nameext_from_url(data["src"], data)
+ def get_story_metadata(self, html):
+ """Collect url and metadata for a story"""
+ extr = text.extract_from(html)
+ data = {
+ "user" : self.user,
+ "title" : text.unescape(extr(
+ "<div class='titlebar'>", "</a>").rpartition(">")[2]),
+ "author" : text.unescape(extr('alt="', '"')),
+ "date" : text.parse_datetime(extr(
+ ">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"),
+ "status" : extr("class='indent'>", "<"),
+ }
+
+ for c in ("Chapters", "Words", "Comments", "Views", "Rating"):
+ data[c.lower()] = text.parse_int(extr(
+ ">" + c + ":</span>", "<").replace(",", ""))
+
+ data["description"] = text.unescape(extr(
+ "class='storyDescript'>", "<div"))
+ path = extr('href="', '"')
+ data["src"] = self.root + path
+ data["index"] = text.parse_int(path.rsplit("/", 2)[1])
+ data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr(
+ "class='ratings_box'", "</div>"), "title='", "'")]
+
+ return text.nameext_from_url(data["src"], data)
+
def set_filters(self):
"""Set site-internal filters to show all images"""
token = text.unquote(text.extract(
@@ -127,19 +153,41 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
"""Extractor for all images of a hentai-foundry-user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/(?:pictures/user/([^/]+)(?:/page/(\d+))?/?$"
- r"|user/([^/]+)/profile)")
+ r"/user/([^/]+)/profile")
+ test = ("https://www.hentai-foundry.com/user/Tenpura/profile",)
+
+ def __init__(self, match):
+ HentaifoundryExtractor.__init__(self, match, match.group(1))
+
+ def items(self):
+ user = "/user/" + self.user
+ return self._dispatch_extractors((
+ (HentaifoundryPicturesExtractor ,
+ self.root + "/pictures" + user),
+ (HentaifoundryScrapsExtractor,
+ self.root + "/pictures" + user + "/scraps"),
+ (HentaifoundryStoriesExtractor,
+ self.root + "/stories" + user),
+ (HentaifoundryFavoriteExtractor,
+ self.root + user + "/faves/pictures"),
+ ), ("pictures",))
+
+
+class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
+ """Extractor for all pictures of a hentaifoundry user"""
+ subcategory = "pictures"
+ pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
+ r"/pictures/user/([^/]+)(?:/page/(\d+))?/?$")
test = (
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
}),
("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"),
- ("https://www.hentai-foundry.com/user/Tenpura/profile"),
)
def __init__(self, match):
HentaifoundryExtractor.__init__(
- self, match, match.group(1) or match.group(3), match.group(2))
+ self, match, match.group(1), match.group(2))
self.page_url = "{}/pictures/user/{}".format(self.root, self.user)
def get_job_metadata(self):
@@ -284,3 +332,68 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
def skip(self, _):
return 0
+
+
+class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
+ """Extractor for stories of a hentai-foundry user"""
+ subcategory = "stories"
+ pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
+ r"/stories/user/([^/]+)(?:/page/(\d+))?/?$")
+ test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", {
+ "count": ">= 35",
+ "keyword": {
+ "author" : "SnowWolf35",
+ "chapters" : int,
+ "comments" : int,
+ "date" : "type:datetime",
+ "description": str,
+ "index" : int,
+ "rating" : int,
+ "ratings" : list,
+ "status" : "re:(Inc|C)omplete",
+ "title" : str,
+ "user" : "SnowWolf35",
+ "views" : int,
+ "words" : int,
+ },
+ })
+
+ def __init__(self, match):
+ HentaifoundryExtractor.__init__(self, match, match.group(1))
+ self.page_url = "{}/stories/user/{}".format(self.root, self.user)
+
+ def items(self):
+ self.get_job_metadata()
+ self.set_filters()
+ stories = self._pagination('<div class="storyRow">', '</tr></table>')
+ for story_html in util.advance(stories, self.start_post):
+ story = self.get_story_metadata(story_html)
+ yield Message.Directory, story
+ yield Message.Url, story["src"], story
+
+
+class HentaifoundryStoryExtractor(HentaifoundryExtractor):
+ """Extractor for a hentaifoundry story"""
+ subcategory = "story"
+ pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
+ r"/stories/user/([^/]+)/(\d+)")
+ test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35"
+ "/26416/Overwatch-High-Chapter-Voting-Location"), {
+ "url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
+ "keyword": {"title": "Overwatch High Chapter Voting Location"},
+ })
+
+ def __init__(self, match):
+ HentaifoundryExtractor.__init__(self, match, match.group(1))
+ self.index = match.group(2)
+
+ def items(self):
+ story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(
+ self.root, self.user, self.index)
+ page = self.request(story_url).text
+ story = self.get_story_metadata(page)
+ yield Message.Directory, story
+ yield Message.Url, story["src"], story
+
+ def skip(self, _):
+ return 0
diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
deleted file mode 100644
index 348453d..0000000
--- a/gallery_dl/extractor/kissmanga.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2015-2020 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extract manga-chapters and entire manga from https://kissmanga.com/"""
-
-from .common import ChapterExtractor, MangaExtractor, Extractor
-from .. import text, aes, exception
-from ..cache import cache
-import hashlib
-import ast
-import re
-
-
-class RedirectMixin():
- """Detect and handle redirects to CAPTCHA pages"""
-
- def request(self, url, **kwargs):
- while True:
- response = Extractor.request(self, url, **kwargs)
- if not response.history or "/AreYouHuman" not in response.url:
- return response
- if self.config("captcha", "stop") == "wait":
- self.log.warning(
- "Redirect to \n%s\nVisit this URL in your browser, solve "
- "the CAPTCHA, and press ENTER to continue", response.url)
- try:
- input()
- except (EOFError, OSError):
- pass
- else:
- raise exception.StopExtraction(
- "Redirect to \n%s\nVisit this URL in your browser and "
- "solve the CAPTCHA to continue", response.url)
-
-
-class KissmangaBase(RedirectMixin):
- """Base class for kissmanga extractors"""
- category = "kissmanga"
- archive_fmt = "{chapter_id}_{page}"
- root = "https://kissmanga.com"
-
- @staticmethod
- def parse_chapter_string(data):
- """Parse 'chapter_string' value contained in 'data'"""
- data["chapter_string"] = text.unescape(data["chapter_string"])
-
- match = re.match((
- r"(?:[Vv]ol\.0*(\d+) )?"
- r"(?:[Cc]h\.)?0*(\d+)"
- r"(?:[.:]0*(\d+))?"
- r"(?: *[:-]? *(.+))?"
- ), data["chapter_string"])
-
- if not match:
- match = re.match((
- r".+?(?: -)? ()"
- r"0*(\d+)(?:[Vv.]0*(\d+))?"
- r"(?: *[:-]? *(.+))?"
- ), data["chapter_string"])
-
- if match:
- volume, chapter, minor, title = match.groups()
- else:
- volume, chapter, minor, title = 0, 0, "", data["chapter_string"]
-
- data["volume"] = text.parse_int(volume)
- data["chapter"] = text.parse_int(chapter)
- data["chapter_minor"] = "." + minor if minor else ""
- data["title"] = title if title and title != "Read Online" else ""
- return data
-
-
-class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
- """Extractor for manga-chapters from kissmanga.com"""
- pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
- r"(/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+))")
- test = (
- ("https://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
- "url": "46e63fd63e9e16f19bc1e6c7a45dc060815642fd",
- "keyword": "1cd0b5214ac7ae4d53e2fd8fec40ceec84cd09bf",
- }),
- ("https://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
- "url": "c26be8bf9c2abacee2076979d021634092cf38f1",
- "keyword": "e1d16780df8e04076ed2b5f0637c5b710ec2f2ea",
- }),
- ("https://kissmanga.com/Manga/Monster/Monster-79?id=7608", {
- "count": 23,
- "keyword": "f433a7a8fae840e17dace316a243fa27faab86de",
- }),
- ("https://kissmanga.com/Manga/Houseki-no-Kuni/Oneshot?id=404189", {
- "count": 49,
- "keyword": "cea131c9fe9c71309b3270cd86718d4d1198c31c",
- }),
- ("https://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608"),
- )
-
- def __init__(self, match):
- ChapterExtractor.__init__(self, match)
- self.chapter_id = match.group(2)
- self.session.headers["Referer"] = self.root
-
- def metadata(self, page):
- title = text.extract(page, "<title>", "</title>")[0].strip()
- manga, cinfo = title.split("\n")[1:3]
- data = {
- "manga": manga.strip(),
- "chapter_string": cinfo.strip(),
- "chapter_id": text.parse_int(self.chapter_id),
- "lang": "en",
- "language": "English",
- }
- return self.parse_chapter_string(data)
-
- def images(self, page):
- self.session.headers["Referer"] = None
- try:
- key = self.build_aes_key(page)
- iv = (0xa5, 0xe8, 0xe2, 0xe9, 0xc2, 0x72, 0x1b, 0xe0,
- 0xa8, 0x4a, 0xd6, 0x60, 0xc4, 0x72, 0xc1, 0xf3)
- return [
- (aes.aes_cbc_decrypt_text(
- data, key, iv).partition("&")[0], None)
- for data in text.extract_iter(
- page, 'push(wrapKA("', '"'
- )
- ]
- except UnicodeDecodeError:
- self.log.error("Failed to decrypt image URLs")
- except (ValueError, IndexError):
- self.log.error("Failed to get AES key")
- return []
-
- def build_aes_key(self, page):
- chko = self._chko_from_external_script()
-
- for script in self._scripts(page):
- for stmt in [s.strip() for s in script.split(";")]:
-
- if stmt.startswith("var _"):
- name, _, value = stmt[4:].partition(" = ")
- name += "[0]"
- value = ast.literal_eval(value)[0]
-
- elif stmt.startswith("chko = "):
- stmt = stmt[7:]
- if stmt == name:
- chko = value
- elif stmt == "chko + " + name:
- chko = chko + value
- elif stmt == name + " + chko":
- chko = value + chko
- else:
- self.log.warning("unrecognized expression: '%s'", stmt)
-
- elif stmt.startswith("key = "):
- pass
-
- else:
- self.log.warning("unrecognized statement: '%s'", stmt)
-
- return list(hashlib.sha256(chko.encode("ascii")).digest())
-
- @staticmethod
- def _scripts(page):
- end = 0
- while True:
- pos = page.find("key = ", end)
- if pos == -1:
- return
- beg = page.rindex('<script type="text/javascript">', 0, pos) + 31
- end = page.index('</script>', pos)
- yield page[beg:end]
-
- @cache(maxage=3600)
- def _chko_from_external_script(self):
- script = self.request(self.root + "/Scripts/lo.js").text
-
- pos = script.index("var chko")
- var = text.extract(script, "=", "[", pos)[0].lstrip()
- idx = text.extract(script, "[", "]", pos)[0]
-
- pos = script.index(var)
- lst = text.extract(script, "=", ";", pos)[0]
- return ast.literal_eval(lst.strip())[int(idx)]
-
-
-class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
- """Extractor for manga from kissmanga.com"""
- chapterclass = KissmangaChapterExtractor
- pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
- r"(/Manga/[^/?&#]+/?)$")
- test = (
- ("https://kissmanga.com/Manga/Dropout", {
- "url": "9e3a6f715b229aa3fafa42a1d5da5d65614cb532",
- "keyword": "32b09711c28b481845acc32e3bb6054cfc90224d",
- }),
- ("https://kissmanga.com/manga/feng-shen-ji"), # lowercase
- )
-
- def chapters(self, page):
- results = []
- manga, pos = text.extract(page, ' class="barTitle">', '\ninformation')
- page , pos = text.extract(page, ' class="listing">', '</table>', pos)
- manga = manga.strip()
- needle = '" title="Read ' + manga + ' '
- manga = text.unescape(manga)
-
- for item in text.extract_iter(page, '<a href="', ' online">'):
- url, _, chapter = item.partition(needle)
- data = {
- "manga": manga, "chapter_string": chapter,
- "chapter_id": text.parse_int(url.rpartition("=")[2]),
- "lang": "en", "language": "English",
- }
- self.parse_chapter_string(data)
- results.append((self.root + url, data))
- return results
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index 0e04f97..5743498 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -167,6 +167,8 @@ class MangoxoChannelExtractor(MangoxoExtractor):
self.login()
num = total = 1
url = "{}/channel/{}/album/".format(self.root, self.channel_id)
+ data = {"_extractor": MangoxoAlbumExtractor}
+
yield Message.Version, 1
while True:
@@ -174,7 +176,7 @@ class MangoxoChannelExtractor(MangoxoExtractor):
for album in text.extract_iter(
page, '<a class="link black" href="', '"'):
- yield Message.Queue, album, {}
+ yield Message.Queue, album, data
if num == 1:
total = self._total_pages(page)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 19a2b92..f9dc886 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -19,8 +19,8 @@ class NewgroundsExtractor(Extractor):
"""Base class for newgrounds extractors"""
category = "newgrounds"
directory_fmt = ("{category}", "{artist[:10]:J, }")
- filename_fmt = "{category}_{index}_{title}.{extension}"
- archive_fmt = "{index}"
+ filename_fmt = "{category}_{_index}_{title}.{extension}"
+ archive_fmt = "{_index}"
root = "https://www.newgrounds.com"
cookiedomain = ".newgrounds.com"
cookienames = ("NG_GG_username", "vmk1du5I8m")
@@ -44,6 +44,13 @@ class NewgroundsExtractor(Extractor):
if url:
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
+
+ for num, url in enumerate(text.extract_iter(
+ post["_comment"], 'data-smartload-src="', '"'), 1):
+ post["num"] = num
+ post["_index"] = "{}_{:>02}".format(post["index"], num)
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
else:
self.log.warning(
"Unable to get download URL for '%s'", post_url)
@@ -97,8 +104,9 @@ class NewgroundsExtractor(Extractor):
else:
data = self._extract_media_data(extr, post_url)
- data["comment"] = text.unescape(text.remove_html(extr(
- 'id="author_comments">', '</div>'), "", ""))
+ data["_comment"] = extr('id="author_comments"', '</div>')
+ data["comment"] = text.unescape(text.remove_html(
+ data["_comment"].partition(">")[2], "", ""))
data["favorites"] = text.parse_int(extr(
'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<'))
@@ -125,33 +133,54 @@ class NewgroundsExtractor(Extractor):
"width" : text.parse_int(full('width="', '"')),
"height" : text.parse_int(full('height="', '"')),
}
- data["index"] = text.parse_int(
- data["url"].rpartition("/")[2].partition("_")[0])
+ index = data["url"].rpartition("/")[2].partition("_")[0]
+ data["index"] = text.parse_int(index)
+ data["_index"] = index
return data
@staticmethod
def _extract_audio_data(extr, url):
+ index = url.split("/")[5]
return {
"title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')),
"date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"),
- "index" : text.parse_int(url.split("/")[5]),
+ "index" : text.parse_int(index),
+ "_index" : index,
"rating" : "",
}
- @staticmethod
- def _extract_media_data(extr, url):
+ def _extract_media_data(self, extr, url):
+ index = url.split("/")[5]
+ title = extr('"og:title" content="', '"')
+ src = extr('{"url":"', '"')
+
+ if src:
+ src = src.replace("\\/", "/")
+ date = text.parse_datetime(extr(
+ 'itemprop="datePublished" content="', '"'))
+ else:
+ url = self.root + "/portal/video/" + index
+ headers = {
+ "Accept": "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ "Referer": self.root,
+ }
+ data = self.request(url, headers=headers).json()
+ src = data["sources"]["360p"][0]["src"].replace(".360p.", ".")
+ date = text.parse_timestamp(src.rpartition("?")[2])
+
return {
- "title" : text.unescape(extr('"og:title" content="', '"')),
- "url" : extr('{"url":"', '"').replace("\\/", "/"),
- "date" : text.parse_datetime(extr(
- 'itemprop="datePublished" content="', '"')),
+ "title" : text.unescape(title),
+ "url" : src,
+ "date" : date,
"description": text.unescape(extr(
'itemprop="description" content="', '"')),
"rating" : extr('class="rated-', '"'),
- "index" : text.parse_int(url.split("/")[5]),
+ "index" : text.parse_int(index),
+ "_index" : index,
}
def _pagination(self, kind):
@@ -215,6 +244,10 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
("https://art.ngfiles.com/images/0/94_tomfulp_ryu-is-hawt.gif", {
"url": "57f182bcbbf2612690c3a54f16ffa1da5105245e",
}),
+ ("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", {
+ "url": "84eec95e663041a80630df72719f231e157e5f5d",
+ "count": 2,
+ })
)
def __init__(self, match):
@@ -236,23 +269,21 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
r"(/(?:portal/view|audio/listen)/\d+)")
test = (
- ("https://www.newgrounds.com/portal/view/589549", {
- "url": "48d916d819c99139e6a3acbbf659a78a867d363e",
- "content": "ceb865426727ec887177d99e0d20bb021e8606ae",
+ ("https://www.newgrounds.com/portal/view/595355", {
+ "pattern": r"https://uploads\.ungrounded\.net/alternate/564000"
+ r"/564957_alternate_31\.mp4\?1359712249",
"keyword": {
- "artist" : ["psychogoldfish", "tomfulp"],
- "comment" : "re:People have been asking me how I like the ",
- "date" : "dt:2012-02-08 21:40:56",
- "description": "re:People have been asking how I like the ",
+ "artist" : ["kickinthehead", "danpaladin", "tomfulp"],
+ "comment" : "re:My fan trailer for Alien Hominid HD!",
+ "date" : "dt:2013-02-01 09:50:49",
"favorites" : int,
- "filename" : "527818_alternate_1896",
- "index" : 589549,
- "rating" : "t",
+ "filename" : "564957_alternate_31",
+ "index" : 595355,
+ "rating" : "e",
"score" : float,
- "tags" : ["newgrounds", "psychogoldfish",
- "rage", "redesign-2012"],
- "title" : "Redesign Rage",
- "user" : "psychogoldfish",
+ "tags" : ["alienhominid", "trailer"],
+ "title" : "Alien Hominid Fan Trailer",
+ "user" : "kickinthehead",
},
}),
("https://www.newgrounds.com/audio/listen/609768", {
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index aae17a3..2394acf 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -127,9 +127,25 @@ class NijieExtractor(AsynchronousMixin, Extractor):
class NijieUserExtractor(NijieExtractor):
- """Extractor for works of a nijie-user"""
+ """Extractor for nijie user profiles"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/members(?:_illust)?\.php\?id=(\d+)"
+ cookiedomain = None
+ pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)"
+ test = ("https://nijie.info/members.php?id=44",)
+
+ def items(self):
+ base = "{}/{{}}.php?id={}".format(self.root, self.user_id)
+ return self._dispatch_extractors((
+ (NijieIllustrationExtractor, base.format("members_illust")),
+ (NijieDoujinExtractor , base.format("members_dojin")),
+ (NijieFavoriteExtractor , base.format("user_like_illust_view")),
+ ), ("illustration", "doujin"))
+
+
+class NijieIllustrationExtractor(NijieExtractor):
+ """Extractor for all illustrations of a nijie-user"""
+ subcategory = "illustration"
+ pattern = BASE_PATTERN + r"/members_illust\.php\?id=(\d+)"
test = (
("https://nijie.info/members_illust.php?id=44", {
"url": "66c4ff94c6e77c0765dd88f2d8c663055fda573e",
@@ -152,7 +168,6 @@ class NijieUserExtractor(NijieExtractor):
("https://nijie.info/members_illust.php?id=43", {
"exception": exception.NotFoundError,
}),
- ("https://nijie.info/members.php?id=44"),
)
def image_ids(self):
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index abf88cd..5e7e387 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -106,7 +106,7 @@ class NozomiPostExtractor(NozomiExtractor):
# multiple images per post
("https://nozomi.la/post/25588032.html", {
"url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
- "keyword": "0aa99cbaaeada2984a1fbf912274409c6ba106d4",
+ "keyword": "8c3a2561ccc9ad429be9850d1383a952d0b4a8ab",
"count": 7,
}),
)
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index c07c4b7..6d7b27a 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -180,16 +180,11 @@ class OAuthBase(Extractor):
self.send(msg)
def _generate_message(self, names, values):
- if len(names) == 1:
- _vh = "This value has"
- _is = "is"
- _it = "it"
- _va = "this value"
- else:
- _vh = "These values have"
- _is = "are"
- _it = "them"
- _va = "these values"
+ _vh, _va, _is, _it = (
+ ("This value has", "this value", "is", "it")
+ if len(names) == 1 else
+ ("These values have", "these values", "are", "them")
+ )
msg = "\nYour {} {}\n\n{}\n\n".format(
" and ".join("'" + n + "'" for n in names),
@@ -197,23 +192,21 @@ class OAuthBase(Extractor):
"\n".join(values),
)
- if self.cache:
- opt = self.oauth_config(names[0])
- if opt is None or opt == "cache":
- msg += _vh + " been cached and will automatically be used."
- else:
- msg += (
- "Set 'extractor.{}.{}' to \"cache\" to use {}.".format(
- self.subcategory, names[0], _it,
- )
- )
+ opt = self.oauth_config(names[0])
+ if self.cache and (opt is None or opt == "cache"):
+ msg += _vh + " been cached and will automatically be used."
else:
msg += "Put " + _va + " into your configuration file as \n"
msg += " and\n".join(
"'extractor." + self.subcategory + "." + n + "'"
for n in names
)
- msg += "."
+ if self.cache:
+ msg += (
+ "\nor set\n'extractor.{}.{}' to \"cache\""
+ .format(self.subcategory, names[0])
+ )
+ msg += "\nto use {}.".format(_it)
return msg
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 8f2d633..f08055c 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -95,8 +95,8 @@ class PahealPostExtractor(PahealExtractor):
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/view/(\d+)")
test = ("https://rule34.paheal.net/post/view/481609", {
- "url": "d3fd0f82762716fe3fb03c9c923e61c13ce22204",
- "keyword": "35748081bfeaab48f909f4b097a4d79b2be12538",
+ "url": "a91d579be030753282f55b8cb4eeaa89c45a9116",
+ "keyword": "44154bdac3d6cf289d0d9739a566acd8b7839e50",
"content": "7b924bcf150b352ac75c9d281d061e174c851a11",
})
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index eaf97fd..ee8f9bb 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -105,7 +105,7 @@ class PixivUserExtractor(PixivExtractor):
# avatar (#595, 623)
("https://www.pixiv.net/en/users/173530", {
"options": (("avatar", True),),
- "content": "22af450d4dbaf4973d370f164f66f48c7382a6de",
+ "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
"range": "1",
}),
# deleted account
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index bbbc709..6b36cdd 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -29,9 +29,9 @@ class PornhubGalleryExtractor(PornhubExtractor):
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/album/(\d+)"
test = (
- ("https://www.pornhub.com/album/1708982", {
+ ("https://www.pornhub.com/album/17218841", {
"pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/",
- "count": 93,
+ "count": 81,
"keyword": {
"id": int,
"num": int,
@@ -40,11 +40,11 @@ class PornhubGalleryExtractor(PornhubExtractor):
"caption": str,
"user": "Unknown",
"gallery": {
- "id" : 1708982,
+ "id" : 17218841,
"score": int,
"views": int,
"tags" : list,
- "title": "Random Hentai",
+ "title": "Hentai/Ecchi 41",
},
},
}),
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 8290d2d..e5b4b44 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -16,7 +16,7 @@ import time
import json
-BASE_PATTERN = r"(?:https?://)?([^/.]+\.reactor\.cc)"
+BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
class ReactorExtractor(SharedConfigMixin, Extractor):
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index dda4809..7030c81 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -1,20 +1,19 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2019 Mike Fährmann
+# Copyright 2016-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract comic-issues and entire comics from https://readcomiconline.to/"""
+"""Extractors for https://readcomiconline.to/"""
-from .common import ChapterExtractor, MangaExtractor
-from .kissmanga import RedirectMixin
-from .. import text
+from .common import Extractor, ChapterExtractor, MangaExtractor
+from .. import text, exception
import re
-class ReadcomiconlineBase(RedirectMixin):
+class ReadcomiconlineBase():
"""Base class for readcomiconline extractors"""
category = "readcomiconline"
directory_fmt = ("{category}", "{comic}", "{issue:>03}")
@@ -22,6 +21,25 @@ class ReadcomiconlineBase(RedirectMixin):
archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.to"
+ def request(self, url, **kwargs):
+ """Detect and handle redirects to CAPTCHA pages"""
+ while True:
+ response = Extractor.request(self, url, **kwargs)
+ if not response.history or "/AreYouHuman" not in response.url:
+ return response
+ if self.config("captcha", "stop") == "wait":
+ self.log.warning(
+ "Redirect to \n%s\nVisit this URL in your browser, solve "
+ "the CAPTCHA, and press ENTER to continue", response.url)
+ try:
+ input()
+ except (EOFError, OSError):
+ pass
+ else:
+ raise exception.StopExtraction(
+ "Redirect to \n%s\nVisit this URL in your browser and "
+ "solve the CAPTCHA to continue", response.url)
+
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index b07d024..a9252f5 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -152,7 +152,7 @@ class SankakuTagExtractor(SankakuExtractor):
test = (
("https://chan.sankakucomplex.com/?tags=bonocho", {
"count": 5,
- "pattern": r"https://cs\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
+ "pattern": r"https://c?s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
# respect 'page' query parameter
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 236a001..c98a300 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -110,16 +110,17 @@ class TwitterExtractor(Extractor):
twitpics = []
for url in tweet["entities"].get("urls", ()):
url = url["expanded_url"]
- if "//twitpic.com/" in url:
+ if "//twitpic.com/" in url and "/photos/" not in url:
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
url = text.extract(
response.text, 'name="twitter:image" value="', '"')[0]
- twitpics.append({
- "original_info": {},
- "media_url" : url,
- })
+ if url:
+ twitpics.append({
+ "original_info": {},
+ "media_url" : url,
+ })
if twitpics:
if "extended_entities" in tweet:
tweet["extended_entities"]["media"].extend(twitpics)
@@ -312,6 +313,7 @@ class TwitterSearchExtractor(TwitterExtractor):
test = ("https://twitter.com/search?q=nature", {
"range": "1-40",
"count": 40,
+ "archive": False,
})
def metadata(self):
@@ -378,6 +380,15 @@ class TwitterTweetExtractor(TwitterExtractor):
"url": "0f6a841e23948e4320af7ae41125e0c5b3cadc98",
"content": "f29501e44d88437fe460f5c927b7543fda0f6e34",
}),
+ # original retweets (#1026)
+ ("https://twitter.com/jessica_3978/status/1296304589591810048", {
+ "options": (("retweets", "original"),),
+ "count": 2,
+ "keyword": {
+ "tweet_id": 1296296016002547713,
+ "date" : "dt:2020-08-20 04:00:28",
+ },
+ }),
)
def __init__(self, match):
@@ -451,7 +462,8 @@ class TwitterAPI():
endpoint = "2/timeline/conversation/{}.json".format(tweet_id)
tweets = []
for tweet in self._pagination(endpoint):
- if tweet["id_str"] == tweet_id:
+ if tweet["id_str"] == tweet_id or \
+ tweet.get("_retweet_id_str") == tweet_id:
tweets.append(tweet)
if "quoted_status_id_str" in tweet:
tweet_id = tweet["quoted_status_id_str"]
@@ -536,6 +548,7 @@ class TwitterAPI():
entry_tweet="tweet-", entry_cursor="cursor-bottom-"):
if params is None:
params = self.params.copy()
+ original_retweets = (self.extractor.retweets == "original")
while True:
cursor = tweet = None
@@ -558,12 +571,17 @@ class TwitterAPI():
"Skipping %s (deleted)",
entry["entryId"][len(entry_tweet):])
continue
- tweet["user"] = users[tweet["user_id_str"]]
if "retweeted_status_id_str" in tweet:
retweet = tweets.get(tweet["retweeted_status_id_str"])
- if retweet:
+ if original_retweets:
+ if not retweet:
+ continue
+ retweet["_retweet_id_str"] = tweet["id_str"]
+ tweet = retweet
+ elif retweet:
tweet["author"] = users[retweet["user_id_str"]]
+ tweet["user"] = users[tweet["user_id_str"]]
yield tweet
if "quoted_status_id_str" in tweet:
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
new file mode 100644
index 0000000..a39fbf1
--- /dev/null
+++ b/gallery_dl/extractor/weasyl.py
@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.weasyl.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/"
+
+
+class WeasylExtractor(Extractor):
+ category = "weasyl"
+ directory_fmt = ("{category}", "{owner_login}")
+ filename_fmt = "{submitid} {title}.{extension}"
+ archive_fmt = "{submitid}"
+ root = "https://www.weasyl.com"
+
+ @staticmethod
+ def populate_submission(data):
+ # Some submissions don't have content and can be skipped
+ if "submission" in data["media"]:
+ data["url"] = data["media"]["submission"][0]["url"]
+ data["date"] = text.parse_datetime(
+ data["posted_at"][:19], "%Y-%m-%dT%H:%M:%S")
+ text.nameext_from_url(data["url"], data)
+ return True
+ return False
+
+ def request_submission(self, submitid):
+ return self.request(
+ "{}/api/submissions/{}/view".format(self.root, submitid)).json()
+
+ def retrieve_journal(self, journalid):
+ data = self.request(
+ "{}/api/journals/{}/view".format(self.root, journalid)).json()
+ data["extension"] = "html"
+ data["html"] = "text:" + data["content"]
+ data["date"] = text.parse_datetime(data["posted_at"])
+ return data
+
+ def submissions(self, owner_login, folderid=None):
+ url = "{}/api/users/{}/gallery".format(self.root, owner_login)
+ params = {
+ "nextid" : None,
+ "folderid": folderid,
+ }
+
+ while True:
+ data = self.request(url, params=params).json()
+ for submission in data["submissions"]:
+ if self.populate_submission(submission):
+ submission["folderid"] = folderid
+ # Do any submissions have more than one url? If so
+ # a urllist of the submission array urls would work.
+ yield Message.Url, submission["url"], submission
+ if not data["nextid"]:
+ return
+ params["nextid"] = data["nextid"]
+
+
+class WeasylSubmissionExtractor(WeasylExtractor):
+ subcategory = "submission"
+ pattern = BASE_PATTERN + r"(?:~[\w-]+/submissions|submission)/(\d+)"
+ test = (
+ ("https://www.weasyl.com/~fiz/submissions/2031/a-wesley", {
+ "pattern": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29"
+ "40be928532785dfbf35c37622664d2fbb8114c3b063df969562fc5"
+ "1/fiz-a-wesley.png",
+ "keyword": {
+ "comments" : int,
+ "date" : "dt:2012-04-20 00:38:04",
+ "description" : "<p>(flex)</p>",
+ "favorites" : int,
+ "folder_name" : "Wesley Stuff",
+ "folderid" : 2081,
+ "friends_only": False,
+ "owner" : "Fiz",
+ "owner_login" : "fiz",
+ "rating" : "general",
+ "submitid" : 2031,
+ "subtype" : "visual",
+ "tags" : list,
+ "title" : "A Wesley!",
+ "type" : "submission",
+ "views" : int,
+ },
+ }),
+ ("https://www.weasyl.com/submission/2031/a-wesley"),
+ )
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.submitid = match.group(1)
+
+ def items(self):
+ data = self.request_submission(self.submitid)
+ if self.populate_submission(data):
+ yield Message.Directory, data
+ yield Message.Url, data["url"], data
+
+
+class WeasylSubmissionsExtractor(WeasylExtractor):
+ subcategory = "submissions"
+ pattern = BASE_PATTERN + r"(?:~|submissions/)([\w-]+)/?$"
+ test = (
+ ("https://www.weasyl.com/~tanidareal", {
+ "count": ">= 200"
+ }),
+ ("https://www.weasyl.com/submissions/tanidareal"),
+ )
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.owner_login = match.group(1)
+
+ def items(self):
+ yield Message.Version, 1
+ yield Message.Directory, {"owner_login": self.owner_login}
+ yield from self.submissions(self.owner_login)
+
+
+class WeasylFolderExtractor(WeasylExtractor):
+ subcategory = "folder"
+ directory_fmt = ("{category}", "{owner_login}", "{folder_name}")
+ pattern = BASE_PATTERN + r"submissions/([\w-]+)\?folderid=(\d+)"
+ test = ("https://www.weasyl.com/submissions/tanidareal?folderid=7403", {
+ "count": ">= 12"
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.owner_login, self.folderid = match.groups()
+
+ def items(self):
+ yield Message.Version, 1
+ iter = self.submissions(self.owner_login, self.folderid)
+ # Folder names are only on single submission api calls
+ msg, url, data = next(iter)
+ details = self.request_submission(data["submitid"])
+ yield Message.Directory, details
+ yield msg, url, data
+ yield from iter
+
+
+class WeasylJournalExtractor(WeasylExtractor):
+ subcategory = "journal"
+ filename_fmt = "{journalid} {title}.{extension}"
+ archive_fmt = "{journalid}"
+ pattern = BASE_PATTERN + r"journal/(\d+)"
+ test = ("https://www.weasyl.com/journal/17647/bbcode", {
+ "keyword": {
+ "title" : "BBCode",
+ "date" : "dt:2013-09-19 23:11:23",
+ "content": "<p><a>javascript:alert(42);</a></p>"
+ "<p>No more of that!</p>",
+ },
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.journalid = match.group(1)
+
+ def items(self):
+ data = self.retrieve_journal(self.journalid)
+ yield Message.Version, 1
+ yield Message.Directory, data
+ yield Message.Url, data["html"], data
+
+
+class WeasylJournalsExtractor(WeasylExtractor):
+ subcategory = "journals"
+ filename_fmt = "{journalid} {title}.{extension}"
+ archive_fmt = "{journalid}"
+ pattern = BASE_PATTERN + r"journals/([\w-]+)"
+ test = ("https://www.weasyl.com/journals/charmander", {
+ "count": ">= 2",
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.owner_login = match.group(1)
+
+ def items(self):
+ yield Message.Version, 1
+ yield Message.Directory, {"owner_login": self.owner_login}
+
+ url = "{}/journals/{}".format(self.root, self.owner_login)
+ page = self.request(url).text
+ for journalid in text.extract_iter(page, 'href="/journal/', '/'):
+ data = self.retrieve_journal(journalid)
+ yield Message.Url, data["html"], data
+
+
+class WeasylFavoriteExtractor(WeasylExtractor):
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "{owner_login}", "Favorites")
+ pattern = BASE_PATTERN + r"favorites\?userid=(\d+)&feature=submit"
+ test = ("https://www.weasyl.com/favorites?userid=184616&feature=submit", {
+ "count": ">= 5",
+ })
+
+ def __init__(self, match):
+ WeasylExtractor.__init__(self, match)
+ self.userid = match.group(1)
+
+ def items(self):
+ owner_login = lastid = None
+ url = self.root + "/favorites"
+ params = {
+ "userid" : self.userid,
+ "feature": "submit",
+ }
+
+ while True:
+ page = self.request(url, params=params).text
+ pos = page.index('id="favorites-content"')
+
+ if not owner_login:
+ owner_login = text.extract(page, '<a href="/~', '"')[0]
+ yield Message.Directory, {"owner_login": owner_login}
+
+ for submitid in text.extract_iter(page, "/submissions/", "/", pos):
+ if submitid == lastid:
+ continue
+ lastid = submitid
+ submission = self.request_submission(submitid)
+ if self.populate_submission(submission):
+ yield Message.Url, submission["url"], submission
+
+ if "&amp;nextid=" not in page:
+ return
+ params["nextid"] = submitid
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 0b1b2d9..a325f87 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -47,21 +47,31 @@ class WeiboExtractor(Extractor):
file["num"] = num
yield Message.Url, file["url"], file
+ def statuses(self):
+ """Returns an iterable containing all relevant 'status' objects"""
+
+ def _status_by_id(self, status_id):
+ url = "{}/detail/{}".format(self.root, status_id)
+ page = self.request(url, fatal=False).text
+ data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
+ return json.loads(data)["status"] if data else None
+
def _files_from_status(self, status):
- images = status.pop("pics", ())
page_info = status.pop("page_info", ())
-
- for image in images:
- pid = image["pid"]
- if "large" in image:
- image = image["large"]
- geo = image.get("geo") or {}
- yield text.nameext_from_url(image["url"], {
- "url" : image["url"],
- "pid" : pid,
- "width" : text.parse_int(geo.get("width")),
- "height": text.parse_int(geo.get("height")),
- })
+ if "pics" in status:
+ if len(status["pics"]) < status["pic_num"]:
+ status = self._status_by_id(status["id"]) or status
+ for image in status.pop("pics"):
+ pid = image["pid"]
+ if "large" in image:
+ image = image["large"]
+ geo = image.get("geo") or {}
+ yield text.nameext_from_url(image["url"], {
+ "url" : image["url"],
+ "pid" : pid,
+ "width" : text.parse_int(geo.get("width")),
+ "height": text.parse_int(geo.get("height")),
+ })
if self.videos and "media_info" in page_info:
info = page_info["media_info"]
@@ -79,9 +89,6 @@ class WeiboExtractor(Extractor):
data["_ytdl_extra"] = {"protocol": "m3u8_native"}
yield data
- def statuses(self):
- """Returns an iterable containing all relevant 'status' objects"""
-
class WeiboUserExtractor(WeiboExtractor):
"""Extractor for all images of a user on weibo.cn"""
@@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor):
while True:
data = self.request(url, params=params).json()
+ cards = data["data"]["cards"]
- for card in data["data"]["cards"]:
+ if not cards:
+ return
+ for card in cards:
if "mblog" in card:
yield card["mblog"]
-
- if not data["data"]["cards"]:
- return
params["page"] += 1
@@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor):
self.status_id = match.group(1)
def statuses(self):
- url = "{}/detail/{}".format(self.root, self.status_id)
- page = self.request(url, notfound="status").text
- data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
- if not data:
+ status = self._status_by_id(self.status_id)
+ if not status:
raise exception.NotFoundError("status")
- return (json.loads(data)["status"],)
+ return (status,)
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 2548ead..b7d116a 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -59,13 +59,13 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
+ title = extr('"title":"', '"')
user = {
"id" : text.parse_int(extr('"id_user":', ',')),
"display": extr('"display":"', '"'),
"sex" : extr('"sex":"', '"'),
"name" : self.user,
}
- title = extr('"title":"', '"')
user["description"] = extr(
'<small class="mobile-hide">', '</small>').strip()
tags = extr('<em>Tagged:</em>', '<').strip()
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 7d08b86..b62240b 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -228,7 +228,7 @@ class DownloadJob(Job):
for pp in postprocessors:
pp.prepare(pathfmt)
- if archive and kwdict in archive:
+ if archive and archive.check(kwdict):
pathfmt.fix_extension()
self.handle_skip()
return
@@ -385,8 +385,23 @@ class DownloadJob(Job):
self.sleep = config("sleep")
if not config("download", True):
+ # monkey-patch method to do nothing and always return True
self.download = pathfmt.fix_extension
+ archive = config("archive")
+ if archive:
+ path = util.expand_path(archive)
+ try:
+ if "{" in path:
+ path = util.Formatter(path).format_map(kwdict)
+ self.archive = util.DownloadArchive(path, self.extractor)
+ except Exception as exc:
+ self.extractor.log.warning(
+ "Failed to open download archive at '%s' ('%s: %s')",
+ path, exc.__class__.__name__, exc)
+ else:
+ self.extractor.log.debug("Using download archive '%s'", path)
+
skip = config("skip", True)
if skip:
self._skipexc = None
@@ -401,21 +416,10 @@ class DownloadJob(Job):
self._skipcnt = 0
self._skipmax = text.parse_int(smax)
else:
+ # monkey-patch methods to always return False
pathfmt.exists = lambda x=None: False
-
- archive = config("archive")
- if archive:
- path = util.expand_path(archive)
- try:
- if "{" in path:
- path = util.Formatter(path).format_map(kwdict)
- self.archive = util.DownloadArchive(path, self.extractor)
- except Exception as exc:
- self.extractor.log.warning(
- "Failed to open download archive at '%s' ('%s: %s')",
- path, exc.__class__.__name__, exc)
- else:
- self.extractor.log.debug("Using download archive '%s'", path)
+ if self.archive:
+ self.archive.check = pathfmt.exists
postprocessors = self.extractor.config_accumulate("postprocessors")
if postprocessors:
@@ -449,7 +453,7 @@ class DownloadJob(Job):
def _build_blacklist(self):
wlist = self.extractor.config("whitelist")
- if wlist:
+ if wlist is not None:
if isinstance(wlist, str):
wlist = wlist.split(",")
blist = {e.category for e in extractor._list_classes()}
@@ -457,7 +461,7 @@ class DownloadJob(Job):
return blist
blist = self.extractor.config("blacklist")
- if blist:
+ if blist is not None:
if isinstance(blist, str):
blist = blist.split(",")
blist = set(blist)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index dbebfce..3e91405 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -941,7 +941,7 @@ class DownloadArchive():
"archive-format", extractor.archive_fmt)
).format_map
- def __contains__(self, kwdict):
+ def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
key = kwdict["_archive_key"] = self.keygen(kwdict)
self.cursor.execute(
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d7e2737..81976c2 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.15.0"
+__version__ = "1.15.1"