blob: 7a9269d0dd5b1d7d31e45a82be95aadb12725ff7 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for general-purpose URL shorteners"""
from .common import BaseExtractor, Message
from .. import exception
class UrlshortenerExtractor(BaseExtractor):
"""Base class for URL shortener extractors"""
basecategory = "urlshortener"
BASE_PATTERN = UrlshortenerExtractor.update({
"bitly": {
"root": "https://bit.ly",
"pattern": r"bit\.ly",
},
"tco": {
# t.co sends 'http-equiv="refresh"' (200) when using browser UA
"headers": {"User-Agent": None},
"root": "https://t.co",
"pattern": r"t\.co",
},
})
class UrlshortenerLinkExtractor(UrlshortenerExtractor):
"""Extractor for general-purpose URL shorteners"""
subcategory = "link"
pattern = BASE_PATTERN + r"(/[^/?#]+)"
example = "https://bit.ly/abcde"
def items(self):
url = self.root + self.groups[-1]
location = self.request_location(
url, headers=self.config_instance("headers"), notfound="URL")
if not location:
raise exception.AbortExtraction("Unable to resolve short URL")
yield Message.Queue, location, {}
|