diff options
Diffstat (limited to 'gallery_dl/extractor/facebook.py')
| -rw-r--r-- | gallery_dl/extractor/facebook.py | 49 |
1 files changed, 24 insertions, 25 deletions
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 6061737..5d56a5f 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -11,9 +11,9 @@ from .. import text, util, exception from ..cache import memcache BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com" -USER_PATTERN = (BASE_PATTERN + - r"/(?!media/|photo/|photo.php|watch/)" - r"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)") +USER_PATTERN = (rf"{BASE_PATTERN}/" + rf"(?!media/|photo/|photo.php|watch/|permalink.php)" + rf"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)") class FacebookExtractor(Extractor): @@ -108,7 +108,7 @@ class FacebookExtractor(Extractor): '"message":{"delight_ranges"', '"},"message_preferred_body"' ).rsplit('],"text":"', 1)[-1]), - "date": text.parse_timestamp( + "date": self.parse_timestamp( text.extr(photo_page, '\\"publish_time\\":', ',') or text.extr(photo_page, '"created_time":', ',') ), @@ -172,7 +172,7 @@ class FacebookExtractor(Extractor): "user_id": text.extr( video_page, '"owner":{"__typename":"User","id":"', '"' ), - "date": text.parse_timestamp(text.extr( + "date": self.parse_timestamp(text.extr( video_page, '\\"publish_time\\":', ',' )), "type": "video" @@ -292,7 +292,7 @@ class FacebookExtractor(Extractor): else: retries = 0 photo.update(set_data) - yield Message.Directory, photo + yield Message.Directory, "", photo yield Message.Url, photo["url"], photo if not photo["next_photo_id"]: @@ -389,9 +389,9 @@ class FacebookExtractor(Extractor): class FacebookPhotoExtractor(FacebookExtractor): """Base class for Facebook Photo extractors""" subcategory = "photo" - pattern = (BASE_PATTERN + - r"/(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?" - r"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$") + pattern = (rf"{BASE_PATTERN}/" + rf"(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?" + rf"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$") example = "https://www.facebook.com/photo/?fbid=PHOTO_ID" def items(self): @@ -408,7 +408,7 @@ class FacebookPhotoExtractor(FacebookExtractor): directory = self.parse_set_page(set_page) - yield Message.Directory, directory + yield Message.Directory, "", directory yield Message.Url, photo["url"], photo if self.author_followups: @@ -427,12 +427,11 @@ class FacebookSetExtractor(FacebookExtractor): """Base class for Facebook Set extractors""" subcategory = "set" pattern = ( - BASE_PATTERN + - r"/(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)" - r"[^/?#]*(?<!&setextract)$" - r"|([^/?#]+/posts/[^/?#]+)" - r"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)" - ) + rf"{BASE_PATTERN}/" + rf"(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)" + rf"[^/?#]*(?<!&setextract)$" + rf"|([^/?#]+/posts/[^/?#]+)" + rf"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)") example = "https://www.facebook.com/media/set/?set=SET_ID" def items(self): @@ -455,7 +454,7 @@ class FacebookVideoExtractor(FacebookExtractor): """Base class for Facebook Video extractors""" subcategory = "video" directory_fmt = ("{category}", "{username}", "{subcategory}") - pattern = BASE_PATTERN + r"/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)" + pattern = rf"{BASE_PATTERN}/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)" example = "https://www.facebook.com/watch/?v=VIDEO_ID" def items(self): @@ -468,7 +467,7 @@ class FacebookVideoExtractor(FacebookExtractor): if "url" not in video: return - yield Message.Directory, video + yield Message.Directory, "", video if self.videos == "ytdl": yield Message.Url, "ytdl:" + video_url, video @@ -482,18 +481,18 @@ class FacebookInfoExtractor(FacebookExtractor): """Extractor for Facebook Profile data""" subcategory = "info" directory_fmt = ("{category}", "{username}") - pattern = USER_PATTERN + r"/info" + pattern = rf"{USER_PATTERN}/info" example = "https://www.facebook.com/USERNAME/info" def items(self): user = self._extract_profile(self.groups[0]) - return iter(((Message.Directory, user),)) + return iter(((Message.Directory, "", user),)) class FacebookAlbumsExtractor(FacebookExtractor): """Extractor for Facebook Profile albums""" subcategory = "albums" - pattern = USER_PATTERN + r"/photos_albums(?:/([^/?#]+))?" + pattern = rf"{USER_PATTERN}/photos_albums(?:/([^/?#]+))?" example = "https://www.facebook.com/USERNAME/photos_albums" def items(self): @@ -526,7 +525,7 @@ class FacebookAlbumsExtractor(FacebookExtractor): class FacebookPhotosExtractor(FacebookExtractor): """Extractor for Facebook Profile Photos""" subcategory = "photos" - pattern = USER_PATTERN + r"/photos(?:_by)?" + pattern = rf"{USER_PATTERN}/photos(?:_by)?" example = "https://www.facebook.com/USERNAME/photos" def items(self): @@ -543,7 +542,7 @@ class FacebookPhotosExtractor(FacebookExtractor): class FacebookAvatarExtractor(FacebookExtractor): """Extractor for Facebook Profile Avatars""" subcategory = "avatar" - pattern = USER_PATTERN + r"/avatar" + pattern = rf"{USER_PATTERN}/avatar" example = "https://www.facebook.com/USERNAME/avatar" def items(self): @@ -559,13 +558,13 @@ class FacebookAvatarExtractor(FacebookExtractor): set_page = self.request(set_url).text directory = self.parse_set_page(set_page) - yield Message.Directory, directory + yield Message.Directory, "", directory yield Message.Url, avatar["url"], avatar class FacebookUserExtractor(Dispatch, FacebookExtractor): """Extractor for Facebook Profiles""" - pattern = USER_PATTERN + r"/?(?:$|\?|#)" + pattern = rf"{USER_PATTERN}/?(?:$|\?|#)" example = "https://www.facebook.com/USERNAME" def items(self): |
