diff options
Diffstat (limited to 'gallery_dl/extractor/facebook.py')
| -rw-r--r-- | gallery_dl/extractor/facebook.py | 53 |
1 files changed, 29 insertions, 24 deletions
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 5d56a5f..7f03f84 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -11,9 +11,9 @@ from .. import text, util, exception from ..cache import memcache BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com" -USER_PATTERN = (rf"{BASE_PATTERN}/" - rf"(?!media/|photo/|photo.php|watch/|permalink.php)" - rf"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)") +USER_PATTERN = (BASE_PATTERN + + r"/(?!media/|photo/|photo.php|watch/|permalink.php)" + r"(?:profile\.php\?id=|people/[^/?#]+/)?([^/?&#]+)") class FacebookExtractor(Extractor): @@ -237,16 +237,14 @@ class FacebookExtractor(Extractor): if res.url.startswith(self.root + "/login"): raise exception.AuthRequired( - message=(f"You must be logged in to continue viewing images." - f"{LEFT_OFF_TXT}") - ) + message=("You must be logged in to continue viewing images." + + LEFT_OFF_TXT)) if b'{"__dr":"CometErrorRoot.react"}' in res.content: raise exception.AbortExtraction( - f"You've been temporarily blocked from viewing images.\n" - f"Please try using a different account, " - f"using a VPN or waiting before you retry.{LEFT_OFF_TXT}" - ) + "You've been temporarily blocked from viewing images.\n" + "Please try using a different account, " + "using a VPN or waiting before you retry." + LEFT_OFF_TXT) return res @@ -306,6 +304,12 @@ class FacebookExtractor(Extractor): "Detected a loop in the set, it's likely finished. " "Extraction is over." ) + elif int(photo["next_photo_id"]) > int(photo["id"]) + i*120: + self.log.info( + "Detected jump to the beginning of the set. (%s -> %s)", + photo["id"], photo["next_photo_id"]) + if self.config("loop", False): + all_photo_ids.append(photo["next_photo_id"]) else: all_photo_ids.append(photo["next_photo_id"]) @@ -389,9 +393,9 @@ class FacebookExtractor(Extractor): class FacebookPhotoExtractor(FacebookExtractor): """Base class for Facebook Photo extractors""" subcategory = "photo" - pattern = (rf"{BASE_PATTERN}/" - rf"(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?" - rf"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$") + pattern = (BASE_PATTERN + + r"/(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?" + r"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$") example = "https://www.facebook.com/photo/?fbid=PHOTO_ID" def items(self): @@ -427,11 +431,12 @@ class FacebookSetExtractor(FacebookExtractor): """Base class for Facebook Set extractors""" subcategory = "set" pattern = ( - rf"{BASE_PATTERN}/" - rf"(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)" - rf"[^/?#]*(?<!&setextract)$" - rf"|([^/?#]+/posts/[^/?#]+)" - rf"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)") + BASE_PATTERN + + r"/(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)" + r"[^/?#]*(?<!&setextract)$" + r"|([^/?#]+/posts/[^/?#]+)" + r"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)" + ) example = "https://www.facebook.com/media/set/?set=SET_ID" def items(self): @@ -454,7 +459,7 @@ class FacebookVideoExtractor(FacebookExtractor): """Base class for Facebook Video extractors""" subcategory = "video" directory_fmt = ("{category}", "{username}", "{subcategory}") - pattern = rf"{BASE_PATTERN}/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)" + pattern = BASE_PATTERN + r"/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)" example = "https://www.facebook.com/watch/?v=VIDEO_ID" def items(self): @@ -481,7 +486,7 @@ class FacebookInfoExtractor(FacebookExtractor): """Extractor for Facebook Profile data""" subcategory = "info" directory_fmt = ("{category}", "{username}") - pattern = rf"{USER_PATTERN}/info" + pattern = USER_PATTERN + r"/info" example = "https://www.facebook.com/USERNAME/info" def items(self): @@ -492,7 +497,7 @@ class FacebookInfoExtractor(FacebookExtractor): class FacebookAlbumsExtractor(FacebookExtractor): """Extractor for Facebook Profile albums""" subcategory = "albums" - pattern = rf"{USER_PATTERN}/photos_albums(?:/([^/?#]+))?" + pattern = USER_PATTERN + r"/photos_albums(?:/([^/?#]+))?" example = "https://www.facebook.com/USERNAME/photos_albums" def items(self): @@ -525,7 +530,7 @@ class FacebookAlbumsExtractor(FacebookExtractor): class FacebookPhotosExtractor(FacebookExtractor): """Extractor for Facebook Profile Photos""" subcategory = "photos" - pattern = rf"{USER_PATTERN}/photos(?:_by)?" + pattern = USER_PATTERN + r"/photos(?:_by)?" example = "https://www.facebook.com/USERNAME/photos" def items(self): @@ -542,7 +547,7 @@ class FacebookPhotosExtractor(FacebookExtractor): class FacebookAvatarExtractor(FacebookExtractor): """Extractor for Facebook Profile Avatars""" subcategory = "avatar" - pattern = rf"{USER_PATTERN}/avatar" + pattern = USER_PATTERN + r"/avatar" example = "https://www.facebook.com/USERNAME/avatar" def items(self): @@ -564,7 +569,7 @@ class FacebookAvatarExtractor(FacebookExtractor): class FacebookUserExtractor(Dispatch, FacebookExtractor): """Extractor for Facebook Profiles""" - pattern = rf"{USER_PATTERN}/?(?:$|\?|#)" + pattern = USER_PATTERN + r"/?(?:$|\?|#)" example = "https://www.facebook.com/USERNAME" def items(self): |
