1
0
mirror of https://github.com/yt-dlp/yt-dlp synced 2025-04-04 22:20:16 -05:00

[ie/instagram] Improve error handling (#12410)

Closes #5967, Closes #6294, Closes #7328, Closes #8452
Authored by: bashonly
This commit is contained in:
bashonly 2025-02-23 02:35:22 -06:00 committed by GitHub
parent a59abe0636
commit 480125560a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,12 +2,12 @@ import hashlib
import itertools import itertools
import json import json
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
bug_reports_message,
decode_base_n, decode_base_n,
encode_base_n, encode_base_n,
filter_dict, filter_dict,
@ -15,12 +15,12 @@ from ..utils import (
format_field, format_field,
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
join_nonempty,
lowercase_escape, lowercase_escape,
str_or_none, str_or_none,
str_to_int, str_to_int,
traverse_obj, traverse_obj,
url_or_none, url_or_none,
urlencode_postdata,
) )
_ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_' _ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
@ -40,9 +40,6 @@ def _id_to_pk(shortcode):
class InstagramBaseIE(InfoExtractor): class InstagramBaseIE(InfoExtractor):
_NETRC_MACHINE = 'instagram'
_IS_LOGGED_IN = False
_API_BASE_URL = 'https://i.instagram.com/api/v1' _API_BASE_URL = 'https://i.instagram.com/api/v1'
_LOGIN_URL = 'https://www.instagram.com/accounts/login' _LOGIN_URL = 'https://www.instagram.com/accounts/login'
@ -56,42 +53,6 @@ class InstagramBaseIE(InfoExtractor):
'Accept': '*/*', 'Accept': '*/*',
} }
def _perform_login(self, username, password):
if self._IS_LOGGED_IN:
return
login_webpage = self._download_webpage(
self._LOGIN_URL, None, note='Downloading login webpage', errnote='Failed to download login webpage')
shared_data = self._parse_json(self._search_regex(
r'window\._sharedData\s*=\s*({.+?});', login_webpage, 'shared data', default='{}'), None)
login = self._download_json(
f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
**self._api_headers,
'X-Requested-With': 'XMLHttpRequest',
'X-CSRFToken': shared_data['config']['csrf_token'],
'X-Instagram-AJAX': shared_data['rollout_hash'],
'Referer': 'https://www.instagram.com/',
}, data=urlencode_postdata({
'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
'username': username,
'queryParams': '{}',
'optIntoOneTap': 'false',
'stopDeletionNonce': '',
'trustedDeviceRecords': '{}',
}))
if not login.get('authenticated'):
if login.get('message'):
raise ExtractorError(f'Unable to login: {login["message"]}')
elif login.get('user'):
raise ExtractorError('Unable to login: Sorry, your password was incorrect. Please double-check your password.', expected=True)
elif login.get('user') is False:
raise ExtractorError('Unable to login: The username you entered doesn\'t belong to an account. Please check your username and try again.', expected=True)
raise ExtractorError('Unable to login')
InstagramBaseIE._IS_LOGGED_IN = True
def _get_count(self, media, kind, *keys): def _get_count(self, media, kind, *keys):
return traverse_obj( return traverse_obj(
media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys), media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys),
@ -443,7 +404,6 @@ class InstagramIE(InstagramBaseIE):
'doc_id': '8845758582119845', 'doc_id': '8845758582119845',
'variables': json.dumps(variables, separators=(',', ':')), 'variables': json.dumps(variables, separators=(',', ':')),
}) })
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
if not general_info: if not general_info:
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id) self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
@ -472,6 +432,26 @@ class InstagramIE(InstagramBaseIE):
media.update(traverse_obj( media.update(traverse_obj(
additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {}) additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
else:
xdt_shortcode_media = traverse_obj(general_info, ('data', 'xdt_shortcode_media', {dict})) or {}
if not xdt_shortcode_media:
error = join_nonempty('title', 'description', delim=': ', from_dict=api_check)
if 'Restricted Video' in error:
self.raise_login_required(error)
elif error:
raise ExtractorError(error, expected=True)
elif len(video_id) > 28:
# It's a private post (video_id == shortcode + 28 extra characters)
# Only raise after getting empty response; sometimes "long"-shortcode posts are public
self.raise_login_required(
'This content is only available for registered users who follow this account')
raise ExtractorError(
'Instagram sent an empty media response. Check if this post is accessible in your '
f'browser without being logged-in. If it is not, then u{self._login_hint()[1:]}. '
'Otherwise, if the post is accessible in browser without being logged-in'
f'{bug_reports_message(before=",")}', expected=True)
media.update(xdt_shortcode_media)
username = traverse_obj(media, ('owner', 'username')) or self._search_regex( username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False) r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
@ -491,8 +471,7 @@ class InstagramIE(InstagramBaseIE):
return self.playlist_result( return self.playlist_result(
self._extract_nodes(nodes, True), video_id, self._extract_nodes(nodes, True), video_id,
format_field(username, None, 'Post by %s'), description) format_field(username, None, 'Post by %s'), description)
raise ExtractorError('There is no video in this post', expected=True)
video_url = self._og_search_video_url(webpage, secure=False)
formats = [{ formats = [{
'url': video_url, 'url': video_url,