mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-04-05 06:30:17 -05:00
[ie/instagram] Improve error handling (#12410)
Closes #5967, Closes #6294, Closes #7328, Closes #8452 Authored by: bashonly
This commit is contained in:
parent
a59abe0636
commit
480125560a
@ -2,12 +2,12 @@ import hashlib
|
|||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
bug_reports_message,
|
||||||
decode_base_n,
|
decode_base_n,
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
filter_dict,
|
filter_dict,
|
||||||
@ -15,12 +15,12 @@ from ..utils import (
|
|||||||
format_field,
|
format_field,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
_ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
_ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
||||||
@ -40,9 +40,6 @@ def _id_to_pk(shortcode):
|
|||||||
|
|
||||||
|
|
||||||
class InstagramBaseIE(InfoExtractor):
|
class InstagramBaseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'instagram'
|
|
||||||
_IS_LOGGED_IN = False
|
|
||||||
|
|
||||||
_API_BASE_URL = 'https://i.instagram.com/api/v1'
|
_API_BASE_URL = 'https://i.instagram.com/api/v1'
|
||||||
_LOGIN_URL = 'https://www.instagram.com/accounts/login'
|
_LOGIN_URL = 'https://www.instagram.com/accounts/login'
|
||||||
|
|
||||||
@ -56,42 +53,6 @@ class InstagramBaseIE(InfoExtractor):
|
|||||||
'Accept': '*/*',
|
'Accept': '*/*',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
|
||||||
if self._IS_LOGGED_IN:
|
|
||||||
return
|
|
||||||
|
|
||||||
login_webpage = self._download_webpage(
|
|
||||||
self._LOGIN_URL, None, note='Downloading login webpage', errnote='Failed to download login webpage')
|
|
||||||
|
|
||||||
shared_data = self._parse_json(self._search_regex(
|
|
||||||
r'window\._sharedData\s*=\s*({.+?});', login_webpage, 'shared data', default='{}'), None)
|
|
||||||
|
|
||||||
login = self._download_json(
|
|
||||||
f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
|
|
||||||
**self._api_headers,
|
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
|
||||||
'X-CSRFToken': shared_data['config']['csrf_token'],
|
|
||||||
'X-Instagram-AJAX': shared_data['rollout_hash'],
|
|
||||||
'Referer': 'https://www.instagram.com/',
|
|
||||||
}, data=urlencode_postdata({
|
|
||||||
'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
|
|
||||||
'username': username,
|
|
||||||
'queryParams': '{}',
|
|
||||||
'optIntoOneTap': 'false',
|
|
||||||
'stopDeletionNonce': '',
|
|
||||||
'trustedDeviceRecords': '{}',
|
|
||||||
}))
|
|
||||||
|
|
||||||
if not login.get('authenticated'):
|
|
||||||
if login.get('message'):
|
|
||||||
raise ExtractorError(f'Unable to login: {login["message"]}')
|
|
||||||
elif login.get('user'):
|
|
||||||
raise ExtractorError('Unable to login: Sorry, your password was incorrect. Please double-check your password.', expected=True)
|
|
||||||
elif login.get('user') is False:
|
|
||||||
raise ExtractorError('Unable to login: The username you entered doesn\'t belong to an account. Please check your username and try again.', expected=True)
|
|
||||||
raise ExtractorError('Unable to login')
|
|
||||||
InstagramBaseIE._IS_LOGGED_IN = True
|
|
||||||
|
|
||||||
def _get_count(self, media, kind, *keys):
|
def _get_count(self, media, kind, *keys):
|
||||||
return traverse_obj(
|
return traverse_obj(
|
||||||
media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys),
|
media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys),
|
||||||
@ -443,7 +404,6 @@ class InstagramIE(InstagramBaseIE):
|
|||||||
'doc_id': '8845758582119845',
|
'doc_id': '8845758582119845',
|
||||||
'variables': json.dumps(variables, separators=(',', ':')),
|
'variables': json.dumps(variables, separators=(',', ':')),
|
||||||
})
|
})
|
||||||
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
|
||||||
|
|
||||||
if not general_info:
|
if not general_info:
|
||||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||||
@ -472,6 +432,26 @@ class InstagramIE(InstagramBaseIE):
|
|||||||
media.update(traverse_obj(
|
media.update(traverse_obj(
|
||||||
additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
|
additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
|
||||||
|
|
||||||
|
else:
|
||||||
|
xdt_shortcode_media = traverse_obj(general_info, ('data', 'xdt_shortcode_media', {dict})) or {}
|
||||||
|
if not xdt_shortcode_media:
|
||||||
|
error = join_nonempty('title', 'description', delim=': ', from_dict=api_check)
|
||||||
|
if 'Restricted Video' in error:
|
||||||
|
self.raise_login_required(error)
|
||||||
|
elif error:
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
elif len(video_id) > 28:
|
||||||
|
# It's a private post (video_id == shortcode + 28 extra characters)
|
||||||
|
# Only raise after getting empty response; sometimes "long"-shortcode posts are public
|
||||||
|
self.raise_login_required(
|
||||||
|
'This content is only available for registered users who follow this account')
|
||||||
|
raise ExtractorError(
|
||||||
|
'Instagram sent an empty media response. Check if this post is accessible in your '
|
||||||
|
f'browser without being logged-in. If it is not, then u{self._login_hint()[1:]}. '
|
||||||
|
'Otherwise, if the post is accessible in browser without being logged-in'
|
||||||
|
f'{bug_reports_message(before=",")}', expected=True)
|
||||||
|
media.update(xdt_shortcode_media)
|
||||||
|
|
||||||
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
||||||
|
|
||||||
@ -491,8 +471,7 @@ class InstagramIE(InstagramBaseIE):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._extract_nodes(nodes, True), video_id,
|
self._extract_nodes(nodes, True), video_id,
|
||||||
format_field(username, None, 'Post by %s'), description)
|
format_field(username, None, 'Post by %s'), description)
|
||||||
|
raise ExtractorError('There is no video in this post', expected=True)
|
||||||
video_url = self._og_search_video_url(webpage, secure=False)
|
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user