diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index f3695cc06..b76cfae1d 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -21,6 +21,7 @@ from ..utils import ( str_or_none, strip_or_none, traverse_obj, + truncate_string, try_call, try_get, unified_timestamp, @@ -358,6 +359,7 @@ class TwitterCardIE(InfoExtractor): 'display_id': '560070183650213889', 'uploader_url': 'https://twitter.com/Twitter', }, + 'skip': 'This content is no longer available.', }, { 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768', @@ -365,7 +367,7 @@ class TwitterCardIE(InfoExtractor): 'info_dict': { 'id': '623160978427936768', 'ext': 'mp4', - 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.", + 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASA...", 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA", 'uploader': 'NASA', 'uploader_id': 'NASA', @@ -377,12 +379,14 @@ class TwitterCardIE(InfoExtractor): 'like_count': int, 'repost_count': int, 'tags': ['PlutoFlyby'], + 'channel_id': '11348282', + '_old_archive_ids': ['twitter 623160978427936768'], }, 'params': {'format': '[protocol=https]'}, }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', - 'md5': 'b6d9683dd3f48e340ded81c0e917ad46', + 'md5': 'fb08fbd69595cbd8818f0b2f2a94474d', 'info_dict': { 'id': 'dq4Oj5quskI', 'ext': 'mp4', @@ -390,12 +394,12 @@ class TwitterCardIE(InfoExtractor): 'description': 'md5:a831e97fa384863d6e26ce48d1c43376', 'upload_date': '20111013', 'uploader': 'OMG! UBUNTU!', - 'uploader_id': 'omgubuntu', + 'uploader_id': '@omgubuntu', 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ', 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ', 'channel_follower_count': int, 'chapters': 'count:8', - 'uploader_url': 'http://www.youtube.com/user/omgubuntu', + 'uploader_url': 'https://www.youtube.com/@omgubuntu', 'duration': 138, 'categories': ['Film & Animation'], 'age_limit': 0, @@ -407,6 +411,9 @@ class TwitterCardIE(InfoExtractor): 'tags': 'count:12', 'channel': 'OMG! UBUNTU!', 'playable_in_embed': True, + 'heatmap': 'count:100', + 'timestamp': 1318500227, + 'live_status': 'not_live', }, 'add_ie': ['Youtube'], }, @@ -548,13 +555,14 @@ class TwitterIE(TwitterBaseIE): 'age_limit': 0, '_old_archive_ids': ['twitter 700207533655363584'], }, + 'skip': 'Tweet has been deleted', }, { 'url': 'https://twitter.com/captainamerica/status/719944021058060289', 'info_dict': { 'id': '717462543795523584', 'display_id': '719944021058060289', 'ext': 'mp4', - 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', + 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theat...', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'channel_id': '701615052', 'uploader_id': 'CaptainAmerica', @@ -591,7 +599,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '852077943283097602', 'ext': 'mp4', - 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', + 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعا...', 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'channel_id': '2526757026', 'uploader': 'عالم الأخبار', @@ -615,7 +623,7 @@ class TwitterIE(TwitterBaseIE): 'id': '910030238373089285', 'display_id': '910031516746514432', 'ext': 'mp4', - 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', + 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terr...', 'thumbnail': r're:^https?://.*\.jpg', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'channel_id': '2319432498', @@ -707,7 +715,7 @@ class TwitterIE(TwitterBaseIE): 'id': '1349774757969989634', 'display_id': '1349794411333394432', 'ext': 'mp4', - 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba', + 'title': "Brooklyn Nets - WATCH: Sean Marks' full media session after our acquisition of 8-time...", 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'channel_id': '18552281', @@ -733,7 +741,7 @@ class TwitterIE(TwitterBaseIE): 'id': '1577855447914409984', 'display_id': '1577855540407197696', 'ext': 'mp4', - 'title': 'md5:466a3a8b049b5f5a13164ce915484b51', + 'title': 'Oshtru - gm ✨️ now I can post image and video. nice update.', 'description': 'md5:b9c3699335447391d11753ab21c70a74', 'upload_date': '20221006', 'channel_id': '143077138', @@ -755,10 +763,10 @@ class TwitterIE(TwitterBaseIE): 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464', 'info_dict': { 'id': '1577719286659006464', - 'title': 'Ultima Reload - Test', + 'title': 'Ultima - Test', 'description': 'Test https://t.co/Y3KEZD7Dad', 'channel_id': '168922496', - 'uploader': 'Ultima Reload', + 'uploader': 'Ultima', 'uploader_id': 'UltimaShadowX', 'uploader_url': 'https://twitter.com/UltimaShadowX', 'upload_date': '20221005', @@ -777,7 +785,7 @@ class TwitterIE(TwitterBaseIE): 'id': '1575559336759263233', 'display_id': '1575560063510810624', 'ext': 'mp4', - 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', + 'title': 'Max Olson - Absolutely heartbreaking footage captured by our surge probe of catas...', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:95aea692fda36a12081b9629b02daa92', 'channel_id': '1094109584', @@ -901,18 +909,18 @@ class TwitterIE(TwitterBaseIE): 'playlist_mincount': 2, 'info_dict': { 'id': '1600649710662213632', - 'title': 'md5:be05989b0722e114103ed3851a0ffae2', + 'title': "Jocelyn Laidlaw - How Kirstie Alley's tragic death inspired me to share more about my c...", 'timestamp': 1670459604.0, 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', 'comment_count': int, - 'uploader_id': 'CTVJLaidlaw', + 'uploader_id': 'JocelynVLaidlaw', 'channel_id': '80082014', 'repost_count': int, 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], 'upload_date': '20221208', 'age_limit': 0, 'uploader': 'Jocelyn Laidlaw', - 'uploader_url': 'https://twitter.com/CTVJLaidlaw', + 'uploader_url': 'https://twitter.com/JocelynVLaidlaw', 'like_count': int, }, }, { @@ -921,17 +929,17 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1600649511827013632', 'ext': 'mp4', - 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1', + 'title': "Jocelyn Laidlaw - How Kirstie Alley's tragic death inspired me to share more about my c... #1", 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1670459604.0, 'channel_id': '80082014', - 'uploader_id': 'CTVJLaidlaw', + 'uploader_id': 'JocelynVLaidlaw', 'uploader': 'Jocelyn Laidlaw', 'repost_count': int, 'comment_count': int, 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], 'duration': 102.226, - 'uploader_url': 'https://twitter.com/CTVJLaidlaw', + 'uploader_url': 'https://twitter.com/JocelynVLaidlaw', 'display_id': '1600649710662213632', 'like_count': int, 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', @@ -990,6 +998,7 @@ class TwitterIE(TwitterBaseIE): '_old_archive_ids': ['twitter 1599108751385972737'], }, 'params': {'noplaylist': True}, + 'skip': 'Tweet is limited', }, { 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625', 'info_dict': { @@ -1001,10 +1010,10 @@ class TwitterIE(TwitterBaseIE): 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml', 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig', 'age_limit': 0, - 'uploader': 'Mün', + 'uploader': 'Boy Called Mün', 'repost_count': int, 'upload_date': '20221206', - 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525', + 'title': 'Boy Called Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525', 'comment_count': int, 'like_count': int, 'tags': [], @@ -1042,7 +1051,7 @@ class TwitterIE(TwitterBaseIE): 'id': '1694928337846538240', 'ext': 'mp4', 'display_id': '1695424220702888009', - 'title': 'md5:e8daa9527bc2b947121395494f786d9d', + 'title': 'Benny Johnson - Donald Trump driving through the urban, poor neighborhoods of Atlanta...', 'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'channel_id': '15212187', 'uploader': 'Benny Johnson', @@ -1066,7 +1075,7 @@ class TwitterIE(TwitterBaseIE): 'id': '1694928337846538240', 'ext': 'mp4', 'display_id': '1695424220702888009', - 'title': 'md5:e8daa9527bc2b947121395494f786d9d', + 'title': 'Benny Johnson - Donald Trump driving through the urban, poor neighborhoods of Atlanta...', 'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'channel_id': '15212187', 'uploader': 'Benny Johnson', @@ -1101,6 +1110,7 @@ class TwitterIE(TwitterBaseIE): 'view_count': int, }, 'add_ie': ['TwitterBroadcast'], + 'skip': 'Broadcast no longer exists', }, { # Animated gif and quote tweet video 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950', @@ -1129,7 +1139,7 @@ class TwitterIE(TwitterBaseIE): 'info_dict': { 'id': '1724883339285544960', 'ext': 'mp4', - 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c', + 'title': 'Robert F. Kennedy Jr - A beautifully crafted short film by Mikki Willis about my independent...', 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164', 'display_id': '1724884212803834154', 'channel_id': '337808606', @@ -1150,7 +1160,7 @@ class TwitterIE(TwitterBaseIE): }, { # x.com 'url': 'https://x.com/historyinmemes/status/1790637656616943991', - 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5', + 'md5': '4549eda363fecfe37439c455923cba2c', 'info_dict': { 'id': '1790637589910654976', 'ext': 'mp4', @@ -1390,7 +1400,7 @@ class TwitterIE(TwitterBaseIE): title = description = traverse_obj( status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or '' # strip 'https -_t.co_BJYgOjSeGA' junk from filenames - title = re.sub(r'\s+(https?://[^ ]+)', '', title) + title = truncate_string(re.sub(r'\s+(https?://[^ ]+)', '', title), left=72) user = status.get('user') or {} uploader = user.get('name') if uploader: