mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-06-30 11:02:51 -05:00
[utils] Use bytes-like objects in dfxp2srt
This fixes handling of non-UTF8 TTML subtitles Closes #14191
This commit is contained in:
@ -2572,14 +2572,18 @@ def srt_subtitles_timecode(seconds):
|
||||
|
||||
|
||||
def dfxp2srt(dfxp_data):
|
||||
'''
|
||||
@param dfxp_data A bytes-like object containing DFXP data
|
||||
@returns A unicode object containing converted SRT data
|
||||
'''
|
||||
LEGACY_NAMESPACES = (
|
||||
('http://www.w3.org/ns/ttml', [
|
||||
'http://www.w3.org/2004/11/ttaf1',
|
||||
'http://www.w3.org/2006/04/ttaf1',
|
||||
'http://www.w3.org/2006/10/ttaf1',
|
||||
(b'http://www.w3.org/ns/ttml', [
|
||||
b'http://www.w3.org/2004/11/ttaf1',
|
||||
b'http://www.w3.org/2006/04/ttaf1',
|
||||
b'http://www.w3.org/2006/10/ttaf1',
|
||||
]),
|
||||
('http://www.w3.org/ns/ttml#styling', [
|
||||
'http://www.w3.org/ns/ttml#style',
|
||||
(b'http://www.w3.org/ns/ttml#styling', [
|
||||
b'http://www.w3.org/ns/ttml#style',
|
||||
]),
|
||||
)
|
||||
|
||||
@ -2674,7 +2678,7 @@ def dfxp2srt(dfxp_data):
|
||||
for ns in v:
|
||||
dfxp_data = dfxp_data.replace(ns, k)
|
||||
|
||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
||||
dfxp = compat_etree_fromstring(dfxp_data)
|
||||
out = []
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
||||
|
||||
|
Reference in New Issue
Block a user