mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-08-02 10:30:04 -05:00
Compare commits
540 Commits
2018.06.19
...
2019.03.01
Author | SHA1 | Date | |
---|---|---|---|
![]() |
04c33bdfb3 | ||
![]() |
333f617b12 | ||
![]() |
ff60ec8f02 | ||
![]() |
9d9a8676dc | ||
![]() |
db1c3a9d3f | ||
![]() |
55b8588f0e | ||
![]() |
f0228f56fb | ||
![]() |
8c80603f1a | ||
![]() |
37b239b3b6 | ||
![]() |
caf48f557a | ||
![]() |
77a842c892 | ||
![]() |
c76fc5b22a | ||
![]() |
388cfbd3d8 | ||
![]() |
d93083789b | ||
![]() |
34568dc296 | ||
![]() |
3c9647372e | ||
![]() |
659e93fcf5 | ||
![]() |
c9a0ea6e51 | ||
![]() |
d7d513891b | ||
![]() |
ae65c93a26 | ||
![]() |
ba2e3730d1 | ||
![]() |
2b2da3ba10 | ||
![]() |
794c1b6e02 | ||
![]() |
7bee705d8f | ||
![]() |
6f5c1807f4 | ||
![]() |
985637cbbf | ||
![]() |
7d8b89163c | ||
![]() |
d777f3e81c | ||
![]() |
4c0e0dc9dc | ||
![]() |
f516f44094 | ||
![]() |
e9dee7f1b2 | ||
![]() |
91effe22a0 | ||
![]() |
04eacf5453 | ||
![]() |
f1f5b47255 | ||
![]() |
1211bb6dac | ||
![]() |
4de3cb883c | ||
![]() |
22f5f5c6fc | ||
![]() |
49bd993fd9 | ||
![]() |
f06a1cabe8 | ||
![]() |
241c5d7d38 | ||
![]() |
8fecc7353d | ||
![]() |
5dda1edef9 | ||
![]() |
d2d970d07e | ||
![]() |
48fb963b2f | ||
![]() |
70c3ee1367 | ||
![]() |
07fbfef1c7 | ||
![]() |
eecf788b90 | ||
![]() |
0efcb5a2fe | ||
![]() |
7c5307f4c4 | ||
![]() |
6cc6e0c34d | ||
![]() |
b9bc1cff72 | ||
![]() |
e9fef7ee4e | ||
![]() |
b6423e6ca2 | ||
![]() |
3ef2da2d21 | ||
![]() |
49fe4175ae | ||
![]() |
9613e14a92 | ||
![]() |
15e832ff2a | ||
![]() |
645c4885cf | ||
![]() |
7b0f9df23d | ||
![]() |
c2a0fe2ea7 | ||
![]() |
ce52c7c111 | ||
![]() |
1063b4c707 | ||
![]() |
ca01e5f903 | ||
![]() |
5496754ae4 | ||
![]() |
9868f1ab18 | ||
![]() |
41cff90c41 | ||
![]() |
a2d821d711 | ||
![]() |
6df196f32e | ||
![]() |
41c2c254d3 | ||
![]() |
a81daba231 | ||
![]() |
61ff92e11e | ||
![]() |
1397a790ff | ||
![]() |
7f903dd8bf | ||
![]() |
2b3afe6b0f | ||
![]() |
e71be6ee9f | ||
![]() |
bf8ebc9cfe | ||
![]() |
1fcc91663b | ||
![]() |
30cd1a5f39 | ||
![]() |
458fd30f56 | ||
![]() |
845333acf6 | ||
![]() |
252abb1e8b | ||
![]() |
ae18d58297 | ||
![]() |
1602a240a7 | ||
![]() |
0eba178fce | ||
![]() |
eb35b163ad | ||
![]() |
118afcf52f | ||
![]() |
9713d1d1e0 | ||
![]() |
a1e171233d | ||
![]() |
7d311586ed | ||
![]() |
e118a8794f | ||
![]() |
435e382423 | ||
![]() |
0670bdd8f2 | ||
![]() |
71a1f61700 | ||
![]() |
6510a3aa97 | ||
![]() |
278d061a0c | ||
![]() |
503b604a31 | ||
![]() |
4b85f0f9db | ||
![]() |
19d6991312 | ||
![]() |
07f9febc4b | ||
![]() |
fad4ceb534 | ||
![]() |
6945b9e78f | ||
![]() |
29cfcb43da | ||
![]() |
a1a4607598 | ||
![]() |
73c19aaa9f | ||
![]() |
289ef490f7 | ||
![]() |
6ca3fa898c | ||
![]() |
31fbedc06a | ||
![]() |
15870747f0 | ||
![]() |
fc746c3fdd | ||
![]() |
4e58d9fabb | ||
![]() |
2cc779f497 | ||
![]() |
379306ef55 | ||
![]() |
f28363ad1f | ||
![]() |
2bfc1d9d68 | ||
![]() |
e2dd132f05 | ||
![]() |
79fec976b0 | ||
![]() |
29639b363d | ||
![]() |
f53cecd796 | ||
![]() |
fa4ac365f6 | ||
![]() |
bfc8eeea57 | ||
![]() |
b0d73a7456 | ||
![]() |
4fe54c128a | ||
![]() |
a16c7c033a | ||
![]() |
2f483bc1c3 | ||
![]() |
561b456e2d | ||
![]() |
929ba3997b | ||
![]() |
10026329c2 | ||
![]() |
3b983ee471 | ||
![]() |
f1ab3b7de7 | ||
![]() |
d65f6e734b | ||
![]() |
ed8db0a25c | ||
![]() |
60a899bb7e | ||
![]() |
cbdc688c41 | ||
![]() |
5caa531a1a | ||
![]() |
a64646e417 | ||
![]() |
c469e8808c | ||
![]() |
b64f6e690f | ||
![]() |
a4491dd55c | ||
![]() |
c3e543893b | ||
![]() |
432aba1c5e | ||
![]() |
7c072f00d6 | ||
![]() |
96c186e1fd | ||
![]() |
4ad159c7b0 | ||
![]() |
65615be368 | ||
![]() |
3c1089dba4 | ||
![]() |
6089ff40e7 | ||
![]() |
2543938bbe | ||
![]() |
440863ade1 | ||
![]() |
391256dc0e | ||
![]() |
06b4b90c70 | ||
![]() |
8cb5c2181a | ||
![]() |
0266854f63 | ||
![]() |
bcc334a3c6 | ||
![]() |
e9a50fba86 | ||
![]() |
04fb6928da | ||
![]() |
b7acc83550 | ||
![]() |
de0359c0af | ||
![]() |
c87f65e43d | ||
![]() |
d7c3af7a72 | ||
![]() |
aeb72b3a41 | ||
![]() |
2122d7151d | ||
![]() |
751e051557 | ||
![]() |
d226c560a6 | ||
![]() |
8437f5089f | ||
![]() |
1d803085d7 | ||
![]() |
696f4e4114 | ||
![]() |
0e713dbb11 | ||
![]() |
9b5c8751ee | ||
![]() |
d9f1123c08 | ||
![]() |
3d8eb6beb9 | ||
![]() |
38d15ba7f9 | ||
![]() |
6b688b8942 | ||
![]() |
9d9daed464 | ||
![]() |
32ac3d49ae | ||
![]() |
373941c5f0 | ||
![]() |
4e1ddc8da9 | ||
![]() |
e4d51e751e | ||
![]() |
c2dd2dc086 | ||
![]() |
140a13f5de | ||
![]() |
825cd268a3 | ||
![]() |
63529e935c | ||
![]() |
4273caf5c7 | ||
![]() |
e1a0628797 | ||
![]() |
835e45abab | ||
![]() |
904bb599be | ||
![]() |
65e29cdac3 | ||
![]() |
4ee1845454 | ||
![]() |
cfd13c4c45 | ||
![]() |
386d1fea79 | ||
![]() |
7216e9bff7 | ||
![]() |
4cee62ade0 | ||
![]() |
cbb3e4b14f | ||
![]() |
752582183a | ||
![]() |
1c82122741 | ||
![]() |
50a498a68e | ||
![]() |
252e172dea | ||
![]() |
90046d7761 | ||
![]() |
c8b3751086 | ||
![]() |
21c340b83f | ||
![]() |
c984196cf1 | ||
![]() |
7f41a598b3 | ||
![]() |
8fe104947d | ||
![]() |
0a05cfabb6 | ||
![]() |
13e17cd28e | ||
![]() |
102a4e54c5 | ||
![]() |
6e29458f24 | ||
![]() |
59c3940165 | ||
![]() |
cefe42c412 | ||
![]() |
24cc64254c | ||
![]() |
9e02c2c704 | ||
![]() |
5ee7ae5c75 | ||
![]() |
3ad6dabd33 | ||
![]() |
5f47a60c5d | ||
![]() |
1bab343704 | ||
![]() |
1d88b3e6e6 | ||
![]() |
9235b5091c | ||
![]() |
c3c098dcf2 | ||
![]() |
8c5879715f | ||
![]() |
ebb0449049 | ||
![]() |
dfe0a3a9d2 | ||
![]() |
c976873c5b | ||
![]() |
15699ec8b0 | ||
![]() |
33cc1ea586 | ||
![]() |
ae9d77dab5 | ||
![]() |
8bb0c9cc16 | ||
![]() |
5547014ad9 | ||
![]() |
ab896fa894 | ||
![]() |
1fa59a928e | ||
![]() |
ce18a19be9 | ||
![]() |
1ead840d2c | ||
![]() |
aa374bc78e | ||
![]() |
3430ff9b07 | ||
![]() |
f012823082 | ||
![]() |
16597c2f94 | ||
![]() |
adbbdefc81 | ||
![]() |
053e5b12b2 | ||
![]() |
d9df8f120b | ||
![]() |
ca01d17884 | ||
![]() |
d19600df07 | ||
![]() |
641e86e3cf | ||
![]() |
6864855eb1 | ||
![]() |
d861a9d581 | ||
![]() |
66173211c4 | ||
![]() |
6f2883a2df | ||
![]() |
560020da30 | ||
![]() |
305ce767d5 | ||
![]() |
157eef3e63 | ||
![]() |
bd2d553c7b | ||
![]() |
af60e81e3c | ||
![]() |
a843464a7e | ||
![]() |
6866f24494 | ||
![]() |
4e33e0792a | ||
![]() |
35328915b5 | ||
![]() |
6c882aa899 | ||
![]() |
183417a50f | ||
![]() |
6a6d7f0641 | ||
![]() |
05bd5e9c77 | ||
![]() |
15ed5a2784 | ||
![]() |
2e1280ed43 | ||
![]() |
8578ea4dcb | ||
![]() |
9b27a78a88 | ||
![]() |
964b989dc8 | ||
![]() |
f97c099131 | ||
![]() |
1febf99da1 | ||
![]() |
4167148fa4 | ||
![]() |
5bb0479269 | ||
![]() |
02df855e13 | ||
![]() |
006374e3ae | ||
![]() |
11d19ff503 | ||
![]() |
a640c4d226 | ||
![]() |
d0058c76d5 | ||
![]() |
0919cd4d01 | ||
![]() |
2599956c9f | ||
![]() |
9b9b3501c5 | ||
![]() |
730c0d12a0 | ||
![]() |
f17a24a6df | ||
![]() |
83852e57bf | ||
![]() |
96a91b1551 | ||
![]() |
cab26223bf | ||
![]() |
532782ade1 | ||
![]() |
f81d44aab6 | ||
![]() |
2511eee215 | ||
![]() |
0df514f07e | ||
![]() |
432cd48410 | ||
![]() |
c0345b825f | ||
![]() |
2004e2210b | ||
![]() |
16d896b2a7 | ||
![]() |
22e07ce502 | ||
![]() |
dbdaaa231a | ||
![]() |
38c32dbf19 | ||
![]() |
a085410936 | ||
![]() |
6895ea4d3f | ||
![]() |
faac1c1f70 | ||
![]() |
573531dcfb | ||
![]() |
da56fb631f | ||
![]() |
95e42d7336 | ||
![]() |
cf0db4d997 | ||
![]() |
036f905161 | ||
![]() |
4b6aca17cc | ||
![]() |
c620694c97 | ||
![]() |
061ea3a776 | ||
![]() |
c70ba664f1 | ||
![]() |
f16679e843 | ||
![]() |
b14475724b | ||
![]() |
aa7e974a2a | ||
![]() |
9aac22c195 | ||
![]() |
94db1f7f3b | ||
![]() |
ffa7b2bfee | ||
![]() |
2943397e87 | ||
![]() |
9c4a83a1be | ||
![]() |
9ff558f67f | ||
![]() |
c2fe21efaa | ||
![]() |
476cf548e1 | ||
![]() |
bebef10909 | ||
![]() |
4c237ab787 | ||
![]() |
a1d1c63678 | ||
![]() |
1fafb32984 | ||
![]() |
c901cc38e5 | ||
![]() |
022218f2f0 | ||
![]() |
08c7d3dade | ||
![]() |
5e733b066a | ||
![]() |
7d9e858132 | ||
![]() |
b99b0bcfa0 | ||
![]() |
baeabf7742 | ||
![]() |
582797d780 | ||
![]() |
160c2773f6 | ||
![]() |
ee5fe42e44 | ||
![]() |
f0ee386851 | ||
![]() |
a94e7c195e | ||
![]() |
5d90a8a5f3 | ||
![]() |
19a352854f | ||
![]() |
c9d891f19a | ||
![]() |
d96f976b0c | ||
![]() |
2e7ed29e34 | ||
![]() |
21c1a00dd7 | ||
![]() |
0082f44a08 | ||
![]() |
f60b9803a4 | ||
![]() |
d98cb62e55 | ||
![]() |
05e7c184da | ||
![]() |
66d106f270 | ||
![]() |
3c7da54c92 | ||
![]() |
9795d93316 | ||
![]() |
365343131d | ||
![]() |
85fa80d5f9 | ||
![]() |
245cbb33bc | ||
![]() |
85cd69adcb | ||
![]() |
4c89a675dd | ||
![]() |
3d3499742c | ||
![]() |
c17e100b96 | ||
![]() |
8fd12a0831 | ||
![]() |
60ce0c67fd | ||
![]() |
cd5a74a28e | ||
![]() |
f6d7f7b474 | ||
![]() |
21160a1792 | ||
![]() |
4ac73fc170 | ||
![]() |
28fcb7b061 | ||
![]() |
3a9c928426 | ||
![]() |
d9b1cec171 | ||
![]() |
e504b09070 | ||
![]() |
c8f6ab8c38 | ||
![]() |
e2f61598be | ||
![]() |
c11485162b | ||
![]() |
1084563eaa | ||
![]() |
d9b0d118ad | ||
![]() |
8b40c92724 | ||
![]() |
3661ebf2b6 | ||
![]() |
0e7b8d3eac | ||
![]() |
127103b643 | ||
![]() |
d03beddf0f | ||
![]() |
dd4c449219 | ||
![]() |
6f1f59f39c | ||
![]() |
15bf2ca0da | ||
![]() |
0f2aa0dcaa | ||
![]() |
db348e8849 | ||
![]() |
f5b0175349 | ||
![]() |
79facb2773 | ||
![]() |
96dbf70de6 | ||
![]() |
8476b4fd91 | ||
![]() |
14f577e31c | ||
![]() |
25d110be30 | ||
![]() |
a2637a2dda | ||
![]() |
2e4350eec6 | ||
![]() |
2c9d3b9962 | ||
![]() |
13ef64fd93 | ||
![]() |
6f9f3340bb | ||
![]() |
ae2384ff5f | ||
![]() |
d0de6a287a | ||
![]() |
d0c5fabc12 | ||
![]() |
ad98d2eb74 | ||
![]() |
a41a506077 | ||
![]() |
9a47fa35dd | ||
![]() |
2d4fe594c6 | ||
![]() |
09322cccdb | ||
![]() |
aa1d5eb905 | ||
![]() |
93284ff2ea | ||
![]() |
0a9a8118ce | ||
![]() |
3d08f63dc5 | ||
![]() |
27d8e089a2 | ||
![]() |
7bbc1b189a | ||
![]() |
0b87e88453 | ||
![]() |
4d59db5b90 | ||
![]() |
4627995882 | ||
![]() |
7f2611cb5b | ||
![]() |
54a5be4dba | ||
![]() |
ed6919e737 | ||
![]() |
2b83da2463 | ||
![]() |
c1a37eb24a | ||
![]() |
4991e16c2a | ||
![]() |
14b7a24c19 | ||
![]() |
73f3bdbeb4 | ||
![]() |
9e21e6d96b | ||
![]() |
8959018a5f | ||
![]() |
eebbce5656 | ||
![]() |
56213aff1d | ||
![]() |
409b9324da | ||
![]() |
02df41354c | ||
![]() |
dd88fd65a5 | ||
![]() |
287cf7e443 | ||
![]() |
dac6f7654a | ||
![]() |
e0b6e98871 | ||
![]() |
beff09505c | ||
![]() |
135e6a1c10 | ||
![]() |
c707d2067d | ||
![]() |
4c86163b60 | ||
![]() |
b662273989 | ||
![]() |
df4d817bc3 | ||
![]() |
db192b2932 | ||
![]() |
52007de8ca | ||
![]() |
28f96cf407 | ||
![]() |
eda86b4335 | ||
![]() |
bf1245d236 | ||
![]() |
6f356cbbcf | ||
![]() |
0a74b45191 | ||
![]() |
d6ef8b4dd4 | ||
![]() |
60c0856223 | ||
![]() |
57c68ec4c3 | ||
![]() |
24e0cd709f | ||
![]() |
4779420ce8 | ||
![]() |
de4c41b437 | ||
![]() |
b65e3b0636 | ||
![]() |
d37dc6e1c9 | ||
![]() |
a62460aa21 | ||
![]() |
d588d4a5a6 | ||
![]() |
81cc22bab6 | ||
![]() |
20f96f64bd | ||
![]() |
af322eb830 | ||
![]() |
cb1c3a3c07 | ||
![]() |
48afc6ca3e | ||
![]() |
644921b372 | ||
![]() |
19b9de13c4 | ||
![]() |
6f2d82a5a0 | ||
![]() |
7ff129d3ea | ||
![]() |
9d1b213845 | ||
![]() |
5484828418 | ||
![]() |
4eecef84f3 | ||
![]() |
b2286f8fb2 | ||
![]() |
4938c8d573 | ||
![]() |
1a88fc5a69 | ||
![]() |
38e87f6c2a | ||
![]() |
ec240a4369 | ||
![]() |
cd3a3ff93b | ||
![]() |
9a984265b9 | ||
![]() |
a098c99f0d | ||
![]() |
8e37a7e4cc | ||
![]() |
722f1a0f8f | ||
![]() |
0c7b4f49eb | ||
![]() |
ad1bc71a8a | ||
![]() |
b5dec62ca6 | ||
![]() |
631f93ee2d | ||
![]() |
d4e7065111 | ||
![]() |
234a85858c | ||
![]() |
a789d1cc90 | ||
![]() |
694079dff7 | ||
![]() |
d94fb1225e | ||
![]() |
7930f91494 | ||
![]() |
a702056fbe | ||
![]() |
8fd2a7be37 | ||
![]() |
6de82b4476 | ||
![]() |
8e66ffc3b7 | ||
![]() |
6f27998e75 | ||
![]() |
3052a30d42 | ||
![]() |
4ecf300d13 | ||
![]() |
af03000ad5 | ||
![]() |
b96b4be461 | ||
![]() |
edb0e17188 | ||
![]() |
e9c671d5e8 | ||
![]() |
fd62b36680 | ||
![]() |
25586c601c | ||
![]() |
ecb6b6ae2d | ||
![]() |
c258570edd | ||
![]() |
6fc09f0155 | ||
![]() |
11330f5121 | ||
![]() |
8da17f9680 | ||
![]() |
c63f5fb863 | ||
![]() |
38f1eb0ac3 | ||
![]() |
371dcc1dd4 | ||
![]() |
bd21ead2a2 | ||
![]() |
905eef2b06 | ||
![]() |
79367a9820 | ||
![]() |
40a051fa9f | ||
![]() |
7e8e948cf7 | ||
![]() |
4b3ee09886 | ||
![]() |
79fd7320e2 | ||
![]() |
0685d9727b | ||
![]() |
e06632e3fe | ||
![]() |
69fcdb845b | ||
![]() |
6868d272e5 | ||
![]() |
4742150788 | ||
![]() |
4e71dfd819 | ||
![]() |
1ed0b2f74d | ||
![]() |
e15141adae | ||
![]() |
94fef94d9c | ||
![]() |
9a6628aaf9 | ||
![]() |
689af4960e | ||
![]() |
d5de0f21b9 | ||
![]() |
24d26ab380 | ||
![]() |
836ef4840f | ||
![]() |
5621c3222e | ||
![]() |
db5debf313 | ||
![]() |
8cee692b8b | ||
![]() |
973b6ceebb | ||
![]() |
eca1f0d115 | ||
![]() |
2160768a21 | ||
![]() |
267d81962a | ||
![]() |
9cf648c92b | ||
![]() |
5e8e2fa51f | ||
![]() |
d4a24f4091 | ||
![]() |
acbd0ff5df | ||
![]() |
7b393f9cc5 | ||
![]() |
c3bcd206eb | ||
![]() |
1f6cc5807e | ||
![]() |
c306f076ec | ||
![]() |
a0949fec08 | ||
![]() |
74caf528bc | ||
![]() |
9fb62e35f6 | ||
![]() |
b71cc71910 | ||
![]() |
a4ec45179e | ||
![]() |
30374f4d40 | ||
![]() |
91aa502d91 | ||
![]() |
f51f526b0a |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.19*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.19**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.06.19
|
||||
[debug] youtube-dl version 2019.03.01
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@@ -48,3 +48,6 @@ youtube-dl.zsh
|
||||
|
||||
tmp/
|
||||
venv/
|
||||
|
||||
# VS Code related files
|
||||
.vscode
|
||||
|
12
.travis.yml
12
.travis.yml
@@ -15,6 +15,18 @@ env:
|
||||
- YTDL_TEST_SET=download
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
fast_finish: true
|
||||
|
7
AUTHORS
7
AUTHORS
@@ -239,3 +239,10 @@ Martin Weinelt
|
||||
Surya Oktafendri
|
||||
TingPing
|
||||
Alexandre Macabies
|
||||
Bastian de Groot
|
||||
Niklas Haas
|
||||
András Veres-Szentkirályi
|
||||
Enes Solak
|
||||
Nathan Rossi
|
||||
Thomas van der Berg
|
||||
Luca Cherubin
|
||||
|
@@ -152,16 +152,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
@@ -173,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
@@ -181,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -257,11 +261,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
### Regular expressions
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
#### Don't capture groups you don't use
|
||||
|
||||
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||
|
||||
##### Example
|
||||
|
||||
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
r'(?:id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
```python
|
||||
r'(id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
|
||||
#### Make regular expressions relaxed and flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||
|
||||
#### Example
|
||||
##### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
@@ -294,7 +320,49 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
### Long lines policy
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||
|
||||
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
|
616
ChangeLog
616
ChangeLog
@@ -1,3 +1,619 @@
|
||||
version 2019.03.01
|
||||
|
||||
Core
|
||||
+ [downloader/external] Add support for rate limit and retries for wget
|
||||
* [downloader/external] Fix infinite retries for curl (#19303)
|
||||
|
||||
Extractors
|
||||
* [npo] Fix extraction (#20084)
|
||||
* [francetv:site] Extend video id regex (#20029, #20071)
|
||||
+ [periscope] Extract width and height (#20015)
|
||||
* [servus] Fix extraction (#19297)
|
||||
* [bbccouk] Make subtitles non fatal (#19651)
|
||||
* [metacafe] Fix family filter bypass (#19287)
|
||||
|
||||
|
||||
version 2019.02.18
|
||||
|
||||
Extractors
|
||||
* [tvp:website] Fix and improve extraction
|
||||
+ [tvp] Detect unavailable videos
|
||||
* [tvp] Fix description extraction and make thumbnail optional
|
||||
+ [linuxacademy] Add support for linuxacademy.com (#12207)
|
||||
* [bilibili] Update keys (#19233)
|
||||
* [udemy] Extend URL regular expressions (#14330, #15883)
|
||||
* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
|
||||
* [noovo] Fix extraction (#19230)
|
||||
* [rai] Relax URL regular expression (#19232)
|
||||
+ [vshare] Pass Referer to download request (#19205, #19221)
|
||||
+ [openload] Add support for oload.live (#19222)
|
||||
* [imgur] Use video id as title fallback (#18590)
|
||||
+ [twitch] Add new source format detection approach (#19193)
|
||||
* [tvplayhome] Fix video id extraction (#19190)
|
||||
* [tvplayhome] Fix episode metadata extraction (#19190)
|
||||
* [rutube:embed] Fix extraction (#19163)
|
||||
+ [rutube:embed] Add support private videos (#19163)
|
||||
+ [soundcloud] Extract more metadata
|
||||
+ [trunews] Add support for trunews.com (#19153)
|
||||
+ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
|
||||
|
||||
|
||||
version 2019.02.08
|
||||
|
||||
Core
|
||||
* [utils] Improve JSON-LD regular expression (#18058)
|
||||
* [YoutubeDL] Fallback to ie_key of matching extractor while making
|
||||
download archive id when no explicit ie_key is provided (#19022)
|
||||
|
||||
Extractors
|
||||
+ [malltv] Add support for mall.tv (#18058, #17856)
|
||||
+ [spankbang:playlist] Add support for playlists (#19145)
|
||||
* [spankbang] Extend URL regular expression
|
||||
* [trutv] Fix extraction (#17336)
|
||||
* [toutv] Fix authentication (#16398, #18700)
|
||||
* [pornhub] Fix tags and categories extraction (#13720, #19135)
|
||||
* [pornhd] Fix formats extraction
|
||||
+ [pornhd] Extract like count (#19123, #19125)
|
||||
* [radiocanada] Switch to the new media requests (#19115)
|
||||
+ [teachable] Add support for courses.workitdaily.com (#18871)
|
||||
- [vporn] Remove extractor (#16276)
|
||||
+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
|
||||
+ [drtuber] Extract duration (#19078)
|
||||
* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
|
||||
* [soundcloud] Update client id
|
||||
* [drtv] Improve preference (#19079)
|
||||
+ [openload] Add support for openload.pw and oload.pw (#18930)
|
||||
+ [openload] Add support for oload.info (#19073)
|
||||
* [crackle] Authorize media detail request (#16931)
|
||||
|
||||
|
||||
version 2019.01.30.1
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
|
||||
|
||||
|
||||
version 2019.01.30
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
|
||||
subtitles (#19024, #19042)
|
||||
* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
|
||||
|
||||
Extractors
|
||||
* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
|
||||
* [drtv] Improve extraction (#19039)
|
||||
+ Add support for EncryptedUri videos
|
||||
+ Extract more metadata
|
||||
* Fix subtitles extraction
|
||||
+ [fox] Add support for locked videos using cookies (#19060)
|
||||
* [fox] Fix extraction for free videos (#19060)
|
||||
+ [zattoo] Add support for tv.salt.ch (#19059)
|
||||
|
||||
|
||||
version 2019.01.27
|
||||
|
||||
Core
|
||||
+ [extractor/common] Extract season in _json_ld
|
||||
* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
|
||||
(#681)
|
||||
|
||||
Extractors
|
||||
* [vice] Fix extraction for locked videos (#16248)
|
||||
+ [wakanim] Detect DRM protected videos
|
||||
+ [wakanim] Add support for wakanim.tv (#14374)
|
||||
* [usatoday] Fix extraction for videos with custom brightcove partner id
|
||||
(#18990)
|
||||
* [drtv] Fix extraction (#18989)
|
||||
* [nhk] Extend URL regular expression (#18968)
|
||||
* [go] Fix Adobe Pass requests for Disney Now (#18901)
|
||||
+ [openload] Add support for oload.club (#18969)
|
||||
|
||||
|
||||
version 2019.01.24
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Fix negation for string operators in format selection (#18961)
|
||||
|
||||
|
||||
version 2019.01.23
|
||||
|
||||
Core
|
||||
* [utils] Fix urljoin for paths with non-http(s) schemes
|
||||
* [extractor/common] Improve jwplayer relative URL handling (#18892)
|
||||
+ [YoutubeDL] Add negation support for string comparisons in format selection
|
||||
expressions (#18600, #18805)
|
||||
* [extractor/common] Improve HLS video-only format detection (#18923)
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Extend URL regular expression (#18955)
|
||||
* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
|
||||
#17197, #18338 #18842, #18899)
|
||||
+ [vrv] Add support for authentication (#14307)
|
||||
* [videomore:season] Fix extraction
|
||||
* [videomore] Improve extraction (#18908)
|
||||
+ [tnaflix] Pass Referer in metadata request (#18925)
|
||||
* [radiocanada] Relax DRM check (#18608, #18609)
|
||||
* [vimeo] Fix video password verification for videos protected by
|
||||
Referer HTTP header
|
||||
+ [hketv] Add support for hkedcity.net (#18696)
|
||||
+ [streamango] Add support for fruithosts.net (#18710)
|
||||
+ [instagram] Add support for tags (#18757)
|
||||
+ [odnoklassniki] Detect paid videos (#18876)
|
||||
* [ted] Correct acodec for HTTP formats (#18923)
|
||||
* [cartoonnetwork] Fix extraction (#15664, #17224)
|
||||
* [vimeo] Fix extraction for password protected player URLs (#18889)
|
||||
|
||||
|
||||
version 2019.01.17
|
||||
|
||||
Extractors
|
||||
* [youtube] Extend JS player signature function name regular expressions
|
||||
(#18890, #18891, #18893)
|
||||
|
||||
|
||||
version 2019.01.16
|
||||
|
||||
Core
|
||||
+ [test/helper] Add support for maxcount and count collection len checkers
|
||||
* [downloader/hls] Fix uplynk ad skipping (#18824)
|
||||
* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
|
||||
|
||||
Extractors
|
||||
* [youtube] Skip unsupported adaptive stream type (#18804)
|
||||
+ [youtube] Extract DASH formats from player response (#18804)
|
||||
* [funimation] Fix extraction (#14089)
|
||||
* [skylinewebcams] Fix extraction (#18853)
|
||||
+ [curiositystream] Add support for non app URLs
|
||||
+ [bitchute] Check formats (#18833)
|
||||
* [wistia] Extend URL regular expression (#18823)
|
||||
+ [playplustv] Add support for playplus.com (#18789)
|
||||
|
||||
|
||||
version 2019.01.10
|
||||
|
||||
Core
|
||||
* [extractor/common] Use episode name as title in _json_ld
|
||||
+ [extractor/common] Add support for movies in _json_ld
|
||||
* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
|
||||
(#18765)
|
||||
+ [utils] Add language codes replaced in 1989 revision of ISO 639
|
||||
to ISO639Utils (#18765)
|
||||
|
||||
Extractors
|
||||
* [youtube] Extract live HLS URL from player response (#18799)
|
||||
+ [outsidetv] Add support for outsidetv.com (#18774)
|
||||
* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
|
||||
+ [fox] Add support National Geographic (#17985, #15333, #14698)
|
||||
+ [playplustv] Add support for playplus.tv (#18789)
|
||||
* [globo] Set GLBID cookie manually (#17346)
|
||||
+ [gaia] Add support for gaia.com (#14605)
|
||||
* [youporn] Fix title and description extraction (#18748)
|
||||
+ [hungama] Add support for hungama.com (#17402, #18771)
|
||||
* [dtube] Fix extraction (#18741)
|
||||
* [tvnow] Fix and rework extractors and prepare for a switch to the new API
|
||||
(#17245, #18499)
|
||||
* [carambatv:page] Fix extraction (#18739)
|
||||
|
||||
|
||||
version 2019.01.02
|
||||
|
||||
Extractors
|
||||
* [discovery] Use geo verification headers (#17838)
|
||||
+ [packtpub] Add support for subscription.packtpub.com (#18718)
|
||||
* [yourporn] Fix extraction (#18583)
|
||||
+ [acast:channel] Add support for play.acast.com (#18587)
|
||||
+ [extractors] Add missing age limits (#18621)
|
||||
+ [rmcdecouverte] Add support for live stream
|
||||
* [rmcdecouverte] Bypass geo restriction
|
||||
* [rmcdecouverte] Update URL regular expression (#18595, 18697)
|
||||
* [manyvids] Fix extraction (#18604, #18614)
|
||||
* [bitchute] Fix extraction (#18567)
|
||||
|
||||
|
||||
version 2018.12.31
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for another embed pattern (#18643)
|
||||
+ [npo:live] Add support for npostart.nl (#18644)
|
||||
* [beeg] Fix extraction (#18610, #18626)
|
||||
* [youtube] Unescape HTML for series (#18641)
|
||||
+ [youtube] Extract more format metadata
|
||||
* [youtube] Detect DRM protected videos (#1774)
|
||||
* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
|
||||
* [youtube] Extend HTML5 player regular expression (#17516)
|
||||
+ [liveleak] Add support for another embed type and restore original
|
||||
format extraction
|
||||
+ [crackle] Extract ISM and HTTP formats
|
||||
+ [twitter] Pass Referer with card request (#18579)
|
||||
* [mediasite] Extend URL regular expression (#18558)
|
||||
+ [lecturio] Add support for lecturio.de (#18562)
|
||||
+ [discovery] Add support for Scripps Networks watch domains (#17947)
|
||||
|
||||
|
||||
version 2018.12.17
|
||||
|
||||
Extractors
|
||||
* [ard:beta] Improve geo restricted videos extraction
|
||||
* [ard:beta] Fix subtitles extraction
|
||||
* [ard:beta] Improve extraction robustness
|
||||
* [ard:beta] Relax URL regular expression (#18441)
|
||||
* [acast] Add support for embed.acast.com and play.acast.com (#18483)
|
||||
* [iprima] Relax URL regular expression (#18515, #18540)
|
||||
* [vrv] Fix initial state extraction (#18553)
|
||||
* [youtube] Fix mark watched (#18546)
|
||||
+ [safari] Add support for learning.oreilly.com (#18510)
|
||||
* [youtube] Fix multifeed extraction (#18531)
|
||||
* [lecturio] Improve subtitles extraction (#18488)
|
||||
* [uol] Fix format URL extraction (#18480)
|
||||
+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
|
||||
|
||||
|
||||
version 2018.12.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Keep session cookies in cookie file between runs
|
||||
* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
|
||||
|
||||
Extractors
|
||||
+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
|
||||
+ [aenetworks] Add support for historyvault.com (#18460)
|
||||
* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
|
||||
#17223, #18404)
|
||||
* [iprima] Relax URL regular expression (#18453)
|
||||
* [hotstar] Fix video data extraction (#18386)
|
||||
* [ard:mediathek] Fix title and description extraction (#18349, #18371)
|
||||
* [xvideos] Switch to HTTPS (#18422, #18427)
|
||||
+ [lecturio] Add support for lecturio.com (#18405)
|
||||
+ [nrktv:series] Add support for extra materials
|
||||
* [nrktv:season,series] Fix extraction (#17159, #17258)
|
||||
* [nrktv] Relax URL regular expression (#18304, #18387)
|
||||
* [yourporn] Fix extraction (#18424, #18425)
|
||||
* [tbs] Fix info extraction (#18403)
|
||||
+ [gamespot] Add support for review URLs
|
||||
|
||||
|
||||
version 2018.12.03
|
||||
|
||||
Core
|
||||
* [utils] Fix random_birthday to generate existing dates only (#18284)
|
||||
|
||||
Extractors
|
||||
+ [tiktok] Add support for tiktok.com (#18108, #18135)
|
||||
* [pornhub] Use actual URL host for requests (#18359)
|
||||
* [lynda] Fix authentication (#18158, #18217)
|
||||
* [gfycat] Update API endpoint (#18333, #18343)
|
||||
+ [hotstar] Add support for alternative app state layout (#18320)
|
||||
* [azmedien] Fix extraction (#18334, #18336)
|
||||
+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
|
||||
* [joj] Fix extraction (#18280, #18281)
|
||||
+ [wistia] Add support for fast.wistia.com (#18287)
|
||||
|
||||
|
||||
version 2018.11.23
|
||||
|
||||
Core
|
||||
+ [setup.py] Add more relevant classifiers
|
||||
|
||||
Extractors
|
||||
* [mixcloud] Fallback to hardcoded decryption key (#18016)
|
||||
* [nbc:news] Fix article extraction (#16194)
|
||||
* [foxsports] Fix extraction (#17543)
|
||||
* [loc] Relax regular expression and improve formats extraction
|
||||
+ [ciscolive] Add support for ciscolive.cisco.com (#17984)
|
||||
* [nzz] Relax kaltura regex (#18228)
|
||||
* [sixplay] Fix formats extraction
|
||||
* [bitchute] Improve title extraction
|
||||
* [kaltura] Limit requested MediaEntry fields
|
||||
+ [americastestkitchen] Add support for zype embeds (#18225)
|
||||
+ [pornhub] Add pornhub.net alias
|
||||
* [nova:embed] Fix extraction (#18222)
|
||||
|
||||
|
||||
version 2018.11.18
|
||||
|
||||
Extractors
|
||||
+ [wwe] Extract subtitles
|
||||
+ [wwe] Add support for playlistst (#14781)
|
||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||
* [vk] Detect geo restriction (#17767)
|
||||
* [openload] Use original host during extraction (#18211)
|
||||
* [atvat] Fix extraction (#18041)
|
||||
+ [rte] Add support for new API endpoint (#18206)
|
||||
* [tnaflixnetwork:embed] Fix extraction (#18205)
|
||||
* [picarto] Use API and add token support (#16518)
|
||||
+ [zype] Add support for player.zype.com (#18143)
|
||||
* [vivo] Fix extraction (#18139)
|
||||
* [ruutu] Update API endpoint (#18138)
|
||||
|
||||
|
||||
version 2018.11.07
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add another JS signature function name regex (#18091, #18093,
|
||||
#18094)
|
||||
* [facebook] Fix tahoe request (#17171)
|
||||
* [cliphunter] Fix extraction (#18083)
|
||||
+ [youtube:playlist] Add support for invidio.us (#18077)
|
||||
* [zattoo] Arrange API hosts for derived extractors (#18035)
|
||||
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
|
||||
|
||||
|
||||
version 2018.11.03
|
||||
|
||||
Core
|
||||
* [extractor/common] Ensure response handle is not prematurely closed before
|
||||
it can be read if it matches expected_status (#17195, #17846, #17447)
|
||||
|
||||
Extractors
|
||||
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
|
||||
+ [ehftv] Add support for ehftv.com (#15408)
|
||||
* [azmedien] Adopt to major site redesign (#17745, #17746)
|
||||
+ [twitcasting] Add support for twitcasting.tv (#17981)
|
||||
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
|
||||
+ [openload] Add support for oload.fun (#18045)
|
||||
* [njpwworld] Fix authentication (#17427)
|
||||
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
|
||||
* [theplatform] Improve error detection (#13222)
|
||||
* [cnbc] Simplify extraction (#14280, #17110)
|
||||
+ [cbnc] Add support for new URL schema (#14193)
|
||||
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
|
||||
* [aparat] Fix extraction
|
||||
|
||||
|
||||
version 2018.10.29
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add validation for JSON-LD URLs
|
||||
|
||||
Extractors
|
||||
+ [sportbox] Add support for matchtv.ru
|
||||
* [sportbox] Fix extraction (#17978)
|
||||
* [screencast] Fix extraction (#14590, #14617, #17990)
|
||||
+ [openload] Add support for oload.icu
|
||||
+ [ivi] Add support for ivi.tv
|
||||
* [crunchyroll] Improve extraction failsafeness (#17991)
|
||||
* [dailymail] Fix formats extraction (#17976)
|
||||
* [viewster] Reduce format requests
|
||||
* [cwtv] Handle API errors (#17905)
|
||||
+ [rutube] Use geo verification headers (#17897)
|
||||
+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
|
||||
- [tv3] Remove extractor (#10461, #15339)
|
||||
* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
|
||||
+ [openload] Add support for oload.cc (#17823)
|
||||
+ [patreon] Extract post_file URL (#17792)
|
||||
* [patreon] Fix extraction (#14502, #10471)
|
||||
|
||||
|
||||
version 2018.10.05
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Improve authentication (#17762)
|
||||
* [dailymotion] Fix extraction (#17699)
|
||||
* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
|
||||
+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
|
||||
* [philharmoniedeparis] Fix extraction (#17705)
|
||||
+ [jamendo] Add support for licensing.jamendo.com (#17724)
|
||||
+ [openload] Add support for oload.cloud (#17710)
|
||||
* [pluralsight] Fix subtitles extraction (#17726, #17728)
|
||||
+ [vimeo] Add another config regular expression (#17690)
|
||||
* [spike] Fix Paramount Network extraction (#17677)
|
||||
* [hotstar] Fix extraction (#14694, #14931, #17637)
|
||||
|
||||
|
||||
version 2018.09.26
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Fix subtitles extraction (#17671)
|
||||
* [mediaset] Improve embed support (#17668)
|
||||
+ [youtube] Add support for invidio.us (#17613)
|
||||
+ [zattoo] Add support for more zattoo platform sites
|
||||
* [zattoo] Fix extraction (#17175, #17542)
|
||||
|
||||
|
||||
version 2018.09.18
|
||||
|
||||
Core
|
||||
+ [extractor/common] Introduce channel meta fields
|
||||
|
||||
Extractors
|
||||
* [adobepass] Don't pollute default headers dict
|
||||
* [udemy] Don't pollute default headers dict
|
||||
* [twitch] Don't pollute default headers dict
|
||||
* [youtube] Don't pollute default query dict (#17593)
|
||||
* [crunchyroll] Prefer hardsubless formats and formats in locale language
|
||||
* [vrv] Make format ids deterministic
|
||||
* [vimeo] Fix ondemand playlist extraction (#14591)
|
||||
+ [pornhub] Extract upload date (#17574)
|
||||
+ [porntube] Extract channel meta fields
|
||||
+ [vimeo] Extract channel meta fields
|
||||
+ [youtube] Extract channel meta fields (#9676, #12939)
|
||||
* [porntube] Fix extraction (#17541)
|
||||
* [asiancrush] Fix extraction (#15630)
|
||||
+ [twitch:clips] Extend URL regular expression (closes #17559)
|
||||
+ [vzaar] Add support for HLS
|
||||
* [tube8] Fix metadata extraction (#17520)
|
||||
* [eporner] Extract JSON-LD (#17519)
|
||||
|
||||
|
||||
version 2018.09.10
|
||||
|
||||
Core
|
||||
+ [utils] Properly recognize AV1 codec (#17506)
|
||||
|
||||
Extractors
|
||||
+ [iprima] Add support for prima.iprima.cz (#17514)
|
||||
+ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
|
||||
* [nbc] Fix extraction of percent encoded URLs (#17374)
|
||||
|
||||
|
||||
version 2018.09.08
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction (#17457, #17464)
|
||||
+ [pornhub:uservideos] Add support for new URLs (#17388)
|
||||
* [iprima] Confirm adult check (#17437)
|
||||
* [slideslive] Make check for video service name case-insensitive (#17429)
|
||||
* [radiojavan] Fix extraction (#17151)
|
||||
* [generic] Skip unsuccessful jwplayer extraction (#16735)
|
||||
|
||||
|
||||
version 2018.09.01
|
||||
|
||||
Core
|
||||
* [utils] Skip remote IP addresses non matching to source address' IP version
|
||||
when creating a connection (#13422, #17362)
|
||||
|
||||
Extractors
|
||||
+ [ard] Add support for one.ard.de (#17397)
|
||||
* [niconico] Fix extraction on python3 (#17393, #17407)
|
||||
* [ard] Extract f4m formats
|
||||
* [crunchyroll] Parse vilos media data (#17343)
|
||||
+ [ard] Add support for Beta ARD Mediathek
|
||||
+ [bandcamp] Extract more metadata (#13197)
|
||||
* [internazionale] Fix extraction of non-available-abroad videos (#17386)
|
||||
|
||||
|
||||
version 2018.08.28
|
||||
|
||||
Extractors
|
||||
+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
|
||||
(#17361)
|
||||
* [bitchute] Fix extraction by pass custom User-Agent (#17360)
|
||||
* [webofstories:playlist] Fix extraction (#16914)
|
||||
+ [tvplayhome] Add support for new tvplay URLs (#17344)
|
||||
+ [generic] Allow relative src for videojs embeds (#17324)
|
||||
+ [xfileshare] Add support for vidto.se (#17317)
|
||||
+ [vidzi] Add support for vidzi.nu (#17316)
|
||||
+ [nova:embed] Add support for media.cms.nova.cz (#17282)
|
||||
|
||||
|
||||
version 2018.08.22
|
||||
|
||||
Core
|
||||
* [utils] Use pure browser header for User-Agent (#17236)
|
||||
|
||||
Extractors
|
||||
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
|
||||
+ [yourporn] Add support for yourporn.sexy (#17298)
|
||||
+ [go] Add support for disneynow.go.com (#16299, #17264)
|
||||
+ [6play] Add support for play.rtl.hr (#17249)
|
||||
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
|
||||
(#16788, #17254)
|
||||
* [lci] Fix extraction (#17274)
|
||||
* [bbccouk] Extend id URL regular expression (#17270)
|
||||
* [cwtv] Fix extraction (#17256)
|
||||
* [nova] Fix extraction (#17241)
|
||||
+ [generic] Add support for expressen embeds
|
||||
* [raywenderlich] Adapt to site redesign (#17225)
|
||||
+ [redbulltv] Add support redbull.com tv URLs (#17218)
|
||||
+ [bitchute] Add support for bitchute.com (#14052)
|
||||
+ [clyp] Add support for token protected media (#17184)
|
||||
* [imdb] Fix extension extraction (#17167)
|
||||
|
||||
|
||||
version 2018.08.04
|
||||
|
||||
Extractors
|
||||
* [funk:channel] Improve byChannelAlias extraction (#17142)
|
||||
* [twitch] Fix authentication (#17024, #17126)
|
||||
* [twitch:vod] Improve URL regular expression (#17135)
|
||||
* [watchbox] Fix extraction (#17107)
|
||||
* [pbs] Fix extraction (#17109)
|
||||
* [theplatform] Relax URL regular expression (#16181, #17097)
|
||||
+ [viqeo] Add support for viqeo.tv (#17066)
|
||||
|
||||
|
||||
version 2018.07.29
|
||||
|
||||
Extractors
|
||||
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
|
||||
+ [pornhub] Add support for subtitles (#16924, #17088)
|
||||
* [ceskatelevize] Use https for API call (#16997, #16999)
|
||||
* [dailymotion:playlist] Fix extraction (#16894)
|
||||
* [ted] Improve extraction
|
||||
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
|
||||
* [telecinco] Fix extraction (#17080)
|
||||
* [mitele] Reduce number of requests
|
||||
* [rai] Return non HTTP relinker URL intact (#17055)
|
||||
* [vk] Fix extraction for inline only videos (#16923)
|
||||
* [streamcloud] Fix extraction (#17054)
|
||||
* [facebook] Fix tahoe player extraction with authentication (#16655)
|
||||
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
|
||||
|
||||
|
||||
version 2018.07.21
|
||||
|
||||
Core
|
||||
+ [utils] Introduce url_or_none
|
||||
* [utils] Allow JSONP without function name (#17028)
|
||||
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for BBC Radio Play pages (#17022)
|
||||
* [iwara] Fix download URLs (#17026)
|
||||
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
|
||||
+ [viu] Pass Referer and Origin headers and area id (#16992)
|
||||
+ [vimeo] Add another config regular expression (#17013)
|
||||
+ [facebook] Extract view count (#16942)
|
||||
* [dailymotion] Improve description extraction (#16984)
|
||||
* [slutload] Fix and improve extraction (#17001)
|
||||
* [mediaset] Fix extraction (#16977)
|
||||
+ [theplatform] Add support for theplatform TLD customization (#16977)
|
||||
* [imgur] Relax URL regular expression (#16987)
|
||||
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
|
||||
#16959)
|
||||
|
||||
|
||||
version 2018.07.10
|
||||
|
||||
Core
|
||||
* [utils] Share JSON-LD regular expression
|
||||
* [downloader/dash] Improve error handling (#16927)
|
||||
|
||||
Extractors
|
||||
+ [nrktv] Add support for new season and serie URL schema
|
||||
+ [nrktv] Add support for new episode URL schema (#16909)
|
||||
+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
|
||||
* [funk] Fix extraction (#16918)
|
||||
* [watchbox] Fix extraction (#16904)
|
||||
* [dplayit] Sort formats
|
||||
* [dplayit] Fix extraction (#16901)
|
||||
* [youtube] Improve login error handling (#13822)
|
||||
|
||||
|
||||
version 2018.07.04
|
||||
|
||||
Core
|
||||
* [extractor/common] Properly escape % in MPD templates (#16867)
|
||||
* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
|
||||
* Prefer ffmpeg over avconv by default (#8622)
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
|
||||
* [lynda] Simplify login and improve error capturing (#16891)
|
||||
+ [go90] Add support for embed URLs (#16873)
|
||||
* [go90] Detect geo restriction error and pass geo verification headers
|
||||
(#16874)
|
||||
* [vlive] Fix live streams extraction (#16871)
|
||||
* [npo] Fix typo (#16872)
|
||||
+ [mediaset] Add support for new videos and extract all formats (#16568)
|
||||
* [dctptv] Restore extraction based on REST API (#16850)
|
||||
* [svt] Improve extraction and add support for pages (#16802)
|
||||
* [porncom] Fix extraction (#16808)
|
||||
|
||||
|
||||
version 2018.06.25
|
||||
|
||||
Extractors
|
||||
* [joj] Relax URL regular expression (#16771)
|
||||
* [brightcove] Workaround sonyliv DRM protected videos (#16807)
|
||||
* [motherless] Fix extraction (#16786)
|
||||
* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
|
||||
- [foxnews:insider] Remove extractor (#15810)
|
||||
+ [foxnews] Add support for iframe embeds (#15810, #16711)
|
||||
|
||||
|
||||
version 2018.06.19
|
||||
|
||||
Core
|
||||
|
110
README.md
110
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
To install it right away for all UNIX users (Linux, macOS, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
@@ -35,7 +35,7 @@ You can also use pip:
|
||||
|
||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||
|
||||
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
|
||||
brew install youtube-dl
|
||||
|
||||
@@ -427,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors (default)
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
@@ -442,7 +442,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
@@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
@@ -511,6 +511,8 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date (YYYYMMDD)
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `channel` (string): Full name of the channel the video is uploaded on
|
||||
- `channel_id` (string): Id of the channel
|
||||
- `location` (string): Physical location where the video was filmed
|
||||
- `duration` (numeric): Length of the video in seconds
|
||||
- `view_count` (numeric): How many users have watched the video on the platform
|
||||
@@ -665,7 +667,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
|
||||
Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
- `ext`: File extension
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
@@ -673,6 +675,8 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
||||
@@ -870,7 +874,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
@@ -1022,16 +1026,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
@@ -1043,7 +1051,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
@@ -1051,7 +1059,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -1127,11 +1135,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
### Regular expressions
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
#### Don't capture groups you don't use
|
||||
|
||||
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||
|
||||
##### Example
|
||||
|
||||
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
r'(?:id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
```python
|
||||
r'(id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
|
||||
#### Make regular expressions relaxed and flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||
|
||||
#### Example
|
||||
##### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
@@ -1164,9 +1194,51 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
### Long lines policy
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||
|
||||
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
# EMBEDDING YOUTUBE-DL
|
||||
|
||||
|
@@ -33,7 +33,7 @@
|
||||
- **AdobeTVShow**
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
@@ -56,6 +56,7 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
@@ -83,8 +84,6 @@
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@@ -97,6 +96,7 @@
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BBVTV**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@@ -108,6 +108,8 @@
|
||||
- **BiliBili**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
@@ -161,6 +163,8 @@
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **CiscoLiveSearch**
|
||||
- **CiscoLiveSession**
|
||||
- **CJSW**
|
||||
- **cliphunter**
|
||||
- **Clippit**
|
||||
@@ -174,6 +178,7 @@
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNBC**
|
||||
- **CNBCVideo**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
@@ -189,7 +194,7 @@
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
@@ -247,7 +252,9 @@
|
||||
- **EchoMsk**
|
||||
- **egghead:course**: egghead.io course
|
||||
- **egghead:lesson**: egghead.io lesson
|
||||
- **ehftv**
|
||||
- **eHow**
|
||||
- **EinsUndEinsTV**
|
||||
- **Einthusan**
|
||||
- **eitb.tv**
|
||||
- **EllenTube**
|
||||
@@ -265,6 +272,7 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
- **ExtremeTube**
|
||||
@@ -290,7 +298,6 @@
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-what**
|
||||
- **FranceCulture**
|
||||
@@ -303,6 +310,9 @@
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **Funimation**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
@@ -310,6 +320,7 @@
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
@@ -322,6 +333,7 @@
|
||||
- **Gfycat**
|
||||
- **GiantBomb**
|
||||
- **Giga**
|
||||
- **GlattvisionTV**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
@@ -349,9 +361,10 @@
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HitRecord**
|
||||
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
|
||||
- **HornBunny**
|
||||
- **HotNewHipHop**
|
||||
- **HotStar**
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
@@ -359,18 +372,22 @@
|
||||
- **HRTiPlaylist**
|
||||
- **Huajiao**: 花椒直播
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hungama**
|
||||
- **HungamaSong**
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **ImgurAlbum**
|
||||
- **imgur:album**
|
||||
- **imgur:gallery**
|
||||
- **Ina**
|
||||
- **Inc**
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:tag**: Instagram hashtag search
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
@@ -403,6 +420,7 @@
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KinoPoisk**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
@@ -423,6 +441,9 @@
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **Lecturio**
|
||||
- **LecturioCourse**
|
||||
- **LecturioDeCourse**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **Lenta**
|
||||
@@ -435,6 +456,9 @@
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
- **LinuxAcademy**
|
||||
- **LiTV**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
@@ -453,6 +477,7 @@
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakerTV**
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
@@ -488,6 +513,7 @@
|
||||
- **Mixer:vod**
|
||||
- **MLB**
|
||||
- **Mnet**
|
||||
- **MNetTV**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
- **Mofosex**
|
||||
- **Mojvideo**
|
||||
@@ -519,10 +545,10 @@
|
||||
- **Myvi**
|
||||
- **MyVidster**
|
||||
- **MyviEmbed**
|
||||
- **MyVisionTV**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:episodeguide**
|
||||
- **natgeo:video**
|
||||
- **NationalGeographicTV**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@@ -544,6 +570,7 @@
|
||||
- **netease:program**: 网易云音乐 - 电台节目
|
||||
- **netease:singer**: 网易云音乐 - 歌手
|
||||
- **netease:song**: 网易云音乐
|
||||
- **NetPlus**
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **NewgroundsPlaylist**
|
||||
@@ -575,6 +602,7 @@
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
@@ -590,7 +618,9 @@
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisode**
|
||||
- **NRKTVEpisodes**
|
||||
- **NRKTVSeason**
|
||||
- **NRKTVSeries**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
@@ -617,6 +647,8 @@
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **OsnatelTV**
|
||||
- **OutsideTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PandaTV**: 熊猫TV
|
||||
@@ -641,6 +673,7 @@
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
- **PlayPlusTV**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@@ -668,6 +701,8 @@
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **qqmusic**: QQ音乐
|
||||
@@ -675,6 +710,7 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuantumTV**
|
||||
- **Quickline**
|
||||
- **QuicklineLive**
|
||||
- **R7**
|
||||
@@ -690,6 +726,7 @@
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@@ -741,6 +778,8 @@
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **SAKTV**
|
||||
- **SaltTV**
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@@ -790,13 +829,14 @@
|
||||
- **southpark.nl**
|
||||
- **southparkstudios.dk**
|
||||
- **SpankBang**
|
||||
- **SpankBangPlaylist**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
- **Spiegeltv**
|
||||
- **sport.francetvinfo.fr**
|
||||
- **Sport5**
|
||||
- **SportBoxEmbed**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
@@ -814,6 +854,7 @@
|
||||
- **StretchInternet**
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPage**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **SWRMediathek**
|
||||
@@ -826,6 +867,8 @@
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**
|
||||
- **TeachableCourse**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
@@ -834,6 +877,7 @@
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **Tele13**
|
||||
- **Tele5**
|
||||
- **TeleBruxelles**
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
@@ -857,6 +901,8 @@
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **TikTok**
|
||||
- **TikTokUser**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
@@ -870,6 +916,7 @@
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **TruNews**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
@@ -885,7 +932,6 @@
|
||||
- **TV2**
|
||||
- **tv2.hu**
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **TVA**
|
||||
@@ -899,13 +945,17 @@
|
||||
- **TVNet**
|
||||
- **TVNoe**
|
||||
- **TVNow**
|
||||
- **TVNowList**
|
||||
- **TVNowAnnual**
|
||||
- **TVNowNew**
|
||||
- **TVNowSeason**
|
||||
- **TVNowShow**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:profile**
|
||||
@@ -930,8 +980,6 @@
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Upskill**
|
||||
- **UpskillCourse**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
@@ -950,6 +998,7 @@
|
||||
- **VevoPlaylist**
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **vhx:embed**
|
||||
- **Viafree**
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
@@ -994,6 +1043,7 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **Viqeo**
|
||||
- **Viu**
|
||||
- **viu:ott**
|
||||
- **viu:playlist**
|
||||
@@ -1011,7 +1061,6 @@
|
||||
- **Voot**
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||
@@ -1019,12 +1068,15 @@
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
- **VShare**
|
||||
- **VTXTV**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Wakanim**
|
||||
- **Walla**
|
||||
- **WalyTV**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
@@ -1050,6 +1102,7 @@
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
@@ -1085,6 +1138,7 @@
|
||||
- **YouNowLive**
|
||||
- **YouNowMoment**
|
||||
- **YouPorn**
|
||||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
@@ -1108,3 +1162,4 @@
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **Zype**
|
||||
|
9
setup.py
9
setup.py
@@ -124,6 +124,8 @@ setup(
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'License :: Public Domain',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
@@ -132,6 +134,13 @@ setup(
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: Implementation',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
'Programming Language :: Python :: Implementation :: IronPython',
|
||||
'Programming Language :: Python :: Implementation :: Jython',
|
||||
'Programming Language :: Python :: Implementation :: PyPy',
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
@@ -7,6 +7,7 @@ import json
|
||||
import os.path
|
||||
import re
|
||||
import types
|
||||
import ssl
|
||||
import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
@@ -152,15 +153,27 @@ def expect_value(self, got, expected, field):
|
||||
isinstance(got, compat_str),
|
||||
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
|
||||
got = 'md5:' + md5(got)
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
|
||||
self.assertTrue(
|
||||
isinstance(got, (list, dict)),
|
||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||
field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
op, _, expected_num = expected.partition(':')
|
||||
expected_num = int(expected_num)
|
||||
if op == 'mincount':
|
||||
assert_func = assertGreaterEqual
|
||||
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
||||
elif op == 'maxcount':
|
||||
assert_func = assertLessEqual
|
||||
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
||||
elif op == 'count':
|
||||
assert_func = assertEqual
|
||||
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
||||
else:
|
||||
assert False
|
||||
assert_func(
|
||||
self, len(got), expected_num,
|
||||
'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got)))
|
||||
msg_tmpl % (expected_num, field, len(got)))
|
||||
return
|
||||
self.assertEqual(
|
||||
expected, got,
|
||||
@@ -236,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None):
|
||||
self.assertTrue(got >= expected, msg)
|
||||
|
||||
|
||||
def assertLessEqual(self, got, expected, msg=None):
|
||||
if not (got <= expected):
|
||||
if msg is None:
|
||||
msg = '%r not less than or equal to %r' % (got, expected)
|
||||
self.assertTrue(got <= expected, msg)
|
||||
|
||||
|
||||
def assertEqual(self, got, expected, msg=None):
|
||||
if not (got == expected):
|
||||
if msg is None:
|
||||
msg = '%r not equal to %r' % (got, expected)
|
||||
self.assertTrue(got == expected, msg)
|
||||
|
||||
|
||||
def expect_warnings(ydl, warnings_re):
|
||||
real_warning = ydl.report_warning
|
||||
|
||||
@@ -244,3 +271,12 @@ def expect_warnings(ydl, warnings_re):
|
||||
real_warning(w)
|
||||
|
||||
ydl.report_warning = _report_warning
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
@@ -9,11 +9,30 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, expect_dict, expect_value
|
||||
from youtube_dl.compat import compat_etree_fromstring
|
||||
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
||||
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
import threading
|
||||
|
||||
|
||||
TEAPOT_RESPONSE_STATUS = 418
|
||||
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||
|
||||
|
||||
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/teapot':
|
||||
self.send_response(TEAPOT_RESPONSE_STATUS)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@@ -42,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
<meta content='Foo' property=og:foobar>
|
||||
<meta name="og:test1" content='foo > < bar'/>
|
||||
<meta name="og:test2" content="foo >//< bar"/>
|
||||
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
@@ -50,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||
self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
|
||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||
@@ -478,7 +499,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
)
|
||||
),
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/18923
|
||||
# https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
|
||||
'ted_18923',
|
||||
'http://hls.ted.com/talks/31241.m3u8',
|
||||
[{
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '600k-Audio',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '68',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '163',
|
||||
'acodec': 'none',
|
||||
'width': 320,
|
||||
'height': 180,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '481',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '769',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '984',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '1255',
|
||||
'acodec': 'none',
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '1693',
|
||||
'acodec': 'none',
|
||||
'width': 853,
|
||||
'height': 480,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '2462',
|
||||
'acodec': 'none',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
),
|
||||
]
|
||||
|
||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||
@@ -743,6 +821,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
def test_response_with_expected_status_returns_content(self):
|
||||
# Checks for mitigations against the effects of
|
||||
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
|
||||
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
|
||||
# or the underlying `_download_webpage_handle` returning no content
|
||||
# when a response matches `expected_status`.
|
||||
|
||||
httpd = compat_http_server.HTTPServer(
|
||||
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||
port = http_server_port(httpd)
|
||||
server_thread = threading.Thread(target=httpd.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
|
||||
(content, urlh) = self.ie._download_webpage_handle(
|
||||
'http://127.0.0.1:%d/teapot' % port, None,
|
||||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -239,6 +239,76 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
||||
|
||||
def test_format_selection_string_ops(self):
|
||||
formats = [
|
||||
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
|
||||
{'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
# equals (=)
|
||||
ydl = YDL({'format': '[format_id=abc-cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not equal (!=)
|
||||
ydl = YDL({'format': '[format_id!=abc-cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# starts with (^=)
|
||||
ydl = YDL({'format': '[format_id^=abc]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not start with (!^=)
|
||||
ydl = YDL({'format': '[format_id!^=abc]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# ends with ($=)
|
||||
ydl = YDL({'format': '[format_id$=cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not end with (!$=)
|
||||
ydl = YDL({'format': '[format_id!$=cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# contains (*=)
|
||||
ydl = YDL({'format': '[format_id*=bc-cb]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not contain (!*=)
|
||||
ydl = YDL({'format': '[format_id!*=bc-cb]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
ydl = YDL({'format': '[format_id!*=-]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
||||
|
34
test/test_YoutubeDLCookieJar.py
Normal file
34
test/test_YoutubeDLCookieJar.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import YoutubeDLCookieJar
|
||||
|
||||
|
||||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
def test_keep_session_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||
temp = tf.read().decode('utf-8')
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
|
||||
finally:
|
||||
tf.close()
|
||||
os.remove(tf.name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
|
||||
|
||||
def test_compat_expanduser(self):
|
||||
old_home = os.environ.get('HOME')
|
||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
||||
test_str = r'C:\Documents and Settings\тест\Application Data'
|
||||
compat_setenv('HOME', test_str)
|
||||
self.assertEqual(compat_expanduser('~'), test_str)
|
||||
compat_setenv('HOME', old_home or '')
|
||||
|
@@ -9,26 +9,16 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from test.helper import http_server_port, try_rm
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.downloader.http import HttpFD
|
||||
from youtube_dl.utils import encodeFilename
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
TEST_SIZE = 10 * 1024
|
||||
|
||||
|
||||
|
@@ -8,6 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import http_server_port
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
@@ -16,15 +17,6 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
|
@@ -78,6 +78,7 @@ from youtube_dl.utils import (
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
base_url,
|
||||
urljoin,
|
||||
urlencode_postdata,
|
||||
@@ -506,6 +507,18 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(urljoin('http://foo.de/', ''), None)
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||
self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||
|
||||
def test_url_or_none(self):
|
||||
self.assertEqual(url_or_none(None), None)
|
||||
self.assertEqual(url_or_none(''), None)
|
||||
self.assertEqual(url_or_none('foo'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
@@ -717,6 +730,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
stripped = strip_jsonp('({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
@@ -770,6 +787,10 @@ class TestUtil(unittest.TestCase):
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
})
|
||||
self.assertEqual(parse_codecs('av01.0.05M.08'), {
|
||||
'vcodec': 'av01.0.05M.08',
|
||||
'acodec': 'none',
|
||||
})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
|
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Netscape HTTP Cookie File
|
||||
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
|
||||
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
|
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
#EXTM3U
|
||||
#EXT-X-VERSION:4
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
|
||||
/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
|
||||
/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
|
||||
/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
|
||||
/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
|
||||
/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
|
@@ -82,12 +82,14 @@ from .utils import (
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
subtitles_filename,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
version_tuple,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLCookieJar,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
@@ -305,8 +307,8 @@ class YoutubeDL(object):
|
||||
http_chunk_size.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
|
||||
otherwise prefer ffmpeg.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
|
||||
@@ -558,7 +560,7 @@ class YoutubeDL(object):
|
||||
self.restore_console_title()
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@@ -1062,21 +1064,24 @@ class YoutubeDL(object):
|
||||
if not m:
|
||||
STR_OPERATORS = {
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(filter_spec)
|
||||
if m:
|
||||
comparison_value = m.group('value')
|
||||
op = STR_OPERATORS[m.group('op')]
|
||||
str_op = STR_OPERATORS[m.group('op')]
|
||||
if m.group('negation'):
|
||||
op = lambda attr, value: not str_op(attr, value)
|
||||
else:
|
||||
op = str_op
|
||||
|
||||
if not m:
|
||||
raise ValueError('Invalid filter specification %r' % filter_spec)
|
||||
@@ -2056,15 +2061,24 @@ class YoutubeDL(object):
|
||||
self.report_warning('Unable to remove downloaded original file')
|
||||
|
||||
def _make_archive_id(self, info_dict):
|
||||
video_id = info_dict.get('id')
|
||||
if not video_id:
|
||||
return
|
||||
# Future-proof against any change in case
|
||||
# and backwards compatibility with prior versions
|
||||
extractor = info_dict.get('extractor_key')
|
||||
extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
if 'id' in info_dict:
|
||||
extractor = info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
return None # Incomplete video information
|
||||
return extractor.lower() + ' ' + info_dict['id']
|
||||
url = str_or_none(info_dict.get('url'))
|
||||
if not url:
|
||||
return
|
||||
# Try to find matching extractor for the URL and take its ie_key
|
||||
for ie in self._ies:
|
||||
if ie.suitable(url):
|
||||
extractor = ie.ie_key()
|
||||
break
|
||||
else:
|
||||
return
|
||||
return extractor.lower() + ' ' + video_id
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
@@ -2072,7 +2086,7 @@ class YoutubeDL(object):
|
||||
return False
|
||||
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
if vid_id is None:
|
||||
if not vid_id:
|
||||
return False # Incomplete video information
|
||||
|
||||
try:
|
||||
@@ -2297,10 +2311,9 @@ class YoutubeDL(object):
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
opts_cookiefile = expand_path(opts_cookiefile)
|
||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||
opts_cookiefile)
|
||||
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
self.cookiejar.load()
|
||||
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
|
@@ -2,7 +2,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import urljoin
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
@@ -57,6 +60,14 @@ class DashSegmentsFD(FragmentFD):
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
|
@@ -121,7 +121,11 @@ class CurlFD(ExternalFD):
|
||||
cmd += self._valueless_option('--silent', 'noprogress')
|
||||
cmd += self._valueless_option('--verbose', 'verbose')
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
cmd += self._option('--retry', 'retries')
|
||||
retry = self._option('--retry', 'retries')
|
||||
if len(retry) == 2:
|
||||
if retry[1] in ('inf', 'infinite'):
|
||||
retry[1] = '2147483647'
|
||||
cmd += retry
|
||||
cmd += self._option('--max-filesize', 'max_filesize')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
@@ -160,6 +164,12 @@ class WgetFD(ExternalFD):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
retry = self._option('--tries', 'retries')
|
||||
if len(retry) == 2:
|
||||
if retry[1] in ('inf', 'infinite'):
|
||||
retry[1] = '0'
|
||||
cmd += retry
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||
|
@@ -75,10 +75,14 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
def is_ad_fragment(s):
|
||||
def is_ad_fragment_start(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
|
||||
def is_ad_fragment_end(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
ad_frag_next = False
|
||||
@@ -87,12 +91,13 @@ class HlsFD(FragmentFD):
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
if is_ad_fragment(line):
|
||||
ad_frags += 1
|
||||
if is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
continue
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
ad_frags += 1
|
||||
continue
|
||||
media_frags += 1
|
||||
|
||||
@@ -123,7 +128,6 @@ class HlsFD(FragmentFD):
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
continue
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
@@ -196,8 +200,10 @@ class HlsFD(FragmentFD):
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
elif is_ad_fragment(line):
|
||||
elif is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@@ -17,25 +17,15 @@ from ..utils import (
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# test with one bling
|
||||
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
||||
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
||||
'info_dict': {
|
||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'timestamp': 1196172000,
|
||||
'upload_date': '20071127',
|
||||
'duration': 211,
|
||||
'creator': 'Concierge',
|
||||
'series': 'Condé Nast Traveler Podcast',
|
||||
'episode': '"Where Are You?": Taipei 101, Taiwan',
|
||||
}
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||
'info_dict': {
|
||||
@@ -50,6 +40,12 @@ class ACastIE(InfoExtractor):
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -83,17 +79,27 @@ class ACastIE(InfoExtractor):
|
||||
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.acast.com/condenasttraveler',
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/todayinfocus',
|
||||
'info_dict': {
|
||||
'id': '50544219-29bb-499e-a083-6087f4cb7797',
|
||||
'title': 'Condé Nast Traveler Podcast',
|
||||
'description': 'md5:98646dee22a5b386626ae31866638fbd',
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
'playlist_mincount': 35,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
@@ -106,7 +112,7 @@ class ACastChannelIE(InfoExtractor):
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
|
||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1325,8 +1325,8 @@ class AdobePassIE(InfoExtractor):
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
@@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not video_id:
|
||||
entries = []
|
||||
for episode in video_data.get('archiveEpisodes', []):
|
||||
episode_url = episode.get('url')
|
||||
episode_url = url_or_none(episode.get('url'))
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
|
@@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/full-special
|
||||
specials/(?P<special_display_id>[^/]+)/full-special|
|
||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
@@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
@@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = file_element.text
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
|
@@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
@@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
|
||||
zype_id = ep_meta.get('zype_id')
|
||||
if zype_id:
|
||||
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
||||
ie_key = 'Zype'
|
||||
else:
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
||||
ie_key = 'Kaltura'
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
@@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'url': embed_url,
|
||||
'ie_key': ie_key,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -3,11 +3,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
@@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
subtitle_href = url_or_none(subtitle.get('href'))
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
@@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
media_url = url_or_none(media.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
@@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'url': media_url,
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = playlist.get('streamurl')
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
|
@@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
@@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
@@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for rendition in video_data.get('renditions', []):
|
||||
video_url = rendition.get('url')
|
||||
video_url = url_or_none(rendition.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = rendition.get('format')
|
||||
|
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = source.get('file')
|
||||
if not source_url or not isinstance(source_url, compat_str):
|
||||
source_url = url_or_none(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
|
@@ -4,66 +4,92 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
'title': 'تیم گلکسی 11 - زومیت',
|
||||
'age_limit': 0,
|
||||
'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
|
||||
'duration': 231,
|
||||
'timestamp': 1387394859,
|
||||
'upload_date': '20131218',
|
||||
'view_count': int,
|
||||
},
|
||||
# 'skip': 'Extremely unreliable',
|
||||
}
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'https://www.aparat.com/v/8dflw/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
# Provides more metadata
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
file_list = self._parse_json(
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
|
||||
'file list'),
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
|
||||
formats = []
|
||||
for item in file_list[0]:
|
||||
file_url = item.get('file')
|
||||
if not file_url:
|
||||
continue
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': label or ext,
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height', default=None)),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
for sources in player['multiSRC']:
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
file_url = url_or_none(item.get('src'))
|
||||
if not file_url:
|
||||
continue
|
||||
item_type = item.get('type')
|
||||
if item_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': 'http-%s' % (label or ext),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height',
|
||||
default=None)),
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return {
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@@ -5,23 +5,26 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
qualities,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
@@ -37,6 +40,9 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@@ -48,8 +54,15 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
@@ -100,7 +113,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
@@ -170,13 +183,18 @@ class ARDMediathekIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description')
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
@@ -282,3 +300,101 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'title': 'Tatort: Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
|
||||
'upload_date': '20180826',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
|
||||
res = {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
formats = []
|
||||
subtitles = {}
|
||||
geoblocked = False
|
||||
for widget in data.values():
|
||||
if widget.get('_geoblocked') is True:
|
||||
geoblocked = True
|
||||
if '_duration' in widget:
|
||||
res['duration'] = int_or_none(widget['_duration'])
|
||||
if 'clipTitle' in widget:
|
||||
res['title'] = widget['clipTitle']
|
||||
if '_previewImage' in widget:
|
||||
res['thumbnail'] = widget['_previewImage']
|
||||
if 'broadcastedOn' in widget:
|
||||
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
|
||||
if 'synopsis' in widget:
|
||||
res['description'] = widget['synopsis']
|
||||
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('de', []).append({
|
||||
'ext': 'ttml',
|
||||
'url': subtitle_url,
|
||||
})
|
||||
if '_quality' in widget:
|
||||
format_url = url_or_none(try_get(
|
||||
widget, lambda x: x['_stream']['json'][0]))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.11.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
# HTTP formats are not available when geoblocked is True,
|
||||
# other formats are fine though
|
||||
if geoblocked:
|
||||
continue
|
||||
quality = str_or_none(widget.get('_quality'))
|
||||
formats.append({
|
||||
'format_id': ('http-' + quality) if quality else 'http',
|
||||
'url': format_url,
|
||||
'preference': 10, # Plain HTTP, that's nice
|
||||
})
|
||||
|
||||
if not formats and geoblocked:
|
||||
self.raise_geo_restricted(
|
||||
msg='This video is not available due to geoblocking',
|
||||
countries=['DE'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
res.update({
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return res
|
||||
|
@@ -8,7 +8,6 @@ from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'postid': video_id,
|
||||
'action': 'get_channel_kaltura_vars',
|
||||
}))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
entry_id = data['entry_id']
|
||||
entry_id, partner_id, title = [None] * 3
|
||||
|
||||
vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False)
|
||||
if vars:
|
||||
entry_id = vars.get('entry_id')
|
||||
partner_id = vars.get('partner_id')
|
||||
title = vars.get('vid_label')
|
||||
|
||||
if not entry_id:
|
||||
entry_id = self._search_regex(
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
|
||||
'kaltura id', group='id')
|
||||
|
||||
if not partner_id:
|
||||
partner_id = self._search_regex(
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (data['partner_id'], entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id,
|
||||
video_title=data.get('vid_label'))
|
||||
'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
ie=KalturaIE.ie_key(), video_id=kaltura_id,
|
||||
video_title=title)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
|
@@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
||||
webpage, 'player data')), display_id)['config']['initial_video']
|
||||
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||
webpage, 'player data', group='json')),
|
||||
display_id)['config']['initial_video']
|
||||
|
||||
video_id = video_data['id']
|
||||
video_title = video_data['title']
|
||||
|
@@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor):
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(api_response['url']):
|
||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
||||
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||
|
||||
return {
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
|
@@ -1,213 +1,86 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AZMedienBaseIE(InfoExtractor):
|
||||
def _kaltura_video(self, partner_id, entry_id):
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||
video_id=entry_id)
|
||||
|
||||
|
||||
class AZMedienIE(AZMedienBaseIE):
|
||||
class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[0-9]+-show-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||
\#
|
||||
)|
|
||||
\#
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
[^/]+-(?P<article_id>\d+)
|
||||
)
|
||||
(?P<id>[^\#]+)
|
||||
(?:
|
||||
\#video=
|
||||
(?P<kaltura_id>
|
||||
[_0-9a-z]+
|
||||
)
|
||||
)?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'segment'
|
||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_2444peh4',
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||
'uploader_id': 'TeleZ?ri',
|
||||
'upload_date': '20161218',
|
||||
'timestamp': 1482084490,
|
||||
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# URL with 'segment' and fragment:
|
||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'episode' and fragment:
|
||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'show' and fragment:
|
||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||
webpage, 'kaltura partner id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'episode'
|
||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'info_dict': {
|
||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
'only_matching': True
|
||||
}]
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
entry_id = mobj.group('kaltura_id')
|
||||
|
||||
entries = []
|
||||
if not entry_id:
|
||||
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
|
||||
payload = {
|
||||
'query': '''query VideoContext($articleId: ID!) {
|
||||
article: node(id: $articleId) {
|
||||
... on Article {
|
||||
mainAssetRelation {
|
||||
asset {
|
||||
... on VideoAsset {
|
||||
kalturaId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
||||
}
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
data=json.dumps(payload).encode())
|
||||
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id', default=None)
|
||||
|
||||
if partner_id:
|
||||
entries = [
|
||||
self._kaltura_video(partner_id, m.group('id'))
|
||||
for m in re.finditer(
|
||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
||||
|
||||
|
||||
class AZMedienShowPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien show playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?:
|
||||
all-episodes|
|
||||
alle-episoden
|
||||
)/
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
|
||||
'info_dict': {
|
||||
'id': 'astrotalk',
|
||||
'title': 'TeleZüri: AstroTalk - alle episoden',
|
||||
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
episodes = get_element_by_class('search-mobile-box', webpage)
|
||||
entries = [self.url_result(
|
||||
urljoin(url, m.group('url'))) for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -16,14 +15,18 @@ from ..utils import (
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
@@ -35,13 +38,44 @@ class BandcampIE(InfoExtractor):
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
|
||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ben Prunty',
|
||||
'timestamp': 1396508491,
|
||||
'upload_date': '20140403',
|
||||
'release_date': '20140403',
|
||||
'duration': 260.877,
|
||||
'track': 'Lanius (Battle)',
|
||||
'track_number': 1,
|
||||
'track_id': '2650410135',
|
||||
'artist': 'Ben Prunty',
|
||||
'album': 'FTL: Advanced Edition Soundtrack',
|
||||
},
|
||||
}, {
|
||||
# no free download, mp3 128
|
||||
'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
|
||||
'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
|
||||
'info_dict': {
|
||||
'id': '2584466013',
|
||||
'ext': 'mp3',
|
||||
'title': 'Mastodon - Hail to Fire',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mastodon',
|
||||
'timestamp': 1322005399,
|
||||
'upload_date': '20111122',
|
||||
'release_date': '20040207',
|
||||
'duration': 120.79,
|
||||
'track': 'Hail to Fire',
|
||||
'track_number': 5,
|
||||
'track_id': '2584466013',
|
||||
'artist': 'Mastodon',
|
||||
'album': 'Call of the Mastodon',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -50,19 +84,23 @@ class BandcampIE(InfoExtractor):
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
track_id = None
|
||||
track = None
|
||||
track_number = None
|
||||
duration = None
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
formats = []
|
||||
track_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
||||
webpage, 'track info', default='{}'), title)
|
||||
if track_info:
|
||||
file_ = track_info.get('file')
|
||||
if isinstance(file_, dict):
|
||||
for format_id, format_url in file_.items():
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -72,85 +110,110 @@ class BandcampIE(InfoExtractor):
|
||||
'acodec': ext,
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
track = track_info.get('title')
|
||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
||||
track_number = int_or_none(track_info.get('track_num'))
|
||||
duration = float_or_none(track_info.get('duration'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
def extract(key):
|
||||
return self._search_regex(
|
||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
||||
webpage, key, default=None, group='value')
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('No free songs found')
|
||||
artist = extract('artist')
|
||||
album = extract('album_title')
|
||||
timestamp = unified_timestamp(
|
||||
extract('publish_date') or extract('album_publish_date'))
|
||||
release_date = unified_strdate(extract('album_release_date'))
|
||||
|
||||
download_link = m_download.group(1)
|
||||
video_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
download_link = self._search_regex(
|
||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'download link', default=None, group='url')
|
||||
if download_link:
|
||||
track_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'track id')
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, video_id, 'Downloading free downloads page')
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
track_id, transform_source=unescapeHTML)
|
||||
|
||||
info = blob['digital_items'][0]
|
||||
info = try_get(
|
||||
blob, (lambda x: x['digital_items'][0],
|
||||
lambda x: x['download_items'][0]), dict)
|
||||
if info:
|
||||
downloads = info.get('downloads')
|
||||
if isinstance(downloads, dict):
|
||||
if not track:
|
||||
track = info.get('title')
|
||||
if not artist:
|
||||
artist = info.get('artist')
|
||||
if not thumbnail:
|
||||
thumbnail = info.get('thumb_url')
|
||||
|
||||
downloads = info['downloads']
|
||||
track = info['title']
|
||||
download_formats = {}
|
||||
download_formats_list = blob.get('download_formats')
|
||||
if isinstance(download_formats_list, list):
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
artist = info.get('artist')
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, track_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
download_formats = {}
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
formats = []
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, video_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = stat.get('retry_url')
|
||||
if not isinstance(retry_url, compat_str):
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
if not duration:
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url') or thumbnail,
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -306,7 +369,7 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
|
@@ -1,8 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
import re
|
||||
import xml
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -17,6 +18,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -29,7 +31,7 @@ from ..compat import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -236,6 +238,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/m00005xn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
@@ -304,7 +312,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
cc_url = url_or_none(connection.get('href'))
|
||||
if not cc_url:
|
||||
continue
|
||||
captions = self._download_xml(
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, xml.etree.ElementTree.Element):
|
||||
continue
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
{
|
||||
@@ -778,6 +792,26 @@ class BBCIE(BBCCoUkIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
'info_dict': {
|
||||
'id': 'b0b9z4vz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prom 6: An American in Paris and Turangalila',
|
||||
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
'id': 'p06w9tws',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -928,6 +962,15 @@ class BBCIE(BBCCoUkIE):
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
||||
group_id = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||
webpage, 'group id', default=None)
|
||||
if playlist_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||
@@ -1000,6 +1043,36 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||
|
@@ -1,15 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,29 +31,9 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cpl_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
||||
webpage, 'cpl', default=None, group='url')
|
||||
|
||||
cpl_url = urljoin(url, cpl_url)
|
||||
|
||||
beeg_version, beeg_salt = [None] * 2
|
||||
|
||||
if cpl_url:
|
||||
cpl = self._download_webpage(
|
||||
self._proto_relative_url(cpl_url), video_id,
|
||||
'Downloading cpl JS', fatal=False)
|
||||
if cpl:
|
||||
beeg_version = int_or_none(self._search_regex(
|
||||
r'beeg_version\s*=\s*([^\b]+)', cpl,
|
||||
'beeg version', default=None)) or self._search_regex(
|
||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
||||
beeg_salt = self._search_regex(
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '2185'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
@@ -68,37 +43,6 @@ class BeegIE(InfoExtractor):
|
||||
if video:
|
||||
break
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
n.append(s[:x])
|
||||
return s[x:]
|
||||
n = []
|
||||
r = len(o) % e
|
||||
if r > 0:
|
||||
o = cut(o, r)
|
||||
while len(o) > e:
|
||||
o = cut(o, e)
|
||||
n.append(o)
|
||||
return n
|
||||
|
||||
def decrypt_key(key):
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1738.js
|
||||
a = beeg_salt
|
||||
e = compat_urllib_parse_unquote(key)
|
||||
o = ''.join([
|
||||
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
|
||||
for n in range(len(e))])
|
||||
return ''.join(split(o, 3)[::-1])
|
||||
|
||||
def decrypt_url(encrypted_url):
|
||||
encrypted_url = self._proto_relative_url(
|
||||
encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
|
||||
key = self._search_regex(
|
||||
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
|
||||
if not key:
|
||||
return encrypted_url
|
||||
return encrypted_url.replace(key, decrypt_key(key))
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
@@ -108,18 +52,20 @@ class BeegIE(InfoExtractor):
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': decrypt_url(video_url),
|
||||
'url': self._proto_relative_url(
|
||||
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
video_id = video.get('id') or video_id
|
||||
video_id = compat_str(video.get('id') or video_id)
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
series = video.get('ps_name')
|
||||
|
||||
timestamp = parse_iso8601(video.get('date'), ' ')
|
||||
timestamp = unified_timestamp(video.get('date'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
@@ -129,6 +75,7 @@ class BeegIE(InfoExtractor):
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
|
@@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor):
|
||||
}]
|
||||
}]
|
||||
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
|
||||
|
||||
def _report_error(self, result):
|
||||
if 'message' in result:
|
||||
|
129
youtube_dl/extractor/bitchute.py
Normal file
129
youtube_dl/extractor/bitchute.py
Normal file
@@ -0,0 +1,129 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
|
||||
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
|
||||
'info_dict': {
|
||||
'id': 'szoMrox2JEI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck bitches get money',
|
||||
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Victoria X Rave',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
format_urls = []
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
format_urls.append(mobj.group('url'))
|
||||
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
|
||||
|
||||
formats = [
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
|
||||
'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
|
||||
'playlist_mincount': 185,
|
||||
'info_dict': {
|
||||
'id': 'victoriaxrave',
|
||||
},
|
||||
}
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
|
||||
def _entries(self, channel_id):
|
||||
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
|
||||
offset = 0
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
'%sextend/' % channel_url, channel_id,
|
||||
'Downloading channel page %d' % page_num,
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': offset,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': channel_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': 'csrftoken=%s' % self._TOKEN,
|
||||
})
|
||||
if data.get('success') is False:
|
||||
break
|
||||
html = data.get('html')
|
||||
if not html:
|
||||
break
|
||||
video_ids = re.findall(
|
||||
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
|
||||
html)
|
||||
if not video_ids:
|
||||
break
|
||||
offset += len(video_ids)
|
||||
for video_id in video_ids:
|
||||
yield self.url_result(
|
||||
'https://www.bitchute.com/video/%s' % video_id,
|
||||
ie=BitChuteIE.ie_key(), video_id=video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), playlist_id=channel_id)
|
@@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
@@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video in content:
|
||||
video_url = video.get('url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||
|
@@ -1,8 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
@@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
@@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
publisher_id = query.get('publisherId')
|
||||
if publisher_id and publisher_id[0].isdigit():
|
||||
publisher_id = publisher_id[0]
|
||||
if not publisher_id:
|
||||
player_key = query.get('playerKey')
|
||||
if player_key and ',' in player_key[0]:
|
||||
player_key = player_key[0]
|
||||
else:
|
||||
player_id = query.get('playerID')
|
||||
if player_id and player_id[0].isdigit():
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
|
||||
player_page, 'player key', fatal=False)
|
||||
if player_key:
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
||||
raise ExtractorError(
|
||||
'brightcove said: %s' % error_msg, expected=True)
|
||||
|
||||
@@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
else:
|
||||
return ad_info
|
||||
|
||||
if 'url' not in info and not info.get('formats'):
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
if not info.get('url') and not info.get('formats'):
|
||||
uploader_id = info.get('uploader_id')
|
||||
if uploader_id:
|
||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
return info
|
||||
|
||||
|
||||
@@ -572,7 +604,8 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if ext == 'ism' or container == 'WVM':
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@@ -629,6 +662,14 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
|
@@ -2,10 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
||||
'only_matching': True,
|
||||
'age_limit': 18
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -56,8 +57,8 @@ class CamModelsIE(InfoExtractor):
|
||||
for media in encodings:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
media_url = media.get('location')
|
||||
if not media_url or not isinstance(media_url, compat_str):
|
||||
media_url = url_or_none(media.get('location'))
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
format_id_list = [format_id]
|
||||
@@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor):
|
||||
'title': self._live_title(user_id),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor):
|
||||
'duration': 1274,
|
||||
'timestamp': 1528018608,
|
||||
'upload_date': '20180603',
|
||||
'age_limit': 18
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'uploader': 'MileenaK',
|
||||
'upload_date': '20160322',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title').strip()
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
@@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return {
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
@@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
}
|
||||
})
|
||||
|
@@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
videomore_url = VideomoreIE._extract_url(webpage)
|
||||
if not videomore_url:
|
||||
videomore_id = self._search_regex(
|
||||
r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
|
||||
default=None)
|
||||
if videomore_id:
|
||||
videomore_url = 'videomore:%s' % videomore_id
|
||||
if videomore_url:
|
||||
title = self._og_search_title(webpage)
|
||||
return {
|
||||
|
@@ -1,20 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
|
||||
'info_dict': {
|
||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starfire the Cat Lady',
|
||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||
'title': 'How to Draw Upgrade',
|
||||
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
|
||||
},
|
||||
}, {
|
||||
|
||||
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
|
||||
metadata_re = ''
|
||||
if content_re:
|
||||
metadata_re = r'|video_metadata\.content_' + content_re
|
||||
return self._search_regex(
|
||||
r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
|
||||
webpage, name, fatal=fatal)
|
||||
|
||||
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
|
||||
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {'networkId': 'cartoonnetwork'}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': self._search_regex(
|
||||
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
||||
webpage, 'auth required', default='false') == 'true',
|
||||
'auth_required': find_field('authType', 'auth type') != 'unauth',
|
||||
})
|
||||
|
||||
series = find_field(
|
||||
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
|
||||
info.update({
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'series': series,
|
||||
'episode': title,
|
||||
})
|
||||
|
||||
for field in ('season', 'episode'):
|
||||
field_name = field + 'Number'
|
||||
info[field + '_number'] = int_or_none(find_field(
|
||||
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
|
||||
|
||||
return info
|
||||
|
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
|
||||
media_url = media['media']['url']
|
||||
if isinstance(media_url, list):
|
||||
for format_ in media_url:
|
||||
format_url = format_.get('file')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
|
@@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
142
youtube_dl/extractor/ciscolive.py
Normal file
142
youtube_dl/extractor/ciscolive.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class CiscoLiveBaseIE(InfoExtractor):
|
||||
# These appear to be constant across all Cisco Live presentations
|
||||
# and are not tied to any user session or event
|
||||
RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
|
||||
RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
|
||||
RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
|
||||
|
||||
HEADERS = {
|
||||
'Origin': 'https://ciscolive.cisco.com',
|
||||
'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
|
||||
'rfWidgetId': RAINFOCUS_WIDGET_ID,
|
||||
}
|
||||
|
||||
def _call_api(self, ep, rf_id, query, referrer, note=None):
|
||||
headers = self.HEADERS.copy()
|
||||
headers['Referer'] = referrer
|
||||
return self._download_json(
|
||||
self.RAINFOCUS_API_URL % ep, rf_id, note=note,
|
||||
data=urlencode_postdata(query), headers=headers)
|
||||
|
||||
def _parse_rf_item(self, rf_item):
|
||||
event_name = rf_item.get('eventName')
|
||||
title = rf_item['title']
|
||||
description = clean_html(rf_item.get('abstract'))
|
||||
presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
|
||||
bc_id = rf_item['videos'][0]['url']
|
||||
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
|
||||
duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
|
||||
location = try_get(rf_item, lambda x: x['times'][0]['room'])
|
||||
|
||||
if duration:
|
||||
duration = duration * 60
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': bc_url,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'creator': presenter_name,
|
||||
'location': location,
|
||||
'series': event_name,
|
||||
}
|
||||
|
||||
|
||||
class CiscoLiveSessionIE(CiscoLiveBaseIE):
|
||||
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
|
||||
'md5': 'c98acf395ed9c9f766941c70f5352e22',
|
||||
'info_dict': {
|
||||
'id': '5803694304001',
|
||||
'ext': 'mp4',
|
||||
'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
|
||||
'description': 'md5:ec4a436019e09a918dec17714803f7cc',
|
||||
'timestamp': 1530305395,
|
||||
'upload_date': '20180629',
|
||||
'uploader_id': '5647924234001',
|
||||
'location': '16B Mezz.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
rf_id = self._match_id(url)
|
||||
rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
|
||||
return self._parse_rf_item(rf_result['items'][0])
|
||||
|
||||
|
||||
class CiscoLiveSearchIE(CiscoLiveBaseIE):
|
||||
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
|
||||
_TESTS = [{
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
|
||||
'info_dict': {
|
||||
'title': 'Search query',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
|
||||
|
||||
@staticmethod
|
||||
def _check_bc_id_exists(rf_item):
|
||||
return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
|
||||
|
||||
def _entries(self, query, url):
|
||||
query['size'] = 50
|
||||
query['from'] = 0
|
||||
for page_num in itertools.count(1):
|
||||
results = self._call_api(
|
||||
'search', None, query, url,
|
||||
'Downloading search JSON page %d' % page_num)
|
||||
sl = try_get(results, lambda x: x['sectionList'][0], dict)
|
||||
if sl:
|
||||
results = sl
|
||||
items = results.get('items')
|
||||
if not items or not isinstance(items, list):
|
||||
break
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if not self._check_bc_id_exists(item):
|
||||
continue
|
||||
yield self._parse_rf_item(item)
|
||||
size = int_or_none(results.get('size'))
|
||||
if size is not None:
|
||||
query['size'] = size
|
||||
total = int_or_none(results.get('total'))
|
||||
if total is not None and query['from'] + query['size'] > total:
|
||||
break
|
||||
query['from'] += query['size']
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
query['type'] = 'session'
|
||||
return self.playlist_result(
|
||||
self._entries(query, url), playlist_title='Search query')
|
@@ -1,19 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
_translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
|
||||
}
|
||||
|
||||
|
||||
def _decode(s):
|
||||
return ''.join(_translation_table.get(c, c) for c in s)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
@@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, f in gexo_files.items():
|
||||
video_url = f.get('url')
|
||||
video_url = url_or_none(f.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = f.get('fmt')
|
||||
height = f.get('h')
|
||||
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
||||
formats.append({
|
||||
'url': _decode(video_url),
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('w')),
|
||||
'height': int_or_none(height),
|
||||
|
@@ -1,15 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ClypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://clyp.it/ojz2wfah',
|
||||
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
|
||||
'info_dict': {
|
||||
@@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
|
||||
'timestamp': 1443515251,
|
||||
'upload_date': '20150929',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
|
||||
'info_dict': {
|
||||
'id': 'b04p1odi',
|
||||
'ext': 'mp3',
|
||||
'title': 'GJ! (Reward Edit)',
|
||||
'description': 'Metal Resistance (THE ONE edition)',
|
||||
'duration': 177.789,
|
||||
'timestamp': 1528241278,
|
||||
'upload_date': '20180605',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
token = qs.get('token', [None])[0]
|
||||
|
||||
query = {}
|
||||
if token:
|
||||
query['token'] = token
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id)
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
|
||||
|
||||
formats = []
|
||||
for secure in ('', 'Secure'):
|
||||
@@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
|
||||
title = metadata['Title']
|
||||
description = metadata.get('Description')
|
||||
duration = float_or_none(metadata.get('Duration'))
|
||||
timestamp = parse_iso8601(metadata.get('DateCreated'))
|
||||
timestamp = unified_timestamp(metadata.get('DateCreated'))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
@@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
|
||||
{'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
'info_dict': {
|
||||
'id': '7000031301',
|
||||
'ext': 'mp4',
|
||||
'title': "Trump: I don't necessarily agree with raising rates",
|
||||
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
|
||||
'timestamp': 1531958400,
|
||||
'upload_date': '20180719',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
||||
'video id')
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
|
@@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
@@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'http://cnn.com/video/?/video/' + cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
|
@@ -52,6 +52,7 @@ from ..utils import (
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
JSON_LD_RE,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
@@ -68,6 +69,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
urljoin,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -210,6 +212,11 @@ class InfoExtractor(object):
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||
channel: Full name of the channel the video is uploaded on.
|
||||
Note that channel fields may or may not repeat uploader
|
||||
fields. This depends on a particular extractor.
|
||||
channel_id: Id of the channel.
|
||||
channel_url: Full URL to a channel webpage.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
@@ -599,6 +606,11 @@ class InfoExtractor(object):
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
|
||||
if errnote is False:
|
||||
@@ -1046,7 +1058,7 @@ class InfoExtractor(object):
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
||||
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||
% {'prop': re.escape(prop)})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
@@ -1149,8 +1161,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_ld:
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
@@ -1208,10 +1219,10 @@ class InfoExtractor(object):
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
@@ -1228,17 +1239,30 @@ class InfoExtractor(object):
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type in ('TVEpisode', 'Episode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': unescapeHTML(e.get('name')),
|
||||
'episode': episode_name,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
})
|
||||
if not info.get('title') and episode_name:
|
||||
info['title'] = episode_name
|
||||
part_of_season = e.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
info.update({
|
||||
'season': unescapeHTML(part_of_season.get('name')),
|
||||
'season_number': int_or_none(part_of_season.get('seasonNumber')),
|
||||
})
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Movie':
|
||||
info.update({
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('dateCreated')),
|
||||
})
|
||||
elif item_type in ('Article', 'NewsArticle'):
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
@@ -1575,6 +1599,7 @@ class InfoExtractor(object):
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
# 2. https://github.com/rg3/youtube-dl/issues/12211
|
||||
# 3. https://github.com/rg3/youtube-dl/issues/18923
|
||||
|
||||
# We should try extracting formats only from master playlists [1, 4.3.4],
|
||||
# i.e. playlists that describe available qualities. On the other hand
|
||||
@@ -1646,11 +1671,16 @@ class InfoExtractor(object):
|
||||
rendition = stream_group[0]
|
||||
return rendition.get('NAME') or stream_group_id
|
||||
|
||||
# parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
|
||||
# chance to detect video only formats when EXT-X-STREAM-INF tags
|
||||
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_stream_inf = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
@@ -1701,9 +1731,9 @@ class InfoExtractor(object):
|
||||
# However, this is not always respected, for example, [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing audio group an audio group, it represents
|
||||
# a complete (with audio and video) format. So, for such cases
|
||||
# we will ignore references to rendition groups and treat them
|
||||
# referencing an audio group it represents a complete
|
||||
# (with audio and video) format. So, for such cases we will
|
||||
# ignore references to rendition groups and treat them
|
||||
# as complete formats.
|
||||
if audio_group_id and codecs and f.get('vcodec') != 'none':
|
||||
audio_group = groups.get(audio_group_id)
|
||||
@@ -1859,9 +1889,7 @@ class InfoExtractor(object):
|
||||
'height': height,
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
elif src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
@@ -1871,9 +1899,13 @@ class InfoExtractor(object):
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse_urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
@@ -1884,7 +1916,6 @@ class InfoExtractor(object):
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
return formats
|
||||
|
||||
@@ -2106,7 +2137,21 @@ class InfoExtractor(object):
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
t = representation_ms_info[template_name]
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/rg3/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
@@ -2437,6 +2482,8 @@ class InfoExtractor(object):
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
for f in media_info['formats']:
|
||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
@@ -2586,7 +2633,7 @@ class InfoExtractor(object):
|
||||
'id': this_video_id,
|
||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
@@ -2613,12 +2660,9 @@ class InfoExtractor(object):
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
if source_url in urls:
|
||||
source_url = urljoin(
|
||||
base_url, self._proto_relative_url(source.get('file')))
|
||||
if not source_url or source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
|
@@ -1,19 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
@@ -50,6 +51,21 @@ class CrackleIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 480,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -61,13 +77,16 @@ class CrackleIE(InfoExtractor):
|
||||
|
||||
for country in countries:
|
||||
try:
|
||||
# Authorization generation algorithm is reverse engineered from:
|
||||
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
|
||||
media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
|
||||
timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
|
||||
h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
|
||||
media = self._download_json(
|
||||
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
|
||||
% (video_id, country), video_id,
|
||||
'Downloading media JSON as %s' % country,
|
||||
'Unable to download media JSON', query={
|
||||
'disableProtocols': 'true',
|
||||
'format': 'json'
|
||||
media_detail_url, video_id, 'Downloading media JSON as %s' % country,
|
||||
'Unable to download media JSON', headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': '|'.join([h, timestamp, '117', '1']),
|
||||
})
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
@@ -86,8 +105,8 @@ class CrackleIE(InfoExtractor):
|
||||
for e in media['MediaURLs']:
|
||||
if e.get('UseDRM') is True:
|
||||
continue
|
||||
format_url = e.get('Path')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(e.get('Path'))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
@@ -97,6 +116,20 @@ class CrackleIE(InfoExtractor):
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif format_url.endswith('.ism/Manifest'):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
mfs_path = e.get('Type')
|
||||
mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
|
||||
if not mfs_info:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media.get('Description')
|
||||
@@ -124,8 +157,8 @@ class CrackleIE(InfoExtractor):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = cc_file.get('Path')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file.get('Path'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree as etree
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from .vrv import VRVIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
@@ -18,6 +20,8 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -26,7 +30,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
extract_attributes,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
@@ -43,7 +46,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
data['req'] = 'RpcApi' + method
|
||||
data = compat_urllib_parse_urlencode(data).encode('utf-8')
|
||||
return self._download_xml(
|
||||
'http://www.crunchyroll.com/xml/',
|
||||
'https://www.crunchyroll.com/xml/',
|
||||
video_id, note, fatal=False, data=data, headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
@@ -139,8 +142,9 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@@ -148,7 +152,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
||||
'upload_date': '20131013',
|
||||
'url': 're:(?!.*&)',
|
||||
@@ -221,7 +225,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'info_dict': {
|
||||
'id': '535080',
|
||||
'ext': 'mp4',
|
||||
'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
|
||||
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||
'uploader': 'Marvelous AQL Inc.',
|
||||
'upload_date': '20091021',
|
||||
@@ -262,6 +266,12 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@@ -392,7 +402,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'Downloading subtitles for ' + sub_name, data={
|
||||
'subtitle_script_id': sub_id,
|
||||
})
|
||||
if sub_doc is None:
|
||||
if not isinstance(sub_doc, etree.Element):
|
||||
continue
|
||||
sid = sub_doc.get('id')
|
||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||
@@ -434,13 +444,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'vilos\.config\.media\s*=\s*({.+?});',
|
||||
webpage, 'vilos media', default='{}'), video_id)
|
||||
media_metadata = media.get('metadata') or {}
|
||||
|
||||
language = self._search_regex(
|
||||
r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
|
||||
webpage, 'language', default=None, group='lang')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._parse_json(self._html_search_regex(
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id).get('description')
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
@@ -453,92 +472,113 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||
webpage, 'video_uploader', fatal=False)
|
||||
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
video_encode_ids = []
|
||||
formats = []
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if streamdata is not None:
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
for stream in media.get('streams', []):
|
||||
audio_lang = stream.get('audio_lang')
|
||||
hardsub_lang = stream.get('hardsub_lang')
|
||||
vrv_formats = self._extract_vrv_formats(
|
||||
stream.get('url'), video_id, stream.get('format'),
|
||||
audio_lang, hardsub_lang)
|
||||
for f in vrv_formats:
|
||||
if not hardsub_lang:
|
||||
f['preference'] = 1
|
||||
language_preference = 0
|
||||
if audio_lang == language:
|
||||
language_preference += 1
|
||||
if hardsub_lang == language:
|
||||
language_preference += 1
|
||||
if language_preference:
|
||||
f['language_preference'] = language_preference
|
||||
formats.extend(vrv_formats)
|
||||
if not formats:
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
if not available_fmts:
|
||||
available_fmts = self._FORMAT_IDS.keys()
|
||||
video_encode_ids = []
|
||||
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if isinstance(streamdata, etree.Element):
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if isinstance(stream_info, etree.Element):
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
|
||||
|
||||
metadata = self._call_rpc_api(
|
||||
'VideoPlayer_GetMediaMetadata', video_id,
|
||||
@@ -546,16 +586,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'media_id': video_id,
|
||||
})
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
subtitles = {}
|
||||
for subtitle in media.get('subtitles', []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': subtitle.get('format', 'ass'),
|
||||
})
|
||||
if not subtitles:
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
# webpage provide more accurate data than series_title from XML
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||
webpage, 'series', fatal=False)
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
season = episode = episode_number = duration = thumbnail = None
|
||||
|
||||
if isinstance(metadata, etree.Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||
|
||||
if not episode:
|
||||
episode = media_metadata.get('title')
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||
if not thumbnail:
|
||||
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
@@ -565,7 +627,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': series,
|
||||
@@ -580,7 +643,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
|
@@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
self._handle_errors(result)
|
||||
self._auth_token = result['message']['auth_token']
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
@@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
return self._extract_media_info(media)
|
||||
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}
|
||||
'playlist_mincount': 17,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
@@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'collections/' + collection_id, collection_id)
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
entries.append(self._extract_media_info(media))
|
||||
media_id = compat_str(media.get('id'))
|
||||
entries.append(self.url_result(
|
||||
'https://curiositystream.com/video/' + media_id,
|
||||
CuriosityStreamIE.ie_key(), media_id))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
||||
|
@@ -3,8 +3,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -40,10 +44,15 @@ class CWTVIE(InfoExtractor):
|
||||
'duration': 1263,
|
||||
'series': 'Whose Line Is It Anyway?',
|
||||
'season_number': 11,
|
||||
'season': '11',
|
||||
'episode_number': 20,
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
@@ -58,60 +67,31 @@ class CWTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = None
|
||||
formats = []
|
||||
for partner in (154, 213):
|
||||
vdata = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
|
||||
if not vdata:
|
||||
continue
|
||||
video_data = vdata
|
||||
for quality, quality_data in vdata.get('videos', {}).items():
|
||||
quality_url = quality_data.get('uri')
|
||||
if not quality_url:
|
||||
continue
|
||||
if quality == 'variantplaylist':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = int_or_none(quality_data.get('bitrate'))
|
||||
format_id = 'http' + ('-%d' % tbr if tbr else '')
|
||||
if self._is_valid_url(quality_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': quality_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
video_metadata = video_data['assetFields']
|
||||
ism_url = video_metadata.get('smoothStreamingUrl')
|
||||
if ism_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
ism_url, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
||||
|
||||
thumbnails = [{
|
||||
'url': image['uri'],
|
||||
'width': image.get('width'),
|
||||
'height': image.get('height'),
|
||||
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': video_metadata['UnicornCcUrl'],
|
||||
}],
|
||||
} if video_metadata.get('UnicornCcUrl') else None
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
if episode and season:
|
||||
episode = episode.lstrip(season)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video_metadata['title'],
|
||||
'description': video_metadata.get('description'),
|
||||
'duration': int_or_none(video_metadata.get('duration')),
|
||||
'series': video_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(video_metadata.get('seasonNumber')),
|
||||
'season': video_metadata.get('seasonName'),
|
||||
'episode_number': int_or_none(video_metadata.get('episodeNumber')),
|
||||
'timestamp': parse_iso8601(video_data.get('startTime')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
|
||||
'description': video_data.get('description_long'),
|
||||
'duration': int_or_none(video_data.get('duration_secs')),
|
||||
'series': video_data.get('series_name'),
|
||||
'season_number': int_or_none(season),
|
||||
'episode_number': int_or_none(episode),
|
||||
'timestamp': parse_iso8601(video_data.get('start_time')),
|
||||
'age_limit': parse_age_limit(video_data.get('rating')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
|
||||
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||
|
||||
video_sources = self._download_json(sources_url, video_id)
|
||||
body = video_sources.get('body')
|
||||
if body:
|
||||
video_sources = body
|
||||
|
||||
formats = []
|
||||
for rendition in video_sources['renditions']:
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
@@ -16,11 +17,16 @@ from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -144,7 +150,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count_str = self._search_regex(
|
||||
@@ -167,10 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
player = self._parse_json(player_v5, video_id, fatal=False) or {}
|
||||
metadata = try_get(player, lambda x: x['metadata'], dict)
|
||||
if not metadata:
|
||||
metadata_url = url_or_none(try_get(
|
||||
player, lambda x: x['context']['metadata_template_url1']))
|
||||
if metadata_url:
|
||||
metadata_url = metadata_url.replace(':videoId', video_id)
|
||||
else:
|
||||
metadata_url = update_url_query(
|
||||
'https://www.dailymotion.com/player/metadata/video/%s'
|
||||
% video_id, {
|
||||
'embedder': url,
|
||||
'integration': 'inline',
|
||||
'GK_PV5_NEON': '1',
|
||||
})
|
||||
metadata = self._download_json(
|
||||
metadata_url, video_id, 'Downloading metadata JSON')
|
||||
|
||||
if metadata.get('error', {}).get('type') == 'password_protected':
|
||||
if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password:
|
||||
r = int(metadata['id'][1:], 36)
|
||||
@@ -342,17 +364,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||
'info_dict': {
|
||||
'title': 'SPORT',
|
||||
'id': 'xv4bw_nqtv_sport',
|
||||
'id': 'xv4bw',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, playlist_id, authorizaion, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://graphql.api.dailymotion.com',
|
||||
playlist_id, 'Downloading page %d' % page,
|
||||
data=json.dumps({
|
||||
'query': '''{
|
||||
collection(xid: "%s") {
|
||||
videos(first: %d, page: %d) {
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
xid
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (playlist_id, self._PAGE_SIZE, page)
|
||||
}).encode(), headers={
|
||||
'Authorization': authorizaion,
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
})['data']['collection']['videos']
|
||||
for edge in videos['edges']:
|
||||
node = edge['node']
|
||||
yield self.url_result(
|
||||
node['url'], DailymotionIE.ie_key(), node['xid'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
api = self._parse_json(self._search_regex(
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
||||
webpage, 'player config'), playlist_id)['context']['api']
|
||||
auth = self._download_json(
|
||||
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
||||
playlist_id, data=urlencode_postdata({
|
||||
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
||||
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
||||
'grant_type': 'client_credentials',
|
||||
}))
|
||||
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id,
|
||||
self._og_search_title(webpage))
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = set()
|
||||
@@ -378,43 +476,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': self._extract_entries(playlist_id),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
|
@@ -5,13 +5,16 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# 4x3
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
@@ -19,37 +22,55 @@ class DctpTvIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'upload_date': '20110407',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 71.24,
|
||||
'timestamp': 1302172322,
|
||||
'upload_date': '20110407',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# 16x9
|
||||
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
version = self._download_json(
|
||||
'%s/version.json' % self._BASE_URL, display_id,
|
||||
'Downloading version JSON')
|
||||
|
||||
video_id = self._html_search_meta(
|
||||
'DC.identifier', webpage, 'video id',
|
||||
default=None) or self._search_regex(
|
||||
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
|
||||
restapi_base = '%s/%s/restapi' % (
|
||||
self._BASE_URL, version['version_name'])
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
info = self._download_json(
|
||||
'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
|
||||
'Downloading video info JSON')
|
||||
|
||||
media = self._download_json(
|
||||
'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
|
||||
display_id, 'Downloading media JSON')
|
||||
|
||||
uuid = media['uuid']
|
||||
title = media['title']
|
||||
ratio = '16x9' if media.get('is_wide') else '4x3'
|
||||
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
|
||||
|
||||
servers = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/', display_id,
|
||||
note='Downloading server list', fatal=False)
|
||||
note='Downloading server list JSON', fatal=False)
|
||||
|
||||
if servers:
|
||||
endpoint = next(
|
||||
server['endpoint']
|
||||
for server in servers
|
||||
if isinstance(server.get('endpoint'), compat_str) and
|
||||
if url_or_none(server.get('endpoint')) and
|
||||
'cloudfront' in server['endpoint'])
|
||||
else:
|
||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
||||
@@ -60,27 +81,35 @@ class DctpTvIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': endpoint,
|
||||
'app': app,
|
||||
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
|
||||
'play_path': play_path,
|
||||
'page_url': url,
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
|
||||
'ext': 'flv',
|
||||
}]
|
||||
|
||||
description = self._html_search_meta('DC.description', webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('DC.date.created', webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = float_or_none(self._search_regex(
|
||||
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
|
||||
default=None), scale=1000)
|
||||
thumbnails = []
|
||||
images = media.get('images')
|
||||
if isinstance(images, list):
|
||||
for image in images:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'id': uuid,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'title': title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'description': media.get('description') or media.get('teaser'),
|
||||
'timestamp': unified_timestamp(media.get('created')),
|
||||
'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -17,16 +17,29 @@ from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocity
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
(?:www\.)?
|
||||
(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocity
|
||||
)|
|
||||
watch\.
|
||||
(?:
|
||||
hgtv|
|
||||
foodnetwork|
|
||||
travelchannel|
|
||||
diynetwork|
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||
@@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||
'https://%s.com/anonymous' % site, display_id, query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
@@ -81,11 +94,12 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
stream = self._download_json(
|
||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
||||
display_id, headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
})
|
||||
display_id, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
@@ -12,6 +11,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = caption.get('fileUrl')
|
||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||
not subtitle_url.startswith('http')):
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
|
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
@@ -310,9 +311,11 @@ class DPlayItIE(InfoExtractor):
|
||||
|
||||
if not info:
|
||||
info_url = self._search_regex(
|
||||
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||
webpage, 'info url')
|
||||
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
|
||||
webpage, 'info url', group='url')
|
||||
|
||||
info_url = urljoin(url, info_url)
|
||||
video_id = info_url.rpartition('/')[-1]
|
||||
|
||||
try:
|
||||
@@ -322,6 +325,8 @@ class DPlayItIE(InfoExtractor):
|
||||
'dplayit_token').value,
|
||||
'Referer': url,
|
||||
})
|
||||
if isinstance(info, compat_str):
|
||||
info = self._parse_json(info, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
@@ -337,6 +342,7 @@ class DPlayItIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
||||
|
@@ -7,7 +7,6 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url or not isinstance(sub_url, compat_str):
|
||||
sub_url = url_or_none(sub.get('url'))
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(
|
||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||
@@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for format_id, format_dict in download_assets.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@@ -4,7 +4,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(video_data.get('duration')) or parse_duration(
|
||||
video_data.get('duration_format'))
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
||||
r'<title>([^<]+)\s*@\s+DrTuber',
|
||||
@@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'duration': duration,
|
||||
}
|
||||
|
@@ -1,15 +1,25 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,23 +30,31 @@ class DRTVIE(InfoExtractor):
|
||||
IE_NAME = 'drtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '7ae17b4e18eb5d29212f424a7511c184',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
'info_dict': {
|
||||
'id': 'klassen-darlig-taber-10',
|
||||
'ext': 'mp4',
|
||||
'title': 'Klassen - Dårlig taber (10)',
|
||||
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
||||
'timestamp': 1471991907,
|
||||
'upload_date': '20160823',
|
||||
'timestamp': 1539085800,
|
||||
'upload_date': '20181009',
|
||||
'duration': 606.84,
|
||||
'series': 'Klassen',
|
||||
'season': 'Klassen I',
|
||||
'season_number': 1,
|
||||
'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
|
||||
'episode': 'Episode 10',
|
||||
'episode_number': 10,
|
||||
'release_year': 2016,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# embed
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'info_dict': {
|
||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||
'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||
'title': 'christiania pusher street ryddes drdkrjpo',
|
||||
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
@@ -45,17 +63,18 @@ class DRTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# with SignLanguage formats
|
||||
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
|
||||
'info_dict': {
|
||||
'id': 'historien-om-danmark-stenalder',
|
||||
'ext': 'mp4',
|
||||
'title': 'Historien om Danmark: Stenalder (1)',
|
||||
'title': 'Historien om Danmark: Stenalder',
|
||||
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||
'timestamp': 1490401996,
|
||||
'upload_date': '20170325',
|
||||
'duration': 3502.04,
|
||||
'timestamp': 1546628400,
|
||||
'upload_date': '20190104',
|
||||
'duration': 3502.56,
|
||||
'formats': 'mincount:20',
|
||||
},
|
||||
'params': {
|
||||
@@ -74,20 +93,26 @@ class DRTVIE(InfoExtractor):
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id')
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
programcard = self._download_json(
|
||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
data = programcard['Data'][0]
|
||||
if not video_id:
|
||||
video_id = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
||||
webpage, 'urn'))
|
||||
|
||||
title = remove_end(self._og_search_title(
|
||||
webpage, default=None), ' | TV | DR') or data['Title']
|
||||
data = self._download_json(
|
||||
'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
|
||||
video_id, 'Downloading video JSON', query={'expanded': 'true'})
|
||||
|
||||
title = str_or_none(data.get('Title')) or re.sub(
|
||||
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
||||
self._og_search_title(webpage))
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or data.get('Description')
|
||||
|
||||
timestamp = parse_iso8601(data.get('CreatedTime'))
|
||||
timestamp = unified_timestamp(
|
||||
data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
@@ -97,24 +122,62 @@ class DRTVIE(InfoExtractor):
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
assets = []
|
||||
primary_asset = data.get('PrimaryAsset')
|
||||
if isinstance(primary_asset, dict):
|
||||
assets.append(primary_asset)
|
||||
secondary_assets = data.get('SecondaryAssets')
|
||||
if isinstance(secondary_assets, list):
|
||||
for secondary_asset in secondary_assets:
|
||||
if isinstance(secondary_asset, dict):
|
||||
assets.append(secondary_asset)
|
||||
|
||||
def hex_to_bytes(hex):
|
||||
return binascii.a2b_hex(hex.encode('ascii'))
|
||||
|
||||
def decrypt_uri(e):
|
||||
n = int(e[2:10], 16)
|
||||
a = e[10 + n:]
|
||||
data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
|
||||
key = bytes_to_intlist(hashlib.sha256(
|
||||
('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
|
||||
iv = bytes_to_intlist(hex_to_bytes(a))
|
||||
decrypted = aes_cbc_decrypt(data, key, iv)
|
||||
return intlist_to_bytes(
|
||||
decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
|
||||
|
||||
for asset in assets:
|
||||
kind = asset.get('Kind')
|
||||
if kind == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
thumbnail = url_or_none(asset.get('Uri'))
|
||||
elif kind in ('VideoResource', 'AudioResource'):
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
asset_target = asset.get('Target')
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
encrypted_uri = link.get('EncryptedUri')
|
||||
if not encrypted_uri:
|
||||
continue
|
||||
try:
|
||||
uri = decrypt_uri(encrypted_uri)
|
||||
except Exception:
|
||||
self.report_warning(
|
||||
'Unable to decrypt EncryptedUri', video_id)
|
||||
continue
|
||||
uri = url_or_none(uri)
|
||||
if not uri:
|
||||
continue
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
preference = None
|
||||
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
|
||||
if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
|
||||
preference = -1
|
||||
format_id += '-%s' % asset_target
|
||||
elif asset_target == 'Default':
|
||||
preference = 1
|
||||
else:
|
||||
preference = None
|
||||
if target == 'HDS':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||
@@ -140,19 +203,22 @@ class DRTVIE(InfoExtractor):
|
||||
'vcodec': 'none' if kind == 'AudioResource' else None,
|
||||
'preference': preference,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
LANGS = {
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
if not subs.get('Uri'):
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': subs['Uri'],
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
|
||||
if isinstance(subtitles_list, list):
|
||||
LANGS = {
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
if not isinstance(subs, dict):
|
||||
continue
|
||||
sub_uri = url_or_none(subs.get('Uri'))
|
||||
if not sub_uri:
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': sub_uri,
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
self.raise_geo_restricted(
|
||||
@@ -170,6 +236,13 @@ class DRTVIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': str_or_none(data.get('SeriesTitle')),
|
||||
'season': str_or_none(data.get('SeasonTitle')),
|
||||
'season_number': int_or_none(data.get('SeasonNumber')),
|
||||
'season_id': str_or_none(data.get('SeasonUrn')),
|
||||
'episode': str_or_none(data.get('EpisodeTitle')),
|
||||
'episode_number': int_or_none(data.get('EpisodeNumber')),
|
||||
'release_year': int_or_none(data.get('ProductionYear')),
|
||||
}
|
||||
|
||||
|
||||
|
@@ -15,16 +15,16 @@ from ..utils import (
|
||||
class DTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
||||
_TEST = {
|
||||
'url': 'https://d.tube/#!/v/benswann/zqd630em',
|
||||
'md5': 'a03eaa186618ffa7a3145945543a251e',
|
||||
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
|
||||
'md5': '9f29088fa08d699a7565ee983f56a06e',
|
||||
'info_dict': {
|
||||
'id': 'zqd630em',
|
||||
'id': 'x380jtr1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
|
||||
'description': 'md5:700d164e066b87f9eac057949e4227c2',
|
||||
'uploader_id': 'benswann',
|
||||
'upload_date': '20180222',
|
||||
'timestamp': 1519328958,
|
||||
'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
|
||||
'description': 'md5:60be222088183be3a42f196f34235776',
|
||||
'uploader_id': 'broncnutz',
|
||||
'upload_date': '20190107',
|
||||
'timestamp': 1546854054,
|
||||
},
|
||||
'params': {
|
||||
'format': '480p',
|
||||
@@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor):
|
||||
def canonical_url(h):
|
||||
if not h:
|
||||
return None
|
||||
return 'https://ipfs.io/ipfs/' + h
|
||||
return 'https://video.dtube.top/ipfs/' + h
|
||||
|
||||
formats = []
|
||||
for q in ('240', '480', '720', '1080', ''):
|
||||
@@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor):
|
||||
try:
|
||||
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
|
||||
self._downloader._opener.open(video_url, timeout=5).close()
|
||||
except timeout as e:
|
||||
except timeout:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, format_id))
|
||||
continue
|
||||
|
@@ -4,14 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||
if mp4_data:
|
||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
height = int_or_none(format_id)
|
||||
if height is not None and m3u8_formats_dict.get(height):
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_url = lesson.get('http_url')
|
||||
if not lesson_url or not isinstance(lesson_url, compat_str):
|
||||
lesson_url = url_or_none(lesson.get('http_url'))
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = lesson.get('id')
|
||||
if lesson_id:
|
||||
@@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for _, format_url in lesson['media_urls'].items():
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
|
@@ -9,8 +9,10 @@ from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,10 +26,16 @@ class EpornerIE(InfoExtractor):
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
'description': 'md5:764f39abf932daafa37485eb46efa152',
|
||||
'timestamp': 1232520922,
|
||||
'upload_date': '20090121',
|
||||
'duration': 1838,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118'
|
||||
}
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
@@ -82,8 +90,8 @@ class EpornerIE(InfoExtractor):
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
src = url_or_none(format_dict.get('src'))
|
||||
if not src or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@@ -103,12 +111,15 @@ class EpornerIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
json_ld = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
return merge_dicts(json_ld, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
@@ -116,4 +127,4 @@ class EpornerIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -11,7 +13,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?expressen\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
|
||||
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
|
||||
@@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
|
@@ -54,6 +54,7 @@ from .appletrailers import (
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
)
|
||||
@@ -87,11 +88,7 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienPlaylistIE,
|
||||
AZMedienShowPlaylistIE,
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
@@ -118,6 +115,10 @@ from .bilibili import (
|
||||
BiliBiliBangumiIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bitchute import (
|
||||
BitChuteIE,
|
||||
BitChuteChannelIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
@@ -193,6 +194,10 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .ciscolive import (
|
||||
CiscoLiveSessionIE,
|
||||
CiscoLiveSearchIE,
|
||||
)
|
||||
from .cjsw import CJSWIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
@@ -204,7 +209,10 @@ from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnbc import CNBCIE
|
||||
from .cnbc import (
|
||||
CNBCIE,
|
||||
CNBCVideoIE,
|
||||
)
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
@@ -373,7 +381,6 @@ from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
@@ -391,6 +398,11 @@ from .francetv import (
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .funimation import FunimationIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
@@ -399,6 +411,7 @@ from .funk import (
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
@@ -439,6 +452,7 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hketv import HKETVIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
@@ -457,6 +471,10 @@ from .hrti import (
|
||||
)
|
||||
from .huajiao import HuajiaoIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
from .ign import (
|
||||
@@ -471,12 +489,17 @@ from .imdb import (
|
||||
from .imgur import (
|
||||
ImgurIE,
|
||||
ImgurAlbumIE,
|
||||
ImgurGalleryIE,
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .inc import IncIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .instagram import (
|
||||
InstagramIE,
|
||||
InstagramUserIE,
|
||||
InstagramTagIE,
|
||||
)
|
||||
from .internazionale import InternazionaleIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
@@ -512,6 +535,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
@@ -530,6 +554,7 @@ from .la7 import LA7IE
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
@@ -539,6 +564,11 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioIE,
|
||||
LecturioCourseIE,
|
||||
LecturioDeCourseIE,
|
||||
)
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
@@ -559,6 +589,11 @@ from .limelight import (
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
)
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .litv import LiTVIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
@@ -585,6 +620,7 @@ from .mailru import (
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makertv import MakerTVIE
|
||||
from .malltv import MallTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
@@ -660,8 +696,7 @@ from .myvi import (
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
NationalGeographicIE,
|
||||
NationalGeographicEpisodeGuideIE,
|
||||
NationalGeographicTVIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
@@ -732,7 +767,10 @@ from .nonktube import NonkTubeIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
)
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
@@ -764,7 +802,9 @@ from .nrk import (
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodeIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeasonIE,
|
||||
NRKTVSeriesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
@@ -799,6 +839,7 @@ from .orf import (
|
||||
ORFOE1IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .outsidetv import OutsideTVIE
|
||||
from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
@@ -827,6 +868,7 @@ from .piksel import PikselIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
@@ -854,6 +896,10 @@ from .pornhub import (
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .presstv import PressTVIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
@@ -885,7 +931,10 @@ from .rai import (
|
||||
RaiPlayPlaylistIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import RayWenderlichIE
|
||||
from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
@@ -1011,7 +1060,10 @@ from .southpark import (
|
||||
SouthParkEsIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .spankbang import SpankBangIE
|
||||
from .spankbang import (
|
||||
SpankBangIE,
|
||||
SpankBangPlaylistIE,
|
||||
)
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
@@ -1021,7 +1073,7 @@ from .spike import (
|
||||
)
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
@@ -1041,6 +1093,7 @@ from .stretchinternet import StretchInternetIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPageIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
@@ -1055,6 +1108,10 @@ from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
TeachableCourseIE,
|
||||
)
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@@ -1063,6 +1120,7 @@ from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tele5 import Tele5IE
|
||||
from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
@@ -1092,6 +1150,10 @@ from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import (
|
||||
TikTokIE,
|
||||
TikTokUserIE,
|
||||
)
|
||||
from .tinypic import TinyPicIE
|
||||
from .tmz import (
|
||||
TMZIE,
|
||||
@@ -1110,6 +1172,7 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trunews import TruNewsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
@@ -1129,7 +1192,6 @@ from .tv2 import (
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
from .tva import TVAIE
|
||||
@@ -1148,23 +1210,27 @@ from .tvnet import TVNetIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
TVNowListIE,
|
||||
TVNowNewIE,
|
||||
TVNowSeasonIE,
|
||||
TVNowAnnualIE,
|
||||
TVNowShowIE,
|
||||
)
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
TVPWebsiteIE,
|
||||
)
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
TVPlayHomeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitcasting import TwitCastingIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@@ -1198,10 +1264,6 @@ from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .upskill import (
|
||||
UpskillIE,
|
||||
UpskillCourseIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
@@ -1270,6 +1332,7 @@ from .vimeo import (
|
||||
VimeoReviewIE,
|
||||
VimeoUserIE,
|
||||
VimeoWatchLaterIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
@@ -1280,6 +1343,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
@@ -1304,7 +1368,6 @@ from .voxmedia import (
|
||||
VoxMediaVolumeIE,
|
||||
VoxMediaIE,
|
||||
)
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vrak import VrakIE
|
||||
from .vrv import (
|
||||
@@ -1318,6 +1381,7 @@ from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .wakanim import WakanimIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
@@ -1356,6 +1420,7 @@ from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
@@ -1405,6 +1470,7 @@ from .younow import (
|
||||
YouNowMomentIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
@@ -1428,10 +1494,24 @@ from .youtube import (
|
||||
from .zapiks import ZapiksIE
|
||||
from .zaq1 import Zaq1IE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
EinsUndEinsTVIE,
|
||||
EWETVIE,
|
||||
GlattvisionTVIE,
|
||||
MNetTVIE,
|
||||
MyVisionTVIE,
|
||||
NetPlusIE,
|
||||
OsnatelTVIE,
|
||||
QuantumTVIE,
|
||||
QuicklineIE,
|
||||
QuicklineLiveIE,
|
||||
SaltTVIE,
|
||||
SAKTVIE,
|
||||
VTXTVIE,
|
||||
WalyTVIE,
|
||||
ZattooIE,
|
||||
ZattooLiveIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zype import ZypeIE
|
||||
|
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
parse_count,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
@@ -56,7 +57,7 @@ class FacebookIE(InfoExtractor):
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
@@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
||||
'title': 're:^Asif Nawab Butt posted a video',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
@@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
||||
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
||||
'title': 'md5:1db063d6a8c13faa8da727817339c857',
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
@@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||
'title': 'md5:19a428bbde91364e3de815383b54a235',
|
||||
'timestamp': 1486035494,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
@@ -353,7 +355,6 @@ class FacebookIE(InfoExtractor):
|
||||
tahoe_data = self._download_webpage(
|
||||
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||
data=urlencode_postdata({
|
||||
'__user': 0,
|
||||
'__a': 1,
|
||||
'__pc': self._search_regex(
|
||||
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
@@ -361,6 +362,9 @@ class FacebookIE(InfoExtractor):
|
||||
'__rev': self._search_regex(
|
||||
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||
'client revision', default='3944515'),
|
||||
'fb_dtsg': self._search_regex(
|
||||
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
||||
webpage, 'dtsg token', default=''),
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
@@ -426,6 +430,10 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
view_count = parse_count(self._search_regex(
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
@@ -433,6 +441,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(f.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
|
@@ -3,15 +3,45 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FourTubeBaseIE(InfoExtractor):
|
||||
_TKN_HOST = 'tkn.kodicdn.com'
|
||||
|
||||
def _extract_formats(self, url, video_id, media_id, sources):
|
||||
token_url = 'https://%s/%s/desktop/%s' % (
|
||||
self._TKN_HOST, media_id, '+'.join(sources))
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
tokens = self._download_json(token_url, video_id, data=b'', headers={
|
||||
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
|
||||
'Referer': url,
|
||||
})
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
'format_id': format + 'p',
|
||||
'resolution': format + 'p',
|
||||
'quality': int(format),
|
||||
} for format in sources]
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
|
||||
@@ -68,21 +98,7 @@ class FourTubeBaseIE(InfoExtractor):
|
||||
media_id = params[0]
|
||||
sources = ['%s' % p for p in params[2]]
|
||||
|
||||
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||
media_id, '+'.join(sources))
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
tokens = self._download_json(token_url, video_id, data=b'', headers={
|
||||
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
|
||||
'Referer': url,
|
||||
})
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
'format_id': format + 'p',
|
||||
'resolution': format + 'p',
|
||||
'quality': int(format),
|
||||
} for format in sources]
|
||||
self._sort_formats(formats)
|
||||
formats = self._extract_formats(url, video_id, media_id, sources)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -164,6 +180,7 @@ class FuxIE(FourTubeBaseIE):
|
||||
class PornTubeIE(FourTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||
_URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
|
||||
_TKN_HOST = 'tkn.porntube.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
|
||||
'info_dict': {
|
||||
@@ -171,13 +188,32 @@ class PornTubeIE(FourTubeBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Teen couple doing anal',
|
||||
'uploader': 'Alexy',
|
||||
'uploader_id': 'Alexy',
|
||||
'uploader_id': '91488',
|
||||
'upload_date': '20150606',
|
||||
'timestamp': 1433595647,
|
||||
'duration': 5052,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
|
||||
'info_dict': {
|
||||
'id': '1331406',
|
||||
'ext': 'mp4',
|
||||
'title': 'Squirting Teen Ballerina on ECG',
|
||||
'uploader': 'Exploited College Girls',
|
||||
'uploader_id': '665',
|
||||
'channel': 'Exploited College Girls',
|
||||
'channel_id': '665',
|
||||
'upload_date': '20130920',
|
||||
'timestamp': 1379685485,
|
||||
'duration': 851,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
@@ -191,6 +227,55 @@ class PornTubeIE(FourTubeBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'data', group='value'), video_id,
|
||||
transform_source=lambda x: compat_urllib_parse_unquote(
|
||||
compat_b64decode(x).decode('utf-8')))['page']['video']
|
||||
|
||||
title = video['title']
|
||||
media_id = video['mediaId']
|
||||
sources = [compat_str(e['height'])
|
||||
for e in video['encodings'] if e.get('height')]
|
||||
formats = self._extract_formats(url, video_id, media_id, sources)
|
||||
|
||||
thumbnail = url_or_none(video.get('masterThumb'))
|
||||
uploader = try_get(video, lambda x: x['user']['username'], compat_str)
|
||||
uploader_id = str_or_none(try_get(
|
||||
video, lambda x: x['user']['id'], int))
|
||||
channel = try_get(video, lambda x: x['channel']['name'], compat_str)
|
||||
channel_id = str_or_none(try_get(
|
||||
video, lambda x: x['channel']['id'], int))
|
||||
like_count = int_or_none(video.get('likes'))
|
||||
dislike_count = int_or_none(video.get('dislikes'))
|
||||
view_count = int_or_none(video.get('playsQty'))
|
||||
duration = int_or_none(video.get('durationInSeconds'))
|
||||
timestamp = unified_timestamp(video.get('publishedAt'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader or channel,
|
||||
'uploader_id': uploader_id or channel_id,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'timestamp': timestamp,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class PornerBrosIE(FourTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||
|
@@ -1,17 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,6 +34,7 @@ class FOXIE(AdobePassIE):
|
||||
'upload_date': '20170901',
|
||||
'creator': 'FOX',
|
||||
'series': 'Gotham',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -44,48 +48,54 @@ class FOXIE(AdobePassIE):
|
||||
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_HOME_PAGE_URL = 'https://www.fox.com/'
|
||||
_API_KEY = 'abdcbed02c124d393b39e818a4312055'
|
||||
_access_token = None
|
||||
|
||||
def _call_api(self, path, video_id, data=None):
|
||||
headers = {
|
||||
'X-Api-Key': self._API_KEY,
|
||||
}
|
||||
if self._access_token:
|
||||
headers['Authorization'] = 'Bearer ' + self._access_token
|
||||
return self._download_json(
|
||||
'https://api2.fox.com/v2.0/' + path,
|
||||
video_id, data=data, headers=headers)
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._access_token:
|
||||
mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
|
||||
if mvpd_auth:
|
||||
self._access_token = (self._parse_json(compat_urllib_parse_unquote(
|
||||
mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
|
||||
if not self._access_token:
|
||||
self._access_token = self._call_api(
|
||||
'login', None, json.dumps({
|
||||
'deviceId': compat_str(uuid.uuid4()),
|
||||
}).encode())['accessToken']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
||||
video_id, headers={
|
||||
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': url,
|
||||
})
|
||||
video = self._call_api('vodplayer/' + video_id, video_id)
|
||||
|
||||
title = video['name']
|
||||
release_url = video['videoRelease']['url']
|
||||
|
||||
description = video.get('description')
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
video.get('duration')) or parse_duration(video.get('duration'))
|
||||
timestamp = unified_timestamp(video.get('datePublished'))
|
||||
rating = video.get('contentRating')
|
||||
age_limit = parse_age_limit(rating)
|
||||
release_url = video['url']
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = try_get(
|
||||
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
video.get('duration')) or parse_duration(video.get('duration'))
|
||||
timestamp = unified_timestamp(video.get('datePublished'))
|
||||
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||
|
||||
series = video.get('seriesName') or data.get(
|
||||
'seriesName') or data.get('show')
|
||||
season_number = int_or_none(video.get('seasonNumber'))
|
||||
episode = video.get('name')
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
release_year = int_or_none(video.get('releaseYear'))
|
||||
|
||||
if data.get('authRequired'):
|
||||
resource = self._get_mvpd_resource(
|
||||
'fbc-fox', title, video.get('guid'), rating)
|
||||
release_url = update_url_query(
|
||||
release_url, {
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, video_id, 'fbc-fox', resource)
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for doc_rel in video.get('documentReleases', []):
|
||||
@@ -98,36 +108,19 @@ class FOXIE(AdobePassIE):
|
||||
}]
|
||||
break
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'description': video.get('description'),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': parse_age_limit(video.get('contentRating')),
|
||||
'creator': creator,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
'season_number': int_or_none(video.get('seasonNumber')),
|
||||
'episode': video.get('name'),
|
||||
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||
'release_year': int_or_none(video.get('releaseYear')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
urlh = self._request_webpage(HEADRequest(release_url), video_id)
|
||||
video_url = compat_str(urlh.geturl())
|
||||
|
||||
if UplynkPreplayIE.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'ie_key': UplynkPreplayIE.ie_key(),
|
||||
})
|
||||
else:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
@@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
@@ -68,21 +76,41 @@ class FoxNewsIE(AMPIE):
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# data-video-id
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
||||
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'timestamp': 1473299755,
|
||||
'timestamp': 1473301045,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
|
||||
'info_dict': {
|
||||
'id': '5748266721001',
|
||||
'ext': 'flv',
|
||||
'title': 'Kyle Kashuv has a positive message for the Trump White House',
|
||||
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 229,
|
||||
'timestamp': 1520594670,
|
||||
'upload_date': '20180309',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -90,51 +118,10 @@ class FoxNewsArticleIE(InfoExtractor):
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||
webpage, 'video ID', group='id')
|
||||
webpage, 'video ID', group='id', default=None)
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
|
||||
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id,
|
||||
FoxNewsIE.ie_key())
|
||||
|
||||
|
||||
class FoxNewsInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:insider'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'md5': 'a10c755e582d28120c62749b4feb4c0c',
|
||||
'info_dict': {
|
||||
'id': '5099377331001',
|
||||
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'ext': 'mp4',
|
||||
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
|
||||
'description': 'Is campus censorship getting out of control?',
|
||||
'timestamp': 1472168725,
|
||||
'upload_date': '20160825',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FoxNewsIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FoxNewsIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
|
||||
|
@@ -1,43 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'bwduI3X_TgUB',
|
||||
'id': '432609859715',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
'upload_date': '20150423',
|
||||
'timestamp': 1429761109,
|
||||
# TODO: fix timestamp
|
||||
'upload_date': '19700101', # '20150423',
|
||||
# 'timestamp': 1429761109,
|
||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
||||
webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
config['releaseURL'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'http',
|
||||
}), {'force_smil_url': True}))
|
||||
return self.url_result(
|
||||
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
@@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
def sign(manifest_url, manifest_id):
|
||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||
signed_url = self._download_webpage(
|
||||
signed_url = url_or_none(self._download_webpage(
|
||||
'https://%s/esi/TA' % host, video_id,
|
||||
'Downloading signed %s manifest URL' % manifest_id,
|
||||
fatal=False, query={
|
||||
'url': manifest_url,
|
||||
})
|
||||
if (signed_url and isinstance(signed_url, compat_str) and
|
||||
re.search(r'^(?:https?:)?//', signed_url)):
|
||||
}))
|
||||
if signed_url:
|
||||
return signed_url
|
||||
return manifest_url
|
||||
|
||||
@@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
catalogue = None
|
||||
video_id = self._search_regex(
|
||||
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
|
||||
if not video_id:
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class FreespeechIE(InfoExtractor):
|
||||
@@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor):
|
||||
r'data-video-url="([^"]+)"',
|
||||
webpage, 'youtube url')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
return self.url_result(youtube_url, YoutubeIE.ie_key())
|
||||
|
263
youtube_dl/extractor/frontendmasters.py
Normal file
263
youtube_dl/extractor/frontendmasters.py
Normal file
@@ -0,0 +1,263 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class FrontendMastersBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
|
||||
_LOGIN_URL = 'https://frontendmasters.com/login/'
|
||||
|
||||
_NETRC_MACHINE = 'frontendmasters'
|
||||
|
||||
_QUALITIES = {
|
||||
'low': {'width': 480, 'height': 360},
|
||||
'mid': {'width': 1280, 'height': 720},
|
||||
'high': {'width': 1920, 'height': 1080}
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post_url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if any(p in response for p in (
|
||||
'wp-login.php?action=logout', '>Logout')):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
|
||||
response, 'error message', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
|
||||
def _download_course(self, course_name, url):
|
||||
return self._download_json(
|
||||
'%s/courses/%s' % (self._API_BASE, course_name), course_name,
|
||||
'Downloading course JSON', headers={'Referer': url})
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters(course):
|
||||
chapters = []
|
||||
lesson_elements = course.get('lessonElements')
|
||||
if isinstance(lesson_elements, list):
|
||||
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
def _extract_lesson(chapters, lesson_id, lesson):
|
||||
title = lesson.get('title') or lesson_id
|
||||
display_id = lesson.get('slug')
|
||||
description = lesson.get('description')
|
||||
thumbnail = lesson.get('thumbnail')
|
||||
|
||||
chapter_number = None
|
||||
index = lesson.get('index')
|
||||
element_index = lesson.get('elementIndex')
|
||||
if (isinstance(index, int) and isinstance(element_index, int) and
|
||||
index < element_index):
|
||||
chapter_number = element_index - index
|
||||
chapter = (chapters[chapter_number - 1]
|
||||
if chapter_number - 1 < len(chapters) else None)
|
||||
|
||||
duration = None
|
||||
timestamp = lesson.get('timestamp')
|
||||
if isinstance(timestamp, compat_str):
|
||||
mobj = re.search(
|
||||
r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
|
||||
timestamp)
|
||||
if mobj:
|
||||
duration = parse_duration(mobj.group('end')) - parse_duration(
|
||||
mobj.group('start'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'frontendmasters:%s' % lesson_id,
|
||||
'ie_key': FrontendMastersIE.ie_key(),
|
||||
'id': lesson_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number,
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersIE(FrontendMastersBaseIE):
|
||||
_VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
|
||||
'md5': '7f161159710d6b7016a4f4af6fcb05e2',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'ext': 'mp4',
|
||||
'title': 'a2qogef6ba',
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}, {
|
||||
'url': 'frontendmasters:a2qogef6ba',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lesson_id = self._match_id(url)
|
||||
|
||||
source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
|
||||
|
||||
formats = []
|
||||
for ext in ('webm', 'mp4'):
|
||||
for quality in ('low', 'mid', 'high'):
|
||||
resolution = self._QUALITIES[quality].copy()
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
format_url = self._download_json(
|
||||
source_url, lesson_id,
|
||||
'Downloading %s source JSON' % format_id, query={
|
||||
'f': ext,
|
||||
'r': resolution['height'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
}, fatal=False)['url']
|
||||
|
||||
if not format_url:
|
||||
continue
|
||||
|
||||
f = resolution.copy()
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
'id': lesson_id,
|
||||
'title': lesson_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/tools',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'display_id': 'tools',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tools',
|
||||
'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'chapter': 'Introduction',
|
||||
'chapter_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_name, lesson_name = mobj.group('course_name', 'lesson_name')
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
lesson_id, lesson = next(
|
||||
(video_id, data)
|
||||
for video_id, data in course['lessonData'].items()
|
||||
if data.get('slug') == lesson_name)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
return self._extract_lesson(chapters, lesson_id, lesson)
|
||||
|
||||
|
||||
class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/',
|
||||
'info_dict': {
|
||||
'id': 'web-development',
|
||||
'title': 'Introduction to Web Development',
|
||||
'description': 'md5:9317e6e842098bf725d62360e52d49a6',
|
||||
},
|
||||
'playlist_count': 81,
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if FrontendMastersLessonIE.suitable(url) else super(
|
||||
FrontendMastersBaseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_name = self._match_id(url)
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
|
||||
lessons = sorted(
|
||||
course['lessonData'].values(), key=lambda data: data['index'])
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_name = lesson.get('slug')
|
||||
if not lesson_name:
|
||||
continue
|
||||
lesson_id = lesson.get('hash') or lesson.get('statsId')
|
||||
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
|
||||
|
||||
title = course.get('title')
|
||||
description = course.get('description')
|
||||
|
||||
return self.playlist_result(entries, course_name, title, description)
|
@@ -1,6 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
@@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor):
|
||||
|
||||
video_id = title_data.get('id') or self._search_regex([
|
||||
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||
r'<iframe[^>]+src="/player/(\d+)"',
|
||||
r'<iframe[^>]+src="/player/(\d+)',
|
||||
], webpage, 'video_id', default=None)
|
||||
if not video_id:
|
||||
player_url = self._html_search_meta([
|
||||
@@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor):
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||
sources = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
||||
video_id, headers=headers)['items']
|
||||
'https://www.funimation.com/api/showexperience/%s/' % video_id,
|
||||
video_id, headers=headers, query={
|
||||
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
|
||||
})['items']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||
|
@@ -1,10 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
@@ -12,6 +14,19 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
|
||||
}
|
||||
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
|
||||
|
||||
@staticmethod
|
||||
def _make_headers(referer):
|
||||
headers = FunkBaseIE._HEADERS.copy()
|
||||
headers['Referer'] = referer
|
||||
return headers
|
||||
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -48,19 +63,19 @@ class FunkMixIE(FunkBaseIE):
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
mix_id, headers=self._make_headers(url), query={
|
||||
'size': 100,
|
||||
})['result']['lists']
|
||||
})['_embedded']['curatedListList']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas if meta.get('alias') == alias)
|
||||
for meta in metas
|
||||
if try_get(
|
||||
meta, lambda x: x['videoDataDelegate']['alias'],
|
||||
compat_str) == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
@@ -104,25 +119,53 @@ class FunkChannelIE(FunkBaseIE):
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
headers = {
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
|
||||
'Referer': url,
|
||||
}
|
||||
headers = self._make_headers(url)
|
||||
|
||||
video = None
|
||||
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
|
||||
headers=headers, query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
for page_num in itertools.count():
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
|
||||
headers=headers, query={
|
||||
'filterFsk': 'false',
|
||||
'sort': 'creationDate,desc',
|
||||
'size': 100,
|
||||
'page': page_num,
|
||||
}, fatal=False)
|
||||
if not by_channel_alias:
|
||||
break
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if not video_list:
|
||||
break
|
||||
try:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
break
|
||||
except StopIteration:
|
||||
pass
|
||||
if not try_get(
|
||||
by_channel_alias, lambda x: x['_links']['next']):
|
||||
break
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList',
|
||||
channel_id, 'Downloading byIdList JSON', headers=headers,
|
||||
query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
|
||||
if not video:
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
|
||||
headers=headers, query={
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter',
|
||||
channel_id, 'Downloading filter JSON', headers=headers, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
|
98
youtube_dl/extractor/gaia.py
Normal file
98
youtube_dl/extractor/gaia.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class GaiaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
|
||||
'info_dict': {
|
||||
'id': '89356',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connecting with Universal Consciousness',
|
||||
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||
'upload_date': '20151116',
|
||||
'timestamp': 1447707266,
|
||||
'duration': 936,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
|
||||
'info_dict': {
|
||||
'id': '89351',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connecting with Universal Consciousness',
|
||||
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||
'upload_date': '20151116',
|
||||
'timestamp': 1447707266,
|
||||
'duration': 53,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, vtype = re.search(self._VALID_URL, url).groups()
|
||||
node_id = self._download_json(
|
||||
'https://brooklyn.gaia.com/pathinfo', display_id, query={
|
||||
'path': 'video/' + display_id,
|
||||
})['id']
|
||||
node = self._download_json(
|
||||
'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
|
||||
vdata = node[vtype]
|
||||
media_id = compat_str(vdata['nid'])
|
||||
title = node['title']
|
||||
|
||||
media = self._download_json(
|
||||
'https://brooklyn.gaia.com/media/' + media_id, media_id)
|
||||
formats = self._extract_m3u8_formats(
|
||||
media['mediaUrls']['bcHLS'], media_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = media.get('textTracks', {})
|
||||
for key in ('captions', 'subtitles'):
|
||||
for lang, sub_url in text_tracks.get(key, {}).items():
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
fivestar = node.get('fivestar', {})
|
||||
fields = node.get('fields', {})
|
||||
|
||||
def get_field_value(key, value_key='value'):
|
||||
return try_get(fields, lambda x: x[key][0][value_key])
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
|
||||
'timestamp': int_or_none(node.get('created')),
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(vdata.get('duration')),
|
||||
'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
|
||||
'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
|
||||
'comment_count': int_or_none(node.get('comment_count')),
|
||||
'series': try_get(node, lambda x: x['series']['title'], compat_str),
|
||||
'season_number': int_or_none(get_field_value('season')),
|
||||
'season_id': str_or_none(get_field_value('series_nid', 'nid')),
|
||||
'episode_number': int_or_none(get_field_value('episode')),
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameSpotIE(OnceIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||
@@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .smotri import SmotriIE
|
||||
from .myvi import MyviIE
|
||||
from .condenast import CondeNastIE
|
||||
@@ -109,8 +109,13 @@ from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .teachable import TeachableIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1394,17 +1399,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SVT embed
|
||||
{
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# Crooks and Liars embed
|
||||
{
|
||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||
@@ -2069,6 +2063,44 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'TODO: fix nested playlists processing in tests',
|
||||
},
|
||||
{
|
||||
# Viqeo embeds
|
||||
'url': 'https://viqeo.tv/',
|
||||
'info_dict': {
|
||||
'id': 'viqeo',
|
||||
'title': 'All-new video platform',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
},
|
||||
'add_ie': [ZypeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
'info_dict': {
|
||||
'id': 'shell',
|
||||
'ext': 'mp4',
|
||||
'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
|
||||
'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -2165,10 +2197,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': self.http_scheme() + url,
|
||||
}
|
||||
return self.url_result(self.http_scheme() + url)
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
@@ -2620,9 +2649,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(tvc_url, 'TVC')
|
||||
|
||||
# Look for embedded SportBox player
|
||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
||||
sportbox_urls = SportBoxIE._extract_urls(webpage)
|
||||
if sportbox_urls:
|
||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
|
||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
@@ -3007,7 +3036,7 @@ class GenericIE(InfoExtractor):
|
||||
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
||||
|
||||
# Look for Mediaset embeds
|
||||
mediaset_urls = MediasetIE._extract_urls(webpage)
|
||||
mediaset_urls = MediasetIE._extract_urls(self, webpage)
|
||||
if mediaset_urls:
|
||||
return self.playlist_from_matches(
|
||||
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||
@@ -3081,6 +3110,10 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||
|
||||
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||
if teachable_url:
|
||||
return self.url_result(teachable_url)
|
||||
|
||||
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||
if indavideo_urls:
|
||||
return self.playlist_from_matches(
|
||||
@@ -3091,13 +3124,33 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
apa_urls, video_id, video_title, ie=APAIE.ie_key())
|
||||
|
||||
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
||||
foxnews_urls = FoxNewsIE._extract_urls(webpage)
|
||||
if foxnews_urls:
|
||||
return self.playlist_from_matches(
|
||||
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
|
||||
|
||||
sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
|
||||
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
||||
webpage)]
|
||||
if sharevideos_urls:
|
||||
return self.playlist_from_matches(
|
||||
sharevideos_urls, video_id, video_title)
|
||||
|
||||
viqeo_urls = ViqeoIE._extract_urls(webpage)
|
||||
if viqeo_urls:
|
||||
return self.playlist_from_matches(
|
||||
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
|
||||
|
||||
expressen_urls = ExpressenIE._extract_urls(webpage)
|
||||
if expressen_urls:
|
||||
return self.playlist_from_matches(
|
||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||
|
||||
zype_urls = ZypeIE._extract_urls(webpage)
|
||||
if zype_urls:
|
||||
return self.playlist_from_matches(
|
||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
@@ -3119,9 +3172,13 @@ class GenericIE(InfoExtractor):
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
if jwplayer_data:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
try:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
except ExtractorError:
|
||||
# See https://github.com/rg3/youtube-dl/pull/16735
|
||||
pass
|
||||
|
||||
# Video.js embed
|
||||
mobj = re.search(
|
||||
|
@@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
gfy = self._download_json(
|
||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
||||
'https://api.gfycat.com/v1/gfycats/%s' % video_id,
|
||||
video_id, 'Downloading video info')
|
||||
if 'error' in gfy:
|
||||
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||
|
@@ -72,7 +72,7 @@ class GloboIE(InfoExtractor):
|
||||
return
|
||||
|
||||
try:
|
||||
self._download_json(
|
||||
glb_id = (self._download_json(
|
||||
'https://login.globo.com/api/authentication', None, data=json.dumps({
|
||||
'payload': {
|
||||
'email': email,
|
||||
@@ -81,7 +81,9 @@ class GloboIE(InfoExtractor):
|
||||
},
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})
|
||||
}) or {}).get('glbId')
|
||||
if glb_id:
|
||||
self._set_cookie('.globo.com', 'GLBID', glb_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(e.cause.read(), None)
|
||||
|
@@ -25,18 +25,19 @@ class GoIE(AdobePassIE):
|
||||
},
|
||||
'watchdisneychannel': {
|
||||
'brand': '004',
|
||||
'requestor_id': 'Disney',
|
||||
'resource_id': 'Disney',
|
||||
},
|
||||
'watchdisneyjunior': {
|
||||
'brand': '008',
|
||||
'requestor_id': 'DisneyJunior',
|
||||
'resource_id': 'DisneyJunior',
|
||||
},
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'requestor_id': 'DisneyXD',
|
||||
'resource_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
@@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 004
|
||||
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
@@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
|
||||
default=None)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
|
||||
default='004')
|
||||
site_info = next(
|
||||
si for _, si in self._SITE_INFO.items()
|
||||
if si.get('brand') == brand)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
@@ -112,8 +130,8 @@ class GoIE(AdobePassIE):
|
||||
'device': '001',
|
||||
}
|
||||
if video_data.get('accesslevel') == '1':
|
||||
requestor_id = site_info['requestor_id']
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id = site_info.get('requestor_id', 'DisneyChannels')
|
||||
resource = site_info.get('resource_id') or self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, None)
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -14,8 +15,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class Go90IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
||||
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
||||
'info_dict': {
|
||||
@@ -27,15 +28,31 @@ class Go90IE(InfoExtractor):
|
||||
'upload_date': '20170411',
|
||||
'age_limit': 14,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.go90.com/embed/261MflWkD3N',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id,
|
||||
video_id, headers={
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=b'{"client":"web","device_type":"pc"}')
|
||||
})
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id, video_id,
|
||||
headers=headers, data=b'{"client":"web","device_type":"pc"}')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
|
||||
if 'region unavailable' in message:
|
||||
self.raise_geo_restricted(countries=['US'])
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
if video_data.get('requires_drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
main_video_asset = video_data['main_video_asset']
|
||||
|
@@ -8,6 +8,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
|
||||
bitrates = rendition.get('bitrates')
|
||||
if not isinstance(bitrates, dict):
|
||||
continue
|
||||
m3u8_url = bitrates.get('hls')
|
||||
if not isinstance(m3u8_url, compat_str):
|
||||
m3u8_url = url_or_none(bitrates.get('hls'))
|
||||
if not m3u8_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
|
||||
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
||||
continue
|
||||
cc_lang = cc_file[0]
|
||||
cc_url = cc_file[2]
|
||||
if not isinstance(cc_lang, compat_str) or not isinstance(
|
||||
cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file[2])
|
||||
if not isinstance(cc_lang, compat_str) or not cc_url:
|
||||
continue
|
||||
subtitles.setdefault(cc_lang, []).append({
|
||||
'url': cc_url,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user