release 2014.04.02

[youtube] feeds: Also look for the html in the 'content_html' field (fixes #2671 )
[comedycentral] Change XPath .//guid to ./guid (fixes #2668 )
2025-07-21 21:01:59 -05:00 · 2014-04-02 14:26:34 +02:00 · 2014-04-02 14:13:08 +02:00 · 2014-04-01 21:38:07 +02:00 · 2014-04-01 21:29:40 +02:00 · 2014-04-01 20:30:22 +02:00
81 changed files with 2844 additions and 1065 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -3,3 +3,5 @@ include test/*.py
 include test/*.json
 include youtube-dl.bash-completion
 include youtube-dl.1
+recursive-include docs *
+prune docs/_build
--- a/3
+++ b/3
@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
 		--exclude '__pycache' \
 		--exclude '.git' \
 		--exclude 'testdata' \
+		--exclude 'docs/_build' \
 		-- \
-		bin devscripts test youtube_dl \
+		bin devscripts test youtube_dl docs \
 		CHANGELOG LICENSE README.md README.txt \
 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
 		youtube-dl
--- a/README.md
+++ b/README.md
@ -28,6 +28,9 @@ which means you can modify it, redistribute it or use it however you like.
    --user-agent UA                  specify a custom user agent
    --referer REF                    specify a custom referer, use if the video
                                     access is restricted to one domain
+    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
+                                     separated by a colon ':'. You can use this
+                                     option multiple times
    --list-extractors                List all supported extractors and the URLs
                                     they would handle
    --extractor-descriptions         Output descriptions of all supported
@ -62,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     configuration in ~/.config/youtube-dl.conf
                                     (%APPDATA%/youtube-dl/config.txt on
                                     Windows)
+    --encoding ENCODING              Force the specified encoding (experimental)

 ## Video Selection:
    --playlist-start NUMBER          playlist video to start at (default is 1)
@ -166,6 +170,7 @@ which means you can modify it, redistribute it or use it however you like.

 ## Verbosity / Simulation Options:
    -q, --quiet                      activates quiet mode
+    --no-warnings                    Ignore warnings
    -s, --simulate                   do not download the video and do not write
                                     anything to disk
    --skip-download                  do not download the video
@ -177,7 +182,9 @@ which means you can modify it, redistribute it or use it however you like.
    --get-duration                   simulate, quiet but print video length
    --get-filename                   simulate, quiet but print output filename
    --get-format                     simulate, quiet but print output format
-    -j, --dump-json                  simulate, quiet but print JSON information
+    -j, --dump-json                  simulate, quiet but print JSON information.
+                                     See --output for a description of available
+                                     keys.
    --newline                        output progress bar as new lines
    --no-progress                    do not print progress bar
    --console-title                  display progress in console titlebar
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -22,6 +22,12 @@ fi

 if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
 version="$1"
+major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p')
+if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then
+    echo "$version does not start with today's date!"
+    exit 1
+fi
+
 if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
 if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
 useless_files=$(find youtube_dl -type f -not -name '*.py')
--- a/docs/.gitignore
+++ b/docs/.gitignore
@ -0,0 +1 @@
+_build/
--- a/docs/Makefile
+++ b/docs/Makefile
@ -0,0 +1,177 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
--- a/docs/conf.py
+++ b/docs/conf.py
@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+#
+# youtube-dl documentation build configuration file, created by
+# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+# Allows to import youtube_dl
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# -- General configuration ------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'youtube-dl'
+copyright = u'2014, Ricardo Garcia Gonzalez'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+import youtube_dl
+version = youtube_dl.__version__
+# The full version, including alpha/beta/rc tags.
+release = version
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'youtube-dldoc'
--- a/docs/index.rst
+++ b/docs/index.rst
@ -0,0 +1,23 @@
+Welcome to youtube-dl's documentation!
+======================================
+
+*youtube-dl* is a command-line program to download videos from YouTube.com and more sites.
+It can also be used in Python code.
+
+Developer guide
+---------------
+
+This section contains information for using *youtube-dl* from Python programs.
+
+.. toctree::
+    :maxdepth: 2
+
+    module_guide
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
--- a/docs/module_guide.rst
+++ b/docs/module_guide.rst
@ -0,0 +1,67 @@
+Using the ``youtube_dl`` module
+===============================
+
+When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
+
+.. code-block:: python
+
+    >>> from youtube_dl import YoutubeDL
+    >>> ydl = YoutubeDL()
+    >>> ydl.add_default_info_extractors()
+
+Extracting video information
+----------------------------
+
+You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary:
+
+.. code-block:: python
+
+    >>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False)
+    [youtube] Setting language
+    [youtube] BaW_jenozKc: Downloading webpage
+    [youtube] BaW_jenozKc: Downloading video info webpage
+    [youtube] BaW_jenozKc: Extracting video information
+    >>> info['title']
+    'youtube-dl test video "\'/\\ä↭𝕐'
+    >>> info['height'], info['width']
+    (720, 1280)
+
+If you want to download or play the video you can get its url:
+
+.. code-block:: python
+
+    >>> info['url']
+    'https://...'
+
+Extracting playlist information
+-------------------------------
+
+The playlist information is extracted in a similar way, but the dictionary is a bit different:
+
+.. code-block:: python
+
+    >>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False)
+    [TED] open_source_open_world: Downloading playlist webpage
+    ...
+    >>> playlist['title']
+    'Open-source, open world'
+
+
+
+You can access the videos in the playlist with the ``entries`` field:
+
+.. code-block:: python
+
+    >>> for video in playlist['entries']:
+    ...     print('Video #%d: %s' % (video['playlist_index'], video['title']))
+
+    Video #1: How Arduino is open-sourcing imagination
+    Video #2: The year open data went worldwide
+    Video #3: Massive-scale online collaboration
+    Video #4: The art of asking
+    Video #5: How cognitive surplus will change the world
+    Video #6: The birth of Wikipedia
+    Video #7: Coding a better government
+    Video #8: The era of open innovation
+    Video #9: The currency of the new economy is trust
+
--- a/test/helper.py
+++ b/test/helper.py
@ -9,7 +9,10 @@ import sys

 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
-from youtube_dl.utils import preferredencoding
+from youtube_dl.utils import (
+    compat_str,
+    preferredencoding,
+)


 def get_params(override=None):
@ -83,3 +86,45 @@ def gettestcases():


 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+def expect_info_dict(self, expected_dict, got_dict):
+    for info_field, expected in expected_dict.items():
+        if isinstance(expected, compat_str) and expected.startswith('re:'):
+            got = got_dict.get(info_field)
+            match_str = expected[len('re:'):]
+            match_rex = re.compile(match_str)
+
+            self.assertTrue(
+                isinstance(got, compat_str) and match_rex.match(got),
+                u'field %s (value: %r) should match %r' % (info_field, got, match_str))
+        elif isinstance(expected, type):
+            got = got_dict.get(info_field)
+            self.assertTrue(isinstance(got, expected),
+                u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
+        else:
+            if isinstance(expected, compat_str) and expected.startswith('md5:'):
+                got = 'md5:' + md5(got_dict.get(info_field))
+            else:
+                got = got_dict.get(info_field)
+            self.assertEqual(expected, got,
+                u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+
+    # Check for the presence of mandatory fields
+    for key in ('id', 'url', 'title', 'ext'):
+        self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+    # Check for mandatory fields that are automatically set by YoutubeDL
+    for key in ['webpage_url', 'extractor', 'extractor_key']:
+        self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
+
+    # Are checkable fields missing from the test case definition?
+    test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
+        for key, value in got_dict.items()
+        if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+    missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
+    if missing_keys:
+        sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+        self.assertFalse(
+            missing_keys,
+            'Missing keys in test definition: %s' % (
+                ', '.join(sorted(missing_keys))))
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -143,5 +143,16 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])

+    def test_ComedyCentralShows(self):
+        self.assertMatch(
+            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+            ['ComedyCentralShows'])
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_download.py
+++ b/test/test_download.py
@ -9,16 +9,16 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import (
    get_params,
    gettestcases,
-    try_rm,
+    expect_info_dict,
    md5,
-    report_warning
+    try_rm,
+    report_warning,
 )


 import hashlib
 import io
 import json
-import re
 import socket

 import youtube_dl.YoutubeDL
@ -135,40 +135,8 @@ def generator(test_case):
                    self.assertEqual(md5_for_file, tc['md5'])
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)
-                for (info_field, expected) in tc.get('info_dict', {}).items():
-                    if isinstance(expected, compat_str) and expected.startswith('re:'):
-                        got = info_dict.get(info_field)
-                        match_str = expected[len('re:'):]
-                        match_rex = re.compile(match_str)

-                        self.assertTrue(
-                            isinstance(got, compat_str) and match_rex.match(got),
-                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
-                    elif isinstance(expected, type):
-                        got = info_dict.get(info_field)
-                        self.assertTrue(isinstance(got, expected),
-                            u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
-                    else:
-                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
-                            got = 'md5:' + md5(info_dict.get(info_field))
-                        else:
-                            got = info_dict.get(info_field)
-                        self.assertEqual(expected, got,
-                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
-
-                # Check for the presence of mandatory fields
-                for key in ('id', 'url', 'title', 'ext'):
-                    self.assertTrue(key in info_dict.keys() and info_dict[key])
-                # Check for mandatory fields that are automatically set by YoutubeDL
-                for key in ['webpage_url', 'extractor', 'extractor_key']:
-                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
-
-                # If checkable fields are missing from the test case, print the info_dict
-                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
-                    for key, value in info_dict.items()
-                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
-                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
-                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+                expect_info_dict(self, tc.get('info_dict', {}), info_dict)
        finally:
            try_rm_tcs_files()

--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -9,8 +9,10 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import FakeYDL
-
+from test.helper import (
+    expect_info_dict,
+    FakeYDL,
+)

 from youtube_dl.extractor import (
    AcademicEarthCourseIE,
@ -37,6 +39,9 @@ from youtube_dl.extractor import (
    GoogleSearchIE,
    GenericIE,
    TEDIE,
+    ToypicsUserIE,
+    XTubeUserIE,
+    InstagramUserIE,
 )


@ -269,5 +274,46 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], 'Who are the hackers?')
        self.assertTrue(len(result['entries']) >= 6)

+    def test_toypics_user(self):
+        dl = FakeYDL()
+        ie = ToypicsUserIE(dl)
+        result = ie.extract('http://videos.toypics.net/Mikey')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'Mikey')
+        self.assertTrue(len(result['entries']) >= 17)
+
+    def test_xtube_user(self):
+        dl = FakeYDL()
+        ie = XTubeUserIE(dl)
+        result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'greenshowers')
+        self.assertTrue(len(result['entries']) >= 155)
+
+    def test_InstagramUser(self):
+        dl = FakeYDL()
+        ie = InstagramUserIE(dl)
+        result = ie.extract('http://instagram.com/porsche')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'porsche')
+        self.assertTrue(len(result['entries']) >= 2)
+        test_video = next(
+            e for e in result['entries']
+            if e['id'] == '614605558512799803_462752227')
+        dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
+        dl.process_video_result(test_video, download=False)
+        EXPECTED = {
+            'id': '614605558512799803_462752227',
+            'ext': 'mp4',
+            'title': '#Porsche Intelligent Performance.',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader': 'Porsche',
+            'uploader_id': 'porsche',
+            'timestamp': 1387486713,
+            'upload_date': '20131219',
+        }
+        expect_info_dict(self, EXPECTED, test_video)
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 # Various small unit tests
 import io
+import json
 import xml.etree.ElementTree

 #from youtube_dl.utils import htmlentity_transform
@ -35,6 +36,8 @@ from youtube_dl.utils import (
    url_basename,
    urlencode_postdata,
    xpath_with_ns,
+    parse_iso8601,
+    strip_jsonp,
 )

 if sys.version_info < (3, 0):
@ -266,5 +269,16 @@ class TestUtil(unittest.TestCase):
        data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
        self.assertTrue(isinstance(data, bytes))

+    def test_parse_iso8601(self):
+        self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
+        self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
+        self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
+
+    def test_strip_jsonp(self):
+        stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
+        d = json.loads(stripped)
+        self.assertEqual(d, [{"id": "532cb", "x": 3}])
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -8,6 +8,7 @@ import datetime
 import errno
 import io
 import json
+import locale
 import os
 import platform
 import re
@ -94,6 +95,7 @@ class YoutubeDL(object):
    usenetrc:          Use netrc for authentication instead.
    verbose:           Print additional info to stdout.
    quiet:             Do not print messages to stdout.
+    no_warnings:       Do not print out anything for warnings.
    forceurl:          Force printing final URL.
    forcetitle:        Force printing title.
    forceid:           Force printing ID.
@ -158,6 +160,7 @@ class YoutubeDL(object):
    include_ads:       Download ads as well
    default_search:    Prepend this string if an input url is not valid.
                       'auto' for elaborate guessing
+    encoding:          Use this encoding instead of the system-specified.

    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@ -376,6 +379,8 @@ class YoutubeDL(object):
        if self.params.get('logger') is not None:
            self.params['logger'].warning(message)
        else:
+            if self.params.get('no_warnings'):
+                return
            if self._err_file.isatty() and os.name != 'nt':
                _msg_header = '\033[0;33mWARNING:\033[0m'
            else:
@ -512,13 +517,7 @@ class YoutubeDL(object):
                        '_type': 'compat_list',
                        'entries': ie_result,
                    }
-                self.add_extra_info(ie_result,
-                    {
-                        'extractor': ie.IE_NAME,
-                        'webpage_url': url,
-                        'webpage_url_basename': url_basename(url),
-                        'extractor_key': ie.ie_key(),
-                    })
+                self.add_default_extra_info(ie_result, ie, url)
                if process:
                    return self.process_ie_result(ie_result, download, extra_info)
                else:
@ -537,6 +536,14 @@ class YoutubeDL(object):
        else:
            self.report_error('no suitable InfoExtractor for URL %s' % url)

+    def add_default_extra_info(self, ie_result, ie, url):
+        self.add_extra_info(ie_result, {
+            'extractor': ie.IE_NAME,
+            'webpage_url': url,
+            'webpage_url_basename': url_basename(url),
+            'extractor_key': ie.ie_key(),
+        })
+
    def process_ie_result(self, ie_result, download=True, extra_info={}):
        """
        Take the result of the ie(may be modified) and resolve all unresolved
@ -1195,6 +1202,9 @@ class YoutubeDL(object):
    def print_debug_header(self):
        if not self.params.get('verbose'):
            return
+
+        write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
+                 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
        write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
            sp = subprocess.Popen(
@ -1259,3 +1269,19 @@ class YoutubeDL(object):
        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
        opener.addheaders = []
        self._opener = opener
+
+    def encode(self, s):
+        if isinstance(s, bytes):
+            return s  # Already encoded
+
+        try:
+            return s.encode(self.get_encoding())
+        except UnicodeEncodeError as err:
+            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+            raise
+
+    def get_encoding(self):
+        encoding = self.params.get('encoding')
+        if encoding is None:
+            encoding = preferredencoding()
+        return encoding
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -51,6 +51,7 @@ __authors__  = (
    'David Wagner',
    'Juan C. Olivares',
    'Mattias Harrysson',
+    'phaer',
 )

 __license__ = 'Public Domain'
@ -227,6 +228,9 @@ def parseOpts(overrideArguments=None):
    general.add_option('--referer',
            dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
            metavar='REF', default=None)
+    general.add_option('--add-header',
+            dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
+            metavar='FIELD:VALUE')
    general.add_option('--list-extractors',
            action='store_true', dest='list_extractors',
            help='List all supported extractors and the URLs they would handle', default=False)
@ -252,13 +256,17 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-    general.add_option('--default-search',
-            dest='default_search', metavar='PREFIX',
-            help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+    general.add_option(
+        '--default-search',
+        dest='default_search', metavar='PREFIX',
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
    general.add_option(
        '--ignore-config',
        action='store_true',
        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+    general.add_option(
+        '--encoding', dest='encoding', metavar='ENCODING',
+        help='Force the specified encoding (experimental)')

    selection.add_option(
        '--playlist-start',
@ -361,6 +369,10 @@ def parseOpts(overrideArguments=None):

    verbosity.add_option('-q', '--quiet',
            action='store_true', dest='quiet', help='activates quiet mode', default=False)
+    verbosity.add_option(
+        '--no-warnings',
+        dest='no_warnings', action='store_true', default=False,
+        help='Ignore warnings')
    verbosity.add_option('-s', '--simulate',
            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
    verbosity.add_option('--skip-download',
@ -388,7 +400,7 @@ def parseOpts(overrideArguments=None):
            help='simulate, quiet but print output format', default=False)
    verbosity.add_option('-j', '--dump-json',
            action='store_true', dest='dumpjson',
-            help='simulate, quiet but print JSON information', default=False)
+            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
    verbosity.add_option('--newline',
            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
    verbosity.add_option('--no-progress',
@ -532,8 +544,6 @@ def parseOpts(overrideArguments=None):
            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
-            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
-                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))

    return parser, opts, args

@ -556,6 +566,16 @@ def _real_main(argv=None):
    if opts.referer is not None:
        std_headers['Referer'] = opts.referer

+    # Custom HTTP headers
+    if opts.headers is not None:
+        for h in opts.headers:
+            if h.find(':', 1) < 0:
+                parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
+            key, value = h.split(':', 2)
+            if opts.verbose:
+                write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
+            std_headers[key] = value
+
    # Dump user agent
    if opts.dump_user_agent:
        compat_print(std_headers['User-Agent'])
@ -657,7 +677,7 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')

    # Do not download videos when there are audio-only formats
@ -695,6 +715,7 @@ def _real_main(argv=None):
        'password': opts.password,
        'videopassword': opts.videopassword,
        'quiet': (opts.quiet or any_printing),
+        'no_warnings': opts.no_warnings,
        'forceurl': opts.geturl,
        'forcetitle': opts.gettitle,
        'forceid': opts.getid,
@ -767,6 +788,7 @@ def _real_main(argv=None):
        'include_ads': opts.include_ads,
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+        'encoding': opts.encoding,
    }

    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@ -13,8 +13,10 @@ class HlsFD(FileDownloader):
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)

-        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
-            '-bsf:a', 'aac_adtstoasc', tmpfilename]
+        args = [
+            '-y', '-i', url, '-f', 'mp4', '-c', 'copy',
+            '-bsf:a', 'aac_adtstoasc',
+            encodeFilename(tmpfilename, for_subprocess=True)]

        for program in ['avconv', 'ffmpeg']:
            try:
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -23,6 +23,8 @@ class HttpFD(FileDownloader):
        headers = {'Youtubedl-no-compression': 'True'}
        if 'user_agent' in info_dict:
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
+        if 'http_referer' in info_dict:
+            headers['Referer'] = info_dict['http_referer']
        basic_request = compat_urllib_request.Request(url, None, headers)
        request = compat_urllib_request.Request(url, None, headers)

--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -2,6 +2,7 @@ from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .aftonbladet import AftonbladetIE
 from .anitube import AnitubeIE
+from .aol import AolIE
 from .aparat import AparatIE
 from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
@ -13,6 +14,7 @@ from .arte import (
    ArteTVConcertIE,
    ArteTVFutureIE,
    ArteTVDDCIE,
+    ArteTVEmbedIE,
 )
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
@ -24,6 +26,7 @@ from .bloomberg import BloombergIE
 from .br import BRIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
+from .byutv import BYUtvIE
 from .c56 import C56IE
 from .canal13cl import Canal13clIE
 from .canalplus import CanalplusIE
@ -64,6 +67,7 @@ from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eitb import EitbIE
 from .elpais import ElPaisIE
+from .engadget import EngadgetIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
@ -72,6 +76,7 @@ from .facebook import FacebookIE
 from .faz import FazIE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
 from .fktv import (
    FKTVIE,
    FKTVPosteckeIE,
@ -109,7 +114,7 @@ from .imdb import (
 )
 from .ina import InaIE
 from .infoq import InfoQIE
-from .instagram import InstagramIE
+from .instagram import InstagramIE, InstagramUserIE
 from .internetvideoarchive import InternetVideoArchiveIE
 from .iprima import IPrimaIE
 from .ivi import (
@ -172,8 +177,11 @@ from .normalboots import NormalbootsIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
+from .ntv import NTVIE
+from .oe1 import OE1IE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
+from .parliamentliveuk import ParliamentLiveUKIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .playvid import PlayvidIE
@ -191,6 +199,7 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
+from .rts import RTSIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
@ -201,7 +210,6 @@ from .rutv import RUTVIE
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
-from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .smotri import (
    SmotriIE,
@ -235,6 +243,7 @@ from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trutube import TruTubeIE
@ -249,16 +258,17 @@ from .udemy import (
    UdemyCourseIE
 )
 from .unistra import UnistraIE
+from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vesti import VestiIE
 from .vevo import VevoIE
-from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
+from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .vimeo import (
@ -273,16 +283,21 @@ from .vine import VineIE
 from .viki import VikiIE
 from .vk import VKIE
 from .vube import VubeIE
+from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
-from .wdr import WDRIE
+from .wdr import (
+    WDRIE,
+    WDRMausIE,
+)
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
 from .worldstarhiphop import WorldStarHipHopIE
+from .xbef import XBefIE
 from .xhamster import XHamsterIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
-from .xtube import XTubeIE
+from .xtube import XTubeUserIE, XTubeIE
 from .yahoo import (
    YahooIE,
    YahooNewsIE,
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -14,14 +16,14 @@ from ..utils import (
 class AddAnimeIE(InfoExtractor):

    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
-    IE_NAME = u'AddAnime'
    _TEST = {
-        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
-        u'file': u'24MR3YO5SAS9.mp4',
-        u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
-        u'info_dict': {
-            u"description": u"One Piece 606",
-            u"title": u"One Piece 606"
+        'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
+        'md5': '72954ea10bc979ab5e2eb288b21425a0',
+        'info_dict': {
+            'id': '24MR3YO5SAS9',
+            'ext': 'mp4',
+            'description': 'One Piece 606',
+            'title': 'One Piece 606',
        }
    }

@ -38,10 +40,10 @@ class AddAnimeIE(InfoExtractor):
            redir_webpage = ee.cause.read().decode('utf-8')
            action = self._search_regex(
                r'<form id="challenge-form" action="([^"]+)"',
-                redir_webpage, u'Redirect form')
+                redir_webpage, 'Redirect form')
            vc = self._search_regex(
                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
-                redir_webpage, u'redirect vc value')
+                redir_webpage, 'redirect vc value')
            av = re.search(
                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
                redir_webpage)
@ -52,19 +54,19 @@ class AddAnimeIE(InfoExtractor):
            parsed_url = compat_urllib_parse_urlparse(url)
            av_val = av_res + len(parsed_url.netloc)
            confirm_url = (
-                parsed_url.scheme + u'://' + parsed_url.netloc +
+                parsed_url.scheme + '://' + parsed_url.netloc +
                action + '?' +
                compat_urllib_parse.urlencode({
                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
            self._download_webpage(
                confirm_url, video_id,
-                note=u'Confirming after redirect')
+                note='Confirming after redirect')
            webpage = self._download_webpage(url, video_id)

        formats = []
        for format_id in ('normal', 'hq'):
            rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
-            video_url = self._search_regex(rex, webpage, u'video file URLx',
+            video_url = self._search_regex(rex, webpage, 'video file URLx',
                                           fatal=False)
            if not video_url:
                continue
@ -72,14 +74,13 @@ class AddAnimeIE(InfoExtractor):
                'format_id': format_id,
                'url': video_url,
            })
-        if not formats:
-            raise ExtractorError(u'Cannot find any video format!')
+        self._sort_formats(formats)
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)

        return {
            '_type': 'video',
-            'id':  video_id,
+            'id': video_id,
            'formats': formats,
            'title': video_title,
            'description': video_description
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@ -0,0 +1,28 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+
+
+class AolIE(InfoExtractor):
+    IE_NAME = 'on.aol.com'
+    _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
+
+    _TEST = {
+        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
+        'md5': '18ef68f48740e86ae94b98da815eec42',
+        'info_dict': {
+            'id': '518167793',
+            'ext': 'mp4',
+            'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+        },
+        'add_ie': ['FiveMin'],
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        self.to_screen('Downloading 5min.com video %s' % video_id)
+        return FiveMinIE._build_result(video_id)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -6,7 +6,6 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
-    determine_ext,
 )


@ -16,9 +15,10 @@ class AppleTrailersIE(InfoExtractor):
        "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
        "playlist": [
            {
-                "file": "manofsteel-trailer4.mov",
                "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
                "info_dict": {
+                    "id": "manofsteel-trailer4",
+                    "ext": "mov",
                    "duration": 111,
                    "title": "Trailer 4",
                    "upload_date": "20130523",
@ -26,9 +26,10 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
-                "file": "manofsteel-trailer3.mov",
                "md5": "b8017b7131b721fb4e8d6f49e1df908c",
                "info_dict": {
+                    "id": "manofsteel-trailer3",
+                    "ext": "mov",
                    "duration": 182,
                    "title": "Trailer 3",
                    "upload_date": "20130417",
@ -36,9 +37,10 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
-                "file": "manofsteel-trailer.mov",
                "md5": "d0f1e1150989b9924679b441f3404d48",
                "info_dict": {
+                    "id": "manofsteel-trailer",
+                    "ext": "mov",
                    "duration": 148,
                    "title": "Trailer",
                    "upload_date": "20121212",
@ -46,15 +48,16 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
-                "file": "manofsteel-teaser.mov",
                "md5": "5fe08795b943eb2e757fa95cb6def1cb",
                "info_dict": {
+                    "id": "manofsteel-teaser",
+                    "ext": "mov",
                    "duration": 93,
                    "title": "Teaser",
                    "upload_date": "20120721",
                    "uploader_id": "wb",
                },
-            }
+            },
        ]
    }

@ -65,16 +68,16 @@ class AppleTrailersIE(InfoExtractor):
        movie = mobj.group('movie')
        uploader_id = mobj.group('company')

-        playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
+        playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
        def fix_html(s):
-            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
+            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
            # The ' in the onClick attributes are not escaped, it couldn't be parsed
            # like: http://trailers.apple.com/trailers/wb/gravity/
            def _clean_json(m):
-                return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+                return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
            s = re.sub(self._JSON_RE, _clean_json, s)
-            s = u'<html>' + s + u'</html>'
+            s = '<html>' + s + u'</html>'
            return s
        doc = self._download_xml(playlist_url, movie, transform_source=fix_html)

@ -82,7 +85,7 @@ class AppleTrailersIE(InfoExtractor):
        for li in doc.findall('./div/ul/li'):
            on_click = li.find('.//a').attrib['onClick']
            trailer_info_json = self._search_regex(self._JSON_RE,
-                on_click, u'trailer info')
+                on_click, 'trailer info')
            trailer_info = json.loads(trailer_info_json)
            title = trailer_info['title']
            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
@ -98,8 +101,7 @@ class AppleTrailersIE(InfoExtractor):
            first_url = trailer_info['url']
            trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
            settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
-            settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
-            settings = json.loads(settings_json)
+            settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')

            formats = []
            for format in settings['metadata']['sizes']:
@ -107,7 +109,6 @@ class AppleTrailersIE(InfoExtractor):
                format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
                formats.append({
                    'url': format_url,
-                    'ext': determine_ext(format_url),
                    'format': format['type'],
                    'width': format['width'],
                    'height': int(format['height']),
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -2,7 +2,6 @@
 from __future__ import unicode_literals

 import re
-import json

 from .common import InfoExtractor
 from ..utils import (
@ -19,114 +18,41 @@ from ..utils import (
 # is different for each one. The videos usually expire in 7 days, so we can't
 # add tests.

-class ArteTvIE(InfoExtractor):
-    _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
-    _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
-    _LIVE_URL = r'index-[0-9]+\.html$'

+class ArteTvIE(InfoExtractor):
+    _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
    IE_NAME = 'arte.tv'

-    @classmethod
-    def suitable(cls, url):
-        return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
-
-    # TODO implement Live Stream
-    # from ..utils import compat_urllib_parse
-    # def extractLiveStream(self, url):
-    #     video_lang = url.split('/')[-4]
-    #     info = self.grep_webpage(
-    #         url,
-    #         r'src="(.*?/videothek_js.*?\.js)',
-    #         0,
-    #         [
-    #             (1, 'url', 'Invalid URL: %s' % url)
-    #         ]
-    #     )
-    #     http_host = url.split('/')[2]
-    #     next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
-    #     info = self.grep_webpage(
-    #         next_url,
-    #         r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
-    #             '(http://.*?\.swf).*?' +
-    #             '(rtmp://.*?)\'',
-    #         re.DOTALL,
-    #         [
-    #             (1, 'path',   'could not extract video path: %s' % url),
-    #             (2, 'player', 'could not extract video player: %s' % url),
-    #             (3, 'url',    'could not extract video url: %s' % url)
-    #         ]
-    #     )
-    #     video_url = '%s/%s' % (info.get('url'), info.get('path'))
-
    def _real_extract(self, url):
-        mobj = re.match(self._VIDEOS_URL, url)
-        if mobj is not None:
-            id = mobj.group('id')
-            lang = mobj.group('lang')
-            return self._extract_video(url, id, lang)
+        mobj = re.match(self._VALID_URL, url)
+        lang = mobj.group('lang')
+        video_id = mobj.group('id')

-        mobj = re.match(self._LIVEWEB_URL, url)
-        if mobj is not None:
-            name = mobj.group('name')
-            lang = mobj.group('lang')
-            return self._extract_liveweb(url, name, lang)
-
-        if re.search(self._LIVE_URL, url) is not None:
-            raise ExtractorError('Arte live streams are not yet supported, sorry')
-            # self.extractLiveStream(url)
-            # return
-
-        raise ExtractorError('No video found')
-
-    def _extract_video(self, url, video_id, lang):
-        """Extract from videos.arte.tv"""
        ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
        ref_xml_doc = self._download_xml(
            ref_xml_url, video_id, note='Downloading metadata')
        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
        config_xml_url = config_node.attrib['ref']
-        config_xml = self._download_webpage(
+        config = self._download_xml(
            config_xml_url, video_id, note='Downloading configuration')

-        video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
-        def _key(m):
-            quality = m.group('quality')
-            if quality == 'hd':
-                return 2
-            else:
-                return 1
-        # We pick the best quality
-        video_urls = sorted(video_urls, key=_key)
-        video_url = list(video_urls)[-1].group('url')
-        
-        title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
-        thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
-                                            config_xml, 'thumbnail')
-        return {'id': video_id,
-                'title': title,
-                'thumbnail': thumbnail,
-                'url': video_url,
-                'ext': 'flv',
-                }
+        formats = [{
+            'forma_id': q.attrib['quality'],
+            'url': q.text,
+            'ext': 'flv',
+            'quality': 2 if q.attrib['quality'] == 'hd' else 1,
+        } for q in config.findall('./urls/url')]
+        self._sort_formats(formats)

-    def _extract_liveweb(self, url, name, lang):
-        """Extract form http://liveweb.arte.tv/"""
-        webpage = self._download_webpage(url, name)
-        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
-        config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
-                                            video_id, 'Downloading information')
-        event_doc = config_doc.find('event')
-        url_node = event_doc.find('video').find('urlHd')
-        if url_node is None:
-            url_node = event_doc.find('urlSd')
-
-        return {'id': video_id,
-                'title': event_doc.find('name%s' % lang.capitalize()).text,
-                'url': url_node.text.replace('MP4', 'mp4'),
-                'ext': 'flv',
-                'thumbnail': self._og_search_thumbnail(webpage),
-                }
+        title = config.find('.//name').text
+        thumbnail = config.find('.//firstThumbnailUrl').text
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }


 class ArteTVPlus7IE(InfoExtractor):
@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor):
        return self._extract_from_json_url(json_url, video_id, lang)

    def _extract_from_json_url(self, json_url, video_id, lang):
-        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
-        self.report_extraction(video_id)
-        info = json.loads(json_info)
+        info = self._download_json(json_url, video_id)
        player_info = info['videoJsonPlayer']

        info_dict = {
@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor):
                l = 'F'
            elif lang == 'de':
                l = 'A'
+            else:
+                l = lang
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
            return any(re.match(r, f['versionCode']) for r in regexes)
        # Some formats may not be in the same language as the url
@ -302,5 +228,25 @@ class ArteTVConcertIE(ArteTVPlus7IE):
            'ext': 'mp4',
            'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
            'upload_date': '20140128',
+            'description': 'md5:486eb08f991552ade77439fe6d82c305',
        },
    }
+
+
+class ArteTVEmbedIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:embed'
+    _VALID_URL = r'''(?x)
+        http://www\.arte\.tv
+        /playerv2/embed\.php\?json_url=
+        (?P<json_url>
+            http://arte\.tv/papi/tvguide/videos/stream/player/
+            (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
+        )
+    '''
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        lang = mobj.group('lang')
+        json_url = mobj.group('json_url')
+        return self._extract_from_json_url(json_url, video_id, lang)
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@ -11,22 +11,24 @@ from ..utils import (


 class AUEngineIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
+
    _TEST = {
        'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
-        'file': 'lfvlytY6.mp4',
        'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
        'info_dict': {
+            'id': 'lfvlytY6',
+            'ext': 'mp4',
            'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
        }
    }
-    _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
+
        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
-                webpage, 'title')
+        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
        title = title.strip()
        links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
        links = map(compat_urllib_parse.unquote, links)
@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor):
            elif '/videos/' in link:
                video_url = link
        if not video_url:
-            raise ExtractorError(u'Could not find video URL')
+            raise ExtractorError('Could not find video URL')
        ext = '.' + determine_ext(video_url)
        if ext == title[-len(ext):]:
            title = title[:-len(ext)]

        return {
-            'id':        video_id,
-            'url':       video_url,
-            'title':     title,
+            'id': video_id,
+            'url': video_url,
+            'title': title,
            'thumbnail': thumbnail,
+            'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
        }
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -1,22 +1,21 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
-from .ooyala import OoyalaIE


 class BloombergIE(InfoExtractor):
    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'

    _TEST = {
-        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
-        u'info_dict': {
-            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
-            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
-        },
-        u'params': {
-            # Requires ffmpeg (m3u8 manifest)
-            u'skip_download': True,
+        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        'info_dict': {
+            'id': 'qurhIVlJSB6hzkVi229d8g',
+            'ext': 'flv',
+            'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
+            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
        },
    }

@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        embed_code = self._search_regex(
-            r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
-            'embed code')
-        return OoyalaIE._build_url_result(embed_code)
+        f4m_url = self._search_regex(
+            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
+            'f4m url')
+        title = re.sub(': Video$', '', self._og_search_title(webpage))
+
+        return {
+            'id': name.split('-')[-1],
+            'title': title,
+            'url': f4m_url,
+            'ext': 'flv',
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
        object_str = object_str.replace('<--', '<!--')
        object_str = fix_xml_ampersands(object_str)

-        object_doc = xml.etree.ElementTree.fromstring(object_str)
+        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))

        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
        if fv_el is not None:
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+)
+
+
+class BYUtvIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
+        'info_dict': {
+            'id': 'granite-flats-talking',
+            'ext': 'mp4',
+            'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
+            'title': 'Talking',
+            'thumbnail': 're:^https?://.*promo.*'
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+        episode_code = self._search_regex(
+            r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
+        episode_json = re.sub(
+            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
+        ep = json.loads(episode_json)
+
+        if ep['providerType'] == 'Ooyala':
+            return {
+                '_type': 'url_transparent',
+                'ie_key': 'Ooyala',
+                'url': 'ooyala:%s' % ep['providerId'],
+                'id': video_id,
+                'title': ep['title'],
+                'description': ep.get('description'),
+                'thumbnail': ep.get('imageThumbnail'),
+            }
+        else:
+            raise ExtractorError('Unsupported provider %s' % ep['provider'])
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -28,7 +28,7 @@ class CanalplusIE(InfoExtractor):
        video_id = mobj.groupdict().get('id')
        if video_id is None:
            webpage = self._download_webpage(url, mobj.group('path'))
-            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
+            video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
        info_url = self._VIDEO_INFO_TEMPLATE % video_id
        doc = self._download_xml(info_url,video_id, 
                                           u'Downloading video info')
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@ -9,12 +9,12 @@ from ..utils import (


 class CinemassacreIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
+    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
    _TESTS = [
        {
            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
            'file': '19911.mp4',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'md5': '782f8504ca95a0eba8fc9177c373eec7',
            'info_dict': {
                'upload_date': '20121110',
                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
@ -24,7 +24,7 @@ class CinemassacreIE(InfoExtractor):
        {
            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
            'file': '521be8ef82b16.mp4',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
            'info_dict': {
                'upload_date': '20131002',
                'title': 'The Mummy’s Hand (1940)',
@ -34,8 +34,9 @@ class CinemassacreIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')

-        webpage = self._download_webpage(url, None)  # Don't know video id yet
+        webpage = self._download_webpage(url, display_id)
        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
        if not mobj:
@ -43,33 +44,36 @@ class CinemassacreIE(InfoExtractor):
        playerdata_url = mobj.group('embed_url')
        video_id = mobj.group('video_id')

-        video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
-            webpage, 'title')
-        video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
            webpage, 'description', flags=re.DOTALL, fatal=False)
-        if len(video_description) == 0:
-            video_description = None

        playerdata = self._download_webpage(playerdata_url, video_id)

-        sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
+        sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
+        hd_url = self._html_search_regex(
+            r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
+            default=None)
        video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)

-        formats = [
-            {
-                'url': sd_url,
-                'ext': 'mp4',
-                'format': 'sd',
-                'format_id': 'sd',
-            },
-            {
+        formats = [{
+            'url': sd_url,
+            'ext': 'mp4',
+            'format': 'sd',
+            'format_id': 'sd',
+            'quality': 1,
+        }]
+        if hd_url:
+            formats.append({
                'url': hd_url,
                'ext': 'mp4',
                'format': 'hd',
                'format_id': 'hd',
-            },
-        ]
+                'quality': 2,
+            })
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@ -1,22 +1,28 @@
+from __future__ import unicode_literals
+
 import re
 import time
 import xml.etree.ElementTree

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    parse_duration,
+)


 class ClipfishIE(InfoExtractor):
-    IE_NAME = u'clipfish'
+    IE_NAME = 'clipfish'

    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
    _TEST = {
-        u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
-        u'file': u'3966754.mp4',
-        u'md5': u'2521cd644e862936cf2e698206e47385',
-        u'info_dict': {
-            u'title': u'FIFA 14 - E3 2013 Trailer',
-            u'duration': 82,
+        'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
+        'md5': '2521cd644e862936cf2e698206e47385',
+        'info_dict': {
+            'id': '3966754',
+            'ext': 'mp4',
+            'title': 'FIFA 14 - E3 2013 Trailer',
+            'duration': 82,
        },
        u'skip': 'Blocked in the US'
    }
@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor):
        video_url = doc.find('filename').text
        if video_url is None:
            xml_bytes = xml.etree.ElementTree.tostring(doc)
-            raise ExtractorError(u'Cannot find video URL in document %r' %
+            raise ExtractorError('Cannot find video URL in document %r' %
                                 xml_bytes)
        thumbnail = doc.find('imageurl').text
-        duration_str = doc.find('duration').text
-        m = re.match(
-            r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
-            duration_str)
-        if m:
-            duration = (
-                (int(m.group('hours')) * 60 * 60) +
-                (int(m.group('minutes')) * 60) +
-                (int(m.group('seconds')))
-            )
-        else:
-            duration = None
+        duration = parse_duration(doc.find('duration').text)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor):
    _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'

    _TEST = {
-        u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
-        u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
-        u'info_dict': {
-            u'id': u'4629301',
-            u'ext': u'mp4',
-            u'title': u'Brick Briscoe',
-            u'duration': 612,
+        'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
+        'md5': '4d7d549451bad625e0ff3d7bd56d776c',
+        'info_dict': {
+            'id': '4629301',
+            'ext': 'mp4',
+            'title': 'Brick Briscoe',
+            'duration': 612,
+            'thumbnail': 're:^https?://.+\.jpg',
        },
    }

@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor):
        video_id = mobj.group('id')
        js_player = self._download_webpage(
            'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
-            video_id, u'Downlaoding player')
+            video_id, 'Downlaoding player')
        # it includes a required token
-        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
+        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')

        pdoc = self._download_xml(
            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
-            video_id, u'Downloading video info',
+            video_id, 'Downloading video info',
            transform_source=fix_xml_ampersands)

        track_doc = pdoc.find('trackList/track')
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor
 from ..utils import (
    compat_str,
    compat_urllib_parse,
-
    ExtractorError,
+    float_or_none,
    unified_strdate,
 )

@ -32,31 +32,34 @@ class ComedyCentralIE(MTVServicesInfoExtractor):


 class ComedyCentralShowsIE(InfoExtractor):
-    IE_DESC = 'The Daily Show / Colbert Report'
+    IE_DESC = 'The Daily Show / The Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
    #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
-    _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
-                      |(https?://)?(www\.)?
-                          (?P<showname>thedailyshow|colbertnation)\.com/
+    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
+                      |https?://(:www\.)?
+                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                              (?:videos/[^/]+/(?P<videotitle>[^/?#]+))
+                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
+                          )|
                          (?P<interview>
-                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
-                     $"""
+                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
+                     (?:[?#].*|$)'''
    _TEST = {
-        'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
-        'file': '422212.mp4',
+        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
        'info_dict': {
-            "upload_date": "20121214",
-            "description": "Kristen Stewart",
-            "uploader": "thedailyshow",
-            "title": "thedailyshow-kristen-stewart part 1"
+            'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
+            'ext': 'mp4',
+            'upload_date': '20121213',
+            'description': 'Kristen Stewart learns to let loose in "On the Road."',
+            'uploader': 'thedailyshow',
+            'title': 'thedailyshow kristen-stewart part 1',
        }
    }

@ -79,11 +82,6 @@ class ComedyCentralShowsIE(InfoExtractor):
        '400': (384, 216),
    }

-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
-
    @staticmethod
    def _transform_rtmp_url(rtmp_video_url):
        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
@ -99,14 +97,16 @@ class ComedyCentralShowsIE(InfoExtractor):

        if mobj.group('shortname'):
            if mobj.group('shortname') in ('tds', 'thedailyshow'):
-                url = 'http://www.thedailyshow.com/full-episodes/'
+                url = 'http://thedailyshow.cc.com/full-episodes/'
            else:
-                url = 'http://www.colbertnation.com/full-episodes/'
+                url = 'http://thecolbertreport.cc.com/full-episodes/'
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
            assert mobj is not None

        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
+            if mobj.group('videotitle'):
+                epTitle = mobj.group('videotitle')
+            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
@ -120,9 +120,9 @@ class ComedyCentralShowsIE(InfoExtractor):
                epTitle = mobj.group('showname')
            else:
                epTitle = mobj.group('episode')
+        show_name = mobj.group('showname')

-        self.report_extraction(epTitle)
-        webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
+        webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
        if dlNewest:
            url = htmlHandle.geturl()
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -130,71 +130,86 @@ class ComedyCentralShowsIE(InfoExtractor):
                raise ExtractorError('Invalid redirected URL: ' + url)
            if mobj.group('episode') == '':
                raise ExtractorError('Redirected URL is still not specific: ' + url)
-            epTitle = mobj.group('episode')
+            epTitle = mobj.group('episode').rpartition('/')[-1]

        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
-
        if len(mMovieParams) == 0:
            # The Colbert Report embeds the information in a without
            # a URL prefix; so extract the alternate reference
            # and then add the URL prefix manually.

-            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
+            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
            if len(altMovieParams) == 0:
                raise ExtractorError('unable to find Flash URL in webpage ' + url)
            else:
                mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]

        uri = mMovieParams[0][1]
-        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
-        idoc = self._download_xml(indexUrl, epTitle,
-                                          'Downloading show index',
-                                          'unable to download episode index')
+        # Correct cc.com in uri
+        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)

-        results = []
+        index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
+        idoc = self._download_xml(
+            index_url, epTitle,
+            'Downloading show index', 'Unable to download episode index')

-        itemEls = idoc.findall('.//item')
-        for partNum,itemEl in enumerate(itemEls):
-            mediaId = itemEl.findall('./guid')[0].text
-            shortMediaId = mediaId.split(':')[-1]
-            showId = mediaId.split(':')[-2].replace('.com', '')
-            officialTitle = itemEl.findall('./title')[0].text
-            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
+        title = idoc.find('./channel/title').text
+        description = idoc.find('./channel/description').text

-            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
-                        compat_urllib_parse.urlencode({'uri': mediaId}))
-            cdoc = self._download_xml(configUrl, epTitle,
-                                               'Downloading configuration for %s' % shortMediaId)
+        entries = []
+        item_els = idoc.findall('.//item')
+        for part_num, itemEl in enumerate(item_els):
+            upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
+            thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
+
+            content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
+            duration = float_or_none(content.attrib.get('duration'))
+            mediagen_url = content.attrib['url']
+            guid = itemEl.find('./guid').text.rpartition(':')[-1]
+
+            cdoc = self._download_xml(
+                mediagen_url, epTitle,
+                'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))

            turls = []
            for rendition in cdoc.findall('.//rendition'):
                finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
                turls.append(finfo)

-            if len(turls) == 0:
-                self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
-                continue
-
            formats = []
            for format, rtmp_video_url in turls:
                w, h = self._video_dimensions.get(format, (None, None))
                formats.append({
+                    'format_id': 'vhttp-%s' % format,
                    'url': self._transform_rtmp_url(rtmp_video_url),
                    'ext': self._video_extensions.get(format, 'mp4'),
-                    'format_id': format,
                    'height': h,
                    'width': w,
                })
+                formats.append({
+                    'format_id': 'rtmp-%s' % format,
+                    'url': rtmp_video_url,
+                    'ext': self._video_extensions.get(format, 'mp4'),
+                    'height': h,
+                    'width': w,
+                })
+                self._sort_formats(formats)

-            effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
-            results.append({
-                'id': shortMediaId,
+            virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
+            entries.append({
+                'id': guid,
+                'title': virtual_id,
                'formats': formats,
-                'uploader': showId,
-                'upload_date': officialDate,
-                'title': effTitle,
-                'thumbnail': None,
-                'description': compat_str(officialTitle),
+                'uploader': show_name,
+                'upload_date': upload_date,
+                'duration': duration,
+                'thumbnail': thumbnail,
+                'description': description,
            })

-        return results
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'title': show_name + ' ' + title,
+            'description': description,
+        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -74,7 +74,7 @@ class InfoExtractor(object):
                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
                    * preference Order number of this format. If this field is
                                 present and not None, the formats get sorted
-                                 by this field.
+                                 by this field, regardless of all other values.
                                 -1 for default (order by other properties),
                                 -2 or smaller for less than default.
                    * quality    Order number of the video quality of this
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@ -10,9 +10,9 @@ from ..utils import (


 class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
    IE_DESC = 'C-SPAN'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
        'md5': '8e44ce11f0f725527daccc453f553eb0',
        'info_dict': {
@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor):
            'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
        },
        'skip': 'Regularly fails on travis, for unknown reasons',
-    }
+    }, {
+        'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
+        # For whatever reason, the served video alternates between
+        # two different ones
+        #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
+        'info_dict': {
+            'id': '340723',
+            'ext': 'mp4',
+            'title': 'International Health Care Models',
+            'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_id = mobj.group('id')
        webpage = self._download_webpage(url, page_id)
-        video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
+        video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')

        description = self._html_search_regex(
            [
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@ -1,25 +1,28 @@
 # encoding: utf-8
+
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
-    determine_ext,
 )


 class DaumIE(InfoExtractor):
    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
-    IE_NAME = u'daum.net'
+    IE_NAME = 'daum.net'

    _TEST = {
-        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
-        u'file': u'52554690.mp4',
-        u'info_dict': {
-            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
-            u'upload_date': u'20130831',
-            u'duration': 3868,
+        'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+        'info_dict': {
+            'id': '52554690',
+            'ext': 'mp4',
+            'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+            'upload_date': '20130831',
+            'duration': 3868,
        },
    }

@ -30,14 +33,14 @@ class DaumIE(InfoExtractor):
        webpage = self._download_webpage(canonical_url, video_id)
        full_id = self._search_regex(
            r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
-            webpage, u'full id')
+            webpage, 'full id')
        query = compat_urllib_parse.urlencode({'vid': full_id})
        info = self._download_xml(
            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
-            u'Downloading video info')
+            'Downloading video info')
        urls = self._download_xml(
            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
-            video_id, u'Downloading video formats info')
+            video_id, 'Downloading video formats info')

        self.to_screen(u'%s: Getting video urls' % video_id)
        formats = []
@ -53,7 +56,6 @@ class DaumIE(InfoExtractor):
            format_url = url_doc.find('result/url').text
            formats.append({
                'url': format_url,
-                'ext': determine_ext(format_url),
                'format_id': profile,
            })

--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor):
    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
    _TEST = {
        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
-        'file': '614784.mp4',
        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
        'info_dict': {
+            'id': '614784',
+            'ext': 'mp4',
            'title': 'MythBusters: Mission Impossible Outtakes',
            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                ' each other -- to the point of confusing Jamie\'s dog -- and '
@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor):
        formats = []
        for f in info['mp4']:
            formats.append(
-                {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
+                {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})

        return {
            'id': info['contentId'],
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@ -1,23 +1,25 @@
+from __future__ import unicode_literals
+
 import re

 from ..utils import (
    compat_urllib_parse,
-    determine_ext
 )
 from .common import InfoExtractor


 class EHowIE(InfoExtractor):
-    IE_NAME = u'eHow'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
+    IE_NAME = 'eHow'
+    _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
    _TEST = {
-        u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
-        u'file': u'12245069.flv',
-        u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
-        u'info_dict': {
-            u"title": u"Hardwood Flooring Basics",
-            u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
-   			u"uploader": u"Erick Nathan"
+        'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
+        'md5': '9809b4e3f115ae2088440bcb4efbf371',
+        'info_dict': {
+            'id': '12245069',
+            'ext': 'flv',
+            'title': 'Hardwood Flooring Basics',
+            'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
+            'uploader': 'Erick Nathan',
        }
    }

@ -26,21 +28,16 @@ class EHowIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
-            webpage, u'video URL')
-        final_url = compat_urllib_parse.unquote(video_url)        
-        uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
-            webpage, u'uploader')
+            webpage, 'video URL')
+        final_url = compat_urllib_parse.unquote(video_url)
+        uploader = self._html_search_meta('uploader', webpage)
        title = self._og_search_title(webpage).replace(' | eHow', '')
-        ext = determine_ext(final_url)

        return {
-            '_type':       'video',
-            'id':          video_id,
-            'url':         final_url,
-            'ext':         ext,
-            'title':       title,
-            'thumbnail':   self._og_search_thumbnail(webpage),
+            'id': video_id,
+            'url': final_url,
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
-            'uploader':    uploader,
+            'uploader': uploader,
        }
-
--- a/youtube_dl/extractor/engadget.py
+++ b/youtube_dl/extractor/engadget.py
@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .fivemin import FiveMinIE
+from ..utils import (
+    url_basename,
+)
+
+
+class EngadgetIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://www.engadget.com/
+        (?:video/5min/(?P<id>\d+)|
+            [\d/]+/.*?)
+        '''
+
+    _TEST = {
+        'url': 'http://www.engadget.com/video/5min/518153925/',
+        'md5': 'c6820d4828a5064447a4d9fc73f312c9',
+        'info_dict': {
+            'id': '518153925',
+            'ext': 'mp4',
+            'title': 'Samsung Galaxy Tab Pro 8.4 Review',
+        },
+        'add_ie': ['FiveMin'],
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        if video_id is not None:
+            return FiveMinIE._build_result(video_id)
+        else:
+            title = url_basename(url)
+            webpage = self._download_webpage(url, title)
+            ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
+            return {
+                '_type': 'playlist',
+                'title': title,
+                'entries': [FiveMinIE._build_result(id) for id in ids]
+            }
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+)
+
+
+class FiveMinIE(InfoExtractor):
+    IE_NAME = '5min'
+    _VALID_URL = r'''(?x)
+        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
+            5min:)
+        (?P<id>\d+)
+        '''
+
+    _TEST = {
+        # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
+        'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
+        'md5': '4f7b0b79bf1a470e5004f7112385941d',
+        'info_dict': {
+            'id': '518013791',
+            'ext': 'mp4',
+            'title': 'iPad Mini with Retina Display Review',
+        },
+    }
+
+    @classmethod
+    def _build_result(cls, video_id):
+        return cls.url_result('5min:%s' % video_id, cls.ie_key())
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info = self._download_json(
+            'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
+            'playlist=%s&url=https' % video_id,
+            video_id)['binding'][0]
+
+        second_id = compat_str(int(video_id[:-2]) + 1)
+        formats = []
+        for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
+            if any(r['ID'] == quality for r in info['Renditions']):
+                formats.append({
+                    'format_id': compat_str(quality),
+                    'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
+                    'height': height,
+                })
+
+        return {
+            'id': video_id,
+            'title': info['Title'],
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -25,6 +25,7 @@ from ..utils import (
 from .brightcove import BrightcoveIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
+from .smotri import SmotriIE


 class GenericIE(InfoExtractor):
@ -81,6 +82,17 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Brightcove'],
        },
+        {
+            'url': 'http://www.championat.com/video/football/v/87/87499.html',
+            'md5': 'fb973ecf6e4a78a67453647444222983',
+            'info_dict': {
+                'id': '3414141473001',
+                'ext': 'mp4',
+                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
+                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
+                'uploader': 'Championat',
+            },
+        },
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
@ -102,6 +114,20 @@ class GenericIE(InfoExtractor):
                'title': '2cc213299525360.mov',  # that's what we get
            },
        },
+        # second style of embedded ooyala videos
+        {
+            'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
+            'info_dict': {
+                'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
+                'ext': 'mp4',
+                'title': 'Behind-the-scenes: Financial Review Sunday ',
+                'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -171,7 +197,48 @@ class GenericIE(InfoExtractor):
                'uploader': 'Ze Frank',
                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
            }
-        }
+        },
+        # nowvideo embed hidden behind percent encoding
+        {
+            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+            'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+            'info_dict': {
+                'id': '06e53103ca9aa',
+                'ext': 'flv',
+                'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
+                'description': 'No description',
+            },
+        },
+        # arte embed
+        {
+            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+            'md5': '7653032cbb25bf6c80d80f217055fa43',
+            'info_dict': {
+                'id': '048195-004_PLUS7-F',
+                'ext': 'flv',
+                'title': 'X:enius',
+                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+                'upload_date': '20140320',
+            },
+            'params': {
+                'skip_download': 'Requires rtmpdump'
+            }
+        },
+        # smotri embed
+        {
+            'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
+            'md5': 'ec40048448e9284c9a1de77bb188108b',
+            'info_dict': {
+                'id': 'v27008541fad',
+                'ext': 'mp4',
+                'title': 'Крым и Севастополь вошли в состав России',
+                'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
+                'duration': 900,
+                'upload_date': '20140318',
+                'uploader': 'rbctv_2012_4',
+                'uploader_id': 'rbctv_2012_4',
+            },
+        },
    ]

    def report_download_webpage(self, video_id):
@ -260,13 +327,16 @@ class GenericIE(InfoExtractor):
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
-                default_search = 'auto'
+                default_search = 'auto_warning'

-            if default_search == 'auto':
+            if default_search in ('auto', 'auto_warning'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                else:
+                    if default_search == 'auto_warning':
+                        self._downloader.report_warning(
+                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)
            else:
                assert ':' in default_search
@ -323,6 +393,11 @@ class GenericIE(InfoExtractor):
        except compat_xml_parse_error:
            pass

+        # Sometimes embedded video player is hidden behind percent encoding
+        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+        # Unescaping the whole page allows to handle those cases in a generic way
+        webpage = compat_urllib_parse.unquote(webpage)
+
        # it's tempting to parse this further, but you would
        # have to take into account all the variations like
        #   Video Title - Site Name
@ -424,9 +499,10 @@ class GenericIE(InfoExtractor):
            return self.url_result(mobj.group('url'))

        # Look for Ooyala videos
-        mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+        mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+             re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
        if mobj is not None:
-            return OoyalaIE._build_url_result(mobj.group(1))
+            return OoyalaIE._build_url_result(mobj.group('ec'))

        # Look for Aparat videos
        mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@ -488,6 +564,24 @@ class GenericIE(InfoExtractor):
        if rutv_url:
            return self.url_result(rutv_url, 'RUTV')

+        # Look for embedded TED player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'TED')
+
+        # Look for embedded arte.tv player
+        mobj = re.search(
+            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
+        # Look for embedded smotri.com player
+        smotri_url = SmotriIE._extract_url(webpage)
+        if smotri_url:
+            return self.url_result(smotri_url, 'Smotri')
+
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
@ -500,12 +594,6 @@ class GenericIE(InfoExtractor):
            # Broaden the search a little bit: JWPlayer JS loader
            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)

-        # Look for embedded TED player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
-        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'TED')
-
        if mobj is None:
            # Try to find twitter cards info
            mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor):

    _TEST = {
        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
-        'file': '52dd3e4b02a7602131000677.mp4',
        'md5': '55f5e8981c1c80a64706a44b74833de8',
        'info_dict': {
+            'id': '52dd3e4b02a7602131000677',
+            'ext': 'mp4',
            'title': 'Legalese It! with @MikeSacksHP',
            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
            'duration': 1549,
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@ -1,10 +1,8 @@
+from __future__ import unicode_literals
+
 import re
-import json

 from .common import InfoExtractor
-from ..utils import (
-    determine_ext,
-)


 class IGNIE(InfoExtractor):
@ -14,52 +12,57 @@ class IGNIE(InfoExtractor):
    """

    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
-    IE_NAME = u'ign.com'
+    IE_NAME = 'ign.com'

    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
-    _DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
-                       r'id="my_show_video">.*?<p>(.*?)</p>',
-                       ]
+    _DESCRIPTION_RE = [
+        r'<span class="page-object-description">(.+?)</span>',
+        r'id="my_show_video">.*?<p>(.*?)</p>',
+    ]

    _TESTS = [
        {
-            u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
-            u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
-            u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
-            u'info_dict': {
-                u'title': u'The Last of Us Review',
-                u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
+            'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+            'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
+            'info_dict': {
+                'id': '8f862beef863986b2785559b9e1aa599',
+                'ext': 'mp4',
+                'title': 'The Last of Us Review',
+                'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
            }
        },
        {
-            u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
-            u'playlist': [
+            'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+            'playlist': [
                {
-                    u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
-                    u'info_dict': {
-                        u'title': u'GTA 5 Video Review',
-                        u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+                    'info_dict': {
+                        'id': '5ebbd138523268b93c9141af17bec937',
+                        'ext': 'mp4',
+                        'title': 'GTA 5 Video Review',
+                        'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                    },
                },
                {
-                    u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
-                    u'info_dict': {
-                        u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
-                        u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
+                    'info_dict': {
+                        'id': '638672ee848ae4ff108df2a296418ee2',
+                        'ext': 'mp4',
+                        'title': '26 Twisted Moments from GTA 5 in Slow Motion',
+                        'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
            ],
-            u'params': {
-                u'skip_download': True,
+            'params': {
+                'skip_download': True,
            },
        },
    ]

    def _find_video_id(self, webpage):
-        res_id = [r'data-video-id="(.+?)"',
-                  r'<object id="vid_(.+?)"',
-                  r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
-                  ]
+        res_id = [
+            r'data-video-id="(.+?)"',
+            r'<object id="vid_(.+?)"',
+            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+        ]
        return self._search_regex(res_id, webpage, 'video id')

    def _real_extract(self, url):
@ -68,7 +71,7 @@ class IGNIE(InfoExtractor):
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type == 'articles':
-            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
            return self.url_result(video_url, ie='IGN')
        elif page_type != 'video':
            multiple_urls = re.findall(
@ -80,41 +83,37 @@ class IGNIE(InfoExtractor):
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
-                                              webpage, 'video description',
-                                              flags=re.DOTALL)
+            webpage, 'video description', flags=re.DOTALL)
        result['description'] = description
        return result

    def _get_video_info(self, video_id):
        config_url = self._CONFIG_URL_TEMPLATE % video_id
-        config = json.loads(self._download_webpage(config_url, video_id,
-                            u'Downloading video info'))
+        config = self._download_json(config_url, video_id)
        media = config['playlist']['media']
-        video_url = media['url']

-        return {'id': media['metadata']['videoId'],
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                'title': media['metadata']['title'],
-                'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
-                }
+        return {
+            'id': media['metadata']['videoId'],
+            'url': media['url'],
+            'title': media['metadata']['title'],
+            'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
+        }


 class OneUPIE(IGNIE):
-    """Extractor for 1up.com, it uses the ign videos system."""
-
    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

    _TEST = {
-        u'url': u'http://gamevideos.1up.com/video/id/34976',
-        u'file': u'34976.mp4',
-        u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
-        u'info_dict': {
-            u'title': u'Sniper Elite V2 - Trailer',
-            u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
+        'url': 'http://gamevideos.1up.com/video/id/34976',
+        'md5': '68a54ce4ebc772e4b71e3123d413163d',
+        'info_dict': {
+            'id': '34976',
+            'ext': 'mp4',
+            'title': 'Sniper Elite V2 - Trailer',
+            'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
        }
    }

@ -123,7 +122,6 @@ class OneUPIE(IGNIE):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group('name_or_id')
        result = super(OneUPIE, self)._real_extract(url)
-        result['id'] = id
+        result['id'] = mobj.group('name_or_id')
        return result
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -3,6 +3,9 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+)


 class InstagramIE(InfoExtractor):
@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
            'uploader_id': uploader_id,
            'description': desc,
        }
+
+
+class InstagramUserIE(InfoExtractor):
+    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+    IE_DESC = 'Instagram user profile'
+    IE_NAME = 'instagram:user'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uploader_id = mobj.group('username')
+
+        entries = []
+        page_count = 0
+        media_url = 'http://instagram.com/%s/media' % uploader_id
+        while True:
+            page = self._download_json(
+                media_url, uploader_id,
+                note='Downloading page %d ' % (page_count + 1),
+            )
+            page_count += 1
+
+            for it in page['items']:
+                if it.get('type') != 'video':
+                    continue
+                like_count = int_or_none(it.get('likes', {}).get('count'))
+                user = it.get('user', {})
+
+                formats = [{
+                    'format_id': k,
+                    'height': v.get('height'),
+                    'width': v.get('width'),
+                    'url': v['url'],
+                } for k, v in it['videos'].items()]
+                self._sort_formats(formats)
+
+                thumbnails_el = it.get('images', {})
+                thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
+
+                title = it.get('caption', {}).get('text', it['id'])
+
+                entries.append({
+                    'id': it['id'],
+                    'title': title,
+                    'formats': formats,
+                    'thumbnail': thumbnail,
+                    'webpage_url': it.get('link'),
+                    'uploader': user.get('full_name'),
+                    'uploader_id': user.get('username'),
+                    'like_count': like_count,
+                    'timestamp': int_or_none(it.get('created_time')),
+                })
+
+            if not page['items']:
+                break
+            max_id = page['items'][-1]['id']
+            media_url = (
+                'http://instagram.com/%s/media?max_id=%s' % (
+                    uploader_id, max_id))
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': uploader_id,
+            'title': uploader_id,
+        }
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@ -1,37 +1,39 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor


 class KickStarterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
+    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
    _TEST = {
-        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
-        u"file": u"1404461844.mp4",
-        u"md5": u"c81addca81327ffa66c642b5d8b08cab",
-        u"info_dict": {
-            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
+        'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
+        'md5': 'c81addca81327ffa66c642b5d8b08cab',
+        'info_dict': {
+            'id': '1404461844',
+            'ext': 'mp4',
+            'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
+            'description': 'A unique motocross documentary that examines the '
+                'life and mind of one of sports most elite athletes: Josh Grant.',
        },
    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
-        webpage_src = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, video_id)

-        video_url = self._search_regex(r'data-video="(.*?)">',
-            webpage_src, u'video URL')
-        if 'mp4' in video_url:
-            ext = 'mp4'
-        else:
-            ext = 'flv'
-        video_title = self._html_search_regex(r"<title>(.*?)</title>",
-            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
+        video_url = self._search_regex(r'data-video-url="(.*?)"',
+            webpage, 'video URL')
+        video_title = self._html_search_regex(r'<title>(.*?)</title>',
+            webpage, 'title').rpartition('— Kickstarter')[0].strip()

-        results = [{
-                    'id': video_id,
-                    'url': video_url,
-                    'title': video_title,
-                    'ext': ext,
-                    }]
-        return results
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -9,104 +11,103 @@ from ..utils import (
    ExtractorError,
 )

-class MetacafeIE(InfoExtractor):
-    """Information Extractor for metacafe.com."""

-    _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
+class MetacafeIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
    _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
    _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
-    IE_NAME = u'metacafe'
+    IE_NAME = 'metacafe'
    _TESTS = [
-    # Youtube video
-    {
-        u"add_ie": ["Youtube"],
-        u"url":  u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
-        u"file":  u"_aUehQsCQtM.mp4",
-        u"info_dict": {
-            u"upload_date": u"20090102",
-            u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
-            u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8",
-            u"uploader": u"PBS",
-            u"uploader_id": u"PBS"
-        }
-    },
-    # Normal metacafe video
-    {
-        u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
-        u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
-        u'info_dict': {
-            u'id': u'11121940',
-            u'ext': u'mp4',
-            u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
-            u'uploader': u'ign',
-            u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+        # Youtube video
+        {
+            'add_ie': ['Youtube'],
+            'url':  'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
+            'info_dict': {
+                'id': '_aUehQsCQtM',
+                'ext': 'mp4',
+                'upload_date': '20090102',
+                'title': 'The Electric Company | "Short I" | PBS KIDS GO!',
+                'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8',
+                'uploader': 'PBS',
+                'uploader_id': 'PBS'
+            }
        },
-    },
-    # AnyClip video
-    {
-        u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
-        u"file": u"an-dVVXnuY7Jh77J.mp4",
-        u"info_dict": {
-            u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
-            u"uploader": u"anyclip",
-            u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
+        # Normal metacafe video
+        {
+            'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
+            'md5': '6e0bca200eaad2552e6915ed6fd4d9ad',
+            'info_dict': {
+                'id': '11121940',
+                'ext': 'mp4',
+                'title': 'News: Stuff You Won\'t Do with Your PlayStation 4',
+                'uploader': 'ign',
+                'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
+            },
        },
-    },
-    # age-restricted video
-    {
-        u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
-        u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
-        u'info_dict': {
-            u'id': u'5186653',
-            u'ext': u'mp4',
-            u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
-            u'uploader': u'Dwayne Pipe',
-            u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
-            u'age_limit': 18,
+        # AnyClip video
+        {
+            'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
+            'info_dict': {
+                'id': 'an-dVVXnuY7Jh77J',
+                'ext': 'mp4',
+                'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
+                'uploader': 'anyclip',
+                'description': 'md5:38c711dd98f5bb87acf973d573442e67',
+            },
        },
-    },
-    # cbs video
-    {
-        u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
-        u'info_dict': {
-            u'id': u'0rOxMBabDXN6',
-            u'ext': u'flv',
-            u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
-            u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
-            u'duration': 129,
+        # age-restricted video
+        {
+            'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
+            'md5': '98dde7c1a35d02178e8ab7560fe8bd09',
+            'info_dict': {
+                'id': '5186653',
+                'ext': 'mp4',
+                'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
+                'uploader': 'Dwayne Pipe',
+                'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b',
+                'age_limit': 18,
+            },
        },
-        u'params': {
-            # rtmp download
-            u'skip_download': True,
+        # cbs video
+        {
+            'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/',
+            'info_dict': {
+                'id': '8VD4r_Zws8VP',
+                'ext': 'flv',
+                'title': 'Open: This is Face the Nation, February 9',
+                'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
+                'duration': 96,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
        },
-    },
    ]

-
    def report_disclaimer(self):
-        """Report disclaimer retrieval."""
-        self.to_screen(u'Retrieving disclaimer')
+        self.to_screen('Retrieving disclaimer')

    def _real_initialize(self):
        # Retrieve disclaimer
        self.report_disclaimer()
-        self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
+        self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')

        # Confirm age
        disclaimer_form = {
            'filters': '0',
            'submit': "Continue - I'm over 18",
-            }
+        }
        request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        self.report_age_confirmation()
-        self._download_webpage(request, None, False, u'Unable to confirm age')
+        self._download_webpage(request, None, False, 'Unable to confirm age')

    def _real_extract(self, url):
        # Extract id and simplified title from URL
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)

        video_id = mobj.group(1)

@ -153,23 +154,24 @@ class MetacafeIE(InfoExtractor):
            else:
                mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
                if mobj is None:
-                    raise ExtractorError(u'Unable to extract media URL')
+                    raise ExtractorError('Unable to extract media URL')
                vardict = compat_parse_qs(mobj.group(1))
                if 'mediaData' not in vardict:
-                    raise ExtractorError(u'Unable to extract media URL')
-                mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
+                    raise ExtractorError('Unable to extract media URL')
+                mobj = re.search(
+                    r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
                if mobj is None:
-                    raise ExtractorError(u'Unable to extract media URL')
+                    raise ExtractorError('Unable to extract media URL')
                mediaURL = mobj.group('mediaURL').replace('\\/', '/')
                video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
                video_ext = determine_ext(video_url)

-        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
+        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        video_uploader = self._html_search_regex(
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
-                webpage, u'uploader nickname', fatal=False)
+                webpage, 'uploader nickname', fatal=False)

        if re.search(r'"contentRating":"restricted"', webpage) is not None:
            age_limit = 18
@ -177,14 +179,12 @@ class MetacafeIE(InfoExtractor):
            age_limit = 0

        return {
-            '_type':    'video',
-            'id':       video_id,
-            'url':      video_url,
+            'id': video_id,
+            'url': video_url,
            'description': description,
            'uploader': video_uploader,
-            'upload_date':  None,
-            'title':    video_title,
+            'title': video_title,
            'thumbnail':thumbnail,
-            'ext':      video_ext,
+            'ext': video_ext,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor):

    _TEST = {
        'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
-        'file': '3698222.mp4',
        'info_dict': {
+            'id': '3698222',
+            'ext': 'mp4',
            'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            'duration': 221,
--- a/youtube_dl/extractor/mooshare.py
+++ b/youtube_dl/extractor/mooshare.py
@ -14,7 +14,7 @@ from ..utils import (
 class MooshareIE(InfoExtractor):
    IE_NAME = 'mooshare'
    IE_DESC = 'Mooshare.biz'
-    _VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
+    _VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'

    _TESTS = [
        {
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@ -6,12 +6,13 @@ from .common import InfoExtractor


 class NBAIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'file': u'0021200253-okc-bkn-recap.nba.mp4',
        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
+            'id': '0021200253-okc-bkn-recap.nba',
+            'ext': 'mp4',
            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
        },
@ -19,7 +20,7 @@ class NBAIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

@ -33,7 +34,6 @@ class NBAIE(InfoExtractor):
        return {
            'id': shortened_video_id,
            'url': video_url,
-            'ext': 'mp4',
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@ -1,12 +1,10 @@
 # encoding: utf-8
+from __future__ import unicode_literals

 import re
-import socket

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
@ -18,57 +16,54 @@ from ..utils import (


 class NiconicoIE(InfoExtractor):
-    IE_NAME = u'niconico'
-    IE_DESC = u'ニコニコ動画'
+    IE_NAME = 'niconico'
+    IE_DESC = 'ニコニコ動画'

    _TEST = {
-        u'url': u'http://www.nicovideo.jp/watch/sm22312215',
-        u'file': u'sm22312215.mp4',
-        u'md5': u'd1a75c0823e2f629128c43e1212760f9',
-        u'info_dict': {
-            u'title': u'Big Buck Bunny',
-            u'uploader': u'takuya0301',
-            u'uploader_id': u'2698420',
-            u'upload_date': u'20131123',
-            u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+        'url': 'http://www.nicovideo.jp/watch/sm22312215',
+        'md5': 'd1a75c0823e2f629128c43e1212760f9',
+        'info_dict': {
+            'id': 'sm22312215',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny',
+            'uploader': 'takuya0301',
+            'uploader_id': '2698420',
+            'upload_date': '20131123',
+            'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
        },
-        u'params': {
-            u'username': u'ydl.niconico@gmail.com',
-            u'password': u'youtube-dl',
+        'params': {
+            'username': 'ydl.niconico@gmail.com',
+            'password': 'youtube-dl',
        },
    }

    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
    _NETRC_MACHINE = 'niconico'
-    # If True it will raise an error if no login info is provided
-    _LOGIN_REQUIRED = True

    def _real_initialize(self):
        self._login()

    def _login(self):
        (username, password) = self._get_login_info()
-        # No authentication to be performed
        if username is None:
-            if self._LOGIN_REQUIRED:
-                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
-            return False
+            # Login is required
+            raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

        # Log in
        login_form_strs = {
-            u'mail': username,
-            u'password': password,
+            'mail': username,
+            'password': password,
        }
        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
        # chokes on unicode
-        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
        request = compat_urllib_request.Request(
-            u'https://secure.nicovideo.jp/secure/login', login_data)
+            'https://secure.nicovideo.jp/secure/login', login_data)
        login_results = self._download_webpage(
-            request, u'', note=u'Logging in', errnote=u'Unable to log in')
+            request, None, note='Logging in', errnote='Unable to log in')
        if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
-            self._downloader.report_warning(u'unable to log in: bad username or password')
+            self._downloader.report_warning('unable to log in: bad username or password')
            return False
        return True

@ -82,12 +77,12 @@ class NiconicoIE(InfoExtractor):

        video_info = self._download_xml(
            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
-            note=u'Downloading video info page')
+            note='Downloading video info page')

        # Get flv info
        flv_info_webpage = self._download_webpage(
-            u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
-            video_id, u'Downloading flv info')
+            'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+            video_id, 'Downloading flv info')
        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]

        # Start extracting information
@ -106,22 +101,22 @@ class NiconicoIE(InfoExtractor):
        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
        try:
            user_info = self._download_xml(
-                url, video_id, note=u'Downloading user information')
+                url, video_id, note='Downloading user information')
            video_uploader = user_info.find('.//nickname').text
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
+        except ExtractorError as err:
+            self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))

        return {
-            'id':          video_id,
-            'url':         video_real_url,
-            'title':       video_title,
-            'ext':         video_extension,
-            'format':      video_format,
-            'thumbnail':   video_thumbnail,
+            'id': video_id,
+            'url': video_real_url,
+            'title': video_title,
+            'ext': video_extension,
+            'format': video_format,
+            'thumbnail': video_thumbnail,
            'description': video_description,
-            'uploader':    video_uploader,
+            'uploader': video_uploader,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
-            'view_count':  video_view_count,
+            'view_count': video_view_count,
            'webpage_url': video_webpage_url,
        }
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unescapeHTML
+)
+
+
+class NTVIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.ntv.ru/novosti/863142/',
+            'info_dict': {
+                'id': '746000',
+                'ext': 'flv',
+                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+                'duration': 136,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/video/novosti/750370/',
+            'info_dict': {
+                'id': '750370',
+                'ext': 'flv',
+                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+                'duration': 172,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+            'info_dict': {
+                'id': '747480',
+                'ext': 'flv',
+                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'duration': 1496,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/kino/Koma_film',
+            'info_dict': {
+                'id': '750783',
+                'ext': 'flv',
+                'title': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
+                'description': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
+                'duration': 28,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+            'info_dict': {
+                'id': '751482',
+                'ext': 'flv',
+                'title': '«Дело врачей»: «Деревце жизни»',
+                'description': '«Дело врачей»: «Деревце жизни»',
+                'duration': 2590,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+    ]
+
+    _VIDEO_ID_REGEXES = [
+        r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
+        r'<video embed=[^>]+><id>(\d+)</id>',
+        r'<video restriction[^>]+><key>(\d+)</key>'
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id, 'Downloading page')
+
+        for pattern in self._VIDEO_ID_REGEXES:
+            mobj = re.search(pattern, page)
+            if mobj:
+                break
+
+        if not mobj:
+            raise ExtractorError('No media links available for %s' % video_id)
+
+        video_id = mobj.group(1)
+
+        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
+        title = unescapeHTML(player.find('./data/title').text)
+        description = unescapeHTML(player.find('./data/description').text)
+
+        video = player.find('./data/video')
+        video_id = video.find('./id').text
+        thumbnail = video.find('./splash').text
+        duration = int(video.find('./totaltime').text)
+        view_count = int(video.find('./views').text)
+        puid22 = video.find('./puid22').text
+
+        apps = {
+            '4': 'video1',
+            '7': 'video2',
+        }
+
+        app = apps[puid22] if puid22 in apps else apps['4']
+
+        formats = []
+        for format_id in ['', 'hi', 'webm']:
+            file = video.find('./%sfile' % format_id)
+            if file is None:
+                continue
+            size = video.find('./%ssize' % format_id)
+            formats.append({
+                'url': 'rtmp://media.ntv.ru/%s' % app,
+                'app': app,
+                'play_path': file.text,
+                'rtmp_conn': 'B:1',
+                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
+                'page_url': 'http://www.ntv.ru',
+                'flash_ver': 'LNX 11,2,202,341',
+                'rtmp_live': True,
+                'ext': 'flv',
+                'filesize': int(size.text),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/oe1.py
+++ b/youtube_dl/extractor/oe1.py
@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import calendar
+import datetime
+import re
+
+from .common import InfoExtractor
+
+# audios on oe1.orf.at are only available for 7 days, so we can't
+# add tests.
+
+
+class OE1IE(InfoExtractor):
+    IE_DESC = 'oe1.orf.at'
+    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_id = mobj.group('id')
+
+        data = self._download_json(
+            'http://oe1.orf.at/programm/%s/konsole' % show_id,
+            show_id
+        )
+
+        timestamp = datetime.datetime.strptime('%s %s' % (
+            data['item']['day_label'],
+            data['item']['time']
+        ), '%d.%m.%Y %H:%M')
+        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+        return {
+            'id': show_id,
+            'title': data['item']['title'],
+            'url': data['item']['url_stream'],
+            'ext': 'mp3',
+            'description': data['item'].get('info'),
+            'timestamp': unix_timestamp
+        }
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -1,20 +1,23 @@
+from __future__ import unicode_literals
 import re
 import json

 from .common import InfoExtractor
 from ..utils import unescapeHTML

+
 class OoyalaIE(InfoExtractor):
-    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
+    _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'

    _TEST = {
        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
-        u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-        u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
-        u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
-        u'info_dict': {
-            u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
-            u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+        'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+        'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+        'info_dict': {
+            'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+            'ext': 'mp4',
+            'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+            'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
        },
    }

@ -28,13 +31,14 @@ class OoyalaIE(InfoExtractor):
            ie=cls.ie_key())

    def _extract_result(self, info, more_info):
-        return {'id': info['embedCode'],
-                'ext': 'mp4',
-                'title': unescapeHTML(info['title']),
-                'url': info.get('ipad_url') or info['url'],
-                'description': unescapeHTML(more_info['description']),
-                'thumbnail': more_info['promo'],
-                }
+        return {
+            'id': info['embedCode'],
+            'ext': 'mp4',
+            'title': unescapeHTML(info['title']),
+            'url': info.get('ipad_url') or info['url'],
+            'description': unescapeHTML(more_info['description']),
+            'thumbnail': more_info['promo'],
+        }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -42,22 +46,23 @@ class OoyalaIE(InfoExtractor):
        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
        player = self._download_webpage(player_url, embedCode)
        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
-                                        player, u'mobile player url')
+                                        player, 'mobile player url')
        mobile_player = self._download_webpage(mobile_url, embedCode)
        videos_info = self._search_regex(
            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
-            mobile_player, u'info').replace('\\"','"')
-        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
+            mobile_player, 'info').replace('\\"','"')
+        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
        videos_info = json.loads(videos_info)
        videos_more_info =json.loads(videos_more_info)

        if videos_more_info.get('lineup'):
            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
-            return {'_type': 'playlist',
-                    'id': embedCode,
-                    'title': unescapeHTML(videos_more_info['title']),
-                    'entries': videos,
-                    }
+            return {
+                '_type': 'playlist',
+                'id': embedCode,
+                'title': unescapeHTML(videos_more_info['title']),
+                'entries': videos,
+            }
        else:
            return self._extract_result(videos_info[0], videos_more_info)
        
--- a/youtube_dl/extractor/parliamentliveuk.py
+++ b/youtube_dl/extractor/parliamentliveuk.py
@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ParliamentLiveUKIE(InfoExtractor):
+    IE_NAME = 'parliamentlive.tv'
+    IE_DESC = 'UK parliament videos'
+    _VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
+        'info_dict': {
+            'id': '15121',
+            'ext': 'asf',
+            'title': 'hoc home affairs committee, 18 mar 2014.pm',
+            'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
+        },
+        'params': {
+            'skip_download': True,  # Requires mplayer (mms)
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        asx_url = self._html_search_regex(
+            r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
+            'metadata URL')
+        asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
+        video_url = asx.find('.//REF').attrib['HREF']
+
+        title = self._search_regex(
+            r'''(?x)player\.setClipDetails\(
+                (?:(?:[0-9]+|"[^"]+"),\s*){2}
+                "([^"]+",\s*"[^"]+)"
+                ''',
+            webpage, 'title').replace('", "', ', ')
+        description = self._html_search_regex(
+            r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
+            webpage, 'description')
+
+        return {
+            'id': video_id,
+            'ext': 'asf',
+            'url': video_url,
+            'title': title,
+            'description': description,
+        }
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -8,6 +8,7 @@ from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
+    str_to_int,
 )
 from ..aes import (
    aes_decrypt_text
@ -27,6 +28,12 @@ class PornHubIE(InfoExtractor):
        }
    }

+    def _extract_count(self, pattern, webpage, name):
+        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
+        if count:
+            count = str_to_int(count)
+        return count
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
@ -37,11 +44,19 @@ class PornHubIE(InfoExtractor):
        webpage = self._download_webpage(req, video_id)

        video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
-        video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, 'uploader', fatal=False)
+        video_uploader = self._html_search_regex(
+            r'(?s)<div class="video-info-row">\s*From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
+            webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
        if thumbnail:
            thumbnail = compat_urllib_parse.unquote(thumbnail)

+        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+        comment_count = self._extract_count(
+            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+
        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
        if webpage.find('"encrypted":true') != -1:
            password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
@ -77,6 +92,10 @@ class PornHubIE(InfoExtractor):
            'uploader': video_uploader,
            'title': video_title,
            'thumbnail': thumbnail,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
            'formats': formats,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import os

@ -5,45 +7,50 @@ from .common import InfoExtractor


 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-    _TESTS = [{
-        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-        u'file': u'24_4WWkSmNo.mp4',
-        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
-        u'info_dict': {
-            u"title": u"Become a logging expert in 30 minutes",
-            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
-            u"upload_date": u"20130320",
-            u"uploader": u"NextDayVideo",
-            u"uploader_id": u"NextDayVideo",
+    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+
+    _TESTS = [
+        {
+            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'info_dict': {
+                'id': '24_4WWkSmNo',
+                'ext': 'mp4',
+                'title': 'Become a logging expert in 30 minutes',
+                'description': 'md5:9665350d466c67fb5b1598de379021f7',
+                'upload_date': '20130320',
+                'uploader': 'NextDayVideo',
+                'uploader_id': 'NextDayVideo',
+            },
+            'add_ie': ['Youtube'],
        },
-        u'add_ie': ['Youtube'],
-    },
-    {
-        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
-        u'info_dict': {
-            u'id': u'2542',
-            u'ext': u'm4v',
-            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
+        {
+            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+            'info_dict': {
+                'id': '2542',
+                'ext': 'm4v',
+                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
+            },
        },
-    },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)

+        webpage = self._download_webpage(url, video_id)
+
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')

-        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
-            webpage, u'title', flags=re.DOTALL)
-        video_url = self._search_regex([r'<source src="(.*?)"',
-            r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, u'video url', flags=re.DOTALL)
+        title = self._html_search_regex(
+            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
+        video_url = self._search_regex(
+            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            webpage, 'video url', flags=re.DOTALL)
+
        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@ -1,4 +1,6 @@
 # coding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -6,16 +8,17 @@ from .common import InfoExtractor

 class RadioFranceIE(InfoExtractor):
    _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
-    IE_NAME = u'radiofrance'
+    IE_NAME = 'radiofrance'

    _TEST = {
-        u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
-        u'file': u'one-one.ogg',
-        u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
-        u'info_dict': {
-            u"title": u"One to one",
-            u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
-            u"uploader": u"Thomas Hercouët",
+        'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
+        'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
+        'info_dict': {
+            'id': 'one-one',
+            'ext': 'ogg',
+            "title": "One to one",
+            "description": "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
+            "uploader": "Thomas Hercouët",
        },
    }

@ -24,27 +27,28 @@ class RadioFranceIE(InfoExtractor):
        video_id = m.group('id')

        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
+        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
        description = self._html_search_regex(
            r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
-            webpage, u'description', fatal=False)
+            webpage, 'description', fatal=False)
        uploader = self._html_search_regex(
            r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
-            webpage, u'uploader', fatal=False)
+            webpage, 'uploader', fatal=False)

        formats_str = self._html_search_regex(
            r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
-            webpage, u'audio URLs')
+            webpage, 'audio URLs')
        formats = [
            {
                'format_id': fm[0],
                'url': fm[1],
                'vcodec': 'none',
+                'preference': i,
            }
-            for fm in
-            re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
+            for i, fm in
+            enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
        ]
-        # No sorting, we don't know any more about these formats
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/roxwel.py
+++ b/youtube_dl/extractor/roxwel.py
@ -1,5 +1,6 @@
+from __future__ import unicode_literals
+
 import re
-import json

 from .common import InfoExtractor
 from ..utils import unified_strdate, determine_ext
@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'

    _TEST = {
-        u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
-        u'file': u'passionpittakeawalklive.flv',
-        u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
-        u'info_dict': {
-            u'title': u'Take A Walk (live)',
-            u'uploader': u'Passion Pit',
-            u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
+        'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
+        'info_dict': {
+            'id': 'passionpittakeawalklive',
+            'ext': 'flv',
+            'title': 'Take A Walk (live)',
+            'uploader': 'Passion Pit',
+            'uploader_id': 'passionpit',
+            'upload_date': '20120928',
+            'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
        },
-        u'skip': u'Requires rtmpdump',
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        filename = mobj.group('filename')
        info_url = 'http://www.roxwel.com/api/videos/%s' % filename
-        info_page = self._download_webpage(info_url, filename,
-                                           u'Downloading video info')
+        info = self._download_json(info_url, filename)

-        self.report_extraction(filename)
-        info = json.loads(info_page)
        rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
        best_rate = rtmp_rates[-1]
        url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
-        rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
+        rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
        ext = determine_ext(rtmp_url)
        if ext == 'f4v':
            rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)

-        return {'id': filename,
-                'title': info['title'],
-                'url': rtmp_url,
-                'ext': 'flv',
-                'description': info['description'],
-                'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
-                'uploader': info['artist'],
-                'uploader_id': info['artistname'],
-                'upload_date': unified_strdate(info['dbdate']),
-                }
+        return {
+            'id': filename,
+            'title': info['title'],
+            'url': rtmp_url,
+            'ext': 'flv',
+            'description': info['description'],
+            'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
+            'uploader': info['artist'],
+            'uploader_id': info['artistname'],
+            'upload_date': unified_strdate(info['dbdate']),
+        }
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@ -0,0 +1,63 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    unescapeHTML,
+)
+
+
+class RTSIE(InfoExtractor):
+    IE_DESC = 'RTS.ch'
+    _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
+
+    _TEST = {
+        'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
+        'md5': '753b877968ad8afaeddccc374d4256a5',
+        'info_dict': {
+            'id': '3449373',
+            'ext': 'mp4',
+            'duration': 1488,
+            'title': 'Les Enfants Terribles',
+            'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
+            'uploader': 'Divers',
+            'upload_date': '19680921',
+            'timestamp': -40280400,
+            'thumbnail': 're:^https?://.*\.image'
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        all_info = self._download_json(
+            'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
+        info = all_info['video']['JSONinfo']
+
+        upload_timestamp = parse_iso8601(info.get('broadcast_date'))
+        duration = parse_duration(info.get('duration'))
+        thumbnail = unescapeHTML(info.get('preview_image_url'))
+        formats = [{
+            'format_id': fid,
+            'url': furl,
+            'tbr': int_or_none(self._search_regex(
+                r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
+        } for fid, furl in info['streams'].items()]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': info['title'],
+            'description': info.get('intro'),
+            'duration': duration,
+            'uploader': info.get('programName'),
+            'timestamp': upload_timestamp,
+            'thumbnail': thumbnail,
+        }
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -2,7 +2,6 @@
 from __future__ import unicode_literals

 import re
-import json
 import itertools

 from .common import InfoExtractor
@ -20,8 +19,9 @@ class RutubeIE(InfoExtractor):

    _TEST = {
        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
-        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
        'info_dict': {
+            'id': '3eac3b4561676c17df9132a9a1e62e3e',
+            'ext': 'mp4',
            'title': 'Раненный кенгуру забежал в аптеку',
            'description': 'http://www.ntdtv.ru ',
            'duration': 80,
@ -38,15 +38,15 @@ class RutubeIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        
-        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
-                                              video_id, 'Downloading video JSON')
-        video = json.loads(api_response)
-        
-        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
-                                              video_id, 'Downloading trackinfo JSON')
-        trackinfo = json.loads(api_response)
-        
+
+        video = self._download_json(
+            'http://rutube.ru/api/video/%s/?format=json' % video_id,
+            video_id, 'Downloading video JSON')
+
+        trackinfo = self._download_json(
+            'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+            video_id, 'Downloading trackinfo JSON')
+
        # Some videos don't have the author field
        author = trackinfo.get('author') or {}
        m3u8_url = trackinfo['video_balancer'].get('m3u8')
@ -79,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
    def _extract_videos(self, channel_id, channel_title=None):
        entries = []
        for pagenum in itertools.count(1):
-            api_response = self._download_webpage(
+            page = self._download_json(
                self._PAGE_TEMPLATE % (channel_id, pagenum),
                channel_id, 'Downloading page %s' % pagenum)
-            page = json.loads(api_response)
            results = page['results']
            if not results:
                break
@ -108,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        movie_id = mobj.group('id')
-        api_response = self._download_webpage(
+        movie = self._download_json(
            self._MOVIE_TEMPLATE % movie_id, movie_id,
            'Downloading movie JSON')
-        movie = json.loads(api_response)
        movie_name = movie['name']
        return self._extract_videos(movie_id, movie_name)

--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@ -1,24 +0,0 @@
-import re
-
-from .common import InfoExtractor
-
-
-class SlashdotIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
-
-    _TEST = {
-        u'add_ie': ['Ooyala'],
-        u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
-        u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
-        u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
-        u'info_dict': {
-            u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
-        return self.url_result(ooyala_url, 'Ooyala')
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@ -13,22 +13,24 @@ from ..utils import (
    compat_urllib_request,
    ExtractorError,
    url_basename,
+    int_or_none,
 )


 class SmotriIE(InfoExtractor):
    IE_DESC = 'Smotri.com'
    IE_NAME = 'smotri'
-    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
+    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
    _NETRC_MACHINE = 'smotri'

    _TESTS = [
        # real video id 2610366
        {
            'url': 'http://smotri.com/video/view/?id=v261036632ab',
-            'file': 'v261036632ab.mp4',
            'md5': '2a7b08249e6f5636557579c368040eb9',
            'info_dict': {
+                'id': 'v261036632ab',
+                'ext': 'mp4',
                'title': 'катастрофа с камер видеонаблюдения',
                'uploader': 'rbc2008',
                'uploader_id': 'rbc08',
@ -40,9 +42,10 @@ class SmotriIE(InfoExtractor):
        # real video id 57591
        {
            'url': 'http://smotri.com/video/view/?id=v57591cb20',
-            'file': 'v57591cb20.flv',
            'md5': '830266dfc21f077eac5afd1883091bcd',
            'info_dict': {
+                'id': 'v57591cb20',
+                'ext': 'flv',
                'title': 'test',
                'uploader': 'Support Photofile@photofile',
                'uploader_id': 'support-photofile',
@ -54,9 +57,10 @@ class SmotriIE(InfoExtractor):
        # video-password
        {
            'url': 'http://smotri.com/video/view/?id=v1390466a13c',
-            'file': 'v1390466a13c.mp4',
            'md5': 'f6331cef33cad65a0815ee482a54440b',
            'info_dict': {
+                'id': 'v1390466a13c',
+                'ext': 'mp4',
                'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
                'uploader': 'timoxa40',
                'uploader_id': 'timoxa40',
@ -71,9 +75,10 @@ class SmotriIE(InfoExtractor):
        # age limit + video-password
        {
            'url': 'http://smotri.com/video/view/?id=v15408898bcf',
-            'file': 'v15408898bcf.flv',
            'md5': '91e909c9f0521adf5ee86fbe073aad70',
            'info_dict': {
+                'id': 'v15408898bcf',
+                'ext': 'flv',
                'title': 'этот ролик не покажут по ТВ',
                'uploader': 'zzxxx',
                'uploader_id': 'ueggb',
@ -85,7 +90,22 @@ class SmotriIE(InfoExtractor):
            'params': {
                'videopassword': '333'
            }
-        }
+        },
+        # swf player
+        {
+            'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
+            'md5': '4d47034979d9390d14acdf59c4935bc2',
+            'info_dict': {
+                'id': 'v9188090500',
+                'ext': 'mp4',
+                'title': 'Shakira - Don\'t Bother',
+                'uploader': 'HannahL',
+                'uploader_id': 'lisaha95',
+                'upload_date': '20090331',
+                'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
+                'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
+            },
+        },
    ]

    _SUCCESS = 0
@ -93,6 +113,21 @@ class SmotriIE(InfoExtractor):
    _PASSWORD_DETECTED = 2
    _VIDEO_NOT_FOUND = 3

+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(
+            r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
+            webpage)
+        if mobj is not None:
+            return mobj.group('url')
+
+        mobj = re.search(
+            r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
+                    <div\s+class="video_image">[^<]+</div>\s*
+                    <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
+        if mobj is not None:
+            return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
+
    def _search_meta(self, name, html, display_name=None):
        if display_name is None:
            display_name = name
@ -134,7 +169,7 @@ class SmotriIE(InfoExtractor):

        # Video JSON does not provide enough meta data
        # We will extract some from the video web page instead
-        video_page_url = 'http://' + mobj.group('url')
+        video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
        video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')

        # Warning if video is unavailable
@ -222,7 +257,7 @@ class SmotriIE(InfoExtractor):
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
            'duration': video_duration,
-            'view_count': video_view_count,
+            'view_count': int_or_none(video_view_count),
            'age_limit': 18 if adult_content else 0,
            'video_page_url': video_page_url
        }
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@ -100,7 +100,7 @@ class SoundcloudIE(InfoExtractor):

    def report_resolve(self, video_id):
        """Report information extraction."""
-        self.to_screen(u'%s: Resolving id' % video_id)
+        self.to_screen('%s: Resolving id' % video_id)

    @classmethod
    def _resolv_url(cls, url):
@ -124,45 +124,46 @@ class SoundcloudIE(InfoExtractor):
            'description': info['description'],
            'thumbnail': thumbnail,
        }
+        formats = []
        if info.get('downloadable', False):
            # We can build a direct link to the song
            format_url = (
                'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
                    track_id, self._CLIENT_ID))
-            result['formats'] = [{
+            formats.append({
                'format_id': 'download',
                'ext': info.get('original_format', 'mp3'),
                'url': format_url,
                'vcodec': 'none',
-            }]
-        else:
-            # We have to retrieve the url
-            streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
-                'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
-            stream_json = self._download_webpage(
-                streams_url,
-                track_id, 'Downloading track url')
+                'preference': 10,
+            })

-            formats = []
-            format_dict = json.loads(stream_json)
-            for key, stream_url in format_dict.items():
-                if key.startswith(u'http'):
-                    formats.append({
-                        'format_id': key,
-                        'ext': ext,
-                        'url': stream_url,
-                        'vcodec': 'none',
-                    })
-                elif key.startswith(u'rtmp'):
-                    # The url doesn't have an rtmp app, we have to extract the playpath
-                    url, path = stream_url.split('mp3:', 1)
-                    formats.append({
-                        'format_id': key,
-                        'url': url,
-                        'play_path': 'mp3:' + path,
-                        'ext': ext,
-                        'vcodec': 'none',
-                    })
+        # We have to retrieve the url
+        streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
+            'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
+        stream_json = self._download_webpage(
+            streams_url,
+            track_id, 'Downloading track url')
+
+        format_dict = json.loads(stream_json)
+        for key, stream_url in format_dict.items():
+            if key.startswith('http'):
+                formats.append({
+                    'format_id': key,
+                    'ext': ext,
+                    'url': stream_url,
+                    'vcodec': 'none',
+                })
+            elif key.startswith('rtmp'):
+                # The url doesn't have an rtmp app, we have to extract the playpath
+                url, path = stream_url.split('mp3:', 1)
+                formats.append({
+                    'format_id': key,
+                    'url': url,
+                    'play_path': 'mp3:' + path,
+                    'ext': ext,
+                    'vcodec': 'none',
+                })

            if not formats:
                # We fallback to the stream_url in the original info, this
@ -188,7 +189,7 @@ class SoundcloudIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)

        track_id = mobj.group('track_id')
        token = None
@ -226,7 +227,7 @@ class SoundcloudSetIE(SoundcloudIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)

        # extract uploader (which is in the url)
        uploader = mobj.group(1)
@ -243,7 +244,7 @@ class SoundcloudSetIE(SoundcloudIE):
        info = json.loads(info_json)
        if 'errors' in info:
            for err in info['errors']:
-                self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message']))
+                self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
            return

        self.report_extraction(full_title)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -18,12 +18,14 @@ class TEDIE(SubtitlesInfoExtractor):
            (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
            |
            ((?P<type_talk>talks)) # We have a simple talk
+            |
+            (?P<type_watch>watch)/[^/]+/[^/]+
        )
        (/lang/(.*?))? # The url may contain the language
-        /(?P<name>\w+) # Here goes the name and then ".html"
+        /(?P<name>[\w-]+) # Here goes the name and then ".html"
        .*)$
        '''
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
        'info_dict': {
@ -36,7 +38,17 @@ class TEDIE(SubtitlesInfoExtractor):
                'actively fooling us.'),
            'uploader': 'Dan Dennett',
        }
-    }
+    }, {
+        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
+        'md5': '226f4fb9c62380d11b7995efa4c87994',
+        'info_dict': {
+            'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
+            'ext': 'mp4',
+            'title': 'Vishal Sikka: The beauty and power of algorithms',
+            'thumbnail': 're:^https?://.+\.jpg',
+            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
+        }
+    }]

    _FORMATS_PREFERENCE = {
        'low': 1,
@ -57,6 +69,8 @@ class TEDIE(SubtitlesInfoExtractor):
        name = m.group('name')
        if m.group('type_talk'):
            return self._talk_info(url, name)
+        elif m.group('type_watch'):
+            return self._watch_info(url, name)
        else:
            return self._playlist_videos_info(url, name)

@ -123,3 +137,26 @@ class TEDIE(SubtitlesInfoExtractor):
        else:
            self._downloader.report_warning(u'video doesn\'t have subtitles')
            return {}
+
+    def _watch_info(self, url, name):
+        webpage = self._download_webpage(url, name)
+
+        config_json = self._html_search_regex(
+            r"data-config='([^']+)", webpage, 'config')
+        config = json.loads(config_json)
+        video_url = config['video']['url']
+        thumbnail = config.get('image', {}).get('url')
+
+        title = self._html_search_regex(
+            r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
+        description = self._html_search_regex(
+            r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
+            webpage, 'description', fatal=False)
+
+        return {
+            'id': name,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': description,
+        }
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@ -1,33 +1,37 @@
 # coding: utf-8
+from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor

+
 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
+    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
    _TEST = {
-        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
-        u'file': u'10635995.mp4',
-        u'md5': u'2e378cc28b9957607d5e88f274e637d8',
-        u'info_dict': {
-            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
-            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
+        'info_dict': {
+            'id': '10635995',
+            'ext': 'mp4',
+            'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
+            'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
        },
-        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group(1)
-        webpage = self._download_webpage(url, id)
-        embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
-                                webpage, 'embed url')
-        embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        embed_url = self._html_search_regex(
+            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
+        embed_page = self._download_webpage(embed_url, video_id,
+            'Downloading embed player page')
        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
-        wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
-        wat_info = json.loads(wat_info)['media']
-        wat_url = wat_info['url']
-        return self.url_result(wat_url, 'Wat')
+        wat_info = self._download_json(
+            'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
+        return self.url_result(wat_info['media']['url'], 'Wat')
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dl/extractor/toypics.py
@ -0,0 +1,75 @@
+from .common import InfoExtractor
+import re
+
+
+class ToypicsIE(InfoExtractor):
+    IE_DESC = 'Toypics user profile'
+    _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
+    _TEST = {
+        'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
+        'md5': '16e806ad6d6f58079d210fe30985e08b',
+        'info_dict': {
+            'id': '514',
+            'ext': 'mp4',
+            'title': 'Chance-Bulge\'d, 2',
+            'age_limit': 18,
+            'uploader': 'kidsune',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        page = self._download_webpage(url, video_id)
+        video_url = self._html_search_regex(
+            r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
+        title = self._html_search_regex(
+            r'<title>Toypics - ([^<]+)</title>', page, 'title')
+        username = self._html_search_regex(
+            r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'uploader': username,
+            'age_limit': 18,
+        }
+
+
+class ToypicsUserIE(InfoExtractor):
+    IE_DESC = 'Toypics user profile'
+    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        username = mobj.group('username')
+
+        profile_page = self._download_webpage(
+            url, username, note='Retrieving profile page')
+
+        video_count = int(self._search_regex(
+            r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
+            'video count'))
+
+        PAGE_SIZE = 8
+        urls = []
+        page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+        for n in range(1, page_count + 1):
+            lpage_url = url + '/public/%d' % n
+            lpage = self._download_webpage(
+                lpage_url, username,
+                note='Downloading page %d/%d' % (n, page_count))
+            urls.extend(
+                re.findall(
+                    r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
+                    lpage))
+
+        return {
+            '_type': 'playlist',
+            'id': username,
+            'entries': [{
+                '_type': 'url',
+                'url': eurl,
+                'ie_key': 'Toypics',
+            } for eurl in urls]
+        }
--- a/youtube_dl/extractor/urort.py
+++ b/youtube_dl/extractor/urort.py
@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    unified_strdate,
+)
+
+
+class UrortIE(InfoExtractor):
+    IE_DESC = 'NRK P3 Urørt'
+    _VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
+
+    _TEST = {
+        'url': 'https://urort.p3.no/#!/Band/Gerilja',
+        'md5': '5ed31a924be8a05e47812678a86e127b',
+        'info_dict': {
+            'id': '33124-4',
+            'ext': 'mp3',
+            'title': 'The Bomb',
+            'thumbnail': 're:^https?://.+\.jpg',
+            'like_count': int,
+            'uploader': 'Gerilja',
+            'uploader_id': 'Gerilja',
+            'upload_date': '20100323',
+        },
+        'params': {
+            'matchtitle': '^The Bomb$',  # To test, we want just one video
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+
+        fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
+        json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
+        songs = self._download_json(json_url, playlist_id)
+        print(songs[0])
+
+        entries = [{
+            'id': '%d-%s' % (s['BandId'], s['$id']),
+            'title': s['Title'],
+            'url': s['TrackUrl'],
+            'ext': 'mp3',
+            'uploader_id': playlist_id,
+            'uploader': s.get('BandName', playlist_id),
+            'like_count': s.get('LikeCount'),
+            'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
+            'upload_date': unified_strdate(s.get('Released')),
+        } for s in songs]
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': playlist_id,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@ -4,26 +4,99 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import compat_urllib_request
+from ..utils import (
+    compat_urllib_request,
+    int_or_none,
+)


 class VeohIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/v(?P<id>\d*)'
+    _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'

-    _TEST = {
-        'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
-        'file': '56314296.mp4',
-        'md5': '620e68e6a3cff80086df3348426c9ca3',
-        'info_dict': {
-            'title': 'Straight Backs Are Stronger',
-            'uploader': 'LUMOback',
-            'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
+    _TESTS = [
+        {
+            'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+            'md5': '620e68e6a3cff80086df3348426c9ca3',
+            'info_dict': {
+                'id': '56314296',
+                'ext': 'mp4',
+                'title': 'Straight Backs Are Stronger',
+                'uploader': 'LUMOback',
+                'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
+            },
+        },
+        {
+            'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
+            'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
+            'info_dict': {
+                'id': '27701988',
+                'ext': 'mp4',
+                'title': 'Chile workers cover up to avoid skin damage',
+                'description': 'md5:2bd151625a60a32822873efc246ba20d',
+                'uploader': 'afp-news',
+                'duration': 123,
+            },
+        },
+        {
+            'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
+            'md5': '4fde7b9e33577bab2f2f8f260e30e979',
+            'note': 'Embedded ooyala video',
+            'info_dict': {
+                'id': '69525809',
+                'ext': 'mp4',
+                'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
+                'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
+                'uploader': 'newsy-videos',
+            },
+        },
+    ]
+
+    def _extract_formats(self, source):
+        formats = []
+        link = source.get('aowPermalink')
+        if link:
+            formats.append({
+                'url': link,
+                'ext': 'mp4',
+                'format_id': 'aow',
+            })
+        link = source.get('fullPreviewHashLowPath')
+        if link:
+            formats.append({
+                'url': link,
+                'format_id': 'low',
+            })
+        link = source.get('fullPreviewHashHighPath')
+        if link:
+            formats.append({
+                'url': link,
+                'format_id': 'high',
+            })
+        return formats
+
+    def _extract_video(self, source):
+        return {
+            'id': source.get('videoId'),
+            'title': source.get('title'),
+            'description': source.get('description'),
+            'thumbnail': source.get('highResImage') or source.get('medResImage'),
+            'uploader': source.get('username'),
+            'duration': int_or_none(source.get('length')),
+            'view_count': int_or_none(source.get('views')),
+            'age_limit': 18 if source.get('isMature') == 'true' or source.get('isSexy') == 'true' else 0,
+            'formats': self._extract_formats(source),
        }
-    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
+
+        if video_id.startswith('v'):
+            rsp = self._download_xml(
+                r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
+            if rsp.get('stat') == 'ok':
+                return self._extract_video(rsp.find('./videoList/video'))
+
        webpage = self._download_webpage(url, video_id)
        age_limit = 0
        if 'class="adultwarning-container"' in webpage:
@ -33,24 +106,16 @@ class VeohIE(InfoExtractor):
            request.add_header('Cookie', 'confirmedAdult=true')
            webpage = self._download_webpage(request, video_id)

-        m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
+        m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|"|\?)', webpage)
        if m_youtube is not None:
            youtube_id = m_youtube.group(1)
            self.to_screen('%s: detected Youtube video.' % video_id)
            return self.url_result(youtube_id, 'Youtube')

-        self.report_extraction(video_id)
-        info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
-        info = json.loads(info)
-        video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
+        info = json.loads(
+            self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info').replace('\\\'', '\''))

-        return {
-            'id': info['videoId'],
-            'title': info['title'],
-            'url': video_url,
-            'uploader': info['username'],
-            'thumbnail': info.get('highResImage') or info.get('medResImage'),
-            'description': info['description'],
-            'view_count': info['views'],
-            'age_limit': age_limit,
-        }
+        video = self._extract_video(info)
+        video['age_limit'] = age_limit
+
+        return video
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@ -1,38 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-from ..utils import ExtractorError
-
-
-class ViceIE(InfoExtractor):
-    _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
-
-    _TEST = {
-        u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
-        u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
-        u'info_dict': {
-            u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
-        },
-        u'params': {
-            # Requires ffmpeg (m3u8 manifest)
-            u'skip_download': True,
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        webpage = self._download_webpage(url, name)
-        try:
-            ooyala_url = self._og_search_video_url(webpage)
-        except ExtractorError:
-            try:
-                embed_code = self._search_regex(
-                    r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
-                    u'ooyala embed code')
-                ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
-            except ExtractorError:
-                raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
-        return self.url_result(ooyala_url, ie='Ooyala')
-
--- a/youtube_dl/extractor/videolecturesnet.py
+++ b/youtube_dl/extractor/videolecturesnet.py
@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    find_xpath_attr,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
+)
+
+
+class VideoLecturesNetIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+    IE_NAME = 'videolectures.net'
+
+    _TEST = {
+        'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
+        'info_dict': {
+            'id': 'promogram_igor_mekjavic_eng',
+            'ext': 'mp4',
+            'title': 'Automatics, robotics and biocybernetics',
+            'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+            'upload_date': '20130627',
+            'duration': 565,
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
+        smil = self._download_xml(smil_url, video_id)
+
+        title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
+        description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
+        description = (
+            None if description_el is None
+            else description_el.attrib['content'])
+        upload_date = unified_strdate(
+            find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+
+        switch = smil.find('.//switch')
+        duration = parse_duration(switch.attrib.get('dur'))
+        thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
+        thumbnail = (
+            None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+
+        formats = [{
+            'url': v.attrib['src'],
+            'width': int_or_none(v.attrib.get('width')),
+            'height': int_or_none(v.attrib.get('height')),
+            'filesize': int_or_none(v.attrib.get('size')),
+            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+            'ext': v.attrib.get('ext'),
+        } for v in switch.findall('./video')
+            if v.attrib.get('proto') == 'http']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'upload_date': upload_date,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@ -16,7 +16,7 @@ from ..utils import (

 class VKIE(InfoExtractor):
    IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
    _NETRC_MACHINE = 'vk'

    _TESTS = [
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@ -0,0 +1,103 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    strip_jsonp,
+)
+
+
+class WashingtonPostIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    _TEST = {
+        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+        'playlist': [{
+            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
+            'info_dict': {
+                'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
+                'ext': 'mp4',
+                'title': 'Breaking Points: The Paper Mine',
+                'duration': 1287,
+                'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
+                'uploader': 'The Washington Post',
+                'timestamp': 1395527908,
+                'upload_date': '20140322',
+            },
+        }, {
+            'md5': 'f645a07652c2950cd9134bb852c5f5eb',
+            'info_dict': {
+                'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
+                'ext': 'mp4',
+                'title': 'The town bureaucracy sustains',
+                'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
+                'duration': 2217,
+                'timestamp': 1395528005,
+                'upload_date': '20140322',
+                'uploader': 'The Washington Post',
+            },
+        }]
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, page_id)
+        title = self._og_search_title(webpage)
+        uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
+        entries = []
+        for i, uuid in enumerate(uuids, start=1):
+            vinfo_all = self._download_json(
+                'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
+                page_id,
+                transform_source=strip_jsonp,
+                note='Downloading information of video %d/%d' % (i, len(uuids))
+            )
+            vinfo = vinfo_all[0]['contentConfig']
+            uploader = vinfo.get('credits', {}).get('source')
+            timestamp = int_or_none(
+                vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
+
+            formats = [{
+                'format_id': (
+                    '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
+                    if s.get('width')
+                    else s.get('type')),
+                'vbr': s.get('bitrate') if s.get('width') != 0 else None,
+                'width': s.get('width'),
+                'height': s.get('height'),
+                'acodec': s.get('audioCodec'),
+                'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
+                'filesize': s.get('fileSize'),
+                'url': s.get('url'),
+                'ext': 'mp4',
+                'protocol': {
+                    'MP4': 'http',
+                    'F4F': 'f4m',
+                }.get(s.get('type'))
+            } for s in vinfo.get('streams', [])]
+            source_media_url = vinfo.get('sourceMediaURL')
+            if source_media_url:
+                formats.append({
+                    'format_id': 'source_media',
+                    'url': source_media_url,
+                })
+            self._sort_formats(formats)
+            entries.append({
+                'id': uuid,
+                'title': vinfo['title'],
+                'description': vinfo.get('blurb'),
+                'uploader': uploader,
+                'formats': formats,
+                'duration': int_or_none(vinfo.get('videoDuration'), 100),
+                'timestamp': timestamp,
+            })
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': page_id,
+            'title': title,
+        }
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -1,37 +1,37 @@
 # coding: utf-8
+from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor
-
 from ..utils import (
    unified_strdate,
 )


 class WatIE(InfoExtractor):
-    _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
+    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
    _TEST = {
-        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
-        u'file': u'10631273.mp4',
-        u'md5': u'd8b2231e1e333acd12aad94b80937e19',
-        u'info_dict': {
-            u'title': u'World War Z - Philadelphia VOST',
-            u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        'info_dict': {
+            'id': '10631273',
+            'ext': 'mp4',
+            'title': 'World War Z - Philadelphia VOST',
+            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
        },
-        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }
-    
+
    def download_video_info(self, real_id):
        # 'contentv4' is used in the website, but it also returns the related
        # videos, we don't need them
-        info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
-        info = json.loads(info)
+        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
        return info['media']

-
    def _real_extract(self, url):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
@ -56,17 +56,17 @@ class WatIE(InfoExtractor):
            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
            return self.playlist_result(entries, real_id, video_info['title'])

+        upload_date = None
+        if 'date_diffusion' in first_chapter:
+            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
        # the short id for getting the video url
-        info = {'id': real_id,
-                'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
-                'ext': 'mp4',
-                'title': first_chapter['title'],
-                'thumbnail': first_chapter['preview'],
-                'description': first_chapter['description'],
-                'view_count': video_info['views'],
-                }
-        if 'date_diffusion' in first_chapter:
-            info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
-
-        return info
+        return {
+            'id': real_id,
+            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'title': first_chapter['title'],
+            'thumbnail': first_chapter['preview'],
+            'description': first_chapter['description'],
+            'view_count': video_info['views'],
+            'upload_date': upload_date,
+        }
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@ -4,9 +4,10 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    compat_parse_qs,
    compat_urlparse,
    determine_ext,
+    unified_strdate,
 )


@ -111,4 +112,85 @@ class WDRIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
-        }
+        }
+
+
+class WDRMausIE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
+    IE_DESC = 'Sendung mit der Maus'
+    _TESTS = [{
+        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
+        'info_dict': {
+            'id': 'aktuelle-sendung',
+            'ext': 'mp4',
+            'thumbnail': 're:^http://.+\.jpg',
+            'upload_date': 're:^[0-9]{8}$',
+            'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
+        }
+    }, {
+        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
+        'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
+        'info_dict': {
+            'id': '40_jahre_maus',
+            'ext': 'mp4',
+            'thumbnail': 're:^http://.+\.jpg',
+            'upload_date': '20131007',
+            'title': '12.03.2011 - 40 Jahre Maus',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        param_code = self._html_search_regex(
+            r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
+
+        title_date = self._search_regex(
+            r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
+            webpage, 'air date')
+        title_str = self._html_search_regex(
+            r'<h1>(.*?)</h1>', webpage, 'title')
+        title = '%s - %s' % (title_date, title_str)
+        upload_date = unified_strdate(
+            self._html_search_meta('dc.date', webpage))
+
+        fields = compat_parse_qs(param_code)
+        video_url = fields['firstVideo'][0]
+        thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
+
+        formats = [{
+            'format_id': 'rtmp',
+            'url': video_url,
+        }]
+
+        jscode = self._download_webpage(
+            'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
+            video_id, fatal=False,
+            note='Downloading URL translation table',
+            errnote='Could not download URL translation table')
+        if jscode:
+            for m in re.finditer(
+                    r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
+                    jscode):
+                if video_url.startswith(m.group('stream')):
+                    http_url = video_url.replace(
+                        m.group('stream'), m.group('dl'))
+                    formats.append({
+                        'format_id': 'http',
+                        'url': http_url,
+                    })
+                    break
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+        }
+
+# TODO test _1
--- a/youtube_dl/extractor/worldstarhiphop.py
+++ b/youtube_dl/extractor/worldstarhiphop.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -7,14 +9,14 @@ class WorldStarHipHopIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
    _TEST = {
        "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
-        "file": "wshh6a7q1ny0G34ZwuIO.mp4",
        "md5": "9d04de741161603bf7071bbf4e883186",
        "info_dict": {
+            "id": "wshh6a7q1ny0G34ZwuIO",
+            "ext": "mp4",
            "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
        }
    }

-
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
@ -23,41 +25,32 @@ class WorldStarHipHopIE(InfoExtractor):

        m_vevo_id = re.search(r'videoId=(.*?)&amp?',
                              webpage_src)
-
        if m_vevo_id is not None:
-            self.to_screen(u'Vevo video detected:')
            return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')

-        video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
-            webpage_src, u'video URL')
+        video_url = self._search_regex(
+            r'so\.addVariable\("file","(.*?)"\)', webpage_src, 'video URL')

        if 'youtube' in video_url:
-            self.to_screen(u'Youtube video detected:')
            return self.url_result(video_url, ie='Youtube')

-        if 'mp4' in video_url:
-            ext = 'mp4'
-        else:
-            ext = 'flv'
-
-        video_title = self._html_search_regex(r"<title>(.*)</title>",
-            webpage_src, u'title')
+        video_title = self._html_search_regex(
+            r"<title>(.*)</title>", webpage_src, 'title')

        # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
-        thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
-            webpage_src, u'thumbnail', fatal=False)
-
+        thumbnail = self._html_search_regex(
+            r'rel="image_src" href="(.*)" />', webpage_src, 'thumbnail',
+            fatal=False)
        if not thumbnail:
            _title = r"""candytitles.*>(.*)</span>"""
            mobj = re.search(_title, webpage_src)
            if mobj is not None:
                video_title = mobj.group(1)

-        results = [{
-                    'id': video_id,
-                    'url' : video_url,
-                    'title' : video_title,
-                    'thumbnail' : thumbnail,
-                    'ext' : ext,
-                    }]
-        return results
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'thumbnail': thumbnail,
+        }
+
--- a/youtube_dl/extractor/xbef.py
+++ b/youtube_dl/extractor/xbef.py
@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+)
+
+
+class XBefIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
+        'md5': 'a478b565baff61634a98f5e5338be995',
+        'info_dict': {
+            'id': '5119',
+            'ext': 'mp4',
+            'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
+            'age_limit': 18,
+            'thumbnail': 're:^http://.*\.jpg',
+        }
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(
+            r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
+
+        config_url_enc = self._download_webpage(
+            'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
+            note='Retrieving config URL')
+        config_url = compat_urllib_parse.unquote(config_url_enc)
+        config = self._download_xml(
+            config_url, video_id, note='Retrieving config')
+
+        video_url = config.find('./file').text
+        thumbnail = config.find('./image').text
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'age_limit': 18,
+        }
+
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@ -1,11 +1,10 @@
 from __future__ import unicode_literals

-import os
 import re
+import json

 from .common import InfoExtractor
 from ..utils import (
-    compat_urllib_parse_urlparse,
    compat_urllib_request,
    parse_duration,
    str_to_int,
@ -42,7 +41,6 @@ class XTubeIE(InfoExtractor):
            r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
        video_description = self._html_search_regex(
            r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
-        video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
        duration = parse_duration(self._html_search_regex(
            r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
        view_count = self._html_search_regex(
@ -54,12 +52,18 @@ class XTubeIE(InfoExtractor):
        if comment_count:
            comment_count = str_to_int(comment_count)

-        path = compat_urllib_parse_urlparse(video_url).path
-        extension = os.path.splitext(path)[1][1:]
-        format = path.split('/')[5].split('_')[:2]
-        format[0] += 'p'
-        format[1] += 'k'
-        format = "-".join(format)
+        player_quality_option = json.loads(self._html_search_regex(
+            r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
+
+        QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
+        formats = [
+            {
+                'url': furl,
+                'format_id': format_id,
+                'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
+            } for format_id, furl in player_quality_option.items()
+        ]
+        self._sort_formats(formats)

        return {
            'id': video_id,
@ -69,9 +73,42 @@ class XTubeIE(InfoExtractor):
            'duration': duration,
            'view_count': view_count,
            'comment_count': comment_count,
-            'url': video_url,
-            'ext': extension,
-            'format': format,
-            'format_id': format,
+            'formats': formats,
            'age_limit': 18,
        }
+
+class XTubeUserIE(InfoExtractor):
+    IE_DESC = 'XTube user profile'
+    _VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        username = mobj.group('username')
+
+        profile_page = self._download_webpage(
+            url, username, note='Retrieving profile page')
+
+        video_count = int(self._search_regex(
+            r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
+            'video count'))
+
+        PAGE_SIZE = 25
+        urls = []
+        page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
+        for n in range(1, page_count + 1):
+            lpage_url = 'http://www.xtube.com/user_videos.php?page=%d&u=%s' % (n, username)
+            lpage = self._download_webpage(
+                lpage_url, username,
+                note='Downloading page %d/%d' % (n, page_count))
+            urls.extend(
+                re.findall(r'addthis:url="([^"]+)"', lpage))
+
+        return {
+            '_type': 'playlist',
+            'id': username,
+            'entries': [{
+                '_type': 'url',
+                'url': eurl,
+                'ie_key': 'XTube',
+            } for eurl in urls]
+        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -7,13 +7,13 @@ import itertools
 import json
 import os.path
 import re
-import string
 import struct
 import traceback
 import zlib

 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
+from ..jsinterp import JSInterpreter
 from ..utils import (
    compat_chr,
    compat_parse_qs,
@ -176,32 +176,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):


        # 3d videos
-        '82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
-        '83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
-        '84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
-        '85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
-        '100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
-        '101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
-        '102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
+        '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
+        '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
+        '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
+        '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
+        '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
+        '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
+        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},

        # Apple HTTP Live Streaming
-        '92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
-        '93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
-        '94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
-        '95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
-        '96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
-        '132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
-        '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
+        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
+        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
+        '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
+        '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
+        '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+        '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},

        # DASH mp4 video
-        '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},

        # Dash mp4 audio
        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
@ -215,13 +215,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
-        '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
-        '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
-        '248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
+        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
+        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
+        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
+        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
+        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},

        # Dash webm audio
        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
@ -438,113 +438,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
    def _parse_sig_js(self, jscode):
        funcname = self._search_regex(
            r'signature=([a-zA-Z]+)', jscode,
-            u'Initial JS player signature function name')
+             u'Initial JS player signature function name')

-        functions = {}
-
-        def argidx(varname):
-            return string.lowercase.index(varname)
-
-        def interpret_statement(stmt, local_vars, allow_recursion=20):
-            if allow_recursion < 0:
-                raise ExtractorError(u'Recursion limit reached')
-
-            if stmt.startswith(u'var '):
-                stmt = stmt[len(u'var '):]
-            ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
-                             r'=(?P<expr>.*)$', stmt)
-            if ass_m:
-                if ass_m.groupdict().get('index'):
-                    def assign(val):
-                        lvar = local_vars[ass_m.group('out')]
-                        idx = interpret_expression(ass_m.group('index'),
-                                                   local_vars, allow_recursion)
-                        assert isinstance(idx, int)
-                        lvar[idx] = val
-                        return val
-                    expr = ass_m.group('expr')
-                else:
-                    def assign(val):
-                        local_vars[ass_m.group('out')] = val
-                        return val
-                    expr = ass_m.group('expr')
-            elif stmt.startswith(u'return '):
-                assign = lambda v: v
-                expr = stmt[len(u'return '):]
-            else:
-                raise ExtractorError(
-                    u'Cannot determine left side of statement in %r' % stmt)
-
-            v = interpret_expression(expr, local_vars, allow_recursion)
-            return assign(v)
-
-        def interpret_expression(expr, local_vars, allow_recursion):
-            if expr.isdigit():
-                return int(expr)
-
-            if expr.isalpha():
-                return local_vars[expr]
-
-            m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
-            if m:
-                member = m.group('member')
-                val = local_vars[m.group('in')]
-                if member == 'split("")':
-                    return list(val)
-                if member == 'join("")':
-                    return u''.join(val)
-                if member == 'length':
-                    return len(val)
-                if member == 'reverse()':
-                    return val[::-1]
-                slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
-                if slice_m:
-                    idx = interpret_expression(
-                        slice_m.group('idx'), local_vars, allow_recursion-1)
-                    return val[idx:]
-
-            m = re.match(
-                r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
-            if m:
-                val = local_vars[m.group('in')]
-                idx = interpret_expression(m.group('idx'), local_vars,
-                                           allow_recursion-1)
-                return val[idx]
-
-            m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
-            if m:
-                a = interpret_expression(m.group('a'),
-                                         local_vars, allow_recursion)
-                b = interpret_expression(m.group('b'),
-                                         local_vars, allow_recursion)
-                return a % b
-
-            m = re.match(
-                r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
-            if m:
-                fname = m.group('func')
-                if fname not in functions:
-                    functions[fname] = extract_function(fname)
-                argvals = [int(v) if v.isdigit() else local_vars[v]
-                           for v in m.group('args').split(',')]
-                return functions[fname](argvals)
-            raise ExtractorError(u'Unsupported JS expression %r' % expr)
-
-        def extract_function(funcname):
-            func_m = re.search(
-                r'function ' + re.escape(funcname) +
-                r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
-                jscode)
-            argnames = func_m.group('args').split(',')
-
-            def resf(args):
-                local_vars = dict(zip(argnames, args))
-                for stmt in func_m.group('code').split(';'):
-                    res = interpret_statement(stmt, local_vars)
-                return res
-            return resf
-
-        initial_function = extract_function(funcname)
+        jsi = JSInterpreter(jscode)
+        initial_function = jsi.extract_function(funcname)
        return lambda s: initial_function([s])

    def _parse_sig_swf(self, file_contents):
@ -1549,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                break

            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
            content_html = more['content_html']
            more_widget_html = more['load_more_widget_html']

@ -1712,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):

 class YoutubeSearchIE(SearchInfoExtractor):
    IE_DESC = u'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'
@ -1723,9 +1622,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
        video_ids = []
        pagenum = 0
        limit = n
+        PAGE_SIZE = 50

-        while (50 * pagenum) < limit:
-            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+        while (PAGE_SIZE * pagenum) < limit:
+            result_url = self._API_URL % (
+                compat_urllib_parse.quote_plus(query.encode('utf-8')),
+                (PAGE_SIZE * pagenum) + 1)
            data_json = self._download_webpage(
                result_url, video_id=u'query "%s"' % query,
                note=u'Downloading page %s' % (pagenum + 1),
@ -1836,11 +1738,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        feed_entries = []
        paging = 0
        for i in itertools.count(1):
-            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+            info = self._download_json(self._FEED_TEMPLATE % paging,
                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
-            info = json.loads(info)
-            feed_html = info['feed_html']
+            feed_html = info.get('feed_html') or info.get('content_html')
            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
            ids = orderedSet(m.group(1) for m in m_ids)
            feed_entries.extend(
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -0,0 +1,116 @@
+from __future__ import unicode_literals
+
+import re
+
+from .utils import (
+    ExtractorError,
+)
+
+
+class JSInterpreter(object):
+    def __init__(self, code):
+        self.code = code
+        self._functions = {}
+
+    def interpret_statement(self, stmt, local_vars, allow_recursion=20):
+        if allow_recursion < 0:
+            raise ExtractorError('Recursion limit reached')
+
+        if stmt.startswith('var '):
+            stmt = stmt[len('var '):]
+        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
+                         r'=(?P<expr>.*)$', stmt)
+        if ass_m:
+            if ass_m.groupdict().get('index'):
+                def assign(val):
+                    lvar = local_vars[ass_m.group('out')]
+                    idx = self.interpret_expression(
+                        ass_m.group('index'), local_vars, allow_recursion)
+                    assert isinstance(idx, int)
+                    lvar[idx] = val
+                    return val
+                expr = ass_m.group('expr')
+            else:
+                def assign(val):
+                    local_vars[ass_m.group('out')] = val
+                    return val
+                expr = ass_m.group('expr')
+        elif stmt.startswith('return '):
+            assign = lambda v: v
+            expr = stmt[len('return '):]
+        else:
+            raise ExtractorError(
+                'Cannot determine left side of statement in %r' % stmt)
+
+        v = self.interpret_expression(expr, local_vars, allow_recursion)
+        return assign(v)
+
+    def interpret_expression(self, expr, local_vars, allow_recursion):
+        if expr.isdigit():
+            return int(expr)
+
+        if expr.isalpha():
+            return local_vars[expr]
+
+        m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
+        if m:
+            member = m.group('member')
+            val = local_vars[m.group('in')]
+            if member == 'split("")':
+                return list(val)
+            if member == 'join("")':
+                return u''.join(val)
+            if member == 'length':
+                return len(val)
+            if member == 'reverse()':
+                return val[::-1]
+            slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
+            if slice_m:
+                idx = self.interpret_expression(
+                    slice_m.group('idx'), local_vars, allow_recursion - 1)
+                return val[idx:]
+
+        m = re.match(
+            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+        if m:
+            val = local_vars[m.group('in')]
+            idx = self.interpret_expression(
+                m.group('idx'), local_vars, allow_recursion - 1)
+            return val[idx]
+
+        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
+        if m:
+            a = self.interpret_expression(
+                m.group('a'), local_vars, allow_recursion)
+            b = self.interpret_expression(
+                m.group('b'), local_vars, allow_recursion)
+            return a % b
+
+        m = re.match(
+            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+        if m:
+            fname = m.group('func')
+            if fname not in self._functions:
+                self._functions[fname] = self.extract_function(fname)
+            argvals = [int(v) if v.isdigit() else local_vars[v]
+                       for v in m.group('args').split(',')]
+            return self._functions[fname](argvals)
+        raise ExtractorError('Unsupported JS expression %r' % expr)
+
+    def extract_function(self, funcname):
+        func_m = re.search(
+            (r'(?:function %s|%s\s*=\s*function)' % (
+                re.escape(funcname), re.escape(funcname))) +
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            self.code)
+        if func_m is None:
+            raise ExtractorError('Could not find JS function %r' % funcname)
+        argnames = func_m.group('args').split(',')
+
+        def resf(args):
+            local_vars = dict(zip(argnames, args))
+            for stmt in func_m.group('code').split(';'):
+                res = self.interpret_statement(stmt, local_vars)
+            return res
+        return resf
+
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -53,8 +53,9 @@ class FFmpegPostProcessor(PostProcessor):

        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        stdout,stderr = p.communicate()
+        bcmd = [self._downloader.encode(c) for c in cmd]
+        p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
            msg = stderr.strip().split('\n')[-1]
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

+import calendar
 import contextlib
 import ctypes
 import datetime
@ -501,13 +502,13 @@ def orderedSet(iterable):
            res.append(el)
    return res

-def unescapeHTML(s):
-    """
-    @param s a string
-    """
-    assert type(s) == type(u'')

-    result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
+def unescapeHTML(s):
+    if s is None:
+        return None
+    assert type(s) == compat_str
+
+    result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
    return result


@ -538,7 +539,6 @@ def encodeFilename(s, for_subprocess=False):
        encoding = 'utf-8'
    return s.encode(encoding, 'ignore')

-
 def decodeOption(optval):
    if optval is None:
        return optval
@ -761,8 +761,37 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
    https_response = http_response


+def parse_iso8601(date_str):
+    """ Return a UNIX timestamp from the given date """
+
+    if date_str is None:
+        return None
+
+    m = re.search(
+        r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
+        date_str)
+    if not m:
+        timezone = datetime.timedelta()
+    else:
+        date_str = date_str[:-len(m.group(0))]
+        if not m.group('sign'):
+            timezone = datetime.timedelta()
+        else:
+            sign = 1 if m.group('sign') == '+' else -1
+            timezone = datetime.timedelta(
+                hours=sign * int(m.group('hours')),
+                minutes=sign * int(m.group('minutes')))
+
+    dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
+    return calendar.timegm(dt.timetuple())
+
+
 def unified_strdate(date_str):
    """Return a string with the date in the format YYYYMMDD"""
+
+    if date_str is None:
+        return None
+
    upload_date = None
    #Replace commas
    date_str = date_str.replace(',', ' ')
@ -1122,11 +1151,11 @@ def setproctitle(title):
        libc = ctypes.cdll.LoadLibrary("libc.so.6")
    except OSError:
        return
-    title = title
-    buf = ctypes.create_string_buffer(len(title) + 1)
-    buf.value = title.encode('utf-8')
+    title_bytes = title.encode('utf-8')
+    buf = ctypes.create_string_buffer(len(title_bytes))
+    buf.value = title_bytes
    try:
-        libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
+        libc.prctl(15, buf, 0, 0, 0)
    except AttributeError:
        return  # Strange libc, just skip this

@ -1151,12 +1180,16 @@ def int_or_none(v, scale=1):
    return v if v is None else (int(v) // scale)


+def float_or_none(v, scale=1):
+    return v if v is None else (float(v) / scale)
+
+
 def parse_duration(s):
    if s is None:
        return None

    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
+        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))
@ -1231,8 +1264,8 @@ class PagedList(object):

 def uppercase_escape(s):
    return re.sub(
-        r'\\U([0-9a-fA-F]{8})',
-        lambda m: compat_chr(int(m.group(1), base=16)), s)
+        r'\\U[0-9a-fA-F]{8}',
+        lambda m: m.group(0).decode('unicode-escape'), s)

 try:
    struct.pack(u'!I', 0)
@ -1298,3 +1331,7 @@ US_RATINGS = {
    'R': 16,
    'NC': 18,
 }
+
+
+def strip_jsonp(code):
+    return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.03.21.1'
+__version__ = '2014.04.02'
Author	SHA1	Message	Date
Philipp Hagemeister	d2b194607c	release 2014.04.02	2014-04-02 14:26:34 +02:00
Jaime Marquínez Ferrándiz	f6177462db	[youtube] feeds: Also look for the html in the 'content_html' field (fixes #2671 )	2014-04-02 14:13:08 +02:00
Jaime Marquínez Ferrándiz	9ddaf4ef8c	[comedycentral] Change XPath .//guid to ./guid (fixes #2668 ) It fails to find the element in python 2.6 and it's not required, the element is a direct child of the item node.	2014-04-01 21:38:07 +02:00
Jaime Marquínez Ferrándiz	97b5573848	[comedycentral] Update test title for `34cbc7ee8d`	2014-04-01 21:29:40 +02:00
Jaime Marquínez Ferrándiz	18c95c1ab0	[rutube] Use _download_json	2014-04-01 20:30:22 +02:00
Sergey M․	0479c625a4	[brightcove] Encode object_str with utf-8	2014-04-01 20:17:35 +07:00
Sergey M․	f659951e22	[vk] Support optional dash for oid in embedded links	2014-04-01 19:38:42 +07:00
Philipp Hagemeister	5853a7316e	release 2014.04.01.3	2014-04-01 13:17:15 +02:00
Philipp Hagemeister	a612753db9	[utils] Correct decoding of large unicode codepoints in uppercase_escape (Fixes #2664 )	2014-04-01 13:17:07 +02:00
Philipp Hagemeister	c8fc3fb524	release 2014.04.01.2	2014-04-01 05:57:15 +02:00
Philipp Hagemeister	5912c639df	[youtube] Transform google's JSON dialect (fixes #2663 )	2014-04-01 05:56:56 +02:00
Philipp Hagemeister	017e4dd58c	release 2014.04.01.1	2014-04-01 00:25:17 +02:00
Philipp Hagemeister	651486621d	[comedycentral] Allow URLs with query parts (fixes #2661 )	2014-04-01 00:25:11 +02:00
Philipp Hagemeister	28d9032c88	release 2014.04.01	2014-04-01 00:02:39 +02:00
Philipp Hagemeister	16f4eb723a	[comedycentral] Add support for /videos URLs (Fixes #2660 )	2014-04-01 00:02:32 +02:00
Sergey M․	1cbd410620	[pyvideo] Modernize	2014-03-31 19:31:48 +07:00
Philipp Hagemeister	d41ac5f5dc	release 2014.03.30.1	2014-03-30 15:57:47 +02:00
Philipp Hagemeister	9c1fc022ae	[generic] Warn before fallback to automatic search	2014-03-30 15:57:35 +02:00
Philipp Hagemeister	83d548ef0f	[youtube] Encode ytsearch query	2014-03-30 15:57:35 +02:00
Sergey M․	c72477bd32	[rutube] Modernize	2014-03-30 15:35:07 +07:00
Philipp Hagemeister	9a7b072e38	[wdr] Add support for more wdrmaus subpages	2014-03-30 07:42:35 +02:00
Philipp Hagemeister	cbc4a6cc7e	release 2014.03.30	2014-03-30 07:25:48 +02:00
Philipp Hagemeister	cd7481a39e	[wdr] Add support for wdrmaus.de (Fixes #2651 )	2014-03-30 07:25:42 +02:00
Philipp Hagemeister	acd213ed6d	Remove unusued imports	2014-03-30 07:16:07 +02:00
Philipp Hagemeister	77ffa95701	[jsinterp] Better error messages	2014-03-30 07:15:14 +02:00
Philipp Hagemeister	2b25cb5d76	[youtube] Move JavaScript interpreter into its own module	2014-03-30 07:02:58 +02:00
Philipp Hagemeister	62fec3b2ff	Add new --encoding option (Fixes #2650 )	2014-03-30 06:08:22 +02:00
Jaime Marquínez Ferrándiz	e79162558e	[wat] Modernize	2014-03-29 15:15:16 +01:00
Jaime Marquínez Ferrándiz	2da67107ee	[tf1] Modernize	2014-03-29 15:05:15 +01:00
Jaime Marquínez Ferrándiz	2ff7f8975e	[nba] Modernize	2014-03-29 14:57:48 +01:00
Jaime Marquínez Ferrándiz	87a2566048	[metacritic] Modernize test	2014-03-29 14:57:48 +01:00
Jaime Marquínez Ferrándiz	986f56736b	[roxwel] Modernize	2014-03-29 14:57:44 +01:00
Jaime Marquínez Ferrándiz	2583a0308b	[huffpost] Modernize test	2014-03-29 14:35:45 +01:00
Jaime Marquínez Ferrándiz	40c716d2a2	[ign] Modernize	2014-03-29 14:34:03 +01:00
Jaime Marquínez Ferrándiz	79bfd01001	[kickstarter] Fix extraction, extract more info and modernize	2014-03-29 14:22:28 +01:00
Jaime Marquínez Ferrándiz	f2bcdd8e02	[discovery] modernize	2014-03-29 14:22:27 +01:00
Philipp Hagemeister	8c5850eeb4	release 2014.03.29	2014-03-29 14:01:53 +01:00
Philipp Hagemeister	bd3e077a2d	Merge branch 'master' of github.com:rg3/youtube-dl	2014-03-29 14:01:19 +01:00
Jaime Marquínez Ferrándiz	7e70ac36b3	[bloomberg] Fix extraction (fixes #2154 ) Stop using the OoyalaIE, extract the f4m url instead.	2014-03-29 11:55:12 +01:00
Philipp Hagemeister	2cc0082dc0	Credit @phaer for OE1 (#2646 )	2014-03-29 10:11:32 +01:00
Sergey M․	056b56688a	[ntv] Simplify	2014-03-29 15:55:03 +07:00
Philipp Hagemeister	b17418313f	[oe1] Simplify (#2646 )	2014-03-28 23:23:58 +01:00
Philipp Hagemeister	e9a6fd6a68	Merge remote-tracking branch 'phaer/add-oe1-support'	2014-03-28 23:21:58 +01:00
Philipp Hagemeister	bf30f3bd9d	release 2014.03.28	2014-03-28 23:14:54 +01:00
Philipp Hagemeister	330edf2d84	Mention where to find keys in --dump-json (Fixes #2648 )	2014-03-28 23:13:03 +01:00
Philipp Hagemeister	43f775e4ca	[comedycentral] Duration can now be a float (Fixes #2647 )	2014-03-28 23:06:34 +01:00
Sergey M․	8f6562448c	[ntv] Move app guess outside formats loop	2014-03-28 23:09:56 +07:00
Sergey M․	263f4b514b	[ntv] Add support for ntv.ru (Closes #2581 )	2014-03-28 23:01:08 +07:00
phaer	f0da3f1ef9	[oe1] Add support for oe1.orf.at.	2014-03-28 17:57:25 +02:00
Sergey M․	cb3ac1c610	[smotri] Modernize and add support for emdebbed videos (Closes #2585 )	2014-03-28 19:58:49 +07:00
Sergey M․	8efd15f477	[canalplus] Fix video id extraction (Closes #2645 )	2014-03-28 18:47:15 +07:00
Jaime Marquínez Ferrándiz	d26ebe990f	[ehow] Modernize	2014-03-27 21:23:02 +01:00
Jaime Marquínez Ferrándiz	28acf5500a	[appletrailers] Modernize	2014-03-27 21:10:51 +01:00
Jaime Marquínez Ferrándiz	214c22c704	[niconico] Modernize	2014-03-27 21:01:09 +01:00
Sergey M․	8cdafb47b9	[mooshare] Add support for URLs starting with 'www'	2014-03-27 19:08:35 +07:00
Philipp Hagemeister	0dae5083f1	[urort] Add date	2014-03-27 02:56:23 +01:00
Philipp Hagemeister	4c89bbd22c	release 2014.03.27.1	2014-03-27 02:52:06 +01:00
Philipp Hagemeister	e2b06e76c1	[urort] Add extractor (Fixes #2634 )	2014-03-27 02:51:50 +01:00
Philipp Hagemeister	e9c076c317	[clipsyndicate] Modernize	2014-03-27 02:30:00 +01:00
Philipp Hagemeister	6c072e7d25	release 2014.03.27	2014-03-27 02:22:57 +01:00
Philipp Hagemeister	ac6c104871	[ted] Add support for watch/ URLs (Fixes #2637 )	2014-03-27 02:22:40 +01:00
Philipp Hagemeister	69c01a9f68	[comedycentral] Add a testcase for extended-interviews URLs (#2636 )	2014-03-27 02:02:48 +01:00
Philipp Hagemeister	e55213ce35	Merge remote-tracking branch 'malept/tds-extended-interviews'	2014-03-27 02:02:18 +01:00
Mark Lee	24a2aac445	[comedycentral] fix TDS extended interviews The new website broke the URL format. Added "playlist" as a valid ID keyword.	2014-03-26 10:51:02 -07:00
Jaime Marquínez Ferrándiz	98acdc895b	Merge remote-tracking branch 'dstftw/download-referer-header' (closes #2628 )	2014-03-26 15:20:11 +01:00
Jaime Marquínez Ferrándiz	bd3b5b8b10	[slashdot] Remove extractor The generic ooyala detection works fine.	2014-03-26 15:09:14 +01:00
Jaime Marquínez Ferrándiz	9a90636805	[vice] Remove extractor The generic ooyala detection works fine.	2014-03-26 15:03:34 +01:00
Sergey M․	6a66ae96ed	[cspan] Roll back unfinished rtmp support	2014-03-26 19:51:54 +07:00
Jaime Marquínez Ferrándiz	2c8a4ba6b5	Makefile: include the docs in the tarball	2014-03-26 12:01:08 +01:00
Philipp Hagemeister	ad8915b729	Add --no-warnings option (Fixes #2630 )	2014-03-26 00:43:46 +01:00
Philipp Hagemeister	34cbc7ee8d	[comedycentral] Better titles	2014-03-25 23:46:51 +01:00
Sergey M․	a59e40a1ea	Replace 'referer' with 'http_referer'	2014-03-25 21:53:26 +07:00
Sergey M․	ad0a75db6b	[auengine] Add referer	2014-03-25 21:22:41 +07:00
Sergey M․	1d0e49e1c7	Use explicitly set Referer header for downloading	2014-03-25 21:22:27 +07:00
Sergey M․	b4461b6ebe	[auengine] Modernize	2014-03-25 21:16:10 +07:00
Philipp Hagemeister	80959224fe	release 2014.03.25.1	2014-03-25 14:27:40 +01:00
Philipp Hagemeister	865cbf4fc5	[comedycentral] Correct uri (Fixes #2627 )	2014-03-25 14:27:23 +01:00
Philipp Hagemeister	196f061cac	release 2014.03.25	2014-03-25 04:01:13 +01:00
Philipp Hagemeister	99b380c33b	[comedycentral] Fix thedailyshow / thecolbertreport (Fixes #2600 , #2596 )	2014-03-25 04:00:57 +01:00
Philipp Hagemeister	02e4482e22	release 2014.03.24.5	2014-03-24 23:23:38 +01:00
Philipp Hagemeister	b8a792de80	Merge remote-tracking branch 'origin/master' into HEAD Conflicts: youtube_dl/extractor/arte.py	2014-03-24 23:23:17 +01:00
Philipp Hagemeister	fac55558ad	[washingtonpost] Add extractor (Fixes #2622 )	2014-03-24 23:21:20 +01:00
Jaime Marquínez Ferrándiz	b2799ff96d	[arte] Fix videos.arte.tv extraction	2014-03-24 22:38:51 +01:00
Philipp Hagemeister	7a249480b4	[arte] Fix video.arte.tv extractor	2014-03-24 22:34:03 +01:00
Philipp Hagemeister	f605128d13	[rts] Add thumbnail support	2014-03-24 22:32:04 +01:00
Philipp Hagemeister	ba40a74666	[clipfish] Modernize	2014-03-24 22:30:32 +01:00
Philipp Hagemeister	fb8ae2d438	release 2014.03.24.4	2014-03-24 22:03:51 +01:00
Philipp Hagemeister	893f8832b5	[arte] Add support for embedded videos (Fixes #2620 )	2014-03-24 22:01:47 +01:00
Philipp Hagemeister	878d11ec29	[arte] Add support for multiple formats	2014-03-24 21:36:26 +01:00
Philipp Hagemeister	515bbe4b5b	[arte] Remove liveweb support liveweb.arte.tv is no longer functional, everything has moved to concert	2014-03-24 21:31:19 +01:00
Philipp Hagemeister	75f2e25ba9	[downloader/hls] Encode filename (Fixes #2609 )	2014-03-24 21:23:05 +01:00
Philipp Hagemeister	0d466d34a3	release 2014.03.24.3	2014-03-24 17:12:42 +01:00
Philipp Hagemeister	6949d81095	[byutv] Add support (Fixes #2612 )	2014-03-24 17:12:15 +01:00
Philipp Hagemeister	f847ca02d3	[addanime] Modernize	2014-03-24 16:39:53 +01:00
Philipp Hagemeister	510243ba58	release 2014.03.24.2	2014-03-24 15:00:47 +01:00
Sergey M․	b540697a8a	[veoh] Improve extraction, fix youtube extraction (Closes #2616 )	2014-03-24 20:53:03 +07:00
Philipp Hagemeister	0d3641e589	[cinemassacre] Fix #2815	2014-03-24 13:43:13 +01:00
Philipp Hagemeister	72546c831e	Merge pull request #2553 from anisse/master Add an option to specify custom HTTP headers	2014-03-24 10:42:58 +01:00
Philipp Hagemeister	d26db9269d	release 2014.03.24.1	2014-03-24 10:25:58 +01:00
Philipp Hagemeister	4c0941853a	[devscripts/release] Check version number	2014-03-24 10:25:49 +01:00
Philipp Hagemeister	c11726364e	release 2014.03.24	2014-03-24 10:17:35 +01:00
Philipp Hagemeister	c577d735c6	release 2013.03.24.2	2014-03-24 02:24:31 +01:00
Philipp Hagemeister	9f0375f61a	release 2013.03.24.1	2014-03-24 02:22:12 +01:00
Philipp Hagemeister	5e114e4bfe	[soundcloud] Always add streaming formats	2014-03-24 02:21:17 +01:00
Philipp Hagemeister	83622b6d2f	[soundcloud] Simplify string literals	2014-03-24 02:15:31 +01:00
Philipp Hagemeister	3d87426c2d	release 2013.03.24	2014-03-24 01:42:14 +01:00
Philipp Hagemeister	ce328530a9	Merge remote-tracking branch 'origin/master'	2014-03-24 01:42:11 +01:00
Philipp Hagemeister	f70daac108	[RTS] Add extractor (Fixes #2608 )	2014-03-24 01:41:14 +01:00
Philipp Hagemeister	912b38b428	[instagram] Fix info_dict key name	2014-03-24 01:40:09 +01:00
Philipp Hagemeister	6e25c58ed7	Merge pull request #2567 from jaimeMF/sphinx-docs Add initial sphinx docs	2014-03-24 00:50:32 +01:00
Philipp Hagemeister	51fb2e98d2	[radiofrance] Modernize	2014-03-23 17:43:33 +01:00
Philipp Hagemeister	38d63d846e	[extractor/common] Clarify preference key in formats	2014-03-23 17:41:43 +01:00
Philipp Hagemeister	07cec9776e	release 2014.03.23	2014-03-23 16:06:41 +01:00
Philipp Hagemeister	ea38e55fff	[instagram] Add support for user profiles (Fixes #2606 )	2014-03-23 16:06:07 +01:00
Philipp Hagemeister	257cfebfe6	[test] Move expect_info_dict out of test_download	2014-03-23 15:52:21 +01:00
Philipp Hagemeister	6eefe53329	[utils] Simplify setproctitle	2014-03-23 14:28:22 +01:00
Philipp Hagemeister	1986025d2b	[xbef] (Add extractor)	2014-03-23 14:04:36 +01:00
Philipp Hagemeister	c9aa111b4f	[worldstarhiphop] Modernize	2014-03-23 13:49:15 +01:00
Philipp Hagemeister	bfcb6e3917	Merge remote-tracking branch 'fiocfun/xtube-user-extractor'	2014-03-23 13:36:14 +01:00
Sergey M․	2c1396073e	[metacafe] Remove accidently inserted comment string	2014-03-23 05:16:02 +07:00
Sergey M․	401983c6a0	[metacafe] More modernize	2014-03-23 05:13:15 +07:00
Sergey M․	391dc3ee07	[metacafe] Replace cbs test	2014-03-23 05:08:11 +07:00
Sergey M․	be3b8fa30f	[metacafe] Modernize	2014-03-23 05:05:31 +07:00
fiocfun	9f5809b3e8	[xtube] user playlist extractor	2014-03-23 00:16:35 +06:00
Sergey M․	0320ddc192	[pornhub] Fix uploader extraction and extract counts	2014-03-22 21:30:22 +07:00
Philipp Hagemeister	56dd55721c	Remove unused imports and clarify variable names	2014-03-22 15:17:32 +01:00
Philipp Hagemeister	231f76b530	[toypics] Separate user and video extraction (#2601 )	2014-03-22 15:15:01 +01:00
Philipp Hagemeister	55442a7812	Merge remote-tracking branch 'fiocfun/toypics-support'	2014-03-22 14:24:44 +01:00
Philipp Hagemeister	43b81eb98a	[youtube] Remove useless resolution fields from format definitions These can be - and are - calculated automatically by the YoutubeDL core.	2014-03-22 14:22:41 +01:00
Philipp Hagemeister	bfd718793c	Merge remote-tracking branch 'hurda/patch-1'	2014-03-22 14:21:04 +01:00
Philipp Hagemeister	a9c2896e22	Make missing test definition fields an error If the result is not testable (for example, because a description changes often), either pass in a type or a regular expression (a string starting with 're:')	2014-03-22 14:20:07 +01:00
hurda	278229d195	itag 160 is 144p, not 192p	2014-03-22 12:15:45 +01:00
Philipp Hagemeister	fa154d1dbe	[videolectures.net] Make description optional	2014-03-22 12:10:56 +01:00
Jaime Marquínez Ferrándiz	7e2ede9891	[generic] Run TED detection before JW Player detection Otherwise it overwrittes the `mobj` variable.	2014-03-22 10:20:44 +01:00
fiocfun	74af99fc2f	toypics.net support	2014-03-22 04:07:44 +06:00
Jaime Marquínez Ferrándiz	0f2a2ba14b	Merge remote-tracking branch 'dstftw/generic-webpage-unescape' Conflicts: youtube_dl/extractor/generic.py	2014-03-21 22:14:24 +01:00
Jaime Marquínez Ferrándiz	e24b5a8610	[ooyala] Modernize	2014-03-21 21:55:51 +01:00
Jaime Marquínez Ferrándiz	750f9020ae	[generic] Recognize more Ooyala embedded videos (#2569 )	2014-03-21 21:51:33 +01:00
Jaime Marquínez Ferrándiz	f82863851e	Add an extractor for on.aol.com	2014-03-21 19:54:44 +01:00
Jaime Marquínez Ferrándiz	933a5b3792	Add extractor for Engadget and 5min (closes #2465 ) engadget.com uses the generic 5min.com service.	2014-03-21 19:13:46 +01:00
Sergey M․	aa488e1385	[xtube] Fix formats extraction	2014-03-21 23:58:40 +07:00
Philipp Hagemeister	d77650525d	release 2014.03.21.5	2014-03-21 14:52:57 +01:00
Philipp Hagemeister	3e50c29984	release 2014.03.21.4	2014-03-21 14:38:55 +01:00
Philipp Hagemeister	64e7ad6045	[videolectures] (New extractor)	2014-03-21 14:38:41 +01:00
Philipp Hagemeister	23f4a93bb4	[daum] Modernize	2014-03-21 14:38:41 +01:00
Jaime Marquínez Ferrándiz	6f13b055f1	[cspan] Fix typo in a comment	2014-03-21 08:01:20 +01:00
Philipp Hagemeister	1f91bd15c3	release 2014.03.21.3	2014-03-21 02:10:35 +01:00
Philipp Hagemeister	11a15be4ce	[cspan] Add support for newer videos (Fixes #2577 )	2014-03-21 02:10:24 +01:00
Philipp Hagemeister	14e17e18cb	release 2014.03.21.2	2014-03-21 01:42:45 +01:00
Philipp Hagemeister	1b124d1942	[parliamentliveuk] Add extractor	2014-03-21 01:42:28 +01:00
Anisse Astier	410afb2003	Add an option to specify custom HTTP headers	2014-03-17 16:40:41 +01:00
Jaime Marquínez Ferrándiz	685052fc7b	Add initial sphinx docs With an initial guide for using youtube_dl from python programs.	2014-03-15 19:08:09 +01:00
Sergey M․	d95e35d659	[generic] Add nowvideo test hidden behind percent encoding	2014-03-15 04:39:53 +07:00
Sergey M․	1439073049	[generic] Add comment for unescaping webpage contents	2014-03-15 04:38:49 +07:00
Sergey M	1f7659dbe9	[generic] Unescape webpage contents	2014-03-15 04:21:17 +07:00