release 2014.03.07.1

[facebook] Fix login process
It was broken and didn't work in python 3. And use `_download_webpage` instead of `compat_urllib_request.urlopen`.
2025-12-13 09:32:42 +01:00 · 2014-03-07 15:59:10 +01:00 · 2014-03-07 15:25:33 +01:00 · 2014-03-07 06:41:48 +01:00 · 2014-03-07 06:56:48 +07:00
5 changed files with 45 additions and 13 deletions
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -33,6 +33,7 @@ from youtube_dl.utils import (
    unified_strdate,
    unsmuggle_url,
    url_basename,
+    urlencode_postdata,
    xpath_with_ns,
 )

@@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
            bam''')
        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])

+    def test_urlencode_postdata(self):
+        data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
+        self.assertTrue(isinstance(data, bytes))
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -11,6 +11,7 @@ from ..utils import (
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
+    urlencode_postdata,

    ExtractorError,
 )
@@ -51,8 +52,8 @@ class FacebookIE(InfoExtractor):

        login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
        login_page_req.add_header('Cookie', 'locale=en_US')
-        self.report_login()
-        login_page = self._download_webpage(login_page_req, None, note=False,
+        login_page = self._download_webpage(login_page_req, None,
+            note='Downloading login page',
            errnote='Unable to download login page')
        lsd = self._search_regex(
            r'<input type="hidden" name="lsd" value="([^"]*)"',
@@ -70,23 +71,25 @@ class FacebookIE(InfoExtractor):
            'timezone': '-60',
            'trynum': '1',
            }
-        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+        request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        try:
-            login_results = compat_urllib_request.urlopen(request).read()
+            login_results = self._download_webpage(request, None,
+                note='Logging in', errnote='unable to fetch login page')
            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
                self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                return

            check_form = {
-                'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
+                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
                'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
                'name_action_selected': 'dont_save',
-                'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
+                'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
            }
-            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
+            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            check_response = compat_urllib_request.urlopen(check_req).read()
+            check_response = self._download_webpage(check_req, None,
+                note='Confirming login')
            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
                self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -8,7 +8,8 @@ from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
-    ExtractorError
+    ExtractorError,
+    int_or_none,
 )


@@ -19,7 +20,7 @@ class LyndaIE(SubtitlesInfoExtractor):
    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
    _NETRC_MACHINE = 'lynda'

-    _SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
+    _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
    _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'

    ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
@@ -55,13 +56,29 @@ class LyndaIE(SubtitlesInfoExtractor):
        duration = video_json['DurationInSeconds']
        title = video_json['Title']

-        formats = [{'url': fmt['Url'],
+        formats = []
+
+        fmts = video_json.get('Formats')
+        if fmts:
+            formats.extend([
+                {
+                    'url': fmt['Url'],
                    'ext': fmt['Extension'],
                    'width': fmt['Width'],
                    'height': fmt['Height'],
                    'filesize': fmt['FileSize'],
                    'format_id': str(fmt['Resolution'])
-                    } for fmt in video_json['Formats']]
+                } for fmt in fmts])
+
+        prioritized_streams = video_json.get('PrioritizedStreams')
+        if prioritized_streams:
+            formats.extend([
+                {
+                    'url': video_url,
+                    'width': int_or_none(format_id),
+                    'format_id': format_id,
+                } for format_id, video_url in prioritized_streams['0'].items()
+            ])

        self._sort_formats(formats)

@@ -179,6 +196,9 @@ class LyndaCourseIE(InfoExtractor):
        videos = []
        (username, _) = self._get_login_info()

+        # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
+        # by single video API anymore
+
        for chapter in course_json['Chapters']:
            for video in chapter['Videos']:
                if username is None and video['HasAccess'] is False:
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1263,3 +1263,7 @@ def read_batch_urls(batch_fd):

    with contextlib.closing(batch_fd) as fd:
        return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+    return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.03.06'
+__version__ = '2014.03.07.1'
Author	SHA1	Message	Date
Philipp Hagemeister	dae313e725	release 2014.03.07.1	2014-03-07 15:59:10 +01:00
Jaime Marquínez Ferrándiz	b74fa8cd2c	[facebook] Fix login process It was broken and didn't work in python 3. And use `_download_webpage` instead of `compat_urllib_request.urlopen`.	2014-03-07 15:25:33 +01:00
Philipp Hagemeister	94eae04c94	release 2014.03.07	2014-03-07 06:41:48 +01:00
Sergey M․	16ff7ebc77	[lynda] Fix successful login regex and fix formats extraction (Closes #2520 )	2014-03-07 06:56:48 +07:00