More features added

sanketh95 · May 18, 2015 · 5e0881c · 5e0881c
1 parent 6424624
commit 5e0881c
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 8 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -14,4 +14,8 @@ Added support for windows
 
 --------------------VERSION 1.0.2----------------------
 1. Fixed a bug where -d option required the directory given as option to exist. Now the script creates the directory if it doesn't exist.
-2. The directory path given for -d can now be relative and the script takes care of converting it to absolute path.
+2. The directory path given for -d can now be relative and the script takes care of converting it to absolute path.
+
+--------------------VERSION 1.1.0------------------------
+1. Script can now download lecture slides (pdfs and ppts).
+2. Saves the data.json file prior to downloading.
diff --git a/README.md b/README.md
@@ -64,6 +64,7 @@ Run `export HTTP_PROXY=http://user:password@address:port` and `export HTTPS_PROX
 * The script also downloads the subtitles automatically and saves them in *Subs* folder. So when you play the videos using VLC, the subs are automatically loaded.
 * The script saves a *data.json* file in the course directory. This has all the information required to fetch the videos. So even if you lose some videos or if you forget the course name, as long as you have the *data.json* file, you can always re-download the lost videos.
 * The script saves the session cookies so you don't have to login everytime you run it.
+* The script fetches the lecture slides (pdfs and ppts) and saves them in 'Other Files' folder.
 
 
 ## <a name="full-usage"></a>Full Usage

diff --git a/README.rst b/README.rst
@@ -110,6 +110,8 @@ Features
    all the information required to fetch the videos. So even if you lose
    some videos or if you forget the course name, as long as you have the
    *data.json* file, you can always re-download the lost videos.
+-  The script saves the session cookies so you don't have to login everytime you run it.
+-  The script fetches the lecture slides (pdfs and ppts) and saves them in 'Other Files' folder.
 
 Full Usage
 ----------

diff --git a/coursera_offline.py b/coursera_offline.py
@@ -35,6 +35,7 @@
 DATA_FILE = 'data.json'
 COOKIE_FILE = 'cookie.cookies'
 COURSE_DIR = os.getcwd()
+OTHER_DIR = 'Other Files'
 
 class Downloader(threading.Thread):
     """Instance of threading.Thread class.
@@ -67,6 +68,21 @@ def run(self):
 
         print 'Download finished for %s' % absolute_path(self.savepath)
 
+def get_vid_sub_links(anchor_elems):
+    vid_link = None
+    sub_link = None
+    other_links = []
+    for anchor_elem in anchor_elems:
+        temp = pq(anchor_elem)
+        href = temp.attr('href');
+        if href.find('subtitles') != -1 and href.find('format=srt') != -1:
+            sub_link = href
+        elif href.find('download.mp4') != -1:
+            vid_link = href
+        elif href.find('.pdf') != -1 or href.find('.pptx') != -1:
+            other_links.append(href)
+    return vid_link, sub_link, other_links
+
 def exit_with_message(msg):
     # Print the msg and exit the script
     print msg
@@ -242,6 +258,7 @@ def download(parsed_json, cookie):
         week_count += 1
         create_folder(folder_name)
         create_folder(os.path.join(folder_name, SUB_DIR))
+        create_folder(os.path.join(folder_name, OTHER_DIR))
         count = 0
         for vid_info in sub_json['links']:
 
@@ -269,6 +286,15 @@ def download(parsed_json, cookie):
                 d = Downloader(suburl, sub_path, cookie, True)
                 threads.append(d)
 
+            for other_link in vid_info['other_links']:
+                other_title = other_link.split('/')[-1]
+                other_path = os.path.join(folder_name, OTHER_DIR, str(count) + '-' + other_title)
+                if path_exists(other_path):
+                    print 'Skipping %s' % other_path
+                else:
+                    p = Downloader(other_link, other_path, cookie)
+                    threads.append(p)
+
     for thread in threads:
         thread.start()
 
@@ -336,8 +362,8 @@ def main():
 
     if not args.file:
         parsed_json = get_course_info(shortname, cookie_logged_in)    
-    download(parsed_json, cookie_logged_in)
     save_data_file(parsed_json)
+    download(parsed_json, cookie_logged_in)
     if args.auto is not None:
         schedule_synch(args.auto, args.email, args.password)        
 
@@ -389,10 +415,9 @@ def get_course_info(shortname, cookie):
             for list_item in list_items:
                 list_elem = pq(list_item)
                 anchor_elems = list_elem('a')
-                vid_title = pq(anchor_elems[0]).text()
-                vid_link = pq(anchor_elems[len(anchor_elems) - 1]).attr('href')
-                sub_link = pq(anchor_elems[len(anchor_elems) - 2]).attr('href')
-                parsed_json['links'].append({'title':vid_title, 'link':vid_link, 'sub_link':sub_link})
+                vid_title = pq(anchor_elems[0]).text()  
+                vid_link, sub_link, other_links = get_vid_sub_links(anchor_elems)
+                parsed_json['links'].append({'title':vid_title, 'link':vid_link, 'sub_link':sub_link, 'other_links': other_links})
             course_info_json['data'].append(parsed_json)
     except Exception, e:    
         exit_with_message('Invalid HTML file receieved')

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
 	name = "coursera_offline",
 
-	version = "1.0.2",
+	version = "1.1.0",
 
 	author="Sanketh Mopuru",
 	author_email="[email protected]",
@@ -28,7 +28,7 @@
 		"docutils>=0.3"
 	],
 
-	keywords = "coursera offline download lecture lectures video videos",
+	keywords = "coursera offline download lecture lectures videos and slides",
 
 	scripts=['coursera_offline.py']
 	)