Skip to content

Commit

Permalink
More features added
Browse files Browse the repository at this point in the history
  • Loading branch information
sanketh95 committed May 18, 2015
1 parent 6424624 commit 5e0881c
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 8 deletions.
6 changes: 5 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,8 @@ Added support for windows

--------------------VERSION 1.0.2----------------------
1. Fixed a bug where -d option required the directory given as option to exist. Now the script creates the directory if it doesn't exist.
2. The directory path given for -d can now be relative and the script takes care of converting it to absolute path.
2. The directory path given for -d can now be relative and the script takes care of converting it to absolute path.

--------------------VERSION 1.1.0------------------------
1. Script can now download lecture slides (pdfs and ppts).
2. Saves the data.json file prior to downloading.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ Run `export HTTP_PROXY=http://user:password@address:port` and `export HTTPS_PROX
* The script also downloads the subtitles automatically and saves them in *Subs* folder. So when you play the videos using VLC, the subs are automatically loaded.
* The script saves a *data.json* file in the course directory. This has all the information required to fetch the videos. So even if you lose some videos or if you forget the course name, as long as you have the *data.json* file, you can always re-download the lost videos.
* The script saves the session cookies so you don't have to login everytime you run it.
* The script fetches the lecture slides (pdfs and ppts) and saves them in 'Other Files' folder.


## <a name="full-usage"></a>Full Usage
Expand Down
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ Features
all the information required to fetch the videos. So even if you lose
some videos or if you forget the course name, as long as you have the
*data.json* file, you can always re-download the lost videos.
- The script saves the session cookies so you don't have to login everytime you run it.
- The script fetches the lecture slides (pdfs and ppts) and saves them in 'Other Files' folder.

Full Usage
----------
Expand Down
35 changes: 30 additions & 5 deletions coursera_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
DATA_FILE = 'data.json'
COOKIE_FILE = 'cookie.cookies'
COURSE_DIR = os.getcwd()
OTHER_DIR = 'Other Files'

class Downloader(threading.Thread):
"""Instance of threading.Thread class.
Expand Down Expand Up @@ -67,6 +68,21 @@ def run(self):

print 'Download finished for %s' % absolute_path(self.savepath)

def get_vid_sub_links(anchor_elems):
vid_link = None
sub_link = None
other_links = []
for anchor_elem in anchor_elems:
temp = pq(anchor_elem)
href = temp.attr('href');
if href.find('subtitles') != -1 and href.find('format=srt') != -1:
sub_link = href
elif href.find('download.mp4') != -1:
vid_link = href
elif href.find('.pdf') != -1 or href.find('.pptx') != -1:
other_links.append(href)
return vid_link, sub_link, other_links

def exit_with_message(msg):
# Print the msg and exit the script
print msg
Expand Down Expand Up @@ -242,6 +258,7 @@ def download(parsed_json, cookie):
week_count += 1
create_folder(folder_name)
create_folder(os.path.join(folder_name, SUB_DIR))
create_folder(os.path.join(folder_name, OTHER_DIR))
count = 0
for vid_info in sub_json['links']:

Expand Down Expand Up @@ -269,6 +286,15 @@ def download(parsed_json, cookie):
d = Downloader(suburl, sub_path, cookie, True)
threads.append(d)

for other_link in vid_info['other_links']:
other_title = other_link.split('/')[-1]
other_path = os.path.join(folder_name, OTHER_DIR, str(count) + '-' + other_title)
if path_exists(other_path):
print 'Skipping %s' % other_path
else:
p = Downloader(other_link, other_path, cookie)
threads.append(p)

for thread in threads:
thread.start()

Expand Down Expand Up @@ -336,8 +362,8 @@ def main():

if not args.file:
parsed_json = get_course_info(shortname, cookie_logged_in)
download(parsed_json, cookie_logged_in)
save_data_file(parsed_json)
download(parsed_json, cookie_logged_in)
if args.auto is not None:
schedule_synch(args.auto, args.email, args.password)

Expand Down Expand Up @@ -389,10 +415,9 @@ def get_course_info(shortname, cookie):
for list_item in list_items:
list_elem = pq(list_item)
anchor_elems = list_elem('a')
vid_title = pq(anchor_elems[0]).text()
vid_link = pq(anchor_elems[len(anchor_elems) - 1]).attr('href')
sub_link = pq(anchor_elems[len(anchor_elems) - 2]).attr('href')
parsed_json['links'].append({'title':vid_title, 'link':vid_link, 'sub_link':sub_link})
vid_title = pq(anchor_elems[0]).text()
vid_link, sub_link, other_links = get_vid_sub_links(anchor_elems)
parsed_json['links'].append({'title':vid_title, 'link':vid_link, 'sub_link':sub_link, 'other_links': other_links})
course_info_json['data'].append(parsed_json)
except Exception, e:
exit_with_message('Invalid HTML file receieved')
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = "coursera_offline",

version = "1.0.2",
version = "1.1.0",

author="Sanketh Mopuru",
author_email="[email protected]",
Expand All @@ -28,7 +28,7 @@
"docutils>=0.3"
],

keywords = "coursera offline download lecture lectures video videos",
keywords = "coursera offline download lecture lectures videos and slides",

scripts=['coursera_offline.py']
)

0 comments on commit 5e0881c

Please sign in to comment.