-
Notifications
You must be signed in to change notification settings - Fork 0
/
bandcamp_downloader.py
210 lines (166 loc) · 7.23 KB
/
bandcamp_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import time
import os
import logging
import calendar
import urllib.request
from selenium import webdriver
from selenium.webdriver.remote.remote_connection import LOGGER
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import mutagen
from mutagen.id3 import ID3, TIT2, TALB, TPE1, TRCK, APIC, TDRC
# ID3 info:
# APIC: picture
# TIT2: title
# TPE1: artist
# TRCK: track number
# TALB: album
# TDRC: year
def valid_name(name: str):
"""Replace bad characters in filenames with an underscore."""
deletechars = r'\/:*?\"<>|'
for i in deletechars:
if i in name:
name = name.replace(i, ' ')
return name
# preparing the options for the chrome driver
options = Options()
options.add_argument('headless')
options.add_argument('--mute-audio')
options.add_argument('--disable-extensions')
options.add_argument('--disable-gpu')
options.add_argument('log-level=3')
options.add_experimental_option('excludeSwitches', ['enable-logging'])
def download_album(link: str):
browser.get(link)
time.sleep(1.5)
# Play and pause the first song to initialize the player.
play_button = browser.find_element(By.CLASS_NAME, 'playbutton')
play_button.click()
play_button.click()
# Extract song numbers and titles.
print('Downloading album info')
description = browser.find_element(By.CSS_SELECTOR, "[name='description']").get_attribute('content').strip()
description = description.split('\n\n')
# Seperate numbers from titles.
numbers_and_titles_together = description[1].split('\n')
numbers_and_titles =[]
for elem in numbers_and_titles_together:
num = elem.split('. ')[0]
title = ''
for idx, el in enumerate(elem.split('. ')):
if idx > 1:
title += '. '
if idx > 0:
title += el
try:
if title[2] == '.' and title[3] == ' ':
title=title[4:]
except IndexError:
pass
numbers_and_titles.append([num, title])
# Repair bad symbols.
for i in range(len(numbers_and_titles)):
numbers_and_titles[i][1] = str(numbers_and_titles[i][1]).replace(''', '\'')
numbers_and_titles[i][1] = str(numbers_and_titles[i][1]).replace('&', '&')
numbers_and_titles[i][1] = str(numbers_and_titles[i][1]).replace('<', '<')
numbers_and_titles[i][1] = str(numbers_and_titles[i][1]).replace('>', '>')
# Find the album release date on Bandcamp.
date = browser.find_element(By.CSS_SELECTOR, "[class='tralbumData tralbum-credits']").text.strip().split('\n')[0]
date = date[9:]
date_year = date.split(', ')[1]
date_month = str(list(calendar.month_name).index(date.split(' ')[0]))
date_days = date.split(', ')[0].split(' ')[1]
if len(date_days) == 1:
date_days = f'0{date_days}'
date = f'{date_year}.{date_month}.{date_days}'
# Make a new directory with the album's name.
album_name = browser.find_element(By.CSS_SELECTOR, "[id='name-section'] [class='trackTitle']").text.strip()
album_folder_name = valid_name(f'[{date}] - {album_name} [128K]')
print(f'Making new directory: {album_folder_name}')
os.makedirs(album_folder_name, exist_ok=True)
os.chdir(album_folder_name)
# Extract year and artist.
year = description[0][-4:]
artist = browser.find_element(By.CSS_SELECTOR, "[property='og:site_name']").text
# Download and save album cover.
print('Downloading album cover')
album_cover_link = browser.find_element(By.CSS_SELECTOR, "[rel='image_src']").get_attribute('href')
urllib.request.urlretrieve(album_cover_link, 'cover.jpg')
cover_path = os.path.join(os.getcwd(), 'cover.jpg')
imagedata = open(cover_path, 'rb').read()
def add_leading_zeros(num: int):
"""Return a prettified string with a space at the end."""
return str(num).zfill(len(str(len(numbers_and_titles)))) + ' '
# Needed to get all of the album's mp3s.
next_track = browser.find_element(By.CSS_SELECTOR, "[aria-label='Next track']")
for i in range(len(numbers_and_titles)):
track_num = add_leading_zeros(i + 1)
title = valid_name(numbers_and_titles[i][1])
artist = browser.find_element(By.CSS_SELECTOR, "[class='title']").text.split(' - ')[0]
try:
artist = f'{artist} feat. {title.split(" feat. ")[1]}'
title = title.split(' feat. ')[0]
except IndexError:
pass
# Download and name the mp3 file.
full_track_filename = f'{track_num}{valid_name(artist)} - {valid_name(title)}.mp3'
print(f'Downloading {full_track_filename} (Artist: {artist})')
fallbacks = ['19', '18', '17']
for z in range(len(fallbacks)):
try:
mp3 = browser.find_element(By.CSS_SELECTOR, f'body > audio:nth-child({fallbacks[z]})').get_attribute('src')
urllib.request.urlretrieve(mp3, full_track_filename)
break
except ValueError:
continue
# Add tags.
print(f'Adding tags and cover to {full_track_filename}')
try:
tags = ID3(os.path.join(os.getcwd(), full_track_filename))
except mutagen.id3.ID3NoHeaderError:
tags = ID3()
# Those tags are different in each iteration.
tags['TRCK'] = TRCK(encoding=3, text=numbers_and_titles[i][0])
try:
tags['TIT2'] = TIT2(encoding=3, text=numbers_and_titles[i][1].split(' - ')[1])
except IndexError:
tags['TIT2'] = TIT2(encoding=3, text=title)
# Those tags are constant in each iteration.
tags['TALB'] = TALB(encoding=3, text=album_name)
tags['TPE1'] = TPE1(encoding=3, text=artist)
tags['TDRC'] = TDRC(encoding=3, text=year)
tags['APIC'] = APIC(3, 'image/jpeg', 3, 'Cover', imagedata)
# Save tags.
tags.save(os.path.join(os.getcwd(), full_track_filename), v2_version=3)
# Change the track so the mp3 variable holds a different link.
if i + 1 < len(numbers_and_titles):
next_track.click()
print(f'Finished downloading {album_name}')
print()
os.chdir('..')
if __name__ == '__main__':
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
LOGGER.setLevel(logging.WARNING)
os.makedirs('downloads', exist_ok=True)
os.chdir('downloads')
while True:
link = input('Paste Bandcamp artist/label link here: ')
while 'bandcamp.com' not in link:
link = input('This is not a bandcamp link, try again: ')
browser.get(link)
time.sleep(1.5)
if 'album' in link:
download_album(link)
else:
tags = browser.find_elements(By.XPATH, '//*[@id=\"pgBd\"]/div[2]/ol/li/a')
album_links = []
for link in tags:
album_links.append(link.get_attribute('href'))
print(link.get_attribute('href'))
print(f'Downloading {len(album_links)} albums')
for actual_link in album_links:
download_album(actual_link)
browser.quit()