71 lines
2.2 KiB
Python
71 lines
2.2 KiB
Python
import time
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
import tqdm
|
|
import xmltodict
|
|
from tqdm import tqdm
|
|
|
|
|
|
def download_mp3(url:str, filename:str, ouputdir:Path):
|
|
with open(ouputdir/filename, 'wb') as f:
|
|
with httpx.stream("GET", url, follow_redirects=True) as response:
|
|
for chunk in response.iter_bytes():
|
|
f.write(chunk)
|
|
|
|
|
|
def get_audio_urls(urls:list) -> list[tuple[str, Path]]:
|
|
|
|
audio_urls = list()
|
|
for i, book_url in enumerate(urls, start=1):
|
|
print(book_url)
|
|
days = parse_book_url(i, book_url)
|
|
print(f'url count for book: {len(days)}')
|
|
audio_urls += days
|
|
|
|
return audio_urls
|
|
|
|
def parse_book_url(book_num, url):
|
|
data = xmltodict.parse(httpx.get(url).text)
|
|
|
|
audio_urls = list()
|
|
for i in data['rss']['channel']['item']:
|
|
# print(f'{i.get('title')}: {i['enclosure'].get(r'@url')}')
|
|
filename = i['title'] #['#text'].split(r'/')[-1]
|
|
data = (i['enclosure'].get(r'@url'), f'{book_num} {filename}.mp3')
|
|
print('\t', data)
|
|
audio_urls.append(data)
|
|
|
|
return audio_urls
|
|
|
|
def main():
|
|
urls = [
|
|
"https://feeds.transistor.fm/immerse-beginnings-8-week-plan",
|
|
"https://feeds.transistor.fm/immerse-kingdoms-8-week-plan",
|
|
"https://feeds.transistor.fm/immerse-chronicles-8-week-plan",
|
|
"https://feeds.transistor.fm/immerse-poets-8-week-plan",
|
|
"https://feeds.transistor.fm/immerse-prophets-8-week-plan",
|
|
"https://feeds.transistor.fm/immerse-messiah-8-week-plan",
|
|
]
|
|
|
|
output_dir = Path("./downloads").absolute() # Adjust for tmpfs or other directory
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
audio_urls = get_audio_urls(urls)
|
|
|
|
print(len(audio_urls))
|
|
# for i in audio_urls:
|
|
# print(i)
|
|
|
|
start_time = time.time()
|
|
|
|
# Use ThreadPoolExecutor to manage threads and collect results
|
|
with ThreadPoolExecutor() as executor:
|
|
# Map URLs to download_file function, collecting results
|
|
r = list(tqdm(executor.map(lambda data: download_mp3(*data, output_dir), audio_urls), total=len(urls)))
|
|
|
|
print(f"Completed in {time.time() - start_time:.2f} seconds")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|