import time from concurrent.futures import ThreadPoolExecutor from pathlib import Path import httpx import tqdm import xmltodict from tqdm import tqdm def download_mp3(url:str, filename:str, ouputdir:Path): with open(ouputdir/filename, 'wb') as f: with httpx.stream("GET", url, follow_redirects=True) as response: for chunk in response.iter_bytes(): f.write(chunk) def get_audio_urls(urls:list) -> list[tuple[str, Path]]: audio_urls = list() for i, book_url in enumerate(urls, start=1): print(book_url) days = parse_book_url(i, book_url) print(f'url count for book: {len(days)}') audio_urls += days return audio_urls def parse_book_url(book_num, url): data = xmltodict.parse(httpx.get(url).text) audio_urls = list() for i in data['rss']['channel']['item']: # print(f'{i.get('title')}: {i['enclosure'].get(r'@url')}') filename = i['title'] #['#text'].split(r'/')[-1] data = (i['enclosure'].get(r'@url'), f'{book_num} {filename}.mp3') print('\t', data) audio_urls.append(data) return audio_urls def main(): urls = [ "https://feeds.transistor.fm/immerse-beginnings-8-week-plan", "https://feeds.transistor.fm/immerse-kingdoms-8-week-plan", "https://feeds.transistor.fm/immerse-chronicles-8-week-plan", "https://feeds.transistor.fm/immerse-poets-8-week-plan", "https://feeds.transistor.fm/immerse-prophets-8-week-plan", "https://feeds.transistor.fm/immerse-messiah-8-week-plan", ] output_dir = Path("./downloads").absolute() # Adjust for tmpfs or other directory output_dir.mkdir(parents=True, exist_ok=True) audio_urls = get_audio_urls(urls) print(len(audio_urls)) # for i in audio_urls: # print(i) start_time = time.time() # Use ThreadPoolExecutor to manage threads and collect results with ThreadPoolExecutor() as executor: # Map URLs to download_file function, collecting results r = list(tqdm(executor.map(lambda data: download_mp3(*data, output_dir), audio_urls), total=len(urls))) print(f"Completed in {time.time() - start_time:.2f} seconds") if __name__ == "__main__": main()