first commit

This commit is contained in:
2025-09-14 23:51:39 -04:00
commit 825cbf2a5f
4 changed files with 257 additions and 0 deletions

70
main.py Normal file
View File

@@ -0,0 +1,70 @@
import time
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import httpx
import tqdm
import xmltodict
from tqdm import tqdm
def download_mp3(url:str, filename:str, ouputdir:Path):
with open(ouputdir/filename, 'wb') as f:
with httpx.stream("GET", url, follow_redirects=True) as response:
for chunk in response.iter_bytes():
f.write(chunk)
def get_audio_urls(urls:list) -> list[tuple[str, Path]]:
audio_urls = list()
for i, book_url in enumerate(urls, start=1):
print(book_url)
days = parse_book_url(i, book_url)
print(f'url count for book: {len(days)}')
audio_urls += days
return audio_urls
def parse_book_url(book_num, url):
data = xmltodict.parse(httpx.get(url).text)
audio_urls = list()
for i in data['rss']['channel']['item']:
# print(f'{i.get('title')}: {i['enclosure'].get(r'@url')}')
filename = i['title'] #['#text'].split(r'/')[-1]
data = (i['enclosure'].get(r'@url'), f'{book_num} {filename}.mp3')
print('\t', data)
audio_urls.append(data)
return audio_urls
def main():
urls = [
"https://feeds.transistor.fm/immerse-beginnings-8-week-plan",
"https://feeds.transistor.fm/immerse-kingdoms-8-week-plan",
"https://feeds.transistor.fm/immerse-chronicles-8-week-plan",
"https://feeds.transistor.fm/immerse-poets-8-week-plan",
"https://feeds.transistor.fm/immerse-prophets-8-week-plan",
"https://feeds.transistor.fm/immerse-messiah-8-week-plan",
]
output_dir = Path("./downloads").absolute() # Adjust for tmpfs or other directory
output_dir.mkdir(parents=True, exist_ok=True)
audio_urls = get_audio_urls(urls)
print(len(audio_urls))
# for i in audio_urls:
# print(i)
start_time = time.time()
# Use ThreadPoolExecutor to manage threads and collect results
with ThreadPoolExecutor() as executor:
# Map URLs to download_file function, collecting results
r = list(tqdm(executor.map(lambda data: download_mp3(*data, output_dir), audio_urls), total=len(urls)))
print(f"Completed in {time.time() - start_time:.2f} seconds")
if __name__ == "__main__":
main()