first commit
This commit is contained in:
70
main.py
Normal file
70
main.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import tqdm
|
||||
import xmltodict
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def download_mp3(url:str, filename:str, ouputdir:Path):
|
||||
with open(ouputdir/filename, 'wb') as f:
|
||||
with httpx.stream("GET", url, follow_redirects=True) as response:
|
||||
for chunk in response.iter_bytes():
|
||||
f.write(chunk)
|
||||
|
||||
|
||||
def get_audio_urls(urls:list) -> list[tuple[str, Path]]:
|
||||
|
||||
audio_urls = list()
|
||||
for i, book_url in enumerate(urls, start=1):
|
||||
print(book_url)
|
||||
days = parse_book_url(i, book_url)
|
||||
print(f'url count for book: {len(days)}')
|
||||
audio_urls += days
|
||||
|
||||
return audio_urls
|
||||
|
||||
def parse_book_url(book_num, url):
|
||||
data = xmltodict.parse(httpx.get(url).text)
|
||||
|
||||
audio_urls = list()
|
||||
for i in data['rss']['channel']['item']:
|
||||
# print(f'{i.get('title')}: {i['enclosure'].get(r'@url')}')
|
||||
filename = i['title'] #['#text'].split(r'/')[-1]
|
||||
data = (i['enclosure'].get(r'@url'), f'{book_num} {filename}.mp3')
|
||||
print('\t', data)
|
||||
audio_urls.append(data)
|
||||
|
||||
return audio_urls
|
||||
|
||||
def main():
|
||||
urls = [
|
||||
"https://feeds.transistor.fm/immerse-beginnings-8-week-plan",
|
||||
"https://feeds.transistor.fm/immerse-kingdoms-8-week-plan",
|
||||
"https://feeds.transistor.fm/immerse-chronicles-8-week-plan",
|
||||
"https://feeds.transistor.fm/immerse-poets-8-week-plan",
|
||||
"https://feeds.transistor.fm/immerse-prophets-8-week-plan",
|
||||
"https://feeds.transistor.fm/immerse-messiah-8-week-plan",
|
||||
]
|
||||
|
||||
output_dir = Path("./downloads").absolute() # Adjust for tmpfs or other directory
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
audio_urls = get_audio_urls(urls)
|
||||
|
||||
print(len(audio_urls))
|
||||
# for i in audio_urls:
|
||||
# print(i)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Use ThreadPoolExecutor to manage threads and collect results
|
||||
with ThreadPoolExecutor() as executor:
|
||||
# Map URLs to download_file function, collecting results
|
||||
r = list(tqdm(executor.map(lambda data: download_mp3(*data, output_dir), audio_urls), total=len(urls)))
|
||||
|
||||
print(f"Completed in {time.time() - start_time:.2f} seconds")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user