61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
import json
|
|
import multiprocessing
|
|
|
|
import httpx
|
|
|
|
from pathlib import Path
|
|
|
|
sermon_transcript_url = 'https://www.gty.org/library/print/sermons-library/{code}'
|
|
audio_download_url = "https://cdn.gty.org/sermons/High/{code}.mp3"
|
|
|
|
local_audio = Path(f'/run/media/bear/data/audio/')
|
|
local_transcript = Path(f'/run/media/bear/data/transcrips')
|
|
|
|
def download_transcript(sermon):
|
|
transcript = local_transcript / f'{sermon['code']}.html'
|
|
transcript.parent.mkdir(exist_ok=True, parents=True)
|
|
|
|
if transcript.exists():
|
|
print(f'Skipping {transcript.name}')
|
|
return
|
|
|
|
response = httpx.get(sermon_transcript_url.format(**sermon))
|
|
|
|
transcript.write_text(response.text)
|
|
print(f'Downloaded {transcript.name}')
|
|
|
|
|
|
def download_audio(sermon):
|
|
# if sermon['fileName'] == '':
|
|
# sermon['fileName'] = f'{sermon['code']}.mp3'
|
|
|
|
|
|
audio = local_audio / f'{sermon['code']}.mp3'
|
|
audio.parent.mkdir(exist_ok=True, parents=True)
|
|
|
|
if audio.exists():
|
|
print(f'Skipping {audio.name}')
|
|
return
|
|
|
|
response = httpx.get(audio_download_url.format(**sermon), timeout=60)
|
|
with open(audio, 'wb') as f:
|
|
f.write(response.content)
|
|
|
|
print(f'Downloaded {audio.name}')
|
|
|
|
|
|
def main():
|
|
for index in list(Path('./api_data').glob('*')):
|
|
sermons = json.loads(index.read_text())
|
|
|
|
with multiprocessing.Pool() as p:
|
|
p.map(download_audio, sermons)
|
|
|
|
# with multiprocessing.Pool() as p:
|
|
# p.map(download_transcript, sermons)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |