init
This commit is contained in:
61
downloader.py
Normal file
61
downloader.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import json
|
||||
import multiprocessing
|
||||
|
||||
import httpx
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
sermon_transcript_url = 'https://www.gty.org/library/print/sermons-library/{code}'
|
||||
audio_download_url = "https://cdn.gty.org/sermons/High/{code}.mp3"
|
||||
|
||||
local_audio = Path(f'/run/media/bear/data/audio/')
|
||||
local_transcript = Path(f'/run/media/bear/data/transcrips')
|
||||
|
||||
def download_transcript(sermon):
|
||||
transcript = local_transcript / f'{sermon['code']}.html'
|
||||
transcript.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
if transcript.exists():
|
||||
print(f'Skipping {transcript.name}')
|
||||
return
|
||||
|
||||
response = httpx.get(sermon_transcript_url.format(**sermon))
|
||||
|
||||
transcript.write_text(response.text)
|
||||
print(f'Downloaded {transcript.name}')
|
||||
|
||||
|
||||
def download_audio(sermon):
|
||||
# if sermon['fileName'] == '':
|
||||
# sermon['fileName'] = f'{sermon['code']}.mp3'
|
||||
|
||||
|
||||
audio = local_audio / f'{sermon['code']}.mp3'
|
||||
audio.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
if audio.exists():
|
||||
print(f'Skipping {audio.name}')
|
||||
return
|
||||
|
||||
response = httpx.get(audio_download_url.format(**sermon), timeout=60)
|
||||
with open(audio, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
print(f'Downloaded {audio.name}')
|
||||
|
||||
|
||||
def main():
|
||||
for index in list(Path('./api_data').glob('*')):
|
||||
sermons = json.loads(index.read_text())
|
||||
|
||||
with multiprocessing.Pool() as p:
|
||||
p.map(download_audio, sermons)
|
||||
|
||||
# with multiprocessing.Pool() as p:
|
||||
# p.map(download_transcript, sermons)
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user