init

2025-07-19 20:38:39 -04:00
commit 9e20e634e2
55 changed files with 79094 additions and 0 deletions
--- a/downloader.py
+++ b/downloader.py
@@ -0,0 +1,61 @@
+import json
+import multiprocessing
+
+import httpx
+
+from pathlib import Path
+
+sermon_transcript_url = 'https://www.gty.org/library/print/sermons-library/{code}'
+audio_download_url = "https://cdn.gty.org/sermons/High/{code}.mp3"
+
+local_audio = Path(f'/run/media/bear/data/audio/')
+local_transcript = Path(f'/run/media/bear/data/transcrips')
+
+def download_transcript(sermon):
+    transcript = local_transcript / f'{sermon['code']}.html'
+    transcript.parent.mkdir(exist_ok=True, parents=True)
+
+    if transcript.exists():
+        print(f'Skipping {transcript.name}')
+        return
+
+    response = httpx.get(sermon_transcript_url.format(**sermon))
+
+    transcript.write_text(response.text)
+    print(f'Downloaded {transcript.name}')
+
+
+def download_audio(sermon):
+    # if sermon['fileName'] == '':
+    #     sermon['fileName'] = f'{sermon['code']}.mp3'
+
+
+    audio = local_audio / f'{sermon['code']}.mp3'
+    audio.parent.mkdir(exist_ok=True, parents=True)
+
+    if audio.exists():
+        print(f'Skipping {audio.name}')
+        return
+
+    response = httpx.get(audio_download_url.format(**sermon), timeout=60)
+    with open(audio, 'wb') as f:
+        f.write(response.content)
+
+    print(f'Downloaded {audio.name}')
+
+
+def main():
+    for index in list(Path('./api_data').glob('*')):
+        sermons = json.loads(index.read_text())
+
+        with multiprocessing.Pool() as p:
+            p.map(download_audio, sermons)
+
+        # with multiprocessing.Pool() as p:
+        #     p.map(download_transcript, sermons)
+
+
+
+
+if __name__ == '__main__':
+    main()