90 lines
2.0 KiB
Python
90 lines
2.0 KiB
Python
import json
|
|
import multiprocessing
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from loguru import logger
|
|
|
|
|
|
info_url = 'https://ca.gty.org/api/Library/GetInitialSectionInformation/2/US'
|
|
api_url = "https://ca.gty.org/api/Library/GetResources/2/code/{code}/none/none/en/{page}/US"
|
|
|
|
|
|
def get_codes(url):
|
|
r = httpx.get(url)
|
|
r.raise_for_status()
|
|
|
|
codes = [x['value'] for x in r.json()['possibleCodes']]
|
|
|
|
return codes
|
|
|
|
def get_sermon_data(code):
|
|
print(f'{code=}')
|
|
|
|
# Start with page 1
|
|
page = 1
|
|
data = get_api_data(code, page)
|
|
|
|
total_sermons = data['totalNumberOfRecords']
|
|
|
|
if new_sermon_check(code, total_sermons) is False:
|
|
print('\tNo new sermons')
|
|
return
|
|
|
|
all_sermons = list(data['items'])
|
|
|
|
|
|
print(f'\t{total_sermons=}')
|
|
print(f'\t{len(all_sermons)=}')
|
|
|
|
# Continue with the next page if needed.
|
|
while len(all_sermons) < total_sermons:
|
|
page += 1
|
|
print(f'\t{page=}')
|
|
data = get_api_data(code, page)
|
|
sermons = list(data['items'])
|
|
|
|
|
|
if len(sermons) == 0:
|
|
print('NO MORE SERMONS')
|
|
break
|
|
|
|
all_sermons += sermons
|
|
print(f'\t{len(all_sermons)=}')
|
|
|
|
json.dump(all_sermons, Path(f'./api_data/{code}.json').open('w'), indent=4)
|
|
|
|
return all_sermons
|
|
|
|
|
|
def get_api_data(code, page):
|
|
with httpx.Client() as client:
|
|
response = client.get(api_url.format(code=code, page=page))
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
|
|
def new_sermon_check(code, total_sermons):
|
|
data_path = Path(f'./api_data/{code}.json')
|
|
|
|
if data_path.exists() is False:
|
|
print('\tNO FILE')
|
|
return True
|
|
|
|
data = json.loads(data_path.read_text())
|
|
return len(data) != total_sermons
|
|
|
|
def main():
|
|
|
|
for code in get_codes(info_url):
|
|
get_sermon_data(code)
|
|
|
|
|
|
# codes = get_codes(info_url)
|
|
#
|
|
# with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
|
|
# p.map(get_sermon_data, codes)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|