import json import multiprocessing from pathlib import Path import httpx from loguru import logger info_url = 'https://ca.gty.org/api/Library/GetInitialSectionInformation/2/US' api_url = "https://ca.gty.org/api/Library/GetResources/2/code/{code}/none/none/en/{page}/US" def get_codes(url): r = httpx.get(url) r.raise_for_status() codes = [x['value'] for x in r.json()['possibleCodes']] return codes def get_sermon_data(code): print(f'{code=}') # Start with page 1 page = 1 data = get_api_data(code, page) total_sermons = data['totalNumberOfRecords'] if new_sermon_check(code, total_sermons) is False: print('\tNo new sermons') return all_sermons = list(data['items']) print(f'\t{total_sermons=}') print(f'\t{len(all_sermons)=}') # Continue with the next page if needed. while len(all_sermons) < total_sermons: page += 1 print(f'\t{page=}') data = get_api_data(code, page) sermons = list(data['items']) if len(sermons) == 0: print('NO MORE SERMONS') break all_sermons += sermons print(f'\t{len(all_sermons)=}') json.dump(all_sermons, Path(f'./api_data/{code}.json').open('w'), indent=4) return all_sermons def get_api_data(code, page): with httpx.Client() as client: response = client.get(api_url.format(code=code, page=page)) response.raise_for_status() return response.json() def new_sermon_check(code, total_sermons): data_path = Path(f'./api_data/{code}.json') if data_path.exists() is False: print('\tNO FILE') return True data = json.loads(data_path.read_text()) return len(data) != total_sermons def main(): for code in get_codes(info_url): get_sermon_data(code) # codes = get_codes(info_url) # # with multiprocessing.Pool(multiprocessing.cpu_count()) as p: # p.map(get_sermon_data, codes) if __name__ == "__main__": main()