init
This commit is contained in:
89
indexer.py
Normal file
89
indexer.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import json
|
||||
import multiprocessing
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
|
||||
info_url = 'https://ca.gty.org/api/Library/GetInitialSectionInformation/2/US'
|
||||
api_url = "https://ca.gty.org/api/Library/GetResources/2/code/{code}/none/none/en/{page}/US"
|
||||
|
||||
|
||||
def get_codes(url):
|
||||
r = httpx.get(url)
|
||||
r.raise_for_status()
|
||||
|
||||
codes = [x['value'] for x in r.json()['possibleCodes']]
|
||||
|
||||
return codes
|
||||
|
||||
def get_sermon_data(code):
|
||||
print(f'{code=}')
|
||||
|
||||
# Start with page 1
|
||||
page = 1
|
||||
data = get_api_data(code, page)
|
||||
|
||||
total_sermons = data['totalNumberOfRecords']
|
||||
|
||||
if new_sermon_check(code, total_sermons) is False:
|
||||
print('\tNo new sermons')
|
||||
return
|
||||
|
||||
all_sermons = list(data['items'])
|
||||
|
||||
|
||||
print(f'\t{total_sermons=}')
|
||||
print(f'\t{len(all_sermons)=}')
|
||||
|
||||
# Continue with the next page if needed.
|
||||
while len(all_sermons) < total_sermons:
|
||||
page += 1
|
||||
print(f'\t{page=}')
|
||||
data = get_api_data(code, page)
|
||||
sermons = list(data['items'])
|
||||
|
||||
|
||||
if len(sermons) == 0:
|
||||
print('NO MORE SERMONS')
|
||||
break
|
||||
|
||||
all_sermons += sermons
|
||||
print(f'\t{len(all_sermons)=}')
|
||||
|
||||
json.dump(all_sermons, Path(f'./api_data/{code}.json').open('w'), indent=4)
|
||||
|
||||
return all_sermons
|
||||
|
||||
|
||||
def get_api_data(code, page):
|
||||
with httpx.Client() as client:
|
||||
response = client.get(api_url.format(code=code, page=page))
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def new_sermon_check(code, total_sermons):
|
||||
data_path = Path(f'./api_data/{code}.json')
|
||||
|
||||
if data_path.exists() is False:
|
||||
print('\tNO FILE')
|
||||
return True
|
||||
|
||||
data = json.loads(data_path.read_text())
|
||||
return len(data) != total_sermons
|
||||
|
||||
def main():
|
||||
|
||||
for code in get_codes(info_url):
|
||||
get_sermon_data(code)
|
||||
|
||||
|
||||
# codes = get_codes(info_url)
|
||||
#
|
||||
# with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
|
||||
# p.map(get_sermon_data, codes)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user