import asyncio, os, pandas as pd, aiohttp, backoff
API_URL = "https://api.reprompt.io/v2/placematch"
API_KEY = os.getenv("REPROMPT_API_KEY")
@backoff.on_exception(
backoff.expo, aiohttp.ClientError, max_tries=5,
giveup=lambda e: isinstance(e, aiohttp.ClientResponseError) and e.status not in [429, 500, 502, 503]
)
async def match_place(session, name, address, lat=None, lon=None):
place = {"name": name, "full_address": address}
if lat and lon:
place.update({"latitude": float(lat), "longitude": float(lon)})
async with session.post(API_URL, json={
"place": place,
"match_sources": ["overture", "foursquare"]
}) as resp:
resp.raise_for_status()
return await resp.json()
async def match_dataset(csv_file):
df = pd.read_csv(csv_file).head(1000)
semaphore = asyncio.Semaphore(10) # 10 concurrent requests
headers = {"Authorization": f"Bearer {API_KEY}"}
async def match_row(row):
async with semaphore:
try:
resp = await match_place(
session,
row['business_name'],
f"{row['business_address']}, {row['business_city']}, {row['business_state']}",
row.get('business_latitude'),
row.get('business_longitude')
)
# Organize by source (UUID = Overture, 24-hex = Foursquare)
by_source = {}
for m in resp.get('results', []):
pid = m.get('place_id', '')
if '-' in pid: by_source.setdefault('overture', []).append(m)
elif len(pid) == 24: by_source.setdefault('foursquare', []).append(m)
return {
'name': row['business_name'],
'overture_id': by_source.get('overture', [{}])[0].get('place_id'),
'foursquare_id': by_source.get('foursquare', [{}])[0].get('place_id')
}
except Exception as e:
print(f"Error: {row['business_name']}: {e}")
return None
async with aiohttp.ClientSession(headers=headers) as session:
tasks = [match_row(row) for _, row in df.iterrows()]
results = [r for r in await asyncio.gather(*tasks) if r]
pd.DataFrame(results).to_csv('matched.csv', index=False)
print(f"Matched {len(results)} places")
asyncio.run(match_dataset('restaurants.csv'))