Snowpark Method Guide
Transforming Event Data into ML-Ready Features using Snowpark and Python
Calling the Features API from Snowflake to calculate features
Python Code
from snowflake.snowpark import Session
from predicthq import Client
import pandas as pd
# Snowflake connection parameters
connection_parameters = {
"account": "<your_account>",
"user": "<your_username>",
"password": "<your_password>",
"role": "<your_role>",
"warehouse": "<your_warehouse>",
"database": "<your_database>",
"schema": "<your_schema>"
}
# Create a session
session = Session.builder.configs(connection_parameters).create()
# PredictHQ Client setup
phq = Client(access_token="your_predicthq_access_token")
# Fetch location data from the Snowflake table
locations_df = session.table("SAVED_LOCATIONS").to_pandas()
# Prepare a DataFrame to collect all data
all_data = []
# Iterate over each row in the location DataFrame
for index, location in locations_df.iterrows():
# Prepare the radius in the appropriate unit
radius_with_unit = f"{location['RADIUS']}{location['RADIUS_UNIT']}"
# Call the Features API for each location
for feature in phq.features.obtain_features(
active__gte=str(location['DATE_START']),
active__lte=str(location['DATE_END']),
location__geo={
"lon": location['LON'],
"lat": location['LAT'],
"radius": radius_with_unit
},
phq_attendance_sports__stats=["sum"],
phq_attendance_conferences__stats=["sum"]
# add more ML features here like phq_attendance_community, phq_attendance_concerts,
# phq_attendance_expos, phq_attendance_festivals, phq_attendance_performing_arts,
# and so on.
):
data_point = {
'location': location['LOCATION'],
'date': feature.date,
'phq_attendance_conferences': getattr(feature.phq_attendance_conferences.stats, 'sum', 0),
'phq_attendance_sports': getattr(feature.phq_attendance_sports.stats, 'sum', 0)
}
all_data.append(data_point)
# Convert all collected data to a DataFrame
results_df = pd.DataFrame(all_data)
# Convert the DataFrame to a Snowpark DataFrame
snow_df = session.create_dataframe(results_df)
# Append the results to the existing Snowflake table
snow_df.write.mode("append").save_as_table("ML_FEATURES_FOR_LOCATIONS")
# Print the contents of the table to verify
print(session.table("ML_FEATURES_FOR_LOCATIONS").show())
# Close the session
session.close()
Table Output
location
date
phq_attendance_conferences
phq_attendance_sports
231
19329
666
12312
215
0
87
23246
395
19448
Refer back to Main Guide
Last updated
Was this helpful?