importsnowflake.connectorimportdatetimeimportdatarobotasdrimportpandasaspd# snowflake parametersSNOW_ACCOUNT='my_creds.SNOW_ACCOUNT'SNOW_USER='my_creds.SNOW_USER'SNOW_PASS='my_creds.SNOW_PASS'SNOW_DB='TITANIC'SNOW_SCHEMA='PUBLIC'# datarobot parametersDR_API_TOKEN='YOUR API TOKEN'# replace app.datarobot.com with application host of your cluster if installed locallyDR_ENDPOINT='https://app.datarobot.com/api/v2'DR_HEADERS={'Content-Type':'application/json','Authorization':'token %s'%DR_API_TOKEN}
# create a connectionctx=snowflake.connector.connect(user=SNOW_USER,password=SNOW_PASS,account=SNOW_ACCOUNT,database=SNOW_DB,schema=SNOW_SCHEMA,protocol='https',application='DATAROBOT',)# create a cursorcur=ctx.cursor()# execute sqlsql="select * from titanic.public.passengers_training"cur.execute(sql)# fetch results into dataframedf=cur.fetch_pandas_all()df.head()
# feature engineering a new column for total family sizedf['TOTAL_FAMILY_SIZE']=df['SIBSP']+df['PARCH']+1df.head()
その後、DataRobotにデータが送信され、新しいモデリングプロジェクトが開始されます。
# create a connection to datarobotdr.Client(token=DR_API_TOKEN,endpoint=DR_MODELING_ENDPOINT)# create projectnow=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M')project_name='Titanic_Survival_{}'.format(now)proj=dr.Project.create(sourcedata=df,project_name=project_name)# further work with project via the python API, or work in GUI (link to project printed below)print(DR_MODELING_ENDPOINT[:-6]+'projects/{}'.format(proj.id))