id | era | feature1 | . . . | feature310 | target |
---|---|---|---|---|---|
n2b2e3dd163cb422 | era1 | 0.75 | . . . | 0.00 | 0.25 |
n177021a571c94c8 | era1 | 1.00 | . . . | 0.25 | 0.75 |
n7830fa4c0cd8466 | era1 | 0.25 | . . . | 1.00 | 0.00 |
nc584a184cee941b | era1 | 0.25 | . . . | 0.00 | 1.00 |
nc5ab8667901946a | era1 | 0.75 | . . . | 0.25 | 0.25 |
n84e624e4714a7ca | era1 | 0.00 | . . . | 0.75 | 1.00 |
#!/usr/bin/env python
""" Example classifier on Numerai data using a xgboost regression. """
import pandas as pd
from xgboost import XGBRegressor
# training data contains features and targets
training_data = pd.read_csv("numerai_training_data.csv").set_index("id")
# tournament data contains features only
tournament_data = pd.read_csv("numerai_tournament_data.csv").set_index("id")
feature_names = [f for f in training_data.columns if "feature" in f]
# train a model to make predictions on tournament data
model = XGBRegressor(max_depth=5, learning_rate=0.01, \
n_estimators=2000, colsample_bytree=0.1)
model.fit(training_data[feature_names], training_data["target"])
# submit predictions to numer.ai
predictions = model.predict(tournament_data[feature_names])
predictions.to_csv("predictions.csv")
id | prediction |
---|---|
n60dffdaceb7e467 | 0.25 |
nadaeef0214b84a8 | 1.00 |
nb13883520a4344f | 0.25 |
n423766c5a4fa42a | 0.75 |
n252b14301e46a31 | 0.25 |
n75a5baf93a624cc | 0.00 |
n2ff91086716e413 | 1.00 |