Module rulevetting.api.modeling
Modeling
- run many models from imodels
- extract out stable rules: screen for high predictive acc, look at what is kept
- build stable rules model (e.g. using RuleFit or Corels)
Expand source code
"""
# Modeling
- run many models from [imodels](https://github.com/csinva/imodels)
- extract out stable rules: screen for high predictive acc, look at what is kept
- build stable rules model (e.g. using RuleFit or Corels)
"""
import importlib
import random
import numpy as np
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
import rulevetting
import rulevetting.api.util
def fit_models(train_data: pd.DataFrame, tune_data: pd.DataFrame, interpretable: bool = True):
"""Note: don't use this function, still depends on experimental autogluon dependencies
Parameters
----------
train_datas
tune_data
interpretable: bool
Whether to fit interpretable models or standard models
Returns
-------
predictor
"""
train_data = TabularDataset(train_data)
test_data = TabularDataset(tune_data)
predictor = TabularPredictor(label='outcome',
path=rulevetting.AUTOGLUON_CACHE_PATH,
eval_metric='roc_auc')
kwargs = dict(
verbosity=2,
time_limit=30,
)
if interpretable:
predictor.fit(train_data, presets='interpretable', **kwargs)
print(predictor.interpretable_models_summary())
else:
predictor.fit(train_data, **kwargs)
return predictor
if __name__ == '__main__':
project_ids = rulevetting.api.util.get_project_ids()
for project_id in project_ids:
np.random.seed(0)
random.seed(0)
print('fitting on', project_id)
project_module_name = f'rulevetting.projects.{project_id}.dataset'
module = importlib.import_module(project_module_name)
dset = module.Dataset()
df_train, df_tune, df_test = dset.get_data(load_csvs=True)
predictor = fit_models(df_train, df_tune)
print(predictor)
Functions
def fit_models(train_data: pandas.core.frame.DataFrame, tune_data: pandas.core.frame.DataFrame, interpretable: bool = True)
-
Note: don't use this function, still depends on experimental autogluon dependencies
Parameters
train_datas
tune_data
interpretable
:bool
- Whether to fit interpretable models or standard models
Returns
predictor
Expand source code
def fit_models(train_data: pd.DataFrame, tune_data: pd.DataFrame, interpretable: bool = True): """Note: don't use this function, still depends on experimental autogluon dependencies Parameters ---------- train_datas tune_data interpretable: bool Whether to fit interpretable models or standard models Returns ------- predictor """ train_data = TabularDataset(train_data) test_data = TabularDataset(tune_data) predictor = TabularPredictor(label='outcome', path=rulevetting.AUTOGLUON_CACHE_PATH, eval_metric='roc_auc') kwargs = dict( verbosity=2, time_limit=30, ) if interpretable: predictor.fit(train_data, presets='interpretable', **kwargs) print(predictor.interpretable_models_summary()) else: predictor.fit(train_data, **kwargs) return predictor