ensembles
This commit is contained in:
137
ensemble_methods/paramter_ensemble.py
Normal file
137
ensemble_methods/paramter_ensemble.py
Normal file
@ -0,0 +1,137 @@
|
||||
import csv
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from sklearn import metrics
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
decision_boundrys = [round(x * 0.01, 2) for x in range(10, 90, 5)]
|
||||
|
||||
|
||||
def accumulate_uars(output_folders):
|
||||
for model_type in output_folders.iterdir():
|
||||
for param_config in model_type.iterdir():
|
||||
per_seed_ensemble_files = param_config.rglob('ensemble_checkpoints')
|
||||
for ensemble_file in per_seed_ensemble_files:
|
||||
uar_dict = dict()
|
||||
with ensemble_file.open('rb') as f:
|
||||
loaded_ensemble_file = pickle.load(f)
|
||||
labels = loaded_ensemble_file.pop('labels')
|
||||
for decision_boundry in decision_boundrys:
|
||||
decisions = []
|
||||
try:
|
||||
for i in range(len(labels)):
|
||||
decisions.append(loaded_ensemble_file['weights'][i] > decision_boundry)
|
||||
|
||||
uar_score = metrics.recall_score(labels, decisions, labels=[0, 1], average='macro',
|
||||
sample_weight=None, zero_division='warn')
|
||||
uar_dict[f'weights_decb_{decision_boundry}'] = uar_score
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
with (ensemble_file.parent / 'weights_uar_dict_decb').open('wb') as ef:
|
||||
pickle.dump(uar_dict, ef, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
def accumulate_predictions_along_paramter_within_model(outpath):
|
||||
|
||||
for model_type in outpath.iterdir():
|
||||
version_dicts = defaultdict(dict)
|
||||
labels = []
|
||||
labels_loaded = False
|
||||
for parameter_configuration in model_type.iterdir():
|
||||
for version in parameter_configuration.rglob('weights_uar_dict_decb'):
|
||||
try:
|
||||
with (version.parent / 'weights.csv').open('r') as f:
|
||||
predictions = []
|
||||
# Read Headers to skip the first line
|
||||
_ = f.readline()
|
||||
for row in f:
|
||||
prediction, label = [float(x) for x in row.strip().split(',')]
|
||||
predictions.append(prediction)
|
||||
if not labels_loaded:
|
||||
labels.append(label)
|
||||
if not labels_loaded:
|
||||
labels_loaded = True
|
||||
version_dicts[version.parent.parent.name][parameter_configuration.name] = dict(
|
||||
path=version,
|
||||
predictions=predictions)
|
||||
except KeyError:
|
||||
continue
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
result_dict = defaultdict(list)
|
||||
final_dict = dict()
|
||||
uar_dict = dict()
|
||||
|
||||
for decision_boundry in decision_boundrys:
|
||||
for i in range(len(labels)):
|
||||
for version_key, version_dict in version_dicts.items():
|
||||
majority_votes = []
|
||||
mean_votes = []
|
||||
for parameter_key, parameter_dict in version_dict.items():
|
||||
majority_votes.append(parameter_dict['predictions'][i] > decision_boundry)
|
||||
mean_votes.append(parameter_dict['predictions'][i])
|
||||
result_dict[f'{decision_boundry}_mean_vote_pred_{version_key}'].append(
|
||||
int(sum(mean_votes) / len(mean_votes) > decision_boundry)
|
||||
)
|
||||
result_dict[f'{decision_boundry}_majority_vote_pred_{version_key}'].append(
|
||||
sum(majority_votes) > (len(majority_votes) // 2)
|
||||
)
|
||||
for vote in ['mean', 'majority']:
|
||||
for version_key, version_dict in version_dicts.items():
|
||||
|
||||
predictions = result_dict[f'{decision_boundry}_{vote}_vote_pred_{version_key}']
|
||||
uar_score = metrics.recall_score(labels, predictions, labels=[0, 1], average='macro',
|
||||
sample_weight=None, zero_division='warn')
|
||||
uar_dict[f'weights_decb_{decision_boundry}_{vote}_{version_key}_uar'] = uar_score
|
||||
|
||||
final_dict[f'weights_decb_{decision_boundry}_{vote}_vote_mean_uar'] = np.mean([
|
||||
uar_dict[f'weights_decb_{decision_boundry}_{vote}_{version_key}_uar']
|
||||
for version_key in version_dicts.keys()]
|
||||
)
|
||||
final_dict[f'weights_decb_{decision_boundry}_{vote}_vote_std_uar'] = np.std([
|
||||
uar_dict[f'weights_decb_{decision_boundry}_{vote}_{version_key}_uar']
|
||||
for version_key in version_dicts.keys()]
|
||||
)
|
||||
|
||||
with (model_type / 'parameter_ensemble_uar_dict').open('wb') as f:
|
||||
pickle.dump(final_dict, f, pickle.HIGHEST_PROTOCOL)
|
||||
with (model_type / 'parameter_ensemble_uar_dict.csv').open('w') as f:
|
||||
headers = ['model_type', 'vote', 'decision_boundry', 'mean/std', 'value']
|
||||
|
||||
writer = csv.DictWriter(f, delimiter=',', lineterminator='\n', fieldnames=headers)
|
||||
writer.writeheader() # write a header
|
||||
|
||||
for result_key in final_dict.keys():
|
||||
splited_key = result_key.split('_')
|
||||
writer.writerow({key: value for key, value in
|
||||
zip(headers,
|
||||
[model_type.name,
|
||||
splited_key[3], splited_key[2], splited_key[5],
|
||||
final_dict[result_key]])
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
outpath = Path().absolute().parent / 'output'
|
||||
|
||||
config_filename = 'config.ini'
|
||||
output_folders_path = list(outpath.rglob('outputs'))
|
||||
|
||||
# Accumulate the Predictions
|
||||
# accumulate_predictions(config_filename, output_folders_path)
|
||||
|
||||
# Accumulate the UARS
|
||||
# accumulate_uars(outpath)
|
||||
|
||||
# Find the Best UARS per paramter and Model and combine predictions
|
||||
accumulate_predictions_along_paramter_within_model(outpath)
|
||||
|
||||
# Gather Results to final CSV
|
||||
# gather_results(config_filename, outpath)
|
Reference in New Issue
Block a user