diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_0.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_0.h5 new file mode 100644 index 0000000000000000000000000000000000000000..2959c83aab384cb6fd27cf8e9466028b4a1cecc5 Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_0.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_1.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_1.h5 new file mode 100644 index 0000000000000000000000000000000000000000..b378bb559011d7d804948e37b7004975f01c44b1 Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_1.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_2.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..c411ab9d834d3ddec27535faf0270c5749ec732e Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_2.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_3.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_3.h5 new file mode 100644 index 0000000000000000000000000000000000000000..6767b621e8964e21b2df966a0e81864c76974b90 Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_3.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_4.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_4.h5 new file mode 100644 index 0000000000000000000000000000000000000000..78ea4ced87fd35db7c494c592fa1d39baea0b8b8 Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_4.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_5.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_5.h5 new file mode 100644 index 0000000000000000000000000000000000000000..12b30c9d94828806f76160977c4e1bc49a39dd4c Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_5.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_6.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_6.h5 new file mode 100644 index 0000000000000000000000000000000000000000..1c7a8bd20e54530d2f452aaf2f91c6547989f7fb Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_6.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_7.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_7.h5 new file mode 100644 index 0000000000000000000000000000000000000000..61944c29ce16547d8ca894dd4d7b52c213153df7 Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_7.h5 differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.npy new file mode 100644 index 0000000000000000000000000000000000000000..4b9c97e2ed1fb18ec9e7d92a209f757e7923887c Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.npy differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.txt b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.txt new file mode 100644 index 0000000000000000000000000000000000000000..7b03d81da7c9e19cb40d0ab07526ff2717098fe1 --- /dev/null +++ b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.txt @@ -0,0 +1,34 @@ +Fold 0: +[0.8911138923654568, 0.8053691275167785, 0.8218390804597702, 0.6759474091260634, 0.49026345933562426, 0.5414634146341464, 0.8926968584251326, 0.6567685589519651, 0.665083135391924] (f1-score by class) +[0.8917835671342685, 0.8163265306122449, 0.7918050941306756, 0.6731361675908811, 0.42971887550200805, 0.4652137468566639, 0.9121227280306821, 0.6516464471403813, 0.7494646680942184] (f1 score (custom) by class) +Fold 1: +[0.9430496019595835, 0.7955390334572491, 0.8764044943820225, 0.6267348429510592, 0.4487334137515078, 0.5780240073868883, 0.9217391304347826, 0.6927480916030534, 0.5359477124183006] (f1-score by class) +[0.9454813359528488, 0.7985074626865671, 0.8590308370044053, 0.6886035313001605, 0.37697608431293067, 0.5227120908483633, 0.9260621666360125, 0.6638624725676664, 0.5099502487562189] (f1 score (custom) by class) +Fold 2: +[0.9236812570145904, 0.886021505376344, 0.8148148148148148, 0.609009009009009, 0.33070866141732286, 0.4550499445061043, 0.9151805132666376, 0.6416144745998609, 0.7478632478632479] (f1-score by class) +[0.9369307832422586, 0.871404399323181, 0.8560311284046692, 0.6083513318934485, 0.26717557251908397, 0.3833208676140613, 0.916855499389925, 0.6847890671420083, 0.7383966244725738] (f1 score (custom) by class) +Fold 3: +[0.8853333333333333, 0.852910052910053, 0.85625, 0.6520423600605144, 0.5238095238095238, 0.4774774774774775, 0.9065713008493518, 0.6469534050179212, 0.5333333333333333] (f1-score by class) +[0.9019288236892149, 0.8722943722943723, 0.865992414664981, 0.6574130567419159, 0.5126771066368382, 0.45532646048109965, 0.8891616976499473, 0.6215564738292011, 0.5037037037037037] (f1 score (custom) by class) +Fold 4: +[0.9075844486934354, 0.9157769869513642, 0.8726287262872628, 0.6608695652173913, 0.39397741530740277, 0.691970802919708, 0.8963752665245203, 0.6635338345864662, 0.6190476190476191] (f1-score by class) +[0.9079316500892629, 0.90994813767091, 0.9096045197740112, 0.6763972944108223, 0.37776708373435997, 0.6482494529540481, 0.9167829727843685, 0.6308077197998571, 0.6190476190476191] (f1 score (custom) by class) +Fold 5: +[0.9178683385579938, 0.8285356695869838, 0.846441947565543, 0.6748654880860876, 0.4360400444938821, 0.5719360568383659, 0.8780926675663517, 0.6996402877697842, 0.582716049382716] (f1-score by class) +[0.9182137481184145, 0.8242031872509961, 0.8407738095238095, 0.6540524433849821, 0.4115917681646367, 0.4972205064854849, 0.8467811903522471, 0.7158630842841369, 0.6203995793901157] (f1 score (custom) by class) +Fold 6: +[0.8995010691375623, 0.8513341804320204, 0.8838709677419355, 0.6583184257602862, 0.48705882352941177, 0.5072463768115942, 0.9056437389770723, 0.6950092421441775, 0.6325301204819277] (f1-score by class) +[0.8937677053824362, 0.8230958230958231, 0.8477722772277227, 0.6424581005586593, 0.4370777027027027, 0.48380726698262244, 0.9066031073446328, 0.7123910572186434, 0.621301775147929] (f1 score (custom) by class) +Fold 7: +[0.9263301500682128, 0.8578371810449574, 0.909952606635071, 0.6057046979865772, 0.4444444444444444, 0.5838509316770186, 0.9132340052585451, 0.7061266874350987, 0.518796992481203] (f1-score by class) +[0.9301369863013699, 0.8547215496368039, 0.9393346379647749, 0.625866851595007, 0.3828547648772368, 0.5042918454935622, 0.923431407302375, 0.6738010305192231, 0.5646481178396072] (f1 score (custom) by class) + + + ==> Score by CV: +{'fold_0': 0.7156161040229845, 'fold_1': 0.7132133698160497, 'fold_2': 0.7026603808742147, 'fold_3': 0.703853420754612, 'fold_4': 0.7357516295039077, 'fold_5': 0.715126283316412, 'fold_6': 0.7245014383351098, 'fold_7': 0.7184752996701254} (f1-score) +{'fold_0': 0.7090242027880027, 'fold_1': 0.6990206922294637, 'fold_2': 0.695917252666801, 'fold_3': 0.6977837899656971, 'fold_4': 0.7329484944739176, 'fold_5': 0.7032332574394248, 'fold_6': 0.7075860906290191, 'fold_7': 0.7110096879477734} (f1-score (custom)) + + ==> Average score CV: +CV f1-score: 0.716149740786677 (+/- 0.010015674854875221) +CV f1-score (custom): 0.7070654335175124 (+/- 0.011064764613146013) + diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores_custom.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores_custom.npy new file mode 100644 index 0000000000000000000000000000000000000000..a3ef8da123c5333fb30afbe1661edf8a5b53a428 Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores_custom.npy differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/stage_1_4.py b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/stage_1_4.py new file mode 100644 index 0000000000000000000000000000000000000000..cb0eba0ad58d4d9f4d33ccac0571d6f273027e8c --- /dev/null +++ b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/stage_1_4.py @@ -0,0 +1,209 @@ +""" +Residual network model (stage_1_2) +""" +from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping +from sklearn.model_selection import StratifiedKFold +import os +from utils import encode_labels, custom_multiclass_f1, multiclass_f1 +import shutil +import ntpath +from collections import Counter +import numpy as np +from keras.initializers import glorot_uniform, he_normal +from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays +from preprocessor import preprocess_input_data +from utils import encode_labels +import tensorflow.keras as keras +import pandas as pd +# import sys # This gives an error on the CSC server +from model_architecture_V01 import Inc_ResNet_LSTM_v02 + + +def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels): + """ Subject cross-validation """ + + global epochs, batch_size, n_folds + + # split the subjects on n folds keeping balance + ids = ids_labels['subject'] + skf = StratifiedKFold(n_splits=n_folds, random_state=None, shuffle=True) + subj_folds = [(ids[test_index]) for train_index, test_index in skf.split(ids_labels['subject'], + ids_labels['label'] + )] + + # true labels of each subject + subject_labels = {ID: None for ID in list(ids)} + for ID, label in zip(arr_of_IDs, arr_of_labels): + subject_labels[ID[0]] = label + + # to save the predictions of each subject + subject_predictions = {ID: [] for ID in list(ids)} + + # to save the f1-score of each fold + scores = {} + scores_custom = {} + + for i, validation_fold in enumerate(subj_folds): + print(f"\n\nFold {i} ------------------------------------------------- \n") + + # selector + selector = np.isin(arr_of_IDs.squeeze(), validation_fold) + + # validation + arr_seg_validation = arr_of_segments[selector] + arr_labels_validation = arr_of_labels[selector] + arr_IDs_validation = arr_of_IDs[selector] + + # train + arr_seg_train = arr_of_segments[np.invert(selector)] + arr_labels_train = arr_of_labels[np.invert(selector)] + arr_IDs_train = arr_of_IDs[np.invert(selector)] + + # TODO + # Up-balance 'STE' (3x) + add_to_input = [] + add_to_labels = [] + add_to_IDs = [] + for j in range(len(arr_labels_train)): + if arr_labels_train[j][8] == 1: + add_to_input.append(arr_seg_train[j]) + add_to_labels.append(arr_labels_train[j]) + add_to_IDs.append(arr_IDs_train[j]) + + arr_seg_train_balanced = np.concatenate([add_to_input, arr_seg_train, add_to_input]) + arr_labels_train_balanced = np.concatenate([add_to_labels, arr_labels_train, add_to_labels]) + arr_IDs_train_balanced = np.concatenate([add_to_IDs, arr_IDs_train, add_to_IDs]) + + # Build model + model = Inc_ResNet_LSTM_v02(segment_size, 12, classes=9) + + # TODO + # callbacks + earlyStopping = keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', patience=10, verbose=0, mode='max') + mcp_save = keras.callbacks.ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5"), + save_best_only=True, monitor='val_categorical_accuracy', mode='max') + reduce_lr_loss = keras.callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=7, verbose=1, + epsilon=1e-4, + mode='max') + + # model.summary() + + model.fit(arr_seg_train_balanced, arr_labels_train_balanced, epochs=epochs, batch_size=batch_size, + verbose=1, validation_data=(arr_seg_validation, arr_labels_validation), shuffle=True, + callbacks=[earlyStopping, mcp_save, reduce_lr_loss]) + + # re-load best model + del model + model = keras.models.load_model(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5")) + _, accuracy = model.evaluate(arr_seg_validation, arr_labels_validation, batch_size=batch_size, verbose=1) + predictions = model.predict(arr_seg_validation, verbose=1) + + # print fold results + print("Accuracy:", accuracy) + + f1_score, f1_score_list = multiclass_f1(arr_labels_validation, predictions, return_list=True) + print("\nf1 score:", f1_score) + print(f1_score_list) + + f1_score_custom, f1_score_custom_list = custom_multiclass_f1(arr_labels_validation, predictions, + return_list=True) + print("\nf1 score (custom):", f1_score_custom) + print(f1_score_custom_list) + + # save predictions + for ID, pred in zip(arr_IDs_validation, predictions): + subject_predictions[ID[0]].append(pred) + + # save f1-score + scores[f"fold_{i}"] = f1_score + scores_custom[f"fold_{i}"] = f1_score_custom + + # save f1-score list (text file): + with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f: + f.write(f"Fold {str(i)}:\n" + f"{str(f1_score_list)} (f1-score by class) \n" + f"{str(f1_score_custom_list)} (f1 score (custom) by class) \n") + + # Average f-1 score + m, s = np.mean([v for v in scores.values()]), np.std([v for v in scores.values()]) + m_c, s_c = np.mean([v for v in scores_custom.values()]), np.std([v for v in scores_custom.values()]) + + # save labels (to disk) + np.save(os.path.join(experiments_dir, experiment_name, "subject_labels.npy"), subject_labels) + + # save predictions (to disk) + np.save(os.path.join(experiments_dir, experiment_name, "subject_predictions.npy"), subject_predictions) + + # save f1-scores (to disk) + np.save(os.path.join(experiments_dir, experiment_name, "scores.npy"), scores) + np.save(os.path.join(experiments_dir, experiment_name, "scores_custom.npy"), scores_custom) + + print("\n==========================================================\n") + print(f"CV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})") + + # save f1-scores (text file) + with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f: + f.write("\n\n ==> Score by CV:") + f.write(f"\n{str(scores)} (f1-score) \n{str(scores_custom)} (f1-score (custom))") + f.write("\n\n ==> Average score CV:") + f.write(f"\nCV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})\n\n") + + +if __name__ == '__main__': + + # Config + experiment_name = "stage_1_4" + experiments_dir = "experiments_stage_1" + data_dir = 'data/train_balanced' + segment_size = 2000 + overlap = 0.5 + epochs = 50 # ??? + batch_size = 54 # ??? + n_folds = 8 + + # create directory for the experiment + if not os.path.exists(os.path.join(experiments_dir, experiment_name)): + os.makedirs(os.path.join(experiments_dir, experiment_name)) + else: + raise NameError(f"Already exist an experiment with the name '{experiment_name}'" + f" in the '{experiments_dir}' directory.") + + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + + # This gives an error on the CSC server when trying to import sys + # # Log stdout + # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log') + # sys.stdout = Logger(log_file) + + # load data + data = load_data(data_dir) + + # create array with the label of each subject ( it is used to keep the balance of the labels + # in the folds of the cross-validation + dic_labels = {} + for k, v in data.items(): + dic_labels[k] = data[k]['info']['Dx'] + + ids_labels = pd.Series(dic_labels).reset_index() + ids_labels.columns = ['subject', 'label'] + + # pre-process signals + data = preprocess_input_data(data) + + # segment signal + data = segment_all_dict_data(data, segment_size, overlap) + + arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data, + shuffle_IDs=True, + # Do not shuffle the segments to keep the + # order in time of the predictions + shuffle_segments=False, + segment_standardization_flag=True) + + # Encode labels + arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels]) + arr_of_labels = encode_labels(arr_of_labels) + + # Cross-validation + cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels) diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_labels.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_labels.npy new file mode 100644 index 0000000000000000000000000000000000000000..ca852a733ab0b171f547084b3c77fd2b3efb5e33 Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_labels.npy differ diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_predictions.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_predictions.npy new file mode 100644 index 0000000000000000000000000000000000000000..051028cba050ff83874ce665f36a9211433a16dd Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_predictions.npy differ diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_1_fold_2.h5 b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_1_fold_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..18dde3762a4317602359deaed2951431ebe2e7af Binary files /dev/null and b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_1_fold_2.h5 differ diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_2_fold_4.h5 b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_2_fold_4.h5 new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/info.txt b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/info.txt new file mode 100644 index 0000000000000000000000000000000000000000..81e6d45a173c0831f1ca2f2cb2be14cf8d3b47dd --- /dev/null +++ b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/info.txt @@ -0,0 +1,21 @@ +Model stage 1 1: experiments_stage_1/stage_1_1_001_baseline +Model stage 2 2: experiments_stage_1/stage_1_3_002_Inc_ResNet_LSTM_v01 + + +Stage 1 1 f1-score: 0.7166549481041105 +[0.9266709928617781, 0.8609566184649611, 0.8327402135231317, 0.6370967741935484, 0.43922018348623854, 0.5284872298624754, 0.9198123743578289, 0.6876687668766877, 0.6172413793103448] + +Stage 1 1 f1-score (custom): 0.7104303349036974 +[0.9288409002211526, 0.8609566184649611, 0.8187543736878936, 0.6624182458494047, 0.3898615635179153, 0.477797513321492, 0.9109007255353034, 0.7059693217519867, 0.6383737517831669] + +Stage 1 2 f1-score: 0.7049611543306996 +[0.9364461738002594, 0.8802292263610315, 0.8833922261484098, 0.6249481972648155, 0.40916530278232405, 0.5281954887218046, 0.930981256890849, 0.6598639455782312, 0.49142857142857144] + +Stage 1 2 f1-score (custom): 0.692913257056548 +[0.9390037716217974, 0.8644754615038271, 0.8722958827634334, 0.6394165535956581, 0.3749250149970006, 0.49108703250611674, 0.929137323943662, 0.6470364017533828, 0.47884187082405344] + +Stage 2 f1-score: 0.7946091040231611 +[0.9327902240325866, 0.8833922261484098, 0.9032258064516129, 0.7572254335260116, 0.6129032258064516, 0.8104089219330854, 0.9301075268817204, 0.75, 0.5714285714285714] + +Stage 2 f1-score (custom): 0.7783335126220615 +[0.9362224039247752, 0.8704735376044568, 0.8860759493670886, 0.7293986636971047, 0.6129032258064516, 0.7910014513788098, 0.9316101238556812, 0.7448036951501155, 0.5025125628140703] \ No newline at end of file diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/model_stage_2.h5 b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/model_stage_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..4df7054805998fdef5d5a520544505cd2224eee5 Binary files /dev/null and b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/model_stage_2.h5 differ diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/score_stage_2.txt b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/score_stage_2.txt new file mode 100644 index 0000000000000000000000000000000000000000..f712106003d11ae68ab8dd372d2ed7f7e776b674 --- /dev/null +++ b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/score_stage_2.txt @@ -0,0 +1,2 @@ +f1-score: 0.7946091040231611 + f1-score (custom): 0.7783335126220615 \ No newline at end of file diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/stage_2.py b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/stage_2.py new file mode 100644 index 0000000000000000000000000000000000000000..e5858046220dd1db58660259993cc9b433a66bc6 --- /dev/null +++ b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/stage_2.py @@ -0,0 +1,244 @@ +""" +LSTM model (stage_2) +""" + +from keras import Input, Model +import numpy as np +import os +from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau +from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional +from keras.models import load_model +from keras.optimizers import Adam +import tensorflow.keras as keras +from logger import Logger +from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays +from preprocessor import preprocess_input_data +from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels +import shutil +import ntpath +# import sys + + +def build_model(n_timesteps, n_features, n_outputs): + # model + input = Input(shape=(n_timesteps, n_features), dtype='float32') + x = Masking(mask_value=0.)(input) + x = Bidirectional(LSTM(units=30, return_sequences=True))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + x = Bidirectional(LSTM(units=30))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + output = Dense(n_outputs, activation='sigmoid')(x) + + model = Model(inputs=input, outputs=output) + # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) + model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy']) + model.summary() + + return model + + +if __name__ == '__main__': + + # Config + experiment_name = "stage_2_002_s11_s13" + experiments_dir = "experiments_stage_2" + + data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline' + data_dir_2 = 'experiments_stage_1/stage_1_3_002_Inc_ResNet_LSTM_v01' + data_test_dir = 'data/test_balanced' + + labels_file = 'subject_labels.npy' + predictions_file = 'subject_predictions.npy' + scores_file = 'scores_custom.npy' + + segment_size = 2000 + overlap = 0.5 + epochs = 30 + batch_size = 18 + n_timesteps = 120 + + # create directory for the experiment + if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')): + os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models')) + else: + raise NameError(f"Already exist an experiment with the name '{experiment_name}'" + f" in the '{experiments_dir}' directory.") + + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + + # # Log stdout + # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log') + # sys.stdout = Logger(log_file) + + subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() + subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item() + scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item() + + subject_labels_2 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() + subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item() + scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item() + + # pad inputs + subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()} + for k, v in subject_predictions.items(): + subject_predictions_padded[k][-len(v):, :] = v + + # pad inputs to the longest input sequence + subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()} + for k, v in subject_predictions_2.items(): + subject_predictions_padded_2[k][-len(v):, :] = v + + # concatenate predictions of stages 1 + for k, v in subject_predictions.items(): + subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k], subject_predictions_padded_2[k]], + axis=1) + + ################################################################################################################### + ## Stage 1 on test + + # Get the best stage_1 models + best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0] + best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0] + + # Models stage 1 + model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5")) # TODO save + model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5")) # TODO save + + # save best models + model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5")) + model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5")) + + # Load test data + data_test = load_data(data_test_dir) + data_test = preprocess_input_data(data_test) + data_test = segment_all_dict_data(data_test, segment_size, overlap) + arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test, + shuffle_IDs=False, + # Do not shuffle the segments to keep the + # order in time of the predictions + shuffle_segments=False, + segment_standardization_flag=True) + # Encode labels + arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels]) + arr_of_labels = encode_labels(arr_of_labels) + + # Predictions stages 1 + predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1) + predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1) + + # Score stages 1 + f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True) + f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1, + return_list=True) + print("\nStage 1 f1-score: ", f1_score_s1_1) + print(f1_score_p1_list) + print("\nStage 1 f1-score (custom):", f1_score_custom_s1_1) + print(f1_score_custom_s1_list_1, "\n\n") + + f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True) + f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2, + return_list=True) + print("\nStage 1 2 f1-score: ", f1_score_s1_2) + print(f1_score_p1_list_2) + print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2) + print(f1_score_custom_s1_list_2, "\n\n") + + # concatenate predictions of stages 1 + predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2], axis=-1) + + # Group by subject & padding: + + # true labels of each subject + subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))} + for ID, label in zip(arr_of_IDs, arr_of_labels): + subject_labels_test[ID[0]] = label + + # stages 1 predictions for each subject + subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))} + for ID, pred in zip(arr_of_IDs, predictions_stages_1): + subject_predictions_test[ID[0]].append(pred) + + # pad inputs + subject_predictions_padded_test = {k: np.zeros((n_timesteps, 18)) for k in subject_predictions_test.keys()} + for k, v in subject_predictions_test.items(): + subject_predictions_padded_test[k][-len(v):, :] = v + + # convert to array + X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0) + + ## end stage 1 on test + ################################################################################################################### + # Stage 2 + + # convert to array + X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0) + + # Model + model_stage_2 = build_model(n_timesteps, 18, 9) + + # callbacks + earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max') + mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"), + save_best_only=True, monitor='val_categorical_accuracy', mode='max') + reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1, + epsilon=1e-4, + mode='max') + + # train stage 2 + model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True, + validation_data=(X_val, y_val), + callbacks=[earlyStopping, mcp_save, reduce_lr_loss]) + + # reloading the best model + del model_stage_2 + model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5")) + + # final predictions + _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1) + final_predictions = model_stage_2.predict(X_val, verbose=1) + + print(f"\nAccuracy: {accuracy}") + + score, score_list = multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score: {score}") + print(score_list) + + # f1-score + score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score (custom): {score_custom}") + print(score_custom_list) + + # save f1-score + with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f: + f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}") + + # Save info and results test + with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f: + f.write(f"Model stage 1 1: {data_dir_1}\n") + f.write(f"Model stage 2 2: {data_dir_2}\n") + + f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n") + f.write(str(f1_score_p1_list)) + f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n") + f.write(str(f1_score_custom_s1_list_1)) + + f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n") + f.write(str(f1_score_p1_list_2)) + f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n") + f.write(str(f1_score_custom_s1_list_2)) + + f.write(f"\n\nStage 2 f1-score: {str(score)}\n") + f.write(str(score_list)) + f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n") + f.write(str(score_custom_list)) + + # from sklearn.metrics import multilabel_confusion_matrix + # + # + # pred = np.where(predictions > 0.5, 1, 0) + # true = y_validation.copy() + # + # confusion = multilabel_confusion_matrix(true, pred) diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_1_fold_2.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_1_fold_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..f825de1f0356c4cfcd14d8b9093c0992f62b5328 Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_1_fold_2.h5 differ diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_2_fold_5.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_2_fold_5.h5 new file mode 100644 index 0000000000000000000000000000000000000000..463f938c92a2fbc9604e066bc7f386184c3505a0 Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_2_fold_5.h5 differ diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_3_fold_4.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_3_fold_4.h5 new file mode 100644 index 0000000000000000000000000000000000000000..d46bd21445c681d3eba34f535418bfd9e1dc23f9 Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_3_fold_4.h5 differ diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/info.txt b/experiments_stage_2/stage_2_3x_s11_s12_s14/info.txt new file mode 100644 index 0000000000000000000000000000000000000000..9ab7d4d2df08f69f917a07be630d59e5cdffcbfd --- /dev/null +++ b/experiments_stage_2/stage_2_3x_s11_s12_s14/info.txt @@ -0,0 +1,28 @@ +Model stage 1 1: experiments_stage_1/stage_1_1_001_baseline +Model stage 2 2: experiments_stage_1/stage_1_2_001_baseline +Model stage 2 3: experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02 + + +Stage 1 1 f1-score: 0.7166549481041105 +[0.9266709928617781, 0.8609566184649611, 0.8327402135231317, 0.6370967741935484, 0.43922018348623854, 0.5284872298624754, 0.9198123743578289, 0.6876687668766877, 0.6172413793103448] + +Stage 1 1 f1-score (custom): 0.7104303349036974 +[0.9288409002211526, 0.8609566184649611, 0.8187543736878936, 0.6624182458494047, 0.3898615635179153, 0.477797513321492, 0.9109007255353034, 0.7059693217519867, 0.6383737517831669] + +Stage 1 2 f1-score: 0.671673171827851 +[0.8952380952380953, 0.8505481823427582, 0.8208955223880597, 0.6498516320474778, 0.37857577601947656, 0.5265188042430087, 0.8833333333333333, 0.6826503923278117, 0.3574468085106383] + +Stage 1 2 f1-score (custom): 0.6551974169203674 +[0.8721007289595759, 0.8318284424379232, 0.7840342124019958, 0.7088525651399903, 0.3232176262731241, 0.481651376146789, 0.8559892328398385, 0.7140251687032646, 0.32507739938080493] + +Stage 1 3 f1-score: 0.7083476597834296 +[0.8952380952380953, 0.8505481823427582, 0.8208955223880597, 0.6498516320474778, 0.37857577601947656, 0.5265188042430087, 0.8833333333333333, 0.6826503923278117, 0.3574468085106383] + +Stage 1 3 f1-score (custom): 0.7002310123150498 +[0.940453074433657, 0.8579148840351273, 0.8671328671328671, 0.6732059020791415, 0.40076335877862596, 0.5090183836281651, 0.9345300950369588, 0.6986736409490005, 0.42038690476190477] + +Stage 2 f1-score: 0.7997068957793295 +[0.9357429718875502, 0.8601398601398601, 0.8888888888888888, 0.770949720670391, 0.6666666666666666, 0.8208955223880597, 0.9413333333333334, 0.7833333333333333, 0.5294117647058824] + +Stage 2 f1-score (custom): 0.7832666870847034 +[0.9471544715447154, 0.8529819694868238, 0.8547008547008547, 0.7582417582417582, 0.6333333333333333, 0.7994186046511628, 0.9473966720343532, 0.7993197278911565, 0.45685279187817257] \ No newline at end of file diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/model_stage_2.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/model_stage_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..69cf19e7725afdac02f3f308004651ffeca2f0ee Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/model_stage_2.h5 differ diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/score_stage_2.txt b/experiments_stage_2/stage_2_3x_s11_s12_s14/score_stage_2.txt new file mode 100644 index 0000000000000000000000000000000000000000..0d0abc498ca6d268a548156d648888cc08f98232 --- /dev/null +++ b/experiments_stage_2/stage_2_3x_s11_s12_s14/score_stage_2.txt @@ -0,0 +1,2 @@ +f1-score: 0.7997068957793295 + f1-score (custom): 0.7832666870847034 \ No newline at end of file diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/stage_2_3x.py b/experiments_stage_2/stage_2_3x_s11_s12_s14/stage_2_3x.py new file mode 100644 index 0000000000000000000000000000000000000000..5840d3174d8ba4768d96cf149756ffbb84902abf --- /dev/null +++ b/experiments_stage_2/stage_2_3x_s11_s12_s14/stage_2_3x.py @@ -0,0 +1,275 @@ +""" +LSTM model (stage_2) +""" + +from keras import Input, Model +import numpy as np +import os +from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau +from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional +from keras.models import load_model +from keras.optimizers import Adam +import tensorflow.keras as keras +from logger import Logger +from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays +from preprocessor import preprocess_input_data +from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels +import shutil +import ntpath +# import sys + + +def build_model(n_timesteps, n_features, n_outputs): + # model + input = Input(shape=(n_timesteps, n_features), dtype='float32') + x = Masking(mask_value=0.)(input) + x = Bidirectional(LSTM(units=30, return_sequences=True))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + x = Bidirectional(LSTM(units=30))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + output = Dense(n_outputs, activation='sigmoid')(x) + + model = Model(inputs=input, outputs=output) + # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) + model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy']) + model.summary() + + return model + + +if __name__ == '__main__': + + # Config + experiment_name = "stage_2_3x_s11_s12_s14" + experiments_dir = "experiments_stage_2" + + data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline' + data_dir_2 = 'experiments_stage_1/stage_1_2_001_baseline' + data_dir_3 = 'experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02' + data_test_dir = 'data/test_balanced' + + labels_file = 'subject_labels.npy' + predictions_file = 'subject_predictions.npy' + scores_file = 'scores_custom.npy' + + segment_size = 2000 + overlap = 0.5 + epochs = 30 + batch_size = 18 + n_timesteps = 120 + n_features = 27 + n_outputs = 9 + + # create directory for the experiment + if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')): + os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models')) + else: + raise NameError(f"Already exist an experiment with the name '{experiment_name}'" + f" in the '{experiments_dir}' directory.") + + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + + # # Log stdout + # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log') + # sys.stdout = Logger(log_file) + + subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() + subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item() + scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item() + + # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item() # TODO + subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item() + scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item() + + # subject_labels_3 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() # TODO + subject_predictions_3 = np.load(os.path.join(data_dir_3, predictions_file), allow_pickle=True).item() + scores_3 = np.load(os.path.join(data_dir_3, scores_file), allow_pickle=True).item() + + # pad inputs + subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()} + for k, v in subject_predictions.items(): + subject_predictions_padded[k][-len(v):, :] = v + + subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()} + for k, v in subject_predictions_2.items(): + subject_predictions_padded_2[k][-len(v):, :] = v + + subject_predictions_padded_3 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_3.keys()} + for k, v in subject_predictions_3.items(): + subject_predictions_padded_3[k][-len(v):, :] = v + + # concatenate predictions of stages 1 + for k, v in subject_predictions.items(): + subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k], + subject_predictions_padded_2[k], + subject_predictions_padded_3[k]], axis=1) + + ################################################################################################################### + ## Stage 1 on test + + # Get the best stage_1 models + best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0] + best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0] + best_fold_3 = [(k, v) for k, v in sorted(scores_3.items(), key=lambda item: item[1], reverse=True)][0][0] + + # Models stage 1 + model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5")) + model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5")) + model_stage_1_3 = keras.models.load_model(os.path.join(data_dir_3, f"model_{best_fold_3}.h5")) + + # save best models + model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5")) + model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5")) + model_stage_1_3.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_3_{best_fold_3}.h5")) + + # Load test data + data_test = load_data(data_test_dir) + data_test = preprocess_input_data(data_test) + data_test = segment_all_dict_data(data_test, segment_size, overlap) + arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test, + shuffle_IDs=False, + # Do not shuffle the segments to keep the + # order in time of the predictions + shuffle_segments=False, + segment_standardization_flag=True) + # Encode labels + arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels]) + arr_of_labels = encode_labels(arr_of_labels) + + # Predictions stages 1 + predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1) + predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1) + predictions_s1_3 = model_stage_1_3.predict(arr_of_segments, verbose=1) + + # Score stages 1 + f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True) + f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1, + return_list=True) + print("\nStage 1 1 f1-score: ", f1_score_s1_1) + print(f1_score_p1_list) + print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1) + print(f1_score_custom_s1_list_1, "\n\n") + + f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True) + f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2, + return_list=True) + print("\nStage 1 2 f1-score: ", f1_score_s1_2) + print(f1_score_p1_list_2) + print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2) + print(f1_score_custom_s1_list_2, "\n\n") + + f1_score_s1_3, f1_score_p1_list_3 = multiclass_f1(arr_of_labels, predictions_s1_3, return_list=True) + f1_score_custom_s1_3, f1_score_custom_s1_list_3 = custom_multiclass_f1(arr_of_labels, predictions_s1_3, + return_list=True) + print("\nStage 1 3 f1-score: ", f1_score_s1_3) + print(f1_score_p1_list_3) + print("\nStage 1 3 f1-score (custom):", f1_score_custom_s1_3) + print(f1_score_custom_s1_list_3, "\n\n") + + # concatenate predictions of stages 1 + predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2, predictions_s1_3], axis=-1) + + # Group by subject & padding: + + # true labels of each subject + subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))} + for ID, label in zip(arr_of_IDs, arr_of_labels): + subject_labels_test[ID[0]] = label + + # stages 1 predictions for each subject + subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))} + for ID, pred in zip(arr_of_IDs, predictions_stages_1): + subject_predictions_test[ID[0]].append(pred) + + # pad inputs + subject_predictions_padded_test = {k: np.zeros((n_timesteps, n_features)) for k in subject_predictions_test.keys()} + for k, v in subject_predictions_test.items(): + subject_predictions_padded_test[k][-len(v):, :] = v + + # convert to array + X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, + n_variables=n_features, split=0) + + ## end stage 1 on test + ################################################################################################################### + # Stage 2 + + # convert to array + X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, + n_variables=n_features, split=0) + + # Model + model_stage_2 = build_model(n_timesteps, n_features, n_outputs) + + # callbacks + earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max') + mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"), + save_best_only=True, monitor='val_categorical_accuracy', mode='max') + reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1, + epsilon=1e-4, + mode='max') + + # train stage 2 + model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True, + validation_data=(X_val, y_val), + callbacks=[earlyStopping, mcp_save, reduce_lr_loss]) + + # reloading the best model + del model_stage_2 + model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5")) + + # final predictions + _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1) + final_predictions = model_stage_2.predict(X_val, verbose=1) + + print(f"\nAccuracy: {accuracy}") + + score, score_list = multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score: {score}") + print(score_list) + + # f1-score + score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score (custom): {score_custom}") + print(score_custom_list) + + # save f1-score + with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f: + f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}") + + # Save info and results test + with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f: + f.write(f"Model stage 1 1: {data_dir_1}\n") + f.write(f"Model stage 2 2: {data_dir_2}\n") + f.write(f"Model stage 2 3: {data_dir_3}\n") + + f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n") + f.write(str(f1_score_p1_list)) + f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n") + f.write(str(f1_score_custom_s1_list_1)) + + f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n") + f.write(str(f1_score_p1_list_2)) + f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n") + f.write(str(f1_score_custom_s1_list_2)) + + f.write(f"\n\nStage 1 3 f1-score: {str(f1_score_s1_3)}\n") + f.write(str(f1_score_p1_list_2)) + f.write(f"\n\nStage 1 3 f1-score (custom): {str(f1_score_custom_s1_3)}\n") + f.write(str(f1_score_custom_s1_list_3)) + + f.write(f"\n\nStage 2 f1-score: {str(score)}\n") + f.write(str(score_list)) + f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n") + f.write(str(score_custom_list)) + + # from sklearn.metrics import multilabel_confusion_matrix + # + # + # pred = np.where(predictions > 0.5, 1, 0) + # true = y_validation.copy() + # + # confusion = multilabel_confusion_matrix(true, pred) diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_1_fold_2.h5 b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_1_fold_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..dcad1eba8fa5b4e468c14c4be567001417a0c3b9 Binary files /dev/null and b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_1_fold_2.h5 differ diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_2_fold_5.h5 b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_2_fold_5.h5 new file mode 100644 index 0000000000000000000000000000000000000000..5b4926447f6777a701686171778e152cbbf90f8b Binary files /dev/null and b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_2_fold_5.h5 differ diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/info.txt b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/info.txt new file mode 100644 index 0000000000000000000000000000000000000000..98ed3d43b84953f3bbd7f3da63d67f2dc7808eac --- /dev/null +++ b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/info.txt @@ -0,0 +1,21 @@ +Model stage 1 1: experiments_stage_1/stage_1_1_001_baseline +Model stage 2 2: experiments_stage_1/stage_1_2_001_baseline + + +Stage 1 1 f1-score: 0.7166549481041105 +[0.9266709928617781, 0.8609566184649611, 0.8327402135231317, 0.6370967741935484, 0.43922018348623854, 0.5284872298624754, 0.9198123743578289, 0.6876687668766877, 0.6172413793103448] + +Stage 1 1 f1-score (custom): 0.7104303349036974 +[0.9288409002211526, 0.8609566184649611, 0.8187543736878936, 0.6624182458494047, 0.3898615635179153, 0.477797513321492, 0.9109007255353034, 0.7059693217519867, 0.6383737517831669] + +Stage 1 2 f1-score: 0.671673171827851 +[0.8952380952380953, 0.8505481823427582, 0.8208955223880597, 0.6498516320474778, 0.37857577601947656, 0.5265188042430087, 0.8833333333333333, 0.6826503923278117, 0.3574468085106383] + +Stage 1 2 f1-score (custom): 0.6551974169203674 +[0.8721007289595759, 0.8318284424379232, 0.7840342124019958, 0.7088525651399903, 0.3232176262731241, 0.481651376146789, 0.8559892328398385, 0.7140251687032646, 0.32507739938080493] + +Stage 2 f1-score: 0.8030992668569547 +[0.9354838709677419, 0.852112676056338, 0.8913043478260869, 0.7590027700831025, 0.6942148760330579, 0.8072727272727273, 0.9345794392523364, 0.764179104477612, 0.5897435897435898] + +Stage 2 f1-score (custom): 0.7921984779368064 +[0.9446254071661238, 0.8414464534075105, 0.8686440677966102, 0.7502738225629791, 0.6840390879478827, 0.7985611510791367, 0.9398496240601504, 0.7467911318553092, 0.5555555555555556] \ No newline at end of file diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/model_stage_2.h5 b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/model_stage_2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..cde7305fb601f8f975d561dd4f6620fb8f6a2b02 Binary files /dev/null and b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/model_stage_2.h5 differ diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/score_stage_2.txt b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/score_stage_2.txt new file mode 100644 index 0000000000000000000000000000000000000000..1238ea5072cb1fb02ce2834e2c3a9a2595c14b2c --- /dev/null +++ b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/score_stage_2.txt @@ -0,0 +1,2 @@ +f1-score: 0.8030992668569547 + f1-score (custom): 0.7921984779368064 \ No newline at end of file diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/stage_2.py b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/stage_2.py new file mode 100644 index 0000000000000000000000000000000000000000..15595de9152e82f18b719b327575b2efece9c470 --- /dev/null +++ b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/stage_2.py @@ -0,0 +1,247 @@ +""" +LSTM model (stage_2) +""" + +from keras import Input, Model +import numpy as np +import os +from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau +from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional +from keras.models import load_model +from keras.optimizers import Adam +import tensorflow.keras as keras +from logger import Logger +from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays +from preprocessor import preprocess_input_data +from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels +import shutil +import ntpath +# import sys + + +def build_model(n_timesteps, n_features, n_outputs): + # model + input = Input(shape=(n_timesteps, n_features), dtype='float32') + x = Masking(mask_value=0.)(input) + x = Bidirectional(LSTM(units=30, return_sequences=True))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + x = Bidirectional(LSTM(units=30))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + output = Dense(n_outputs, activation='sigmoid')(x) + + model = Model(inputs=input, outputs=output) + # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) + model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy']) + model.summary() + + return model + + +if __name__ == '__main__': + + # Config + experiment_name = "stage_2_s11_s12_repetition" + experiments_dir = "experiments_stage_2" + + data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline' + data_dir_2 = 'experiments_stage_1/stage_1_2_001_baseline' + data_test_dir = 'data/test_balanced' + + labels_file = 'subject_labels.npy' + predictions_file = 'subject_predictions.npy' + scores_file = 'scores_custom.npy' + + segment_size = 2000 + overlap = 0.5 + epochs = 30 + batch_size = 18 + n_timesteps = 120 + n_features = 18 + n_outputs = 9 + + # create directory for the experiment + if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')): + os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models')) + else: + raise NameError(f"Already exist an experiment with the name '{experiment_name}'" + f" in the '{experiments_dir}' directory.") + + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + + # # Log stdout + # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log') + # sys.stdout = Logger(log_file) + + subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() + subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item() + scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item() + + # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item() # TODO + subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item() + scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item() + + # pad inputs + subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()} + for k, v in subject_predictions.items(): + subject_predictions_padded[k][-len(v):, :] = v + + subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()} + for k, v in subject_predictions_2.items(): + subject_predictions_padded_2[k][-len(v):, :] = v + + # concatenate predictions of stages 1 + for k, v in subject_predictions.items(): + subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k], subject_predictions_padded_2[k]], + axis=1) + + ################################################################################################################### + ## Stage 1 on test + + # Get the best stage_1 models + best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0] + best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0] + + # Models stage 1 + model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5")) + model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5")) + + # save best models + model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5")) + model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5")) + + # Load test data + data_test = load_data(data_test_dir) + data_test = preprocess_input_data(data_test) + data_test = segment_all_dict_data(data_test, segment_size, overlap) + arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test, + shuffle_IDs=False, + # Do not shuffle the segments to keep the + # order in time of the predictions + shuffle_segments=False, + segment_standardization_flag=True) + # Encode labels + arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels]) + arr_of_labels = encode_labels(arr_of_labels) + + # Predictions stages 1 + predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1) + predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1) + + # Score stages 1 + f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True) + f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1, + return_list=True) + print("\nStage 1 1 f1-score: ", f1_score_s1_1) + print(f1_score_p1_list) + print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1) + print(f1_score_custom_s1_list_1, "\n\n") + + f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True) + f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2, + return_list=True) + print("\nStage 1 2 f1-score: ", f1_score_s1_2) + print(f1_score_p1_list_2) + print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2) + print(f1_score_custom_s1_list_2, "\n\n") + + # concatenate predictions of stages 1 + predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2], axis=-1) + + # Group by subject & padding: + + # true labels of each subject + subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))} + for ID, label in zip(arr_of_IDs, arr_of_labels): + subject_labels_test[ID[0]] = label + + # stages 1 predictions for each subject + subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))} + for ID, pred in zip(arr_of_IDs, predictions_stages_1): + subject_predictions_test[ID[0]].append(pred) + + # pad inputs + subject_predictions_padded_test = {k: np.zeros((n_timesteps, 18)) for k in subject_predictions_test.keys()} + for k, v in subject_predictions_test.items(): + subject_predictions_padded_test[k][-len(v):, :] = v + + # convert to array + X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0, + n_variables=n_features) + + ## end stage 1 on test + ################################################################################################################### + # Stage 2 + + # convert to array + X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0, + n_variables=n_features) + + # Model + model_stage_2 = build_model(n_timesteps, n_features, n_outputs) + + # callbacks + earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max') + mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"), + save_best_only=True, monitor='val_categorical_accuracy', mode='max') + reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1, + epsilon=1e-4, + mode='max') + + # train stage 2 + model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True, + validation_data=(X_val, y_val), + callbacks=[earlyStopping, mcp_save, reduce_lr_loss]) + + # reloading the best model + del model_stage_2 + model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5")) + + # final predictions + _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1) + final_predictions = model_stage_2.predict(X_val, verbose=1) + + print(f"\nAccuracy: {accuracy}") + + score, score_list = multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score: {score}") + print(score_list) + + # f1-score + score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score (custom): {score_custom}") + print(score_custom_list) + + # save f1-score + with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f: + f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}") + + # Save info and results test + with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f: + f.write(f"Model stage 1 1: {data_dir_1}\n") + f.write(f"Model stage 2 2: {data_dir_2}\n") + + f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n") + f.write(str(f1_score_p1_list)) + f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n") + f.write(str(f1_score_custom_s1_list_1)) + + f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n") + f.write(str(f1_score_p1_list_2)) + f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n") + f.write(str(f1_score_custom_s1_list_2)) + + f.write(f"\n\nStage 2 f1-score: {str(score)}\n") + f.write(str(score_list)) + f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n") + f.write(str(score_custom_list)) + + # from sklearn.metrics import multilabel_confusion_matrix + # + # + # pred = np.where(predictions > 0.5, 1, 0) + # true = y_validation.copy() + # + # confusion = multilabel_confusion_matrix(true, pred) diff --git a/stage_1_3_Inc_ResNet_LSTM_v01.py b/stage_1_3_Inc_ResNet_LSTM_v01.py index ec61ae833a68fc5a3b1e1d41b0b0b992d2c94cfa..18addd3b6192eccdc8106bab9ebf8401279dbade 100644 --- a/stage_1_3_Inc_ResNet_LSTM_v01.py +++ b/stage_1_3_Inc_ResNet_LSTM_v01.py @@ -31,7 +31,6 @@ def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels): ids_labels['label'] )] - # true labels of each subject subject_labels = {ID: None for ID in list(ids)} for ID, label in zip(arr_of_IDs, arr_of_labels): @@ -153,7 +152,7 @@ def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels): if __name__ == '__main__': # Config - experiment_name = "stage_1_3_002_Inc_ResNet_LSTM_v01" + experiment_name = "stage_1_3_Inc_ResNet_LSTM_v01" experiments_dir = "experiments_stage_1" data_dir = 'data/train_balanced' segment_size = 2000 @@ -165,12 +164,12 @@ if __name__ == '__main__': # create directory for the experiment if not os.path.exists(os.path.join(experiments_dir, experiment_name)): os.makedirs(os.path.join(experiments_dir, experiment_name)) - # else: - # raise NameError(f"Already exist an experiment with the name '{experiment_name}'" - # f" in the '{experiments_dir}' directory.") + else: + raise NameError(f"Already exist an experiment with the name '{experiment_name}'" + f" in the '{experiments_dir}' directory.") - # # save a copy of the script - # shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) # This gives an error on the CSC server when trying to import sys # # Log stdout diff --git a/stage_1_4_Inc_ResNet_LSTM_v02.py b/stage_1_4_Inc_ResNet_LSTM_v02.py new file mode 100644 index 0000000000000000000000000000000000000000..a17703be2f9c045939d4479293af15a2231a8157 --- /dev/null +++ b/stage_1_4_Inc_ResNet_LSTM_v02.py @@ -0,0 +1,209 @@ +""" +Residual network model (stage_1_2) +""" +from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping +from sklearn.model_selection import StratifiedKFold +import os +from utils import encode_labels, custom_multiclass_f1, multiclass_f1 +import shutil +import ntpath +from collections import Counter +import numpy as np +from keras.initializers import glorot_uniform, he_normal +from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays +from preprocessor import preprocess_input_data +from utils import encode_labels +import tensorflow.keras as keras +import pandas as pd +# import sys # This gives an error on the CSC server +from model_architecture_V01 import Inc_ResNet_LSTM_v02 + + +def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels): + """ Subject cross-validation """ + + global epochs, batch_size, n_folds + + # split the subjects on n folds keeping balance + ids = ids_labels['subject'] + skf = StratifiedKFold(n_splits=n_folds, random_state=None, shuffle=True) + subj_folds = [(ids[test_index]) for train_index, test_index in skf.split(ids_labels['subject'], + ids_labels['label'] + )] + + # true labels of each subject + subject_labels = {ID: None for ID in list(ids)} + for ID, label in zip(arr_of_IDs, arr_of_labels): + subject_labels[ID[0]] = label + + # to save the predictions of each subject + subject_predictions = {ID: [] for ID in list(ids)} + + # to save the f1-score of each fold + scores = {} + scores_custom = {} + + for i, validation_fold in enumerate(subj_folds): + print(f"\n\nFold {i} ------------------------------------------------- \n") + + # selector + selector = np.isin(arr_of_IDs.squeeze(), validation_fold) + + # validation + arr_seg_validation = arr_of_segments[selector] + arr_labels_validation = arr_of_labels[selector] + arr_IDs_validation = arr_of_IDs[selector] + + # train + arr_seg_train = arr_of_segments[np.invert(selector)] + arr_labels_train = arr_of_labels[np.invert(selector)] + arr_IDs_train = arr_of_IDs[np.invert(selector)] + + # TODO + # Up-balance 'STE' (3x) + add_to_input = [] + add_to_labels = [] + add_to_IDs = [] + for j in range(len(arr_labels_train)): + if arr_labels_train[j][8] == 1: + add_to_input.append(arr_seg_train[j]) + add_to_labels.append(arr_labels_train[j]) + add_to_IDs.append(arr_IDs_train[j]) + + arr_seg_train_balanced = np.concatenate([add_to_input, arr_seg_train, add_to_input]) + arr_labels_train_balanced = np.concatenate([add_to_labels, arr_labels_train, add_to_labels]) + arr_IDs_train_balanced = np.concatenate([add_to_IDs, arr_IDs_train, add_to_IDs]) + + # Build model + model = Inc_ResNet_LSTM_v02(segment_size, 12, classes=9) + + # TODO + # callbacks + earlyStopping = keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', patience=10, verbose=0, mode='max') + mcp_save = keras.callbacks.ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5"), + save_best_only=True, monitor='val_categorical_accuracy', mode='max') + reduce_lr_loss = keras.callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=7, verbose=1, + epsilon=1e-4, + mode='max') + + # model.summary() + + model.fit(arr_seg_train_balanced, arr_labels_train_balanced, epochs=epochs, batch_size=batch_size, + verbose=1, validation_data=(arr_seg_validation, arr_labels_validation), shuffle=True, + callbacks=[earlyStopping, mcp_save, reduce_lr_loss]) + + # re-load best model + del model + model = keras.models.load_model(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5")) + _, accuracy = model.evaluate(arr_seg_validation, arr_labels_validation, batch_size=batch_size, verbose=1) + predictions = model.predict(arr_seg_validation, verbose=1) + + # print fold results + print("Accuracy:", accuracy) + + f1_score, f1_score_list = multiclass_f1(arr_labels_validation, predictions, return_list=True) + print("\nf1 score:", f1_score) + print(f1_score_list) + + f1_score_custom, f1_score_custom_list = custom_multiclass_f1(arr_labels_validation, predictions, + return_list=True) + print("\nf1 score (custom):", f1_score_custom) + print(f1_score_custom_list) + + # save predictions + for ID, pred in zip(arr_IDs_validation, predictions): + subject_predictions[ID[0]].append(pred) + + # save f1-score + scores[f"fold_{i}"] = f1_score + scores_custom[f"fold_{i}"] = f1_score_custom + + # save f1-score list (text file): + with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f: + f.write(f"Fold {str(i)}:\n" + f"{str(f1_score_list)} (f1-score by class) \n" + f"{str(f1_score_custom_list)} (f1 score (custom) by class) \n") + + # Average f-1 score + m, s = np.mean([v for v in scores.values()]), np.std([v for v in scores.values()]) + m_c, s_c = np.mean([v for v in scores_custom.values()]), np.std([v for v in scores_custom.values()]) + + # save labels (to disk) + np.save(os.path.join(experiments_dir, experiment_name, "subject_labels.npy"), subject_labels) + + # save predictions (to disk) + np.save(os.path.join(experiments_dir, experiment_name, "subject_predictions.npy"), subject_predictions) + + # save f1-scores (to disk) + np.save(os.path.join(experiments_dir, experiment_name, "scores.npy"), scores) + np.save(os.path.join(experiments_dir, experiment_name, "scores_custom.npy"), scores_custom) + + print("\n==========================================================\n") + print(f"CV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})") + + # save f1-scores (text file) + with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f: + f.write("\n\n ==> Score by CV:") + f.write(f"\n{str(scores)} (f1-score) \n{str(scores_custom)} (f1-score (custom))") + f.write("\n\n ==> Average score CV:") + f.write(f"\nCV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})\n\n") + + +if __name__ == '__main__': + + # Config + experiment_name = "stage_1_4_moreEpochs" + experiments_dir = "experiments_stage_1" + data_dir = 'data/train_balanced' + segment_size = 2000 + overlap = 0.5 + epochs = 80 # ??? + batch_size = 54 # ??? + n_folds = 8 + + # create directory for the experiment + if not os.path.exists(os.path.join(experiments_dir, experiment_name)): + os.makedirs(os.path.join(experiments_dir, experiment_name)) + else: + raise NameError(f"Already exist an experiment with the name '{experiment_name}'" + f" in the '{experiments_dir}' directory.") + + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + + # This gives an error on the CSC server when trying to import sys + # # Log stdout + # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log') + # sys.stdout = Logger(log_file) + + # load data + data = load_data(data_dir) + + # create array with the label of each subject ( it is used to keep the balance of the labels + # in the folds of the cross-validation + dic_labels = {} + for k, v in data.items(): + dic_labels[k] = data[k]['info']['Dx'] + + ids_labels = pd.Series(dic_labels).reset_index() + ids_labels.columns = ['subject', 'label'] + + # pre-process signals + data = preprocess_input_data(data) + + # segment signal + data = segment_all_dict_data(data, segment_size, overlap) + + arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data, + shuffle_IDs=True, + # Do not shuffle the segments to keep the + # order in time of the predictions + shuffle_segments=False, + segment_standardization_flag=True) + + # Encode labels + arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels]) + arr_of_labels = encode_labels(arr_of_labels) + + # Cross-validation + cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels) diff --git a/stage_2.py b/stage_2.py index 4af884c25c90fd7ad646bf57cebf7a11702b09c8..90c9c4e61d4f8c6a5ceb69090640526db6b7e8d5 100644 --- a/stage_2.py +++ b/stage_2.py @@ -42,7 +42,7 @@ def build_model(n_timesteps, n_features, n_outputs): if __name__ == '__main__': # Config - experiment_name = "stage_2_001_baseline_CSC_CPU" + experiment_name = "stage_2_s11_s12_repetition" experiments_dir = "experiments_stage_2" data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline' @@ -58,6 +58,8 @@ if __name__ == '__main__': epochs = 30 batch_size = 18 n_timesteps = 120 + n_features = 18 + n_outputs = 9 # create directory for the experiment if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')): @@ -66,8 +68,8 @@ if __name__ == '__main__': raise NameError(f"Already exist an experiment with the name '{experiment_name}'" f" in the '{experiments_dir}' directory.") - # # save a copy of the script - # shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) # # Log stdout # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log') @@ -77,7 +79,7 @@ if __name__ == '__main__': subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item() scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item() - subject_labels_2 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() + # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item() # TODO subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item() scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item() @@ -86,7 +88,6 @@ if __name__ == '__main__': for k, v in subject_predictions.items(): subject_predictions_padded[k][-len(v):, :] = v - # pad inputs to the longest input sequence subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()} for k, v in subject_predictions_2.items(): subject_predictions_padded_2[k][-len(v):, :] = v @@ -104,8 +105,8 @@ if __name__ == '__main__': best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0] # Models stage 1 - model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5")) # TODO save - model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5")) # TODO save + model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5")) + model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5")) # save best models model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5")) @@ -133,9 +134,9 @@ if __name__ == '__main__': f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True) f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True) - print("\nStage 1 f1-score: ", f1_score_s1_1) + print("\nStage 1 1 f1-score: ", f1_score_s1_1) print(f1_score_p1_list) - print("\nStage 1 f1-score (custom):", f1_score_custom_s1_1) + print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1) print(f1_score_custom_s1_list_1, "\n\n") f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True) @@ -162,33 +163,24 @@ if __name__ == '__main__': subject_predictions_test[ID[0]].append(pred) # pad inputs - subject_predictions_padded_test = {k: np.zeros((n_timesteps, 18)) for k in subject_predictions_test.keys()} + subject_predictions_padded_test = {k: np.zeros((n_timesteps, n_features)) for k in subject_predictions_test.keys()} for k, v in subject_predictions_test.items(): subject_predictions_padded_test[k][-len(v):, :] = v # convert to array - X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0) + X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0, + n_variables=n_features) ## end stage 1 on test - ################################################################################################################### - - # TODO - # del model_stage_1_1 - # del model_stage_1_2 - # del arr_of_segments - # del arr_of_labels - # del arr_of_IDs - # del subject_predictions - # del data_test - ################################################################################################################### # Stage 2 # convert to array - X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0) + X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0, + n_variables=n_features) # Model - model_stage_2 = build_model(n_timesteps, 18, 9) + model_stage_2 = build_model(n_timesteps, n_features, n_outputs) # callbacks earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max') diff --git a/stage_2_3x.py b/stage_2_3x.py new file mode 100644 index 0000000000000000000000000000000000000000..5840d3174d8ba4768d96cf149756ffbb84902abf --- /dev/null +++ b/stage_2_3x.py @@ -0,0 +1,275 @@ +""" +LSTM model (stage_2) +""" + +from keras import Input, Model +import numpy as np +import os +from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau +from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional +from keras.models import load_model +from keras.optimizers import Adam +import tensorflow.keras as keras +from logger import Logger +from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays +from preprocessor import preprocess_input_data +from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels +import shutil +import ntpath +# import sys + + +def build_model(n_timesteps, n_features, n_outputs): + # model + input = Input(shape=(n_timesteps, n_features), dtype='float32') + x = Masking(mask_value=0.)(input) + x = Bidirectional(LSTM(units=30, return_sequences=True))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + x = Bidirectional(LSTM(units=30))(x) + x = BatchNormalization()(x) + x = LeakyReLU()(x) + output = Dense(n_outputs, activation='sigmoid')(x) + + model = Model(inputs=input, outputs=output) + # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) + model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy']) + model.summary() + + return model + + +if __name__ == '__main__': + + # Config + experiment_name = "stage_2_3x_s11_s12_s14" + experiments_dir = "experiments_stage_2" + + data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline' + data_dir_2 = 'experiments_stage_1/stage_1_2_001_baseline' + data_dir_3 = 'experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02' + data_test_dir = 'data/test_balanced' + + labels_file = 'subject_labels.npy' + predictions_file = 'subject_predictions.npy' + scores_file = 'scores_custom.npy' + + segment_size = 2000 + overlap = 0.5 + epochs = 30 + batch_size = 18 + n_timesteps = 120 + n_features = 27 + n_outputs = 9 + + # create directory for the experiment + if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')): + os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models')) + else: + raise NameError(f"Already exist an experiment with the name '{experiment_name}'" + f" in the '{experiments_dir}' directory.") + + # save a copy of the script + shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__))) + + # # Log stdout + # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log') + # sys.stdout = Logger(log_file) + + subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() + subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item() + scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item() + + # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item() # TODO + subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item() + scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item() + + # subject_labels_3 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item() # TODO + subject_predictions_3 = np.load(os.path.join(data_dir_3, predictions_file), allow_pickle=True).item() + scores_3 = np.load(os.path.join(data_dir_3, scores_file), allow_pickle=True).item() + + # pad inputs + subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()} + for k, v in subject_predictions.items(): + subject_predictions_padded[k][-len(v):, :] = v + + subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()} + for k, v in subject_predictions_2.items(): + subject_predictions_padded_2[k][-len(v):, :] = v + + subject_predictions_padded_3 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_3.keys()} + for k, v in subject_predictions_3.items(): + subject_predictions_padded_3[k][-len(v):, :] = v + + # concatenate predictions of stages 1 + for k, v in subject_predictions.items(): + subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k], + subject_predictions_padded_2[k], + subject_predictions_padded_3[k]], axis=1) + + ################################################################################################################### + ## Stage 1 on test + + # Get the best stage_1 models + best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0] + best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0] + best_fold_3 = [(k, v) for k, v in sorted(scores_3.items(), key=lambda item: item[1], reverse=True)][0][0] + + # Models stage 1 + model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5")) + model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5")) + model_stage_1_3 = keras.models.load_model(os.path.join(data_dir_3, f"model_{best_fold_3}.h5")) + + # save best models + model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5")) + model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5")) + model_stage_1_3.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_3_{best_fold_3}.h5")) + + # Load test data + data_test = load_data(data_test_dir) + data_test = preprocess_input_data(data_test) + data_test = segment_all_dict_data(data_test, segment_size, overlap) + arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test, + shuffle_IDs=False, + # Do not shuffle the segments to keep the + # order in time of the predictions + shuffle_segments=False, + segment_standardization_flag=True) + # Encode labels + arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels]) + arr_of_labels = encode_labels(arr_of_labels) + + # Predictions stages 1 + predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1) + predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1) + predictions_s1_3 = model_stage_1_3.predict(arr_of_segments, verbose=1) + + # Score stages 1 + f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True) + f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1, + return_list=True) + print("\nStage 1 1 f1-score: ", f1_score_s1_1) + print(f1_score_p1_list) + print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1) + print(f1_score_custom_s1_list_1, "\n\n") + + f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True) + f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2, + return_list=True) + print("\nStage 1 2 f1-score: ", f1_score_s1_2) + print(f1_score_p1_list_2) + print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2) + print(f1_score_custom_s1_list_2, "\n\n") + + f1_score_s1_3, f1_score_p1_list_3 = multiclass_f1(arr_of_labels, predictions_s1_3, return_list=True) + f1_score_custom_s1_3, f1_score_custom_s1_list_3 = custom_multiclass_f1(arr_of_labels, predictions_s1_3, + return_list=True) + print("\nStage 1 3 f1-score: ", f1_score_s1_3) + print(f1_score_p1_list_3) + print("\nStage 1 3 f1-score (custom):", f1_score_custom_s1_3) + print(f1_score_custom_s1_list_3, "\n\n") + + # concatenate predictions of stages 1 + predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2, predictions_s1_3], axis=-1) + + # Group by subject & padding: + + # true labels of each subject + subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))} + for ID, label in zip(arr_of_IDs, arr_of_labels): + subject_labels_test[ID[0]] = label + + # stages 1 predictions for each subject + subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))} + for ID, pred in zip(arr_of_IDs, predictions_stages_1): + subject_predictions_test[ID[0]].append(pred) + + # pad inputs + subject_predictions_padded_test = {k: np.zeros((n_timesteps, n_features)) for k in subject_predictions_test.keys()} + for k, v in subject_predictions_test.items(): + subject_predictions_padded_test[k][-len(v):, :] = v + + # convert to array + X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, + n_variables=n_features, split=0) + + ## end stage 1 on test + ################################################################################################################### + # Stage 2 + + # convert to array + X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, + n_variables=n_features, split=0) + + # Model + model_stage_2 = build_model(n_timesteps, n_features, n_outputs) + + # callbacks + earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max') + mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"), + save_best_only=True, monitor='val_categorical_accuracy', mode='max') + reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1, + epsilon=1e-4, + mode='max') + + # train stage 2 + model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True, + validation_data=(X_val, y_val), + callbacks=[earlyStopping, mcp_save, reduce_lr_loss]) + + # reloading the best model + del model_stage_2 + model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5")) + + # final predictions + _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1) + final_predictions = model_stage_2.predict(X_val, verbose=1) + + print(f"\nAccuracy: {accuracy}") + + score, score_list = multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score: {score}") + print(score_list) + + # f1-score + score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True) + print(f"\nf1-score (custom): {score_custom}") + print(score_custom_list) + + # save f1-score + with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f: + f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}") + + # Save info and results test + with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f: + f.write(f"Model stage 1 1: {data_dir_1}\n") + f.write(f"Model stage 2 2: {data_dir_2}\n") + f.write(f"Model stage 2 3: {data_dir_3}\n") + + f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n") + f.write(str(f1_score_p1_list)) + f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n") + f.write(str(f1_score_custom_s1_list_1)) + + f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n") + f.write(str(f1_score_p1_list_2)) + f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n") + f.write(str(f1_score_custom_s1_list_2)) + + f.write(f"\n\nStage 1 3 f1-score: {str(f1_score_s1_3)}\n") + f.write(str(f1_score_p1_list_2)) + f.write(f"\n\nStage 1 3 f1-score (custom): {str(f1_score_custom_s1_3)}\n") + f.write(str(f1_score_custom_s1_list_3)) + + f.write(f"\n\nStage 2 f1-score: {str(score)}\n") + f.write(str(score_list)) + f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n") + f.write(str(score_custom_list)) + + # from sklearn.metrics import multilabel_confusion_matrix + # + # + # pred = np.where(predictions > 0.5, 1, 0) + # true = y_validation.copy() + # + # confusion = multilabel_confusion_matrix(true, pred) diff --git a/utils.py b/utils.py index 061faf1ba0cfafc15d92079e9417310b21d9d7ed..92a0ecaee805c131d2fe600a8f7691bb78101b3f 100644 --- a/utils.py +++ b/utils.py @@ -112,11 +112,11 @@ def split_data_train_test(): # draft -def split_train_validation_part_2(subject_predictions, subject_labels, split=0.33): +def split_train_validation_part_2(subject_predictions, subject_labels, n_variables=18, split=0.33): """ Splits train/validation sets for the model_1_part_2""" n_timesteps = [len(v) for v in subject_predictions.values()][0] - n_variables = 18 + # n_variables = 18 n_outputs = 9 assert len(subject_labels) == len(subject_predictions), "Labels and predictions have different shapes"