diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_0.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_0.h5
new file mode 100644
index 0000000000000000000000000000000000000000..2959c83aab384cb6fd27cf8e9466028b4a1cecc5
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_0.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_1.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_1.h5
new file mode 100644
index 0000000000000000000000000000000000000000..b378bb559011d7d804948e37b7004975f01c44b1
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_1.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_2.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_2.h5
new file mode 100644
index 0000000000000000000000000000000000000000..c411ab9d834d3ddec27535faf0270c5749ec732e
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_2.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_3.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_3.h5
new file mode 100644
index 0000000000000000000000000000000000000000..6767b621e8964e21b2df966a0e81864c76974b90
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_3.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_4.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_4.h5
new file mode 100644
index 0000000000000000000000000000000000000000..78ea4ced87fd35db7c494c592fa1d39baea0b8b8
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_4.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_5.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_5.h5
new file mode 100644
index 0000000000000000000000000000000000000000..12b30c9d94828806f76160977c4e1bc49a39dd4c
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_5.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_6.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_6.h5
new file mode 100644
index 0000000000000000000000000000000000000000..1c7a8bd20e54530d2f452aaf2f91c6547989f7fb
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_6.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_7.h5 b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_7.h5
new file mode 100644
index 0000000000000000000000000000000000000000..61944c29ce16547d8ca894dd4d7b52c213153df7
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/model_fold_7.h5 differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.npy
new file mode 100644
index 0000000000000000000000000000000000000000..4b9c97e2ed1fb18ec9e7d92a209f757e7923887c
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.npy differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.txt b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b03d81da7c9e19cb40d0ab07526ff2717098fe1
--- /dev/null
+++ b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores.txt
@@ -0,0 +1,34 @@
+Fold 0:
+[0.8911138923654568, 0.8053691275167785, 0.8218390804597702, 0.6759474091260634, 0.49026345933562426, 0.5414634146341464, 0.8926968584251326, 0.6567685589519651, 0.665083135391924] (f1-score by class) 
+[0.8917835671342685, 0.8163265306122449, 0.7918050941306756, 0.6731361675908811, 0.42971887550200805, 0.4652137468566639, 0.9121227280306821, 0.6516464471403813, 0.7494646680942184] (f1 score (custom) by class) 
+Fold 1:
+[0.9430496019595835, 0.7955390334572491, 0.8764044943820225, 0.6267348429510592, 0.4487334137515078, 0.5780240073868883, 0.9217391304347826, 0.6927480916030534, 0.5359477124183006] (f1-score by class) 
+[0.9454813359528488, 0.7985074626865671, 0.8590308370044053, 0.6886035313001605, 0.37697608431293067, 0.5227120908483633, 0.9260621666360125, 0.6638624725676664, 0.5099502487562189] (f1 score (custom) by class) 
+Fold 2:
+[0.9236812570145904, 0.886021505376344, 0.8148148148148148, 0.609009009009009, 0.33070866141732286, 0.4550499445061043, 0.9151805132666376, 0.6416144745998609, 0.7478632478632479] (f1-score by class) 
+[0.9369307832422586, 0.871404399323181, 0.8560311284046692, 0.6083513318934485, 0.26717557251908397, 0.3833208676140613, 0.916855499389925, 0.6847890671420083, 0.7383966244725738] (f1 score (custom) by class) 
+Fold 3:
+[0.8853333333333333, 0.852910052910053, 0.85625, 0.6520423600605144, 0.5238095238095238, 0.4774774774774775, 0.9065713008493518, 0.6469534050179212, 0.5333333333333333] (f1-score by class) 
+[0.9019288236892149, 0.8722943722943723, 0.865992414664981, 0.6574130567419159, 0.5126771066368382, 0.45532646048109965, 0.8891616976499473, 0.6215564738292011, 0.5037037037037037] (f1 score (custom) by class) 
+Fold 4:
+[0.9075844486934354, 0.9157769869513642, 0.8726287262872628, 0.6608695652173913, 0.39397741530740277, 0.691970802919708, 0.8963752665245203, 0.6635338345864662, 0.6190476190476191] (f1-score by class) 
+[0.9079316500892629, 0.90994813767091, 0.9096045197740112, 0.6763972944108223, 0.37776708373435997, 0.6482494529540481, 0.9167829727843685, 0.6308077197998571, 0.6190476190476191] (f1 score (custom) by class) 
+Fold 5:
+[0.9178683385579938, 0.8285356695869838, 0.846441947565543, 0.6748654880860876, 0.4360400444938821, 0.5719360568383659, 0.8780926675663517, 0.6996402877697842, 0.582716049382716] (f1-score by class) 
+[0.9182137481184145, 0.8242031872509961, 0.8407738095238095, 0.6540524433849821, 0.4115917681646367, 0.4972205064854849, 0.8467811903522471, 0.7158630842841369, 0.6203995793901157] (f1 score (custom) by class) 
+Fold 6:
+[0.8995010691375623, 0.8513341804320204, 0.8838709677419355, 0.6583184257602862, 0.48705882352941177, 0.5072463768115942, 0.9056437389770723, 0.6950092421441775, 0.6325301204819277] (f1-score by class) 
+[0.8937677053824362, 0.8230958230958231, 0.8477722772277227, 0.6424581005586593, 0.4370777027027027, 0.48380726698262244, 0.9066031073446328, 0.7123910572186434, 0.621301775147929] (f1 score (custom) by class) 
+Fold 7:
+[0.9263301500682128, 0.8578371810449574, 0.909952606635071, 0.6057046979865772, 0.4444444444444444, 0.5838509316770186, 0.9132340052585451, 0.7061266874350987, 0.518796992481203] (f1-score by class) 
+[0.9301369863013699, 0.8547215496368039, 0.9393346379647749, 0.625866851595007, 0.3828547648772368, 0.5042918454935622, 0.923431407302375, 0.6738010305192231, 0.5646481178396072] (f1 score (custom) by class) 
+
+
+ ==> Score by CV:
+{'fold_0': 0.7156161040229845, 'fold_1': 0.7132133698160497, 'fold_2': 0.7026603808742147, 'fold_3': 0.703853420754612, 'fold_4': 0.7357516295039077, 'fold_5': 0.715126283316412, 'fold_6': 0.7245014383351098, 'fold_7': 0.7184752996701254} (f1-score) 
+{'fold_0': 0.7090242027880027, 'fold_1': 0.6990206922294637, 'fold_2': 0.695917252666801, 'fold_3': 0.6977837899656971, 'fold_4': 0.7329484944739176, 'fold_5': 0.7032332574394248, 'fold_6': 0.7075860906290191, 'fold_7': 0.7110096879477734} (f1-score (custom))
+
+ ==> Average score CV:
+CV f1-score: 0.716149740786677 (+/- 0.010015674854875221) 
+CV f1-score (custom): 0.7070654335175124 (+/- 0.011064764613146013)
+
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores_custom.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores_custom.npy
new file mode 100644
index 0000000000000000000000000000000000000000..a3ef8da123c5333fb30afbe1661edf8a5b53a428
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/scores_custom.npy differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/stage_1_4.py b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/stage_1_4.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb0eba0ad58d4d9f4d33ccac0571d6f273027e8c
--- /dev/null
+++ b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/stage_1_4.py
@@ -0,0 +1,209 @@
+"""
+Residual network model (stage_1_2)
+"""
+from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
+from sklearn.model_selection import StratifiedKFold
+import os
+from utils import encode_labels, custom_multiclass_f1, multiclass_f1
+import shutil
+import ntpath
+from collections import Counter
+import numpy as np
+from keras.initializers import glorot_uniform, he_normal
+from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays
+from preprocessor import preprocess_input_data
+from utils import encode_labels
+import tensorflow.keras as keras
+import pandas as pd
+# import sys  # This gives an error on the CSC server
+from model_architecture_V01 import Inc_ResNet_LSTM_v02
+
+
+def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels):
+    """ Subject cross-validation """
+
+    global epochs, batch_size, n_folds
+
+    # split the subjects on n folds keeping balance
+    ids = ids_labels['subject']
+    skf = StratifiedKFold(n_splits=n_folds, random_state=None, shuffle=True)
+    subj_folds = [(ids[test_index]) for train_index, test_index in skf.split(ids_labels['subject'],
+                                                                             ids_labels['label']
+                                                                             )]
+
+    # true labels of each subject
+    subject_labels = {ID: None for ID in list(ids)}
+    for ID, label in zip(arr_of_IDs, arr_of_labels):
+        subject_labels[ID[0]] = label
+
+    # to save the predictions of each subject
+    subject_predictions = {ID: [] for ID in list(ids)}
+
+    # to save the f1-score of each fold
+    scores = {}
+    scores_custom = {}
+
+    for i, validation_fold in enumerate(subj_folds):
+        print(f"\n\nFold {i} ------------------------------------------------- \n")
+
+        # selector
+        selector = np.isin(arr_of_IDs.squeeze(), validation_fold)
+
+        # validation
+        arr_seg_validation = arr_of_segments[selector]
+        arr_labels_validation = arr_of_labels[selector]
+        arr_IDs_validation = arr_of_IDs[selector]
+
+        # train
+        arr_seg_train = arr_of_segments[np.invert(selector)]
+        arr_labels_train = arr_of_labels[np.invert(selector)]
+        arr_IDs_train = arr_of_IDs[np.invert(selector)]
+
+        # TODO
+        # Up-balance 'STE' (3x)
+        add_to_input = []
+        add_to_labels = []
+        add_to_IDs = []
+        for j in range(len(arr_labels_train)):
+            if arr_labels_train[j][8] == 1:
+                add_to_input.append(arr_seg_train[j])
+                add_to_labels.append(arr_labels_train[j])
+                add_to_IDs.append(arr_IDs_train[j])
+
+        arr_seg_train_balanced = np.concatenate([add_to_input, arr_seg_train, add_to_input])
+        arr_labels_train_balanced = np.concatenate([add_to_labels, arr_labels_train, add_to_labels])
+        arr_IDs_train_balanced = np.concatenate([add_to_IDs, arr_IDs_train, add_to_IDs])
+
+        # Build model
+        model = Inc_ResNet_LSTM_v02(segment_size, 12, classes=9)
+
+        # TODO
+        # callbacks
+        earlyStopping = keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', patience=10, verbose=0, mode='max')
+        mcp_save = keras.callbacks.ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5"),
+                                                   save_best_only=True, monitor='val_categorical_accuracy', mode='max')
+        reduce_lr_loss = keras.callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=7, verbose=1,
+                                                           epsilon=1e-4,
+                                                           mode='max')
+
+        # model.summary()
+
+        model.fit(arr_seg_train_balanced, arr_labels_train_balanced, epochs=epochs, batch_size=batch_size,
+                  verbose=1, validation_data=(arr_seg_validation, arr_labels_validation), shuffle=True,
+                  callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
+
+        # re-load best model
+        del model
+        model = keras.models.load_model(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5"))
+        _, accuracy = model.evaluate(arr_seg_validation, arr_labels_validation, batch_size=batch_size, verbose=1)
+        predictions = model.predict(arr_seg_validation, verbose=1)
+
+        # print fold results
+        print("Accuracy:", accuracy)
+
+        f1_score, f1_score_list = multiclass_f1(arr_labels_validation, predictions, return_list=True)
+        print("\nf1 score:", f1_score)
+        print(f1_score_list)
+
+        f1_score_custom, f1_score_custom_list = custom_multiclass_f1(arr_labels_validation, predictions,
+                                                                     return_list=True)
+        print("\nf1 score (custom):", f1_score_custom)
+        print(f1_score_custom_list)
+
+        # save predictions
+        for ID, pred in zip(arr_IDs_validation, predictions):
+            subject_predictions[ID[0]].append(pred)
+
+        # save f1-score
+        scores[f"fold_{i}"] = f1_score
+        scores_custom[f"fold_{i}"] = f1_score_custom
+
+        # save f1-score list (text file):
+        with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f:
+            f.write(f"Fold {str(i)}:\n"
+                    f"{str(f1_score_list)} (f1-score by class) \n"
+                    f"{str(f1_score_custom_list)} (f1 score (custom) by class) \n")
+
+    # Average f-1 score
+    m, s = np.mean([v for v in scores.values()]), np.std([v for v in scores.values()])
+    m_c, s_c = np.mean([v for v in scores_custom.values()]), np.std([v for v in scores_custom.values()])
+
+    # save labels (to disk)
+    np.save(os.path.join(experiments_dir, experiment_name, "subject_labels.npy"), subject_labels)
+
+    # save predictions (to disk)
+    np.save(os.path.join(experiments_dir, experiment_name, "subject_predictions.npy"), subject_predictions)
+
+    # save f1-scores (to disk)
+    np.save(os.path.join(experiments_dir, experiment_name, "scores.npy"), scores)
+    np.save(os.path.join(experiments_dir, experiment_name, "scores_custom.npy"), scores_custom)
+
+    print("\n==========================================================\n")
+    print(f"CV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})")
+
+    # save f1-scores (text file)
+    with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f:
+        f.write("\n\n ==> Score by CV:")
+        f.write(f"\n{str(scores)} (f1-score) \n{str(scores_custom)} (f1-score (custom))")
+        f.write("\n\n ==> Average score CV:")
+        f.write(f"\nCV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})\n\n")
+
+
+if __name__ == '__main__':
+
+    # Config
+    experiment_name = "stage_1_4"
+    experiments_dir = "experiments_stage_1"
+    data_dir = 'data/train_balanced'
+    segment_size = 2000
+    overlap = 0.5
+    epochs = 50  # ???
+    batch_size = 54  # ???
+    n_folds = 8
+
+    # create directory for the experiment
+    if not os.path.exists(os.path.join(experiments_dir, experiment_name)):
+        os.makedirs(os.path.join(experiments_dir, experiment_name))
+    else:
+        raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
+                        f" in the '{experiments_dir}' directory.")
+
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+
+    # This gives an error on the CSC server when trying to import sys
+    # # Log stdout
+    # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log')
+    # sys.stdout = Logger(log_file)
+
+    # load data
+    data = load_data(data_dir)
+
+    # create array with the label of each subject ( it is used to keep the balance of the labels
+    # in the folds of the cross-validation
+    dic_labels = {}
+    for k, v in data.items():
+        dic_labels[k] = data[k]['info']['Dx']
+
+    ids_labels = pd.Series(dic_labels).reset_index()
+    ids_labels.columns = ['subject', 'label']
+
+    # pre-process signals
+    data = preprocess_input_data(data)
+
+    # segment signal
+    data = segment_all_dict_data(data, segment_size, overlap)
+
+    arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data,
+                                                                          shuffle_IDs=True,
+                                                                          # Do not shuffle the segments to keep the
+                                                                          # order in time of the predictions
+                                                                          shuffle_segments=False,
+                                                                          segment_standardization_flag=True)
+
+    # Encode labels
+    arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels])
+    arr_of_labels = encode_labels(arr_of_labels)
+
+    # Cross-validation
+    cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels)
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_labels.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_labels.npy
new file mode 100644
index 0000000000000000000000000000000000000000..ca852a733ab0b171f547084b3c77fd2b3efb5e33
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_labels.npy differ
diff --git a/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_predictions.npy b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_predictions.npy
new file mode 100644
index 0000000000000000000000000000000000000000..051028cba050ff83874ce665f36a9211433a16dd
Binary files /dev/null and b/experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02/subject_predictions.npy differ
diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_1_fold_2.h5 b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_1_fold_2.h5
new file mode 100644
index 0000000000000000000000000000000000000000..18dde3762a4317602359deaed2951431ebe2e7af
Binary files /dev/null and b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_1_fold_2.h5 differ
diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_2_fold_4.h5 b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/best_models/stage_1_2_fold_4.h5
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/info.txt b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/info.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81e6d45a173c0831f1ca2f2cb2be14cf8d3b47dd
--- /dev/null
+++ b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/info.txt
@@ -0,0 +1,21 @@
+Model stage 1 1: experiments_stage_1/stage_1_1_001_baseline
+Model stage 2 2: experiments_stage_1/stage_1_3_002_Inc_ResNet_LSTM_v01
+
+
+Stage 1 1 f1-score: 0.7166549481041105
+[0.9266709928617781, 0.8609566184649611, 0.8327402135231317, 0.6370967741935484, 0.43922018348623854, 0.5284872298624754, 0.9198123743578289, 0.6876687668766877, 0.6172413793103448]
+
+Stage 1 1 f1-score (custom): 0.7104303349036974
+[0.9288409002211526, 0.8609566184649611, 0.8187543736878936, 0.6624182458494047, 0.3898615635179153, 0.477797513321492, 0.9109007255353034, 0.7059693217519867, 0.6383737517831669]
+
+Stage 1 2 f1-score: 0.7049611543306996
+[0.9364461738002594, 0.8802292263610315, 0.8833922261484098, 0.6249481972648155, 0.40916530278232405, 0.5281954887218046, 0.930981256890849, 0.6598639455782312, 0.49142857142857144]
+
+Stage 1 2 f1-score (custom): 0.692913257056548
+[0.9390037716217974, 0.8644754615038271, 0.8722958827634334, 0.6394165535956581, 0.3749250149970006, 0.49108703250611674, 0.929137323943662, 0.6470364017533828, 0.47884187082405344]
+
+Stage 2 f1-score: 0.7946091040231611
+[0.9327902240325866, 0.8833922261484098, 0.9032258064516129, 0.7572254335260116, 0.6129032258064516, 0.8104089219330854, 0.9301075268817204, 0.75, 0.5714285714285714]
+
+Stage 2 f1-score (custom): 0.7783335126220615
+[0.9362224039247752, 0.8704735376044568, 0.8860759493670886, 0.7293986636971047, 0.6129032258064516, 0.7910014513788098, 0.9316101238556812, 0.7448036951501155, 0.5025125628140703]
\ No newline at end of file
diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/model_stage_2.h5 b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/model_stage_2.h5
new file mode 100644
index 0000000000000000000000000000000000000000..4df7054805998fdef5d5a520544505cd2224eee5
Binary files /dev/null and b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/model_stage_2.h5 differ
diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/score_stage_2.txt b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/score_stage_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f712106003d11ae68ab8dd372d2ed7f7e776b674
--- /dev/null
+++ b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/score_stage_2.txt
@@ -0,0 +1,2 @@
+f1-score: 0.7946091040231611 
+ f1-score (custom): 0.7783335126220615
\ No newline at end of file
diff --git a/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/stage_2.py b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/stage_2.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5858046220dd1db58660259993cc9b433a66bc6
--- /dev/null
+++ b/experiments_stage_2/stage_2_002_s11_s13_Inc_ResNet_LSTM_v01/stage_2.py
@@ -0,0 +1,244 @@
+"""
+LSTM model (stage_2)
+"""
+
+from keras import Input, Model
+import numpy as np
+import os
+from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
+from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional
+from keras.models import load_model
+from keras.optimizers import Adam
+import tensorflow.keras as keras
+from logger import Logger
+from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays
+from preprocessor import preprocess_input_data
+from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels
+import shutil
+import ntpath
+# import sys
+
+
+def build_model(n_timesteps, n_features, n_outputs):
+    # model
+    input = Input(shape=(n_timesteps, n_features), dtype='float32')
+    x = Masking(mask_value=0.)(input)
+    x = Bidirectional(LSTM(units=30, return_sequences=True))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    x = Bidirectional(LSTM(units=30))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    output = Dense(n_outputs, activation='sigmoid')(x)
+
+    model = Model(inputs=input, outputs=output)
+    # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
+    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy'])
+    model.summary()
+
+    return model
+
+
+if __name__ == '__main__':
+
+    # Config
+    experiment_name = "stage_2_002_s11_s13"
+    experiments_dir = "experiments_stage_2"
+
+    data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline'
+    data_dir_2 = 'experiments_stage_1/stage_1_3_002_Inc_ResNet_LSTM_v01'
+    data_test_dir = 'data/test_balanced'
+
+    labels_file = 'subject_labels.npy'
+    predictions_file = 'subject_predictions.npy'
+    scores_file = 'scores_custom.npy'
+
+    segment_size = 2000
+    overlap = 0.5
+    epochs = 30
+    batch_size = 18
+    n_timesteps = 120
+
+    # create directory for the experiment
+    if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')):
+        os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models'))
+    else:
+        raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
+                        f" in the '{experiments_dir}' directory.")
+
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+
+    # # Log stdout
+    # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log')
+    # sys.stdout = Logger(log_file)
+
+    subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()
+    subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item()
+    scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item()
+
+    subject_labels_2 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()
+    subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item()
+    scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item()
+
+    # pad inputs
+    subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()}
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k][-len(v):, :] = v
+
+    # pad inputs to the longest input sequence
+    subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()}
+    for k, v in subject_predictions_2.items():
+        subject_predictions_padded_2[k][-len(v):, :] = v
+
+    # concatenate predictions of stages 1
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k], subject_predictions_padded_2[k]],
+                                                       axis=1)
+
+    ###################################################################################################################
+    ## Stage 1 on test
+
+    # Get the best stage_1 models
+    best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0]
+    best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0]
+
+    # Models stage 1
+    model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5"))  # TODO save
+    model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5"))  # TODO save
+    
+    # save best models
+    model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5"))
+    model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5"))
+
+    # Load test data
+    data_test = load_data(data_test_dir)
+    data_test = preprocess_input_data(data_test)
+    data_test = segment_all_dict_data(data_test, segment_size, overlap)
+    arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test,
+                                                                          shuffle_IDs=False,
+                                                                          # Do not shuffle the segments to keep the
+                                                                          # order in time of the predictions
+                                                                          shuffle_segments=False,
+                                                                          segment_standardization_flag=True)
+    # Encode labels
+    arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels])
+    arr_of_labels = encode_labels(arr_of_labels)
+
+    # Predictions stages 1
+    predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1)
+    predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1)
+
+    # Score stages 1
+    f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True)
+    f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1,
+                                                                       return_list=True)
+    print("\nStage 1 f1-score: ", f1_score_s1_1)
+    print(f1_score_p1_list)
+    print("\nStage 1 f1-score (custom):", f1_score_custom_s1_1)
+    print(f1_score_custom_s1_list_1, "\n\n")
+
+    f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True)
+    f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2,
+                                                                       return_list=True)
+    print("\nStage 1 2 f1-score: ", f1_score_s1_2)
+    print(f1_score_p1_list_2)
+    print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2)
+    print(f1_score_custom_s1_list_2, "\n\n")
+
+    # concatenate predictions of stages 1
+    predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2], axis=-1)
+    
+    # Group by subject & padding:
+
+    # true labels of each subject
+    subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))}
+    for ID, label in zip(arr_of_IDs, arr_of_labels):
+        subject_labels_test[ID[0]] = label
+
+    # stages 1 predictions for each subject
+    subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))}
+    for ID, pred in zip(arr_of_IDs, predictions_stages_1):
+        subject_predictions_test[ID[0]].append(pred)
+
+    # pad inputs
+    subject_predictions_padded_test = {k: np.zeros((n_timesteps, 18)) for k in subject_predictions_test.keys()}
+    for k, v in subject_predictions_test.items():
+        subject_predictions_padded_test[k][-len(v):, :] = v
+
+    # convert to array
+    X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0)
+
+    ## end stage 1 on test
+    ###################################################################################################################
+    # Stage 2
+
+    # convert to array
+    X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0)
+
+    # Model
+    model_stage_2 = build_model(n_timesteps, 18, 9)
+
+    # callbacks
+    earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max')
+    mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"),
+                                               save_best_only=True, monitor='val_categorical_accuracy', mode='max')
+    reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1,
+                                                       epsilon=1e-4,
+                                                       mode='max')
+
+    # train stage 2
+    model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True,
+              validation_data=(X_val, y_val),
+              callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
+
+    # reloading the best model
+    del model_stage_2
+    model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"))
+
+    # final predictions
+    _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1)
+    final_predictions = model_stage_2.predict(X_val, verbose=1)
+
+    print(f"\nAccuracy: {accuracy}")
+
+    score, score_list = multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score: {score}")
+    print(score_list)
+
+    # f1-score
+    score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score (custom): {score_custom}")
+    print(score_custom_list)
+
+    # save f1-score
+    with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f:
+        f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}")
+
+    # Save info and results test
+    with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f:
+        f.write(f"Model stage 1 1: {data_dir_1}\n")
+        f.write(f"Model stage 2 2: {data_dir_2}\n")
+
+        f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n")
+        f.write(str(f1_score_p1_list))
+        f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n")
+        f.write(str(f1_score_custom_s1_list_1))
+
+        f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n")
+        f.write(str(f1_score_p1_list_2))
+        f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n")
+        f.write(str(f1_score_custom_s1_list_2))
+
+        f.write(f"\n\nStage 2 f1-score: {str(score)}\n")
+        f.write(str(score_list))
+        f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n")
+        f.write(str(score_custom_list))
+
+    # from sklearn.metrics import multilabel_confusion_matrix
+    #
+    #
+    # pred = np.where(predictions > 0.5, 1, 0)
+    # true = y_validation.copy()
+    #
+    # confusion = multilabel_confusion_matrix(true, pred)
diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_1_fold_2.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_1_fold_2.h5
new file mode 100644
index 0000000000000000000000000000000000000000..f825de1f0356c4cfcd14d8b9093c0992f62b5328
Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_1_fold_2.h5 differ
diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_2_fold_5.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_2_fold_5.h5
new file mode 100644
index 0000000000000000000000000000000000000000..463f938c92a2fbc9604e066bc7f386184c3505a0
Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_2_fold_5.h5 differ
diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_3_fold_4.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_3_fold_4.h5
new file mode 100644
index 0000000000000000000000000000000000000000..d46bd21445c681d3eba34f535418bfd9e1dc23f9
Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/best_models/stage_1_3_fold_4.h5 differ
diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/info.txt b/experiments_stage_2/stage_2_3x_s11_s12_s14/info.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ab7d4d2df08f69f917a07be630d59e5cdffcbfd
--- /dev/null
+++ b/experiments_stage_2/stage_2_3x_s11_s12_s14/info.txt
@@ -0,0 +1,28 @@
+Model stage 1 1: experiments_stage_1/stage_1_1_001_baseline
+Model stage 2 2: experiments_stage_1/stage_1_2_001_baseline
+Model stage 2 3: experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02
+
+
+Stage 1 1 f1-score: 0.7166549481041105
+[0.9266709928617781, 0.8609566184649611, 0.8327402135231317, 0.6370967741935484, 0.43922018348623854, 0.5284872298624754, 0.9198123743578289, 0.6876687668766877, 0.6172413793103448]
+
+Stage 1 1 f1-score (custom): 0.7104303349036974
+[0.9288409002211526, 0.8609566184649611, 0.8187543736878936, 0.6624182458494047, 0.3898615635179153, 0.477797513321492, 0.9109007255353034, 0.7059693217519867, 0.6383737517831669]
+
+Stage 1 2 f1-score: 0.671673171827851
+[0.8952380952380953, 0.8505481823427582, 0.8208955223880597, 0.6498516320474778, 0.37857577601947656, 0.5265188042430087, 0.8833333333333333, 0.6826503923278117, 0.3574468085106383]
+
+Stage 1 2 f1-score (custom): 0.6551974169203674
+[0.8721007289595759, 0.8318284424379232, 0.7840342124019958, 0.7088525651399903, 0.3232176262731241, 0.481651376146789, 0.8559892328398385, 0.7140251687032646, 0.32507739938080493]
+
+Stage 1 3 f1-score: 0.7083476597834296
+[0.8952380952380953, 0.8505481823427582, 0.8208955223880597, 0.6498516320474778, 0.37857577601947656, 0.5265188042430087, 0.8833333333333333, 0.6826503923278117, 0.3574468085106383]
+
+Stage 1 3 f1-score (custom): 0.7002310123150498
+[0.940453074433657, 0.8579148840351273, 0.8671328671328671, 0.6732059020791415, 0.40076335877862596, 0.5090183836281651, 0.9345300950369588, 0.6986736409490005, 0.42038690476190477]
+
+Stage 2 f1-score: 0.7997068957793295
+[0.9357429718875502, 0.8601398601398601, 0.8888888888888888, 0.770949720670391, 0.6666666666666666, 0.8208955223880597, 0.9413333333333334, 0.7833333333333333, 0.5294117647058824]
+
+Stage 2 f1-score (custom): 0.7832666870847034
+[0.9471544715447154, 0.8529819694868238, 0.8547008547008547, 0.7582417582417582, 0.6333333333333333, 0.7994186046511628, 0.9473966720343532, 0.7993197278911565, 0.45685279187817257]
\ No newline at end of file
diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/model_stage_2.h5 b/experiments_stage_2/stage_2_3x_s11_s12_s14/model_stage_2.h5
new file mode 100644
index 0000000000000000000000000000000000000000..69cf19e7725afdac02f3f308004651ffeca2f0ee
Binary files /dev/null and b/experiments_stage_2/stage_2_3x_s11_s12_s14/model_stage_2.h5 differ
diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/score_stage_2.txt b/experiments_stage_2/stage_2_3x_s11_s12_s14/score_stage_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d0abc498ca6d268a548156d648888cc08f98232
--- /dev/null
+++ b/experiments_stage_2/stage_2_3x_s11_s12_s14/score_stage_2.txt
@@ -0,0 +1,2 @@
+f1-score: 0.7997068957793295 
+ f1-score (custom): 0.7832666870847034
\ No newline at end of file
diff --git a/experiments_stage_2/stage_2_3x_s11_s12_s14/stage_2_3x.py b/experiments_stage_2/stage_2_3x_s11_s12_s14/stage_2_3x.py
new file mode 100644
index 0000000000000000000000000000000000000000..5840d3174d8ba4768d96cf149756ffbb84902abf
--- /dev/null
+++ b/experiments_stage_2/stage_2_3x_s11_s12_s14/stage_2_3x.py
@@ -0,0 +1,275 @@
+"""
+LSTM model (stage_2)
+"""
+
+from keras import Input, Model
+import numpy as np
+import os
+from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
+from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional
+from keras.models import load_model
+from keras.optimizers import Adam
+import tensorflow.keras as keras
+from logger import Logger
+from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays
+from preprocessor import preprocess_input_data
+from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels
+import shutil
+import ntpath
+# import sys
+
+
+def build_model(n_timesteps, n_features, n_outputs):
+    # model
+    input = Input(shape=(n_timesteps, n_features), dtype='float32')
+    x = Masking(mask_value=0.)(input)
+    x = Bidirectional(LSTM(units=30, return_sequences=True))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    x = Bidirectional(LSTM(units=30))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    output = Dense(n_outputs, activation='sigmoid')(x)
+
+    model = Model(inputs=input, outputs=output)
+    # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
+    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy'])
+    model.summary()
+
+    return model
+
+
+if __name__ == '__main__':
+
+    # Config
+    experiment_name = "stage_2_3x_s11_s12_s14"
+    experiments_dir = "experiments_stage_2"
+
+    data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline'
+    data_dir_2 = 'experiments_stage_1/stage_1_2_001_baseline'
+    data_dir_3 = 'experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02'
+    data_test_dir = 'data/test_balanced'
+
+    labels_file = 'subject_labels.npy'
+    predictions_file = 'subject_predictions.npy'
+    scores_file = 'scores_custom.npy'
+
+    segment_size = 2000
+    overlap = 0.5
+    epochs = 30
+    batch_size = 18
+    n_timesteps = 120
+    n_features = 27
+    n_outputs = 9
+
+    # create directory for the experiment
+    if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')):
+        os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models'))
+    else:
+        raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
+                        f" in the '{experiments_dir}' directory.")
+
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+
+    # # Log stdout
+    # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log')
+    # sys.stdout = Logger(log_file)
+
+    subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()
+    subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item()
+    scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item()
+
+    # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item()  # TODO
+    subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item()
+    scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item()
+
+    # subject_labels_3 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()  # TODO
+    subject_predictions_3 = np.load(os.path.join(data_dir_3, predictions_file), allow_pickle=True).item()
+    scores_3 = np.load(os.path.join(data_dir_3, scores_file), allow_pickle=True).item()
+
+    # pad inputs
+    subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()}
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k][-len(v):, :] = v
+
+    subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()}
+    for k, v in subject_predictions_2.items():
+        subject_predictions_padded_2[k][-len(v):, :] = v
+
+    subject_predictions_padded_3 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_3.keys()}
+    for k, v in subject_predictions_3.items():
+        subject_predictions_padded_3[k][-len(v):, :] = v
+
+    # concatenate predictions of stages 1
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k],
+                                                        subject_predictions_padded_2[k],
+                                                        subject_predictions_padded_3[k]], axis=1)
+
+    ###################################################################################################################
+    ## Stage 1 on test
+
+    # Get the best stage_1 models
+    best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0]
+    best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0]
+    best_fold_3 = [(k, v) for k, v in sorted(scores_3.items(), key=lambda item: item[1], reverse=True)][0][0]
+
+    # Models stage 1
+    model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5"))
+    model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5"))
+    model_stage_1_3 = keras.models.load_model(os.path.join(data_dir_3, f"model_{best_fold_3}.h5"))
+
+    # save best models
+    model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5"))
+    model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5"))
+    model_stage_1_3.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_3_{best_fold_3}.h5"))
+
+    # Load test data
+    data_test = load_data(data_test_dir)
+    data_test = preprocess_input_data(data_test)
+    data_test = segment_all_dict_data(data_test, segment_size, overlap)
+    arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test,
+                                                                          shuffle_IDs=False,
+                                                                          # Do not shuffle the segments to keep the
+                                                                          # order in time of the predictions
+                                                                          shuffle_segments=False,
+                                                                          segment_standardization_flag=True)
+    # Encode labels
+    arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels])
+    arr_of_labels = encode_labels(arr_of_labels)
+
+    # Predictions stages 1
+    predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1)
+    predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1)
+    predictions_s1_3 = model_stage_1_3.predict(arr_of_segments, verbose=1)
+
+    # Score stages 1
+    f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True)
+    f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1,
+                                                                       return_list=True)
+    print("\nStage 1 1 f1-score: ", f1_score_s1_1)
+    print(f1_score_p1_list)
+    print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1)
+    print(f1_score_custom_s1_list_1, "\n\n")
+
+    f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True)
+    f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2,
+                                                                       return_list=True)
+    print("\nStage 1 2 f1-score: ", f1_score_s1_2)
+    print(f1_score_p1_list_2)
+    print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2)
+    print(f1_score_custom_s1_list_2, "\n\n")
+
+    f1_score_s1_3, f1_score_p1_list_3 = multiclass_f1(arr_of_labels, predictions_s1_3, return_list=True)
+    f1_score_custom_s1_3, f1_score_custom_s1_list_3 = custom_multiclass_f1(arr_of_labels, predictions_s1_3,
+                                                                           return_list=True)
+    print("\nStage 1 3 f1-score: ", f1_score_s1_3)
+    print(f1_score_p1_list_3)
+    print("\nStage 1 3 f1-score (custom):", f1_score_custom_s1_3)
+    print(f1_score_custom_s1_list_3, "\n\n")
+
+    # concatenate predictions of stages 1
+    predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2, predictions_s1_3], axis=-1)
+
+    # Group by subject & padding:
+
+    # true labels of each subject
+    subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))}
+    for ID, label in zip(arr_of_IDs, arr_of_labels):
+        subject_labels_test[ID[0]] = label
+
+    # stages 1 predictions for each subject
+    subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))}
+    for ID, pred in zip(arr_of_IDs, predictions_stages_1):
+        subject_predictions_test[ID[0]].append(pred)
+
+    # pad inputs
+    subject_predictions_padded_test = {k: np.zeros((n_timesteps, n_features)) for k in subject_predictions_test.keys()}
+    for k, v in subject_predictions_test.items():
+        subject_predictions_padded_test[k][-len(v):, :] = v
+
+    # convert to array
+    X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test,
+                                                       n_variables=n_features, split=0)
+
+    ## end stage 1 on test
+    ###################################################################################################################
+    # Stage 2
+
+    # convert to array
+    X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels,
+                                                           n_variables=n_features, split=0)
+
+    # Model
+    model_stage_2 = build_model(n_timesteps, n_features, n_outputs)
+
+    # callbacks
+    earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max')
+    mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"),
+                                               save_best_only=True, monitor='val_categorical_accuracy', mode='max')
+    reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1,
+                                                       epsilon=1e-4,
+                                                       mode='max')
+
+    # train stage 2
+    model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True,
+              validation_data=(X_val, y_val),
+              callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
+
+    # reloading the best model
+    del model_stage_2
+    model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"))
+
+    # final predictions
+    _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1)
+    final_predictions = model_stage_2.predict(X_val, verbose=1)
+
+    print(f"\nAccuracy: {accuracy}")
+
+    score, score_list = multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score: {score}")
+    print(score_list)
+
+    # f1-score
+    score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score (custom): {score_custom}")
+    print(score_custom_list)
+
+    # save f1-score
+    with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f:
+        f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}")
+
+    # Save info and results test
+    with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f:
+        f.write(f"Model stage 1 1: {data_dir_1}\n")
+        f.write(f"Model stage 2 2: {data_dir_2}\n")
+        f.write(f"Model stage 2 3: {data_dir_3}\n")
+
+        f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n")
+        f.write(str(f1_score_p1_list))
+        f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n")
+        f.write(str(f1_score_custom_s1_list_1))
+
+        f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n")
+        f.write(str(f1_score_p1_list_2))
+        f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n")
+        f.write(str(f1_score_custom_s1_list_2))
+
+        f.write(f"\n\nStage 1 3 f1-score: {str(f1_score_s1_3)}\n")
+        f.write(str(f1_score_p1_list_2))
+        f.write(f"\n\nStage 1 3 f1-score (custom): {str(f1_score_custom_s1_3)}\n")
+        f.write(str(f1_score_custom_s1_list_3))
+
+        f.write(f"\n\nStage 2 f1-score: {str(score)}\n")
+        f.write(str(score_list))
+        f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n")
+        f.write(str(score_custom_list))
+
+    # from sklearn.metrics import multilabel_confusion_matrix
+    #
+    #
+    # pred = np.where(predictions > 0.5, 1, 0)
+    # true = y_validation.copy()
+    #
+    # confusion = multilabel_confusion_matrix(true, pred)
diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_1_fold_2.h5 b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_1_fold_2.h5
new file mode 100644
index 0000000000000000000000000000000000000000..dcad1eba8fa5b4e468c14c4be567001417a0c3b9
Binary files /dev/null and b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_1_fold_2.h5 differ
diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_2_fold_5.h5 b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_2_fold_5.h5
new file mode 100644
index 0000000000000000000000000000000000000000..5b4926447f6777a701686171778e152cbbf90f8b
Binary files /dev/null and b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/best_models/stage_1_2_fold_5.h5 differ
diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/info.txt b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/info.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98ed3d43b84953f3bbd7f3da63d67f2dc7808eac
--- /dev/null
+++ b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/info.txt
@@ -0,0 +1,21 @@
+Model stage 1 1: experiments_stage_1/stage_1_1_001_baseline
+Model stage 2 2: experiments_stage_1/stage_1_2_001_baseline
+
+
+Stage 1 1 f1-score: 0.7166549481041105
+[0.9266709928617781, 0.8609566184649611, 0.8327402135231317, 0.6370967741935484, 0.43922018348623854, 0.5284872298624754, 0.9198123743578289, 0.6876687668766877, 0.6172413793103448]
+
+Stage 1 1 f1-score (custom): 0.7104303349036974
+[0.9288409002211526, 0.8609566184649611, 0.8187543736878936, 0.6624182458494047, 0.3898615635179153, 0.477797513321492, 0.9109007255353034, 0.7059693217519867, 0.6383737517831669]
+
+Stage 1 2 f1-score: 0.671673171827851
+[0.8952380952380953, 0.8505481823427582, 0.8208955223880597, 0.6498516320474778, 0.37857577601947656, 0.5265188042430087, 0.8833333333333333, 0.6826503923278117, 0.3574468085106383]
+
+Stage 1 2 f1-score (custom): 0.6551974169203674
+[0.8721007289595759, 0.8318284424379232, 0.7840342124019958, 0.7088525651399903, 0.3232176262731241, 0.481651376146789, 0.8559892328398385, 0.7140251687032646, 0.32507739938080493]
+
+Stage 2 f1-score: 0.8030992668569547
+[0.9354838709677419, 0.852112676056338, 0.8913043478260869, 0.7590027700831025, 0.6942148760330579, 0.8072727272727273, 0.9345794392523364, 0.764179104477612, 0.5897435897435898]
+
+Stage 2 f1-score (custom): 0.7921984779368064
+[0.9446254071661238, 0.8414464534075105, 0.8686440677966102, 0.7502738225629791, 0.6840390879478827, 0.7985611510791367, 0.9398496240601504, 0.7467911318553092, 0.5555555555555556]
\ No newline at end of file
diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/model_stage_2.h5 b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/model_stage_2.h5
new file mode 100644
index 0000000000000000000000000000000000000000..cde7305fb601f8f975d561dd4f6620fb8f6a2b02
Binary files /dev/null and b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/model_stage_2.h5 differ
diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/score_stage_2.txt b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/score_stage_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1238ea5072cb1fb02ce2834e2c3a9a2595c14b2c
--- /dev/null
+++ b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/score_stage_2.txt
@@ -0,0 +1,2 @@
+f1-score: 0.8030992668569547 
+ f1-score (custom): 0.7921984779368064
\ No newline at end of file
diff --git a/experiments_stage_2/stage_2_s11_s12_baseline_repetition/stage_2.py b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/stage_2.py
new file mode 100644
index 0000000000000000000000000000000000000000..15595de9152e82f18b719b327575b2efece9c470
--- /dev/null
+++ b/experiments_stage_2/stage_2_s11_s12_baseline_repetition/stage_2.py
@@ -0,0 +1,247 @@
+"""
+LSTM model (stage_2)
+"""
+
+from keras import Input, Model
+import numpy as np
+import os
+from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
+from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional
+from keras.models import load_model
+from keras.optimizers import Adam
+import tensorflow.keras as keras
+from logger import Logger
+from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays
+from preprocessor import preprocess_input_data
+from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels
+import shutil
+import ntpath
+# import sys
+
+
+def build_model(n_timesteps, n_features, n_outputs):
+    # model
+    input = Input(shape=(n_timesteps, n_features), dtype='float32')
+    x = Masking(mask_value=0.)(input)
+    x = Bidirectional(LSTM(units=30, return_sequences=True))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    x = Bidirectional(LSTM(units=30))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    output = Dense(n_outputs, activation='sigmoid')(x)
+
+    model = Model(inputs=input, outputs=output)
+    # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
+    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy'])
+    model.summary()
+
+    return model
+
+
+if __name__ == '__main__':
+
+    # Config
+    experiment_name = "stage_2_s11_s12_repetition"
+    experiments_dir = "experiments_stage_2"
+
+    data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline'
+    data_dir_2 = 'experiments_stage_1/stage_1_2_001_baseline'
+    data_test_dir = 'data/test_balanced'
+
+    labels_file = 'subject_labels.npy'
+    predictions_file = 'subject_predictions.npy'
+    scores_file = 'scores_custom.npy'
+
+    segment_size = 2000
+    overlap = 0.5
+    epochs = 30
+    batch_size = 18
+    n_timesteps = 120
+    n_features = 18
+    n_outputs = 9
+
+    # create directory for the experiment
+    if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')):
+        os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models'))
+    else:
+        raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
+                        f" in the '{experiments_dir}' directory.")
+
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+
+    # # Log stdout
+    # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log')
+    # sys.stdout = Logger(log_file)
+
+    subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()
+    subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item()
+    scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item()
+
+    # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item()  # TODO
+    subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item()
+    scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item()
+
+    # pad inputs
+    subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()}
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k][-len(v):, :] = v
+
+    subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()}
+    for k, v in subject_predictions_2.items():
+        subject_predictions_padded_2[k][-len(v):, :] = v
+
+    # concatenate predictions of stages 1
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k], subject_predictions_padded_2[k]],
+                                                       axis=1)
+
+    ###################################################################################################################
+    ## Stage 1 on test
+
+    # Get the best stage_1 models
+    best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0]
+    best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0]
+
+    # Models stage 1
+    model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5"))
+    model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5"))
+    
+    # save best models
+    model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5"))
+    model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5"))
+
+    # Load test data
+    data_test = load_data(data_test_dir)
+    data_test = preprocess_input_data(data_test)
+    data_test = segment_all_dict_data(data_test, segment_size, overlap)
+    arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test,
+                                                                          shuffle_IDs=False,
+                                                                          # Do not shuffle the segments to keep the
+                                                                          # order in time of the predictions
+                                                                          shuffle_segments=False,
+                                                                          segment_standardization_flag=True)
+    # Encode labels
+    arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels])
+    arr_of_labels = encode_labels(arr_of_labels)
+
+    # Predictions stages 1
+    predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1)
+    predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1)
+
+    # Score stages 1
+    f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True)
+    f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1,
+                                                                       return_list=True)
+    print("\nStage 1 1 f1-score: ", f1_score_s1_1)
+    print(f1_score_p1_list)
+    print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1)
+    print(f1_score_custom_s1_list_1, "\n\n")
+
+    f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True)
+    f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2,
+                                                                       return_list=True)
+    print("\nStage 1 2 f1-score: ", f1_score_s1_2)
+    print(f1_score_p1_list_2)
+    print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2)
+    print(f1_score_custom_s1_list_2, "\n\n")
+
+    # concatenate predictions of stages 1
+    predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2], axis=-1)
+    
+    # Group by subject & padding:
+
+    # true labels of each subject
+    subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))}
+    for ID, label in zip(arr_of_IDs, arr_of_labels):
+        subject_labels_test[ID[0]] = label
+
+    # stages 1 predictions for each subject
+    subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))}
+    for ID, pred in zip(arr_of_IDs, predictions_stages_1):
+        subject_predictions_test[ID[0]].append(pred)
+
+    # pad inputs
+    subject_predictions_padded_test = {k: np.zeros((n_timesteps, 18)) for k in subject_predictions_test.keys()}
+    for k, v in subject_predictions_test.items():
+        subject_predictions_padded_test[k][-len(v):, :] = v
+
+    # convert to array
+    X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0,
+                                                       n_variables=n_features)
+
+    ## end stage 1 on test
+    ###################################################################################################################
+    # Stage 2
+
+    # convert to array
+    X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0,
+                                                           n_variables=n_features)
+
+    # Model
+    model_stage_2 = build_model(n_timesteps, n_features, n_outputs)
+
+    # callbacks
+    earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max')
+    mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"),
+                                               save_best_only=True, monitor='val_categorical_accuracy', mode='max')
+    reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1,
+                                                       epsilon=1e-4,
+                                                       mode='max')
+
+    # train stage 2
+    model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True,
+              validation_data=(X_val, y_val),
+              callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
+
+    # reloading the best model
+    del model_stage_2
+    model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"))
+
+    # final predictions
+    _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1)
+    final_predictions = model_stage_2.predict(X_val, verbose=1)
+
+    print(f"\nAccuracy: {accuracy}")
+
+    score, score_list = multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score: {score}")
+    print(score_list)
+
+    # f1-score
+    score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score (custom): {score_custom}")
+    print(score_custom_list)
+
+    # save f1-score
+    with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f:
+        f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}")
+
+    # Save info and results test
+    with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f:
+        f.write(f"Model stage 1 1: {data_dir_1}\n")
+        f.write(f"Model stage 2 2: {data_dir_2}\n")
+
+        f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n")
+        f.write(str(f1_score_p1_list))
+        f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n")
+        f.write(str(f1_score_custom_s1_list_1))
+
+        f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n")
+        f.write(str(f1_score_p1_list_2))
+        f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n")
+        f.write(str(f1_score_custom_s1_list_2))
+
+        f.write(f"\n\nStage 2 f1-score: {str(score)}\n")
+        f.write(str(score_list))
+        f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n")
+        f.write(str(score_custom_list))
+
+    # from sklearn.metrics import multilabel_confusion_matrix
+    #
+    #
+    # pred = np.where(predictions > 0.5, 1, 0)
+    # true = y_validation.copy()
+    #
+    # confusion = multilabel_confusion_matrix(true, pred)
diff --git a/stage_1_3_Inc_ResNet_LSTM_v01.py b/stage_1_3_Inc_ResNet_LSTM_v01.py
index ec61ae833a68fc5a3b1e1d41b0b0b992d2c94cfa..18addd3b6192eccdc8106bab9ebf8401279dbade 100644
--- a/stage_1_3_Inc_ResNet_LSTM_v01.py
+++ b/stage_1_3_Inc_ResNet_LSTM_v01.py
@@ -31,7 +31,6 @@ def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels):
                                                                              ids_labels['label']
                                                                              )]
 
-
     # true labels of each subject
     subject_labels = {ID: None for ID in list(ids)}
     for ID, label in zip(arr_of_IDs, arr_of_labels):
@@ -153,7 +152,7 @@ def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels):
 if __name__ == '__main__':
 
     # Config
-    experiment_name = "stage_1_3_002_Inc_ResNet_LSTM_v01"
+    experiment_name = "stage_1_3_Inc_ResNet_LSTM_v01"
     experiments_dir = "experiments_stage_1"
     data_dir = 'data/train_balanced'
     segment_size = 2000
@@ -165,12 +164,12 @@ if __name__ == '__main__':
     # create directory for the experiment
     if not os.path.exists(os.path.join(experiments_dir, experiment_name)):
         os.makedirs(os.path.join(experiments_dir, experiment_name))
-    # else:
-    #     raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
-    #                     f" in the '{experiments_dir}' directory.")
+    else:
+        raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
+                        f" in the '{experiments_dir}' directory.")
 
-    # # save a copy of the script
-    # shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
 
     # This gives an error on the CSC server when trying to import sys
     # # Log stdout
diff --git a/stage_1_4_Inc_ResNet_LSTM_v02.py b/stage_1_4_Inc_ResNet_LSTM_v02.py
new file mode 100644
index 0000000000000000000000000000000000000000..a17703be2f9c045939d4479293af15a2231a8157
--- /dev/null
+++ b/stage_1_4_Inc_ResNet_LSTM_v02.py
@@ -0,0 +1,209 @@
+"""
+Residual network model (stage_1_2)
+"""
+from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
+from sklearn.model_selection import StratifiedKFold
+import os
+from utils import encode_labels, custom_multiclass_f1, multiclass_f1
+import shutil
+import ntpath
+from collections import Counter
+import numpy as np
+from keras.initializers import glorot_uniform, he_normal
+from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays
+from preprocessor import preprocess_input_data
+from utils import encode_labels
+import tensorflow.keras as keras
+import pandas as pd
+# import sys  # This gives an error on the CSC server
+from model_architecture_V01 import Inc_ResNet_LSTM_v02
+
+
+def cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels):
+    """ Subject cross-validation """
+
+    global epochs, batch_size, n_folds
+
+    # split the subjects on n folds keeping balance
+    ids = ids_labels['subject']
+    skf = StratifiedKFold(n_splits=n_folds, random_state=None, shuffle=True)
+    subj_folds = [(ids[test_index]) for train_index, test_index in skf.split(ids_labels['subject'],
+                                                                             ids_labels['label']
+                                                                             )]
+
+    # true labels of each subject
+    subject_labels = {ID: None for ID in list(ids)}
+    for ID, label in zip(arr_of_IDs, arr_of_labels):
+        subject_labels[ID[0]] = label
+
+    # to save the predictions of each subject
+    subject_predictions = {ID: [] for ID in list(ids)}
+
+    # to save the f1-score of each fold
+    scores = {}
+    scores_custom = {}
+
+    for i, validation_fold in enumerate(subj_folds):
+        print(f"\n\nFold {i} ------------------------------------------------- \n")
+
+        # selector
+        selector = np.isin(arr_of_IDs.squeeze(), validation_fold)
+
+        # validation
+        arr_seg_validation = arr_of_segments[selector]
+        arr_labels_validation = arr_of_labels[selector]
+        arr_IDs_validation = arr_of_IDs[selector]
+
+        # train
+        arr_seg_train = arr_of_segments[np.invert(selector)]
+        arr_labels_train = arr_of_labels[np.invert(selector)]
+        arr_IDs_train = arr_of_IDs[np.invert(selector)]
+
+        # TODO
+        # Up-balance 'STE' (3x)
+        add_to_input = []
+        add_to_labels = []
+        add_to_IDs = []
+        for j in range(len(arr_labels_train)):
+            if arr_labels_train[j][8] == 1:
+                add_to_input.append(arr_seg_train[j])
+                add_to_labels.append(arr_labels_train[j])
+                add_to_IDs.append(arr_IDs_train[j])
+
+        arr_seg_train_balanced = np.concatenate([add_to_input, arr_seg_train, add_to_input])
+        arr_labels_train_balanced = np.concatenate([add_to_labels, arr_labels_train, add_to_labels])
+        arr_IDs_train_balanced = np.concatenate([add_to_IDs, arr_IDs_train, add_to_IDs])
+
+        # Build model
+        model = Inc_ResNet_LSTM_v02(segment_size, 12, classes=9)
+
+        # TODO
+        # callbacks
+        earlyStopping = keras.callbacks.EarlyStopping(monitor='val_categorical_accuracy', patience=10, verbose=0, mode='max')
+        mcp_save = keras.callbacks.ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5"),
+                                                   save_best_only=True, monitor='val_categorical_accuracy', mode='max')
+        reduce_lr_loss = keras.callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=7, verbose=1,
+                                                           epsilon=1e-4,
+                                                           mode='max')
+
+        # model.summary()
+
+        model.fit(arr_seg_train_balanced, arr_labels_train_balanced, epochs=epochs, batch_size=batch_size,
+                  verbose=1, validation_data=(arr_seg_validation, arr_labels_validation), shuffle=True,
+                  callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
+
+        # re-load best model
+        del model
+        model = keras.models.load_model(os.path.join(experiments_dir, experiment_name, f"model_fold_{i}.h5"))
+        _, accuracy = model.evaluate(arr_seg_validation, arr_labels_validation, batch_size=batch_size, verbose=1)
+        predictions = model.predict(arr_seg_validation, verbose=1)
+
+        # print fold results
+        print("Accuracy:", accuracy)
+
+        f1_score, f1_score_list = multiclass_f1(arr_labels_validation, predictions, return_list=True)
+        print("\nf1 score:", f1_score)
+        print(f1_score_list)
+
+        f1_score_custom, f1_score_custom_list = custom_multiclass_f1(arr_labels_validation, predictions,
+                                                                     return_list=True)
+        print("\nf1 score (custom):", f1_score_custom)
+        print(f1_score_custom_list)
+
+        # save predictions
+        for ID, pred in zip(arr_IDs_validation, predictions):
+            subject_predictions[ID[0]].append(pred)
+
+        # save f1-score
+        scores[f"fold_{i}"] = f1_score
+        scores_custom[f"fold_{i}"] = f1_score_custom
+
+        # save f1-score list (text file):
+        with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f:
+            f.write(f"Fold {str(i)}:\n"
+                    f"{str(f1_score_list)} (f1-score by class) \n"
+                    f"{str(f1_score_custom_list)} (f1 score (custom) by class) \n")
+
+    # Average f-1 score
+    m, s = np.mean([v for v in scores.values()]), np.std([v for v in scores.values()])
+    m_c, s_c = np.mean([v for v in scores_custom.values()]), np.std([v for v in scores_custom.values()])
+
+    # save labels (to disk)
+    np.save(os.path.join(experiments_dir, experiment_name, "subject_labels.npy"), subject_labels)
+
+    # save predictions (to disk)
+    np.save(os.path.join(experiments_dir, experiment_name, "subject_predictions.npy"), subject_predictions)
+
+    # save f1-scores (to disk)
+    np.save(os.path.join(experiments_dir, experiment_name, "scores.npy"), scores)
+    np.save(os.path.join(experiments_dir, experiment_name, "scores_custom.npy"), scores_custom)
+
+    print("\n==========================================================\n")
+    print(f"CV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})")
+
+    # save f1-scores (text file)
+    with open(os.path.join(experiments_dir, experiment_name, "scores.txt"), 'a') as f:
+        f.write("\n\n ==> Score by CV:")
+        f.write(f"\n{str(scores)} (f1-score) \n{str(scores_custom)} (f1-score (custom))")
+        f.write("\n\n ==> Average score CV:")
+        f.write(f"\nCV f1-score: {str(m)} (+/- {str(s)}) \nCV f1-score (custom): {str(m_c)} (+/- {str(s_c)})\n\n")
+
+
+if __name__ == '__main__':
+
+    # Config
+    experiment_name = "stage_1_4_moreEpochs"
+    experiments_dir = "experiments_stage_1"
+    data_dir = 'data/train_balanced'
+    segment_size = 2000
+    overlap = 0.5
+    epochs = 80  # ???
+    batch_size = 54  # ???
+    n_folds = 8
+
+    # create directory for the experiment
+    if not os.path.exists(os.path.join(experiments_dir, experiment_name)):
+        os.makedirs(os.path.join(experiments_dir, experiment_name))
+    else:
+        raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
+                        f" in the '{experiments_dir}' directory.")
+
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+
+    # This gives an error on the CSC server when trying to import sys
+    # # Log stdout
+    # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log')
+    # sys.stdout = Logger(log_file)
+
+    # load data
+    data = load_data(data_dir)
+
+    # create array with the label of each subject ( it is used to keep the balance of the labels
+    # in the folds of the cross-validation
+    dic_labels = {}
+    for k, v in data.items():
+        dic_labels[k] = data[k]['info']['Dx']
+
+    ids_labels = pd.Series(dic_labels).reset_index()
+    ids_labels.columns = ['subject', 'label']
+
+    # pre-process signals
+    data = preprocess_input_data(data)
+
+    # segment signal
+    data = segment_all_dict_data(data, segment_size, overlap)
+
+    arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data,
+                                                                          shuffle_IDs=True,
+                                                                          # Do not shuffle the segments to keep the
+                                                                          # order in time of the predictions
+                                                                          shuffle_segments=False,
+                                                                          segment_standardization_flag=True)
+
+    # Encode labels
+    arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels])
+    arr_of_labels = encode_labels(arr_of_labels)
+
+    # Cross-validation
+    cross_validation(arr_of_segments, arr_of_labels, arr_of_IDs, ids_labels)
diff --git a/stage_2.py b/stage_2.py
index 4af884c25c90fd7ad646bf57cebf7a11702b09c8..90c9c4e61d4f8c6a5ceb69090640526db6b7e8d5 100644
--- a/stage_2.py
+++ b/stage_2.py
@@ -42,7 +42,7 @@ def build_model(n_timesteps, n_features, n_outputs):
 if __name__ == '__main__':
 
     # Config
-    experiment_name = "stage_2_001_baseline_CSC_CPU"
+    experiment_name = "stage_2_s11_s12_repetition"
     experiments_dir = "experiments_stage_2"
 
     data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline'
@@ -58,6 +58,8 @@ if __name__ == '__main__':
     epochs = 30
     batch_size = 18
     n_timesteps = 120
+    n_features = 18
+    n_outputs = 9
 
     # create directory for the experiment
     if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')):
@@ -66,8 +68,8 @@ if __name__ == '__main__':
         raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
                         f" in the '{experiments_dir}' directory.")
 
-    # # save a copy of the script
-    # shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
 
     # # Log stdout
     # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log')
@@ -77,7 +79,7 @@ if __name__ == '__main__':
     subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item()
     scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item()
 
-    subject_labels_2 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()
+    # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item()  # TODO
     subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item()
     scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item()
 
@@ -86,7 +88,6 @@ if __name__ == '__main__':
     for k, v in subject_predictions.items():
         subject_predictions_padded[k][-len(v):, :] = v
 
-    # pad inputs to the longest input sequence
     subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()}
     for k, v in subject_predictions_2.items():
         subject_predictions_padded_2[k][-len(v):, :] = v
@@ -104,8 +105,8 @@ if __name__ == '__main__':
     best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0]
 
     # Models stage 1
-    model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5"))  # TODO save
-    model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5"))  # TODO save
+    model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5"))
+    model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5"))
     
     # save best models
     model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5"))
@@ -133,9 +134,9 @@ if __name__ == '__main__':
     f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True)
     f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1,
                                                                        return_list=True)
-    print("\nStage 1 f1-score: ", f1_score_s1_1)
+    print("\nStage 1 1 f1-score: ", f1_score_s1_1)
     print(f1_score_p1_list)
-    print("\nStage 1 f1-score (custom):", f1_score_custom_s1_1)
+    print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1)
     print(f1_score_custom_s1_list_1, "\n\n")
 
     f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True)
@@ -162,33 +163,24 @@ if __name__ == '__main__':
         subject_predictions_test[ID[0]].append(pred)
 
     # pad inputs
-    subject_predictions_padded_test = {k: np.zeros((n_timesteps, 18)) for k in subject_predictions_test.keys()}
+    subject_predictions_padded_test = {k: np.zeros((n_timesteps, n_features)) for k in subject_predictions_test.keys()}
     for k, v in subject_predictions_test.items():
         subject_predictions_padded_test[k][-len(v):, :] = v
 
     # convert to array
-    X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0)
+    X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test, split=0,
+                                                       n_variables=n_features)
 
     ## end stage 1 on test
-    ###################################################################################################################
-
-    # TODO
-    # del model_stage_1_1
-    # del model_stage_1_2
-    # del arr_of_segments
-    # del arr_of_labels
-    # del arr_of_IDs
-    # del subject_predictions
-    # del data_test
-
     ###################################################################################################################
     # Stage 2
 
     # convert to array
-    X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0)
+    X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels, split=0,
+                                                           n_variables=n_features)
 
     # Model
-    model_stage_2 = build_model(n_timesteps, 18, 9)
+    model_stage_2 = build_model(n_timesteps, n_features, n_outputs)
 
     # callbacks
     earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max')
diff --git a/stage_2_3x.py b/stage_2_3x.py
new file mode 100644
index 0000000000000000000000000000000000000000..5840d3174d8ba4768d96cf149756ffbb84902abf
--- /dev/null
+++ b/stage_2_3x.py
@@ -0,0 +1,275 @@
+"""
+LSTM model (stage_2)
+"""
+
+from keras import Input, Model
+import numpy as np
+import os
+from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
+from keras.layers import Dense, LSTM, Dropout, LeakyReLU, BatchNormalization, Masking, Bidirectional
+from keras.models import load_model
+from keras.optimizers import Adam
+import tensorflow.keras as keras
+from logger import Logger
+from preprocess_and_segmentation import load_data, segment_all_dict_data, reshape_segmented_arrays
+from preprocessor import preprocess_input_data
+from utils import custom_multiclass_f1, split_train_validation_part_2, multiclass_f1, encode_labels
+import shutil
+import ntpath
+# import sys
+
+
+def build_model(n_timesteps, n_features, n_outputs):
+    # model
+    input = Input(shape=(n_timesteps, n_features), dtype='float32')
+    x = Masking(mask_value=0.)(input)
+    x = Bidirectional(LSTM(units=30, return_sequences=True))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    x = Bidirectional(LSTM(units=30))(x)
+    x = BatchNormalization()(x)
+    x = LeakyReLU()(x)
+    output = Dense(n_outputs, activation='sigmoid')(x)
+
+    model = Model(inputs=input, outputs=output)
+    # opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
+    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy'])
+    model.summary()
+
+    return model
+
+
+if __name__ == '__main__':
+
+    # Config
+    experiment_name = "stage_2_3x_s11_s12_s14"
+    experiments_dir = "experiments_stage_2"
+
+    data_dir_1 = 'experiments_stage_1/stage_1_1_001_baseline'
+    data_dir_2 = 'experiments_stage_1/stage_1_2_001_baseline'
+    data_dir_3 = 'experiments_stage_1/stage_1_4_Inc_ResNet_LSTM_v02'
+    data_test_dir = 'data/test_balanced'
+
+    labels_file = 'subject_labels.npy'
+    predictions_file = 'subject_predictions.npy'
+    scores_file = 'scores_custom.npy'
+
+    segment_size = 2000
+    overlap = 0.5
+    epochs = 30
+    batch_size = 18
+    n_timesteps = 120
+    n_features = 27
+    n_outputs = 9
+
+    # create directory for the experiment
+    if not os.path.exists(os.path.join(experiments_dir, experiment_name, 'best_models')):
+        os.makedirs(os.path.join(experiments_dir, experiment_name, 'best_models'))
+    else:
+        raise NameError(f"Already exist an experiment with the name '{experiment_name}'"
+                        f" in the '{experiments_dir}' directory.")
+
+    # save a copy of the script
+    shutil.copy(__file__, os.path.join(experiments_dir, experiment_name, ntpath.basename(__file__)))
+
+    # # Log stdout
+    # log_file = os.path.join(experiments_dir, experiment_name, 'logfile.log')
+    # sys.stdout = Logger(log_file)
+
+    subject_labels = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()
+    subject_predictions = np.load(os.path.join(data_dir_1, predictions_file), allow_pickle=True).item()
+    scores_1 = np.load(os.path.join(data_dir_1, scores_file), allow_pickle=True).item()
+
+    # subject_labels_2 = np.load(os.path.join(data_dir_2, labels_file), allow_pickle=True).item()  # TODO
+    subject_predictions_2 = np.load(os.path.join(data_dir_2, predictions_file), allow_pickle=True).item()
+    scores_2 = np.load(os.path.join(data_dir_2, scores_file), allow_pickle=True).item()
+
+    # subject_labels_3 = np.load(os.path.join(data_dir_1, labels_file), allow_pickle=True).item()  # TODO
+    subject_predictions_3 = np.load(os.path.join(data_dir_3, predictions_file), allow_pickle=True).item()
+    scores_3 = np.load(os.path.join(data_dir_3, scores_file), allow_pickle=True).item()
+
+    # pad inputs
+    subject_predictions_padded = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions.keys()}
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k][-len(v):, :] = v
+
+    subject_predictions_padded_2 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_2.keys()}
+    for k, v in subject_predictions_2.items():
+        subject_predictions_padded_2[k][-len(v):, :] = v
+
+    subject_predictions_padded_3 = {k: np.zeros((n_timesteps, 9)) for k in subject_predictions_3.keys()}
+    for k, v in subject_predictions_3.items():
+        subject_predictions_padded_3[k][-len(v):, :] = v
+
+    # concatenate predictions of stages 1
+    for k, v in subject_predictions.items():
+        subject_predictions_padded[k] = np.concatenate([subject_predictions_padded[k],
+                                                        subject_predictions_padded_2[k],
+                                                        subject_predictions_padded_3[k]], axis=1)
+
+    ###################################################################################################################
+    ## Stage 1 on test
+
+    # Get the best stage_1 models
+    best_fold_1 = [(k, v) for k, v in sorted(scores_1.items(), key=lambda item: item[1], reverse=True)][0][0]
+    best_fold_2 = [(k, v) for k, v in sorted(scores_2.items(), key=lambda item: item[1], reverse=True)][0][0]
+    best_fold_3 = [(k, v) for k, v in sorted(scores_3.items(), key=lambda item: item[1], reverse=True)][0][0]
+
+    # Models stage 1
+    model_stage_1_1 = load_model(os.path.join(data_dir_1, f"model_{best_fold_1}.h5"))
+    model_stage_1_2 = keras.models.load_model(os.path.join(data_dir_2, f"model_{best_fold_2}.h5"))
+    model_stage_1_3 = keras.models.load_model(os.path.join(data_dir_3, f"model_{best_fold_3}.h5"))
+
+    # save best models
+    model_stage_1_1.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_1_{best_fold_1}.h5"))
+    model_stage_1_2.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_2_{best_fold_2}.h5"))
+    model_stage_1_3.save(os.path.join(experiments_dir, experiment_name, 'best_models', f"stage_1_3_{best_fold_3}.h5"))
+
+    # Load test data
+    data_test = load_data(data_test_dir)
+    data_test = preprocess_input_data(data_test)
+    data_test = segment_all_dict_data(data_test, segment_size, overlap)
+    arr_of_segments, arr_of_labels, arr_of_IDs = reshape_segmented_arrays(data_test,
+                                                                          shuffle_IDs=False,
+                                                                          # Do not shuffle the segments to keep the
+                                                                          # order in time of the predictions
+                                                                          shuffle_segments=False,
+                                                                          segment_standardization_flag=True)
+    # Encode labels
+    arr_of_labels = np.array([i[0]['Dx'] for i in arr_of_labels])
+    arr_of_labels = encode_labels(arr_of_labels)
+
+    # Predictions stages 1
+    predictions_s1_1 = model_stage_1_1.predict(arr_of_segments, verbose=1)
+    predictions_s1_2 = model_stage_1_2.predict(arr_of_segments, verbose=1)
+    predictions_s1_3 = model_stage_1_3.predict(arr_of_segments, verbose=1)
+
+    # Score stages 1
+    f1_score_s1_1, f1_score_p1_list = multiclass_f1(arr_of_labels, predictions_s1_1, return_list=True)
+    f1_score_custom_s1_1, f1_score_custom_s1_list_1 = custom_multiclass_f1(arr_of_labels, predictions_s1_1,
+                                                                       return_list=True)
+    print("\nStage 1 1 f1-score: ", f1_score_s1_1)
+    print(f1_score_p1_list)
+    print("\nStage 1 1 f1-score (custom):", f1_score_custom_s1_1)
+    print(f1_score_custom_s1_list_1, "\n\n")
+
+    f1_score_s1_2, f1_score_p1_list_2 = multiclass_f1(arr_of_labels, predictions_s1_2, return_list=True)
+    f1_score_custom_s1_2, f1_score_custom_s1_list_2 = custom_multiclass_f1(arr_of_labels, predictions_s1_2,
+                                                                       return_list=True)
+    print("\nStage 1 2 f1-score: ", f1_score_s1_2)
+    print(f1_score_p1_list_2)
+    print("\nStage 1 2 f1-score (custom):", f1_score_custom_s1_2)
+    print(f1_score_custom_s1_list_2, "\n\n")
+
+    f1_score_s1_3, f1_score_p1_list_3 = multiclass_f1(arr_of_labels, predictions_s1_3, return_list=True)
+    f1_score_custom_s1_3, f1_score_custom_s1_list_3 = custom_multiclass_f1(arr_of_labels, predictions_s1_3,
+                                                                           return_list=True)
+    print("\nStage 1 3 f1-score: ", f1_score_s1_3)
+    print(f1_score_p1_list_3)
+    print("\nStage 1 3 f1-score (custom):", f1_score_custom_s1_3)
+    print(f1_score_custom_s1_list_3, "\n\n")
+
+    # concatenate predictions of stages 1
+    predictions_stages_1 = np.concatenate([predictions_s1_1, predictions_s1_2, predictions_s1_3], axis=-1)
+
+    # Group by subject & padding:
+
+    # true labels of each subject
+    subject_labels_test = {ID: None for ID in list(np.unique(arr_of_IDs))}
+    for ID, label in zip(arr_of_IDs, arr_of_labels):
+        subject_labels_test[ID[0]] = label
+
+    # stages 1 predictions for each subject
+    subject_predictions_test = {ID: [] for ID in list(np.unique(arr_of_IDs))}
+    for ID, pred in zip(arr_of_IDs, predictions_stages_1):
+        subject_predictions_test[ID[0]].append(pred)
+
+    # pad inputs
+    subject_predictions_padded_test = {k: np.zeros((n_timesteps, n_features)) for k in subject_predictions_test.keys()}
+    for k, v in subject_predictions_test.items():
+        subject_predictions_padded_test[k][-len(v):, :] = v
+
+    # convert to array
+    X_val, y_val, _, _ = split_train_validation_part_2(subject_predictions_padded_test, subject_labels_test,
+                                                       n_variables=n_features, split=0)
+
+    ## end stage 1 on test
+    ###################################################################################################################
+    # Stage 2
+
+    # convert to array
+    X_train, y_train, _, _ = split_train_validation_part_2(subject_predictions_padded, subject_labels,
+                                                           n_variables=n_features, split=0)
+
+    # Model
+    model_stage_2 = build_model(n_timesteps, n_features, n_outputs)
+
+    # callbacks
+    earlyStopping = EarlyStopping(monitor='val_categorical_accuracy', patience=16, verbose=0, mode='max')
+    mcp_save = ModelCheckpoint(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"),
+                                               save_best_only=True, monitor='val_categorical_accuracy', mode='max')
+    reduce_lr_loss = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1, patience=10, verbose=1,
+                                                       epsilon=1e-4,
+                                                       mode='max')
+
+    # train stage 2
+    model_stage_2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2, shuffle=True,
+              validation_data=(X_val, y_val),
+              callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
+
+    # reloading the best model
+    del model_stage_2
+    model_stage_2 = load_model(os.path.join(experiments_dir, experiment_name, f"model_stage_2.h5"))
+
+    # final predictions
+    _, accuracy = model_stage_2.evaluate(X_val, y_val, verbose=1)
+    final_predictions = model_stage_2.predict(X_val, verbose=1)
+
+    print(f"\nAccuracy: {accuracy}")
+
+    score, score_list = multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score: {score}")
+    print(score_list)
+
+    # f1-score
+    score_custom, score_custom_list = custom_multiclass_f1(y_val, final_predictions, return_list=True)
+    print(f"\nf1-score (custom): {score_custom}")
+    print(score_custom_list)
+
+    # save f1-score
+    with open(os.path.join(experiments_dir, experiment_name, "score_stage_2.txt"), 'w') as f:
+        f.write(f"f1-score: {str(score)} \n f1-score (custom): {str(score_custom)}")
+
+    # Save info and results test
+    with open(os.path.join(experiments_dir, experiment_name, "info.txt"), 'w') as f:
+        f.write(f"Model stage 1 1: {data_dir_1}\n")
+        f.write(f"Model stage 2 2: {data_dir_2}\n")
+        f.write(f"Model stage 2 3: {data_dir_3}\n")
+
+        f.write(f"\n\nStage 1 1 f1-score: {str(f1_score_s1_1)}\n")
+        f.write(str(f1_score_p1_list))
+        f.write(f"\n\nStage 1 1 f1-score (custom): {str(f1_score_custom_s1_1)}\n")
+        f.write(str(f1_score_custom_s1_list_1))
+
+        f.write(f"\n\nStage 1 2 f1-score: {str(f1_score_s1_2)}\n")
+        f.write(str(f1_score_p1_list_2))
+        f.write(f"\n\nStage 1 2 f1-score (custom): {str(f1_score_custom_s1_2)}\n")
+        f.write(str(f1_score_custom_s1_list_2))
+
+        f.write(f"\n\nStage 1 3 f1-score: {str(f1_score_s1_3)}\n")
+        f.write(str(f1_score_p1_list_2))
+        f.write(f"\n\nStage 1 3 f1-score (custom): {str(f1_score_custom_s1_3)}\n")
+        f.write(str(f1_score_custom_s1_list_3))
+
+        f.write(f"\n\nStage 2 f1-score: {str(score)}\n")
+        f.write(str(score_list))
+        f.write(f"\n\nStage 2 f1-score (custom): {str(score_custom)}\n")
+        f.write(str(score_custom_list))
+
+    # from sklearn.metrics import multilabel_confusion_matrix
+    #
+    #
+    # pred = np.where(predictions > 0.5, 1, 0)
+    # true = y_validation.copy()
+    #
+    # confusion = multilabel_confusion_matrix(true, pred)
diff --git a/utils.py b/utils.py
index 061faf1ba0cfafc15d92079e9417310b21d9d7ed..92a0ecaee805c131d2fe600a8f7691bb78101b3f 100644
--- a/utils.py
+++ b/utils.py
@@ -112,11 +112,11 @@ def split_data_train_test():
 
 
 # draft
-def split_train_validation_part_2(subject_predictions, subject_labels, split=0.33):
+def split_train_validation_part_2(subject_predictions, subject_labels, n_variables=18, split=0.33):
     """ Splits train/validation sets for the model_1_part_2"""
 
     n_timesteps = [len(v) for v in subject_predictions.values()][0]
-    n_variables = 18
+    # n_variables = 18
     n_outputs = 9
 
     assert len(subject_labels) == len(subject_predictions), "Labels and predictions have different shapes"