diff --git a/model3multi/training/own_singularity_trainer.sh b/model3multi/training/own_singularity_trainer.sh
index 0627edc41fb3c19c167ec0aef27c43321b7f46d8..fb4bf1ccc4be8ab15dd7ac711da7c4282c72063b 100644
--- a/model3multi/training/own_singularity_trainer.sh
+++ b/model3multi/training/own_singularity_trainer.sh
@@ -1,13 +1,13 @@
 #!/bin/bash
-#SBATCH --job-name=own_test
+#SBATCH --job-name=2x4test
 #SBATCH --account=project_2004600
 ##SBATCH --time=10:00:00
-#SBATCH --time=00:15:00
+#SBATCH --time=01:00:00
 #SBATCH --partition=gpumedium
 #SBATCH --nodes=2
 #SBATCH --mem=100G
 #SBATCH --cpus-per-task=10
-#SBATCH --gres=gpu:a100:4,nvme:200
+#SBATCH --gres=gpu:a100:4
 #SBATCH -o logs/%j.out
 #SBATCH -e logs/%j.err
 
@@ -21,74 +21,40 @@ GPUS=$(echo $SLURM_JOB_GPUS | tr -s ', ' '[\n*]' | wc -l)
 
 module load pdsh/2.31
 
-export PDSH_SSH_ARGS_APPEND="-v -o StrictHostKeyChecking=no" 
-
 ### CREATES HOSTFILE ###
-
 rm -f hostfile.txt
 # Create deepspeed hostfile.
-
 scontrol show hostnames "$SLURM_JOB_NODELIST" \
 	        | perl -pe 's/$/ slots=4/' \
 			            > "hostfile.txt"
-
-# `scontrol show hostnames` turns condenced nodelist
-# (e.g. "g[1102,1201]") into list of host names (e.g. "g1102\ng1102")
 MASTER_NODE=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
 
 cat hostfile.txt
 
-#export TMPDIR=/scratch/project_2004600/risto/
+export TMPDIR=/scratch/project_2004600/risto
 export HF_DATASETS_CACHE=$TMPDIR/"dataset_cache/"
-
 echo "Using TEMP-dir " $HF_DATASETS_CACHE
 
-#### MODEL ARGS ###
-
-MODEL_OUT=/scratch/project_2004600/risto/model_out_TEST/
-#MODEL_OUT=/scratch/project_2004600/risto/gpt2_large_grad_acc_100/
-#MODEL_CHECKPOINT=/scratch/project_2004600/risto/gpt-
-MODEL_CHECKPOINT='no'
-#BATCH_SIZE=16 # for medium if blocks are 500
-BATCH_SIZE=11 # blocks are 512
-EPOCHS=1
-DS_CONFIG=/projappl/project_2004600/risto/model3multi/training/ds_config.json
-#DS_CONFIG=/projappl/project_2004600/risto/testing/singularity_trainer/zero_stage_3.json
-TRAIN_SAMPLE_SIZE=50
-GRAD_ACC=100
-MODEL='gpt2'
-#TRAIN_DATA=$TMPDIR/gpt2-train/
-#EVAL_DATA=$TMPDIR/gpt2-eval/
-#EVAL_DATA=/scratch/project_2004600/FinBERT-data/batched-data/gpt2-eval/
-#TRAIN_DATA=/scratch/project_2004600/FinBERT-dataset-bal/prepared-dataset_t2/
-TRAIN_DATA=/scratch/project_2004600/FinBERT-data/train/
-EVAL_DATA=/scratch/project_2004600/FinBERT-data/batched-data/gpt2-eval/
-LR=0.0001
-
-#/###/MODEL ARGS ####
 
 export SING_IMAGE=/scratch/project_2004600/containers/deepspeed.sif
-
 export SING_FLAGS="$SING_FLAGS -B /appl/spack/v014/install-tree/gcc-4.8.5/pdsh-2.31-cdzt5w/bin/:/usr/local/sbin,/projappl/project_2004600/risto/model3multi/training/node_init.sh:/data/ --nv"
-#export SING_FLAGS=$SING_FLAGS"--nv"
-export NCCL_DEBUG=INFO
-
+#export NCCL_DEBUG=INFO
 
-singularity_wrapper exec which deepspeed
 
 echo "start running trainer script"
 singularity_wrapper exec deepspeed --hostfile=hostfile.txt --master_addr=$MASTER_NODE /projappl/project_2004600/risto/model3multi/training/trainer.py \
-    --train_data $TRAIN_DATA \
-	--eval_data $EVAL_DATA \
-	--lr $LR \
-    --train_sample_size $TRAIN_SAMPLE_SIZE \
-    --model_output_dir $MODEL_OUT \
-    --from_checkpoint $MODEL_CHECKPOINT \
-    --batch_size $BATCH_SIZE \
-    --epochs $EPOCHS \
-    --grad_acc $GRAD_ACC \
-    --model $MODEL \
+    --train_data /scratch/project_2004600/FinBERT-data/batched-data-bal/train/ \
+	--eval_data /scratch/project_2004600/FinBERT-data/batched-data-bal/eval/ \
+	--lr 5e-5 \
+	--tokenizer /projappl/project_2004600/risto/tokenizer_100/ \
+    --train_sample_size 2 \
+    --model_output_dir /scratch/project_2004600/risto/220921-mahti-gptsmall-epochs5-finbert10-bl1024/ \
+    --from_checkpoint 'no' \
+    --batch_size 11 \
+    --epochs 5 \
+    --grad_acc 1 \
+    --model 'gpt2' \
     --cache_dir $HF_DATASETS_CACHE \
-    --deepspeed --deepspeed_config $DS_CONFIG 
+    --deepspeed --deepspeed_config /projappl/project_2004600/risto/model3multi/training/ds_config.json
 
-seff $SLURM_JOBId
+seff $SLURM_JOBID
diff --git a/model3multi/training/trainer.py b/model3multi/training/trainer.py
index 2db3b664c4f5814b0d688cbab798485ad391c516..7714cb5b614113f59b17c3b5562b8c331b93e32b 100644
--- a/model3multi/training/trainer.py
+++ b/model3multi/training/trainer.py
@@ -77,6 +77,9 @@ if __name__ == '__main__':
                         help='number of training epochs')
     parser.add_argument('--model', type=str,
                        help='model configuration name')
+    parser.add_argument('--tokenizer', type=str,
+                       help='tokenizer path')
+
 
     parser.add_argument('--grad_acc', type=int,
                         help='number of gradient accumulation steps')
@@ -94,12 +97,12 @@ if __name__ == '__main__':
 
 
     num_workers = 40
-    block_size = 512
+    block_size = 1024
     overwrite_cache=False
     keep_in_memory=True
     
     logging.set_verbosity_debug()
-    tokenizer_path = '/projappl/project_2004600/risto/tokenizer/' 
+    tokenizer_path = cmd_args.tokenizer #'/projappl/project_2004600/risto/tokenizer/' 
 
     TRAIN_FILE_PATH =  cmd_args.train_data
     TRAIN_SAMPLE_SIZE = cmd_args.train_sample_size
@@ -179,7 +182,7 @@ if __name__ == '__main__':
     print(f"Eval files count: {len(eval_paths)}, evaluation files: ", eval_paths)
     start_time = perf_counter()
     dataset = load_dataset(
-#            cache_dir = cmd_args.cache_dir,
+            cache_dir = cmd_args.cache_dir,
             path = 'text', # path to loading script. 'text' is default
             data_files = {'train': train_paths, 'eval': eval_paths},
             # keep_in_memory=keep_in_memory