From d4465971e0ab7d72b72fd1244f07429204e9a352 Mon Sep 17 00:00:00 2001 From: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Fri, 31 Jul 2020 13:24:28 +0300 Subject: [PATCH] Add CT_GA_count.R commit ec4f4915ebf6dfda264a5b58c0e180a47e948b99 Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Fri Jul 31 13:23:36 2020 +0300 Removed testing code commit c1fddc713c1d18bd4272b2c455425931681dafe1 Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Fri Jul 31 13:23:14 2020 +0300 Fixed subsetting to make it work commit a64df5c0d15cce8ce2ad78a019cafc9aa17b5ff0 Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Fri Jul 31 13:16:28 2020 +0300 add CT_GA_count.R commit fe050d47c2dc9e61a13d4a362abf56dabfbecbd7 Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Fri Jul 31 12:57:11 2020 +0300 Include usage instruction inside function body Include usage instruction inside function body, so on typing just the function name to display the R code, the instructions will appear. Also stated things clearly in the T&C commit fedbca4a7aa8fb436702accdf1f9b5ec7f066d60 Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Thu Jul 30 12:50:20 2020 +0300 Updates 1. Improve readability 2. Manage differing lengths of breaks and labels. commit fb32fb9173c66c8862adb0ad41d82db9d128777b Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Thu Jul 30 12:43:20 2020 +0300 skip.steps works as expected skip.steps = 1, now skips 1 observation. commit 00a8bbcf04731c010e0a5989a805274609560188 Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi> Date: Thu Jul 30 12:32:16 2020 +0300 add script to calculate breaks for axes in ggplot2 calculates breaks and labels for axes in ggplot2 with user defined gaps --- CT_GA_count.R | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 CT_GA_count.R diff --git a/CT_GA_count.R b/CT_GA_count.R new file mode 100644 index 0000000..3522047 --- /dev/null +++ b/CT_GA_count.R @@ -0,0 +1,62 @@ + + +CT_GA_count <- function(SampleID,Ref_Base,Alt_Base){ +# #<----------------------------> +# # You must include this section when: +# # Distributing, Using and/or Modifying this code. +# # Please read and abide by the terms of the included LICENSE. +# # Copyright 2020, Deepankar Chakroborty, All rights reserved. +# # +# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) +# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues +# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE +# #<----------------------------> + +# # PURPOSE: +# # This function takes in three vectors: +# # SampleID = Sample IDs +# # Ref_Base = Reference Base +# # Alt_Base = altered base that created the mutation. + +# # And calculates the number of C > T and G > A changes are there (per sample) +# # The function returns a data frame listing the number of mutations (per sample): +# # SampleID = Sample ID +# # Total = Total number of mutations +# # CT = C > T changes +# # GA = G > A changes +# # Others = all other types of transitions and transversions combined. + + MutMatrix <- data.frame(SampleID, + Ref_Base, + Alt_Base, + stringsAsFactors = F) + + return.df <- data.frame(SampleID=NA, + Total=0, + CT=0, + GA=0, + Others=0) + + for(SampleID in levels(MutMatrix$SampleID)){ + set <- MutMatrix[ MutMatrix$SampleID == SampleID, ] + + # if(dim(set)[1]==0){ + # return.df <- rbind.data.frame(return.df,c(SampleID,0,0,0,0)) + # next + # } + + Total <- dim(set)[1] + + CT <- dim(subset(set, set$Ref_Base == "C" & set$Alt_Base == "T"))[1] + + GA <- dim(subset(set, set$Ref_Base == "G" & set$Alt_Base == "A"))[1] + + Others=Total-CT-GA + + return.df <- rbind.data.frame(return.df,list(SampleID,Total,CT,GA,Others),stringsAsFactors = F) + + Total <- 0;CT <- 0;GA <- 0;Others <- 0 # re-initialize + rm(set) + } + return(return.df[-1,]) # Removes the first empty row +} -- GitLab