From d4465971e0ab7d72b72fd1244f07429204e9a352 Mon Sep 17 00:00:00 2001
From: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date: Fri, 31 Jul 2020 13:24:28 +0300
Subject: [PATCH] Add CT_GA_count.R

commit ec4f4915ebf6dfda264a5b58c0e180a47e948b99
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Fri Jul 31 13:23:36 2020 +0300

    Removed testing code

commit c1fddc713c1d18bd4272b2c455425931681dafe1
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Fri Jul 31 13:23:14 2020 +0300

    Fixed subsetting to make it work

commit a64df5c0d15cce8ce2ad78a019cafc9aa17b5ff0
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Fri Jul 31 13:16:28 2020 +0300

    add CT_GA_count.R

commit fe050d47c2dc9e61a13d4a362abf56dabfbecbd7
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Fri Jul 31 12:57:11 2020 +0300

    Include usage instruction inside function body

    Include usage instruction inside function body, so on typing just the function name to display the R code, the instructions will appear.
    Also stated things clearly in the T&C

commit fedbca4a7aa8fb436702accdf1f9b5ec7f066d60
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Thu Jul 30 12:50:20 2020 +0300

    Updates

    1. Improve readability
    2. Manage differing lengths of breaks and labels.

commit fb32fb9173c66c8862adb0ad41d82db9d128777b
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Thu Jul 30 12:43:20 2020 +0300

    skip.steps works as expected

    skip.steps = 1, now skips 1 observation.

commit 00a8bbcf04731c010e0a5989a805274609560188
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Thu Jul 30 12:32:16 2020 +0300

    add script to calculate breaks for axes in ggplot2

    calculates breaks and labels for axes in ggplot2 with user defined gaps
---
 CT_GA_count.R | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 CT_GA_count.R

diff --git a/CT_GA_count.R b/CT_GA_count.R
new file mode 100644
index 0000000..3522047
--- /dev/null
+++ b/CT_GA_count.R
@@ -0,0 +1,62 @@
+
+
+CT_GA_count <- function(SampleID,Ref_Base,Alt_Base){
+# #<---------------------------->
+# # You must include this section when:
+# # Distributing, Using and/or Modifying this code.
+# # Please read and abide by the terms of the included LICENSE.
+# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
+# #
+# #  Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
+# #  Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
+# #  License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
+# #<---------------------------->
+  
+# # PURPOSE:
+# # This function takes in three vectors:
+# #     SampleID = Sample IDs
+# #     Ref_Base = Reference Base
+# #     Alt_Base = altered base that created the mutation.
+
+# #  And calculates the number of C > T and G > A changes are there (per sample)
+# #  The function returns a data frame listing the number of mutations (per sample):
+# #    SampleID = Sample ID
+# #    Total = Total number of mutations
+# #    CT = C > T changes
+# #    GA = G > A changes
+# #    Others = all other types of transitions and transversions combined.
+
+  MutMatrix <- data.frame(SampleID,
+                          Ref_Base,
+                          Alt_Base,
+                          stringsAsFactors = F)
+  
+  return.df <- data.frame(SampleID=NA,
+                          Total=0,
+                          CT=0,
+                          GA=0,
+                          Others=0)
+  
+  for(SampleID in levels(MutMatrix$SampleID)){
+    set  <-  MutMatrix[ MutMatrix$SampleID == SampleID, ]
+
+    # if(dim(set)[1]==0){
+    #   return.df <- rbind.data.frame(return.df,c(SampleID,0,0,0,0))
+    #   next
+    # }
+    
+    Total <- dim(set)[1]
+    
+    CT <- dim(subset(set, set$Ref_Base == "C" & set$Alt_Base == "T"))[1]
+    
+    GA <- dim(subset(set, set$Ref_Base == "G" & set$Alt_Base == "A"))[1]
+    
+    Others=Total-CT-GA
+    
+    return.df <- rbind.data.frame(return.df,list(SampleID,Total,CT,GA,Others),stringsAsFactors = F)
+    
+    Total <- 0;CT <- 0;GA <- 0;Others <- 0 # re-initialize
+    rm(set)
+  }
+  return(return.df[-1,]) # Removes the first empty row
+}
-- 
GitLab