Skip to content
Snippets Groups Projects
Commit 875a4e68 authored by Deepankar Chakroborty's avatar Deepankar Chakroborty
Browse files

Upload to Gitlab UTU

parents
No related branches found
No related tags found
No related merge requests found
# To parse files generated by Bam-readcount
# Details in field of bam read count
#base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end
rm(list=ls())
bam_rc.parse <- function(x){
columns = "base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end"
column.names = strsplit(columns, ":")[[1]];rm(columns)
# get line
Row = strsplit(x, split = "\t")[[1]]
# get base counts
temp <- lapply(Row[-c(1:4)], function(basecnt){
basecnt = strsplit(basecnt, split = ":")[[1]]
}) %>% do.call(rbind, .)
colnames(temp) = column.names
dat = data.frame(chr =Row[1],pos = Row[2],ref = Row[3], dp = Row[4],
temp)
}
strandBias=function(arg1,arg2){
if(length(arg1)!=length(arg2)){
error=paste("Parameters with unequal lengths passed as argument!\narg1 =",length(arg1),"; arg2 =",length(arg2))
message(error)
return(NA)
}
SB=c()
for (i in seq(1,length(arg1))){
if(is.na(arg1[i]) | is.na(arg2[i])){
calc=NA
}
else if(arg1[i]<arg2[i]){
calc=(arg2[i]/arg1[i])*-1
}
else{
calc=(arg1[i]/arg2[i])
}
SB=c(SB,calc)
}
return(SB)
}
library(dplyr)
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/Bam-readcount")
files=list.files(".",pattern = "bamcount")
for (file in files){
#file="EGFR.Lib.S1.bam.tsv"
#file="EGFREGFP5.S1.bam.tsv"
#file="EGFREGFP1.S1.bam.tsv"
temp=scan(file, what = "character", sep = "\n")
dat = lapply(temp, bam_rc.parse) %>% bind_rows()
dat$chr=as.character(dat$chr)
dat$pos=as.numeric(dat$pos)
dat$dp=as.numeric(dat$dp)
dat$count=as.numeric(dat$count)
dat[,7:18]=sapply(dat[,7:18],as.numeric)
dat=dat[dat$base!="=",]
dat$strandbias=strandBias(dat$num_plus_strand,dat$num_minus_strand)
dat$AlleleFreq=dat$count/dat$dp
saveRDS(dat,file=paste("Parsed/parsed.",gsub(".tsv","",file),".RDS",sep=""))
write.table(dat,file=paste("Parsed/parsed.",file,sep=""),sep="\t",row.names = F,col.names = T,quote = F,na = "NA")
}
# To parse VCF files generated from samtools mpileup & bcftools
rm(list=ls())
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf")
# file="20180112 EGFR_lib.EGFR.locus.HiDP.vcf"
file="20180112 EGFR.No.Lig.EGFR.locus.HiDP.vcf"
temptext=readLines(paste(file,sep=""))
temptext=temptext[-grep("^##",x = temptext)]
require(progress)
pb <- progress_bar$new( format = " Processing [:bar] :percent in :elapsed", total = length(temptext), clear = FALSE, width= 60)
temptext=gsub(";","\t",temptext)
temptext=temptext[-seq(1:49)]
parsing=matrix(NA,length(temptext),19)
colnames(parsing)=c("chr","pos","dot1","Ref","Alt","zero","dot2","DP","I16","QS","VDB","SGB","RPB","MQB","MQSB","BQB","MQ0F","Format","DATA")
for( i in seq(1,length(temptext))){
#for( i in seq(69801,69810)){
line=temptext[i]
words=unlist(strsplit(line,"\t"))
parsing[i,1:10]=words[1:10]
for (j in seq(11,19)){
if(is.na(words[j])|words[j]==""){
break
}
#print(regexpr("VDB",words[j]))
if(regexpr("VDB",words[j])==1){
parsing[i,"VDB"]=words[j]
}
else if(regexpr("SGB",words[j])==1){
parsing[i,"SGB"]=words[j]
}
else if(regexpr("RPB",words[j])==1){
parsing[i,"RPB"]=words[j]
}
else if(regexpr("MQB",words[j])==1){
parsing[i,"MQB"]=words[j]
}
else if(regexpr("MQSB",words[j])==1){
parsing[i,"MQSB"]=words[j]
}
else if(regexpr("BQB",words[j])==1){
parsing[i,"BQB"]=words[j]
}
else if(regexpr("MQ0F",words[j])==1){
parsing[i,"MQ0F"]=words[j]
}
else if(regexpr("PL:AD",words[j])==1){
parsing[i,"Format"]=words[j]
parsing[i,"DATA"]=words[j+1]
}
}
if(i%%1000==0){
pb$tick(1000)
}
}
temp=data.frame(parsing,stringsAsFactors = F)
pb <- progress_bar$new( format = " Processing [:bar] :percent in :elapsed", total = length(temp$DATA), clear = FALSE, width= 60)
PL=c();AD=c()
for(i in seq(1,length(temp$DATA))){
#for(i in seq(1,10)){
#print(temp$DATA[i])
var=unlist(strsplit(temp$DATA[i],":"))
PL=c(PL,var[1])
AD=c(AD,var[2])
if(i%%1000==0){
pb$tick(1000)
}
}
temp=cbind(temp,PL,AD)
temp=temp[,c(-18,-19)]
colnames(temp)[c(19)]="Allelic.Depth"
parsing=temp;rm(temp)
assign(file,parsing);rm(parsing)
file.s=paste("Parsed/P-",file,sep="")
saveRDS(object =get(file),file = paste(file.s,".RDS",sep=""))
write.table(get(file),file=paste(file.s,".tsv",sep=""),sep="\t",row.names = F,col.names = T,quote = F,na = "NA")
bdir="~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Parsed/";setwd(bdir)
# rm(list=ls());file="P-20180112 EGFR_lib.EGFR.locus.HiDP.vcf.tsv"
rm(list=ls());file="P-20180112 EGFR.No.Lig.EGFR.locus.HiDP.vcf.tsv"
#setwd(paste(bdir,dir,sep=""))
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
tab=read.table(file,sep="\t",header = T,as.is = T,stringsAsFactors = F)
tab=tab[c(-3,-6,-7)] # Removing zero and dot (.) columns
#Change Depth to numeric
tab$DP=as.numeric(gsub("DP=","",tab$DP))
# Subsetting to EGFR locus #chr7:55,084,725-55,277,031
subtab=subset(tab,tab$chr=="chr7" & tab$pos>55084725 & tab$pos < 55277031)
#Subsetting to get Depth > 1000
# subtab=subset(subtab,subtab$DP>1000)
# Make New Only Mutation Matrix
require(progress);pb <- progress_bar$new( format = " Processing [:bar] :current SNVs processed in - :elapsed", total = length(unlist(strsplit(subtab$Alt,","))), clear = FALSE, width= 65)
chr=c();pos=c();ref=c();alt=c();DP=c();AD=c();count=0
for(i in seq(1,length(subtab$DP))){
#for(i in seq(1,100)){
datarow=subtab[i,]
Alts=unlist(strsplit(datarow[,"Alt"],",",fixed = T),use.names = F)
ADs=unlist(strsplit(datarow[,"Allelic.Depth"],",",fixed = T),use.names = T)
for(j in seq(1,length(Alts))){
count=count+1
#chr=unlist(c(chr,datarow[1]))
chr=append(chr,unlist(datarow[1]))
pos=append(pos,unlist(datarow[2]))
ref=append(ref,unlist(datarow[3]))
alt=append(alt,Alts[j])
DP=append(DP,unlist(datarow[5]))
AD=append(AD,ADs[j+1])
if(count%%100==0){
pb$tick(100)
}
}
}
AD=as.numeric(AD)
SNV.matrix=data.frame(chr,pos,ref,alt,DP,AD,stringsAsFactors = F)
rm(chr,pos,ref,alt,DP,AD,Alts,ADs,datarow,i,j)
submat=subset(SNV.matrix,SNV.matrix$alt!="<*>")
fnm=gsub("P-","../Variants/V-",gsub(".vcf.tsv","",file))
saveRDS(object = submat,file = paste(fnm,".RDS",sep="")) # use var=readRDS("FileName")
write.table(submat,file = paste(fnm,".tsv",sep=""),sep="\t",row.names = F,col.names = T,quote = F,na = "NA")
#Subsetting to get Depth > 1000
#subtab=subset(subtab,subtab$DP>1000)
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
file1="V-20180112 EGFR_lib.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
Library=tab;rm(tab,file1)
colnames(Library)=paste("Lib.",colnames(Library),sep="")
file1="V-20180112 EGFR.No.Lig.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
NoLigand=tab;rm(tab,file1)
colnames(NoLigand)=paste("NoLig.",colnames(NoLigand),sep="")
allMuts=union(NoLigand$NoLig.MutID,Library$Lib.MutID)
idx=match(allMuts,Library$Lib.MutID)
Mutation.Table=Library[idx,]
idx=match(allMuts,NoLigand$NoLig.MutID)
Mutation.Table=data.frame(allMuts,Mutation.Table,NoLigand[idx,],stringsAsFactors = F)
rm(Library,NoLigand)
FoldChange=function(arg1,arg2){
if(length(arg1)!=length(arg2)){
error=paste("Parameters with unequal lengths passed as argument!\narg1 =",length(arg1),"; arg2 =",length(arg2))
message(error)
return(NA)
}
FC=c()
for (i in seq(1,length(arg1))){
if(is.na(arg1[i]) | is.na(arg2[i])){
calc=NA
}
else if(arg1[i]<arg2[i]){
calc=(arg2[i]/arg1[i])
}
else{
calc=(arg1[i]/arg2[i])*-1
}
FC=c(FC,calc)
}
return(FC)
}
Mutation.Table$FC=FoldChange(Mutation.Table$Lib.VF,Mutation.Table$NoLig.VF)
AnnoDF=Mutation.Table[,c(2,3,3,4,5,1)]
# write.table(AnnoDF,file = "../20180131.InputAnnovar.txt",col.names = F,quote = F,row.names = F)
#- run following line in annovar
# annotate_variation.pl -out EGFR.muts --build hg19 20180131.InputAnnovar.txt humandb
#- more annotations
# table_annovar.pl "20180131.InputAnnovar.txt" ~/NGS_Seq_Tools/annovar2/humandb -buildver hg19 -out EGFR.iSCREAM -remove -protocol refgene,gnomad_genome,gnomad_exome,avsnp150,clinvar_20170905 -operation g,f,f,f,f -nastring . -csvout -polish
rm(AnnoDF)
var=read.table("../EGFR.iSCREAM.hg19_multianno.csv",sep=",",stringsAsFactors = F,header = T)
var$MutID=paste(var$Chr,":",var$Start,var$Ref,">",var$Alt,sep="")
idx=match(Mutation.Table$allMuts,var$MutID)
Mutation.Table=data.frame(Mutation.Table,var[idx,c(6:26)])
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/EleniusGroup/DvsP.EGFR.HiDP/AnnovarMutCodeFind.R")
Mutation.Table$AAchange=annovarMutCodeFind(Mutation.Table$AAChange.refgene,isoform = "NM_005228")
temp=gsub("^.","",x = Mutation.Table$AAchange);temp=gsub(".$","",x=temp)
Mutation.Table$AAPos=as.numeric(temp);rm(temp)
Mutation.Table=Mutation.Table[Mutation.Table$ExonicFunc.refgene!="synonymous SNV",]
# Add BamReadCount
rm(var,idx)
Lib.Count=readRDS("../../Bam-readcount/Parsed/parsed.EGFR.Lib.bamcount.RDS")
Lib.Count$MutID=paste(Lib.Count$chr,":",Lib.Count$pos,Lib.Count$ref,">",Lib.Count$base,sep="")
colnames(Lib.Count)=paste("bamRC.Lib.",colnames(Lib.Count),sep="")
idx=match(Mutation.Table$allMuts,Lib.Count$bamRC.Lib.MutID)
test=cbind.data.frame(Mutation.Table,Lib.Count[idx,])
NoLig.Count=readRDS("../../Bam-readcount/Parsed/parsed.EGFR.NoLig.bamcount.RDS")
NoLig.Count$MutID=paste(NoLig.Count$chr,":",NoLig.Count$pos,NoLig.Count$ref,">",NoLig.Count$base,sep="")
colnames(NoLig.Count)=paste("bamRC.NoLig.",colnames(NoLig.Count),sep="")
idx=match(Mutation.Table$allMuts,NoLig.Count$bamRC.NoLig.MutID)
test=cbind.data.frame(test,NoLig.Count[idx,])
Mutation.Table=test;rm(test,Lib.Count,NoLig.Count,idx)
# write.table(Mutation.Table,file = "../20180206.Result.tsv",sep="\t",col.names = T,quote = F,row.names = F)
# saveRDS(Mutation.Table,file = "../20180206.Result.RDS")
#-------------------------
library(ggplot2)
source("~/Seafile/My Library/Git/GitLab.UTU/Misc Code/Master ggplot2 Theme/theme.DC.plot.R")
# All data. Massive plot!!!
FC.data.cut=droplevels.data.frame(na.exclude(Mutation.Table))
# View(FC.data.cut[FC.data.cut$AAchange%in%c("L858R","T790M","A702V","A1118E","L184P"),])
# str.extra(FC.data.cut)
toRemove=c(10,11,12,13,14,19,21,23,26,28,29,30,31,32,33,34,36,38,39,40,seq(44,46),48,seq(50,52),seq(55,61),64,seq(65,67),69,seq(71,73),seq(76,82),85)
colnames(FC.data.cut)[toRemove]
FC.data.cut=FC.data.cut[,-toRemove]
# Removed columns
# [1] "Lib.MutID" "NoLig.chr"
# [3] "NoLig.pos" "NoLig.ref"
# [5] "NoLig.alt" "NoLig.MutID"
# [7] "Func.refgene" "GeneDetail.refgene"
# [9] "Xref.refgene" "gnomAD_genome_AFR"
# [11] "gnomAD_genome_AMR" "gnomAD_genome_ASJ"
# [13] "gnomAD_genome_EAS" "gnomAD_genome_FIN"
# [15] "gnomAD_genome_NFE" "gnomAD_genome_OTH"
# [17] "CLINSIG" "CLNACC"
# [19] "CLNDSDB" "CLNDSDBID"
# [21] "MutID"
FC.data.cut$Direction=FC.data.cut$FC
FC.data.cut$Direction[FC.data.cut$Direction<0]="Decrease"
FC.data.cut$Direction[FC.data.cut$Direction!="Decrease"]="Increase"
FC.data.cut$Direction=as.factor(FC.data.cut$Direction)
ggplot(data = FC.data.cut,aes(x=bamRC.NoLig.strandbias,y=FC.data.cut$bamRC.Lib.strandbias))+geom_point(alpha=0.4,size=2)+theme_dc.plot.xaxis.regular+geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+xlab("StrandBias NoLigand")+ylab("Strand Bias Plasmid Library")+ggtitle("Strand Bias distribution")+ggtitle(paste("Strand Bias distribution N=",length(FC.data.cut$allMuts)))
# removing infinite strand bias AA changes
FC.data.cut=FC.data.cut[!((FC.data.cut$bamRC.NoLig.strandbias==Inf | FC.data.cut$bamRC.NoLig.strandbias==-Inf)|(FC.data.cut$bamRC.Lib.strandbias==Inf | FC.data.cut$bamRC.Lib.strandbias==-Inf)),]
ggplot(data = FC.data.cut,aes(x=bamRC.NoLig.strandbias,y=FC.data.cut$bamRC.Lib.strandbias))+geom_point(alpha=0.4,size=2)+theme_dc.plot.xaxis.regular+geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+xlab("StrandBias NoLigand")+ylab("Strand Bias Plasmid Library")+ggtitle("Strand Bias distribution (Inf removed)")+ggtitle(paste("Strand Bias distribution N=",length(FC.data.cut$allMuts)))
FC.data.cut=FC.data.cut[FC.data.cut$bamRC.NoLig.strandbias<=10&FC.data.cut$bamRC.NoLig.strandbias>=-10|FC.data.cut$bamRC.Lib.strandbias<=10&FC.data.cut$bamRC.Lib.strandbias>=-10,]
ggplot(data = FC.data.cut,aes(x=bamRC.NoLig.strandbias,y=FC.data.cut$bamRC.Lib.strandbias))+geom_point(alpha=0.4,size=2)+theme_dc.plot.xaxis.regular+geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+xlab("StrandBias NoLigand")+ylab("Strand Bias Plasmid Library")+ggtitle(paste("Strand Bias distribution N=",length(FC.data.cut$allMuts)))
# write.table(FC.data.cut,file = "../20180206.Final.Table.tsv",sep="\t",col.names = T,quote = F,row.names = F)
# saveRDS(FC.data.cut,file = "../20180206.Final.table.RDS")
library(RColorBrewer)
cols=colorRampPalette(c("#003366","firebrick1"))(20)
#cols=colorRampPalette(c("#ffeaea","#ff3030"))(20)
# #temp=temp[order(temp$VF.NoLigand,decreasing = F),]
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)
# pdf(file = "../FC.plot.pdf",width = 10,height = 7);print(p);dev.off()
#
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.1)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)
# pdf(file = "../FC.plot.alpha.pdf",width = 10,height = 7);print(p);dev.off()
# ggplotly(p)
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)+geom_rug(sides="r",alpha=0.5,size=0.2,color="black");p
# ggsave(plot = p,filename = "../FC.plot.w.rug.pdf",width = 10,height = 7);print(p);dev.off()
# Only FC not my signed FC.
FC.data.cut$FC.reg=(FC.data.cut$NoLig.VF/FC.data.cut$Lib.VF)
p=ggplot(FC.data.cut,aes(y=FC.reg,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC.reg>25,"AAchange"]),hjust=1.3,size=3)+geom_hline(yintercept = 0,color="black")+geom_hline(yintercept = 1,linetype="dashed",color="red",alpha=0.5);p
pdf(file = "../FCreg.plot.pdf",width = 10,height = 5);print(p);dev.off()
#
# p=ggplot(FC.data.cut,aes(y=FC.reg,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.1)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC.reg>25,"AAchange"]),hjust=1.3,size=3)+geom_hline(yintercept = 0,color="black")+geom_hline(yintercept = 1,linetype="dashed",color="red",alpha=0.5);p
# pdf(file = "../FCreg.alpha.plot.pdf",width = 10,height = 5);print(p);dev.off()
# base=c(1,2,4,6,8)
# y.breaks=sort(c(0.01,0.1,base*1,base*10,100))
#
# p=ggplot(FC.data.cut,aes(y=log2(FC.reg),x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("log2(Fold Change)")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC.reg>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks = log2(y.breaks),labels=c(0.01,0.1,1,rep("",4),10,rep("",4),100))+geom_hline(yintercept = 0,color="black");p
# pdf(file = "../FCreg.log.plot.pdf",width = 10,height = 7);print(p);dev.off()
# ggplotly(p)
# Browsable plot
mini=FC.data.cut[,c("AAchange","Lib.VF","NoLig.VF","Lib.AD","NoLig.AD","FC.reg","AAPos")]
colnames(mini)[c(3)]=c("VariantFrequency")
library(plotly)
X.A <- list(
title = "EGFR amino acid",
showticklabels = TRUE,
dtick=100,
ticklen = 5,
gridcolor = toRGB("white"),
gridwidth = 0.5,
tickwidth = 1,
range = c(-10, 1220),
tickangle = 0,
zerolinecolor = toRGB("black"),
zerolinewidth = 1.5)
Y.A <- list(
title = "Fold Change",
showticklabels = TRUE,
dtick=25,
ticklen = 5,
gridcolor = toRGB("white"),
gridwidth = 0.5,
tickwidth = 1,
zerolinecolor = toRGB("black"),
zerolinewidth = 1.5)
p=plot_ly(mini,alpha=0.7,x=~AAPos,y=~FC.reg,color=~VariantFrequency,size=~VariantFrequency,mode="text",text=~paste('EGFR ',AAchange,'</br> FC: ',round(FC.reg,digits = 3),'</br>- - - -</br>Variant frequency</br>Library: ',round(Lib.VF,digits = 3),'</br>Surving cells: ',round(VariantFrequency,digits=3),'</br>- - - -</br>Number of reads</br>Library: ',Lib.AD,'</br>Surving cells: ',NoLig.AD),colors = cols,sizes=c(50,300))%>%
add_markers()%>%
layout(title = "EGFR iSREAM",xaxis = X.A,yaxis = Y.A)
print(p)
#
#
# library(htmlwidgets)
# library(htmltools)
#
# p <- htmlwidgets::appendContent(p, htmltools::tags$input(id='inputText', value='L858R', ''), htmltools::tags$button(id='buttonSearch', 'Search'))
# p <- htmlwidgets::appendContent(p, htmltools::tags$script(HTML(
# 'document.getElementById("buttonSearch").addEventListener("click", function()
# {
# var i = 0;
# var j = 0;
# var found = [];
# var myDiv = document.getElementsByClassName("js-plotly-plot")[0]
# var data = JSON.parse(document.querySelectorAll("script[type=\'application/json\']")[0].innerHTML);
# for (i = 0 ;i < data.x.data.length; i += 1) {
# for (j = 0; j < data.x.data[i].text.length; j += 1) {
# if (data.x.data[i].text[j].indexOf(document.getElementById("inputText").value) !== -1) {
# found.push({curveNumber: i, pointNumber: j});
# }
# }
# }
# Plotly.Fx.hover(myDiv, found);
# }
# );')))
#
# htmlwidgets::saveWidget(p, paste('../FoldChangeRegulariSCREAM', ".html", sep=""))
# p
#
# # ,text=~paste('EGFR Mut: ',AAchange,'</br> Reads_Library: ',Lib.AD,'</br> VF_Lib: ',round(Lib.VF,digits = 3),'</br> Reads_NoLig: ',NoLig.AD,'</br> VF_NoLig: ',round(NoLig.VF,digits=3),'</br> FC: ',round(FC.reg,digits = 3))
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
FC.data.cut=readRDS("../20180401.MutationTable.RDS")
FC.data.cut$FC.reg=(FC.data.cut$NoLig.VF/FC.data.cut$Lib.VF)
selected=FC.data.cut[,c("AAchange","AAPos","Lib.VF","NoLig.VF","FC.reg")]
expandFrames=function(FC,VF.Begin,VF.End,frames=30){
# FC is foldchange column
# frames is the number of frames you want for a smooth animation
DF=matrix(data = 0,nrow =length(FC),ncol = frames)
VF=matrix(data = 0,nrow =length(VF.Begin),ncol = frames)
for(i in seq(1,length(FC))){
DF[i,]=seq(from = 0,to = FC[i],length.out = frames)
VF[i,]=seq(from = VF.Begin[i],to = VF.End[i],length.out = frames)
}
DF=as.data.frame(DF)
colnames(DF)=paste("FC.frame.",seq(1,frames,by = 1),sep="")
VF=as.data.frame(VF)
colnames(VF)=paste("VF.frame.",seq(1,frames,by = 1),sep="")
out.DF=cbind(DF,VF)
return(out.DF)
}
DF=expandFrames(FC = selected$FC,VF.Begin = selected$Lib.VF,VF.End = selected$NoLig.VF,frames = 60)
300
# setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
#
# rm(list=ls())
# source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
# # saveRDS(Mutation.Table,file = "../20180206.Result.RDS")
#
# FC.data.cut=readRDS("../20180206.Final.table.RDS")
# selected=FC.data.cut[,c("AAchange","AAPos","Lib.VF","NoLig.VF","FC")]
#
# expandFrames=function(FC,VF.Begin,VF.End,frames=30){
# # FC is foldchange column
# # frames is the number of frames you want for a smooth animation
# DF=matrix(data = 0,nrow =length(FC),ncol = frames)
# VF=matrix(data = 0,nrow =length(VF.Begin),ncol = frames)
# for(i in seq(1,length(FC))){
# DF[i,]=seq(from = 0,to = FC[i],length.out = frames)
# VF[i,]=seq(from = VF.Begin[i],to = VF.End[i],length.out = frames)
# }
# DF=as.data.frame(DF)
# colnames(DF)=paste("FC.frame.",seq(1,frames,by = 1),sep="")
# VF=as.data.frame(VF)
# colnames(VF)=paste("VF.frame.",seq(1,frames,by = 1),sep="")
# out.DF=cbind(DF,VF)
# return(out.DF)
# }
#
# DF=expandFrames(FC = selected$FC,VF.Begin = selected$Lib.VF,VF.End = selected$NoLig.VF,frames = 60)
# selected=cbind(selected,DF)
# # FC = selected$FC;VF.Begin = selected$Lib.VF;VF.End = selected$NoLig.VF;frames = 20
#
# FC.data=selected[,1:5];colnames(FC.data)
# # [1] "AAchange" "AAPos" "Lib.VF" "NoLig.VF" "FC"
# FC.data$FC=rep(0,length(FC.data$FC))
# FC.data$VF=rep(0,length(FC.data$Lib.VF))
#
# frames=grep("FC.frame",colnames(selected))
# VF.frames=grep("VF.frame",colnames(selected))
# j=1
# VF.range=seq(0,5,by = 0.01)
# color.range=colorRampPalette(c("#003366","firebrick1","firebrick1","firebrick1","firebrick1","firebrick1","firebrick1"))(length(VF.range))
# for(i in frames){
# print(paste("Frame",i))
# FC.data$FC=selected[,i]
# FC.data$VF=selected[,VF.frames[j]];j=j+1
# p=ggplot(FC.data,aes(y=FC,x=AAPos,size=VF,color=VF))+geom_point(alpha=0.9)+scale_colour_gradientn(colours = color.range,values=VF.range)+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+scale_y_continuous(breaks=seq(-240,100,by = 20),limits = c(-240,110))+geom_hline(yintercept = 0,color="black")
# ggsave(filename = paste("../Animated.FC/FC.plot.frame",stringr::str_pad(i,2,pad = "0"),".png",sep = ""),plot = p,dpi = 200,width = 10,height = 7)
# };j=1
# # Final frame
# FC.data.cut$VF=FC.data.cut$NoLig.VF
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=VF,color=VF))+geom_point(alpha=0.9)+scale_colour_gradientn(colours = color.range,values=VF.range)+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20),limits = c(-240,110))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)
# ggsave(filename = paste("../Animated.FC/FC.plot.frame",paste("../Animated.FC/FC.plot.frame",stringr::str_pad(i+1,2,pad = "0"),".png",sep = ""),".png",sep = ""),plot = p,dpi = 200,width = 10,height = 7)
#
#
# # run this: convert -delay 0.1 -loop 1 *.png animation.gif
#
load("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/EleniusGroup/ERBB.Miscellaneous/Posssible mutants/EGFR/egfr_Variants.bin")
print(paste("Unique EGFR cDNA=",length(unique(Variants$mutants)),"Unique EGFR proteins=",length(unique(Variants$mutantprotein))))
EGFR.WT=Variants$mutantprotein[1]
non.syn=Variants[Variants$mutantprotein!=EGFR.WT,]
length(unique(non.syn$mutants))+1 # +1 for WT
length(unique(non.syn$mutantprotein))+1 # +1 for WT
print(paste("Non-synonymous EGFR muts =",length(non.syn$mutants)))
#Subsetting to get Depth > 1000
#subtab=subset(subtab,subtab$DP>1000)
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
file1="V-20180112 EGFR_lib.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
Library=tab;rm(tab,file1)
colnames(Library)=paste("Lib.",colnames(Library),sep="")
file1="V-20180112 EGFR.No.Lig.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
NoLigand=tab;rm(tab,file1)
colnames(NoLigand)=paste("NoLig.",colnames(NoLigand),sep="")
allMuts=union(NoLigand$NoLig.MutID,Library$Lib.MutID)
idx=match(allMuts,Library$Lib.MutID)
Mutation.Table=Library[idx,]
idx=match(allMuts,NoLigand$NoLig.MutID)
Mutation.Table=data.frame(allMuts,Mutation.Table,NoLigand[idx,],stringsAsFactors = F)
rm(Library,NoLigand)
#
# #--------------
# allMuts=Mutation.Table$allMuts
# searchI=gregexpr(":",allMuts,fixed = T)
# start.posI=unlist(lapply(searchI, `[[`, 1),use.names = F)
# chr=substr(allMuts,0,start.posI-1)
#
# searchI=gregexpr(">",allMuts,fixed = T)
# start.posI=unlist(lapply(searchI, `[[`, 1),use.names = F)
# mut=substr(allMuts,start.posI-1,nchar(allMuts))
#
# searchI=gregexpr(">",mut,fixed = T)
# start.posI=unlist(lapply(searchI, `[[`, 1),use.names = F)
# ref=substr(mut,start.posI-1,start.posI-1)
# alt=substr(mut,start.posI+1,start.posI+1);rm(mut,searchI,start.posI)
#
# searchI=gregexpr(":",allMuts,fixed = T)
# start.pos=unlist(lapply(searchI, `[[`, 1),use.names = F)
# searchI=gregexpr(">",allMuts,fixed = T)
# end.pos=unlist(lapply(searchI, `[[`, 1),use.names = F)
# start=end=substr(allMuts,start.pos+1,end.pos-2);rm(searchI,start.pos,end.pos)
#
# AnnoDF=data.frame(chr,start,end,ref,alt,allMuts,stringsAsFactors = F);rm(chr,start,end,ref,alt)
#
# # write.table(AnnoDF,file = "../20180410.InputAnnovar.AlliSCREAM.txt",col.names = F,quote = F,row.names = F)
# # table_annovar.pl "20180410.InputAnnovar.Allvars.txt" ~/NGS_Seq_Tools/annovar2/humandb -buildver hg19 -out 20180410.EGFR.iSCREAM.AlliSCREAM -remove -protocol refgene,gnomad_genome,gnomad_exome,avsnp150,clinvar_20170905 -operation g,f,f,f,f -nastring . -csvout -polish
#-------------
# filtering by depth
message("Starting with ",length(Mutation.Table$allMuts))
filtered=Mutation.Table[Mutation.Table$Lib.DP>=1000&Mutation.Table$NoLig.DP>=1000,]
message(paste("Filtered out",(length(Mutation.Table$allMuts)-length(filtered$allMuts)),"for DP>=1000. Remaining =",length(filtered$allMuts),"unique AAs=",length(unique(filtered$AAchange))))
Mutation.Table=filtered
# Add BamReadCount
Lib.Count=readRDS("../../Bam-readcount/Parsed/parsed.EGFR.Lib.bamcount.RDS")
Lib.Count$MutID=paste(Lib.Count$chr,":",Lib.Count$pos,Lib.Count$ref,">",Lib.Count$base,sep="")
colnames(Lib.Count)=paste("bamRC.Lib.",colnames(Lib.Count),sep="")
idx=match(Mutation.Table$allMuts,Lib.Count$bamRC.Lib.MutID)
test=cbind.data.frame(Mutation.Table,Lib.Count[idx,])
NoLig.Count=readRDS("../../Bam-readcount/Parsed/parsed.EGFR.NoLig.bamcount.RDS")
NoLig.Count$MutID=paste(NoLig.Count$chr,":",NoLig.Count$pos,NoLig.Count$ref,">",NoLig.Count$base,sep="")
colnames(NoLig.Count)=paste("bamRC.NoLig.",colnames(NoLig.Count),sep="")
idx=match(Mutation.Table$allMuts,NoLig.Count$bamRC.NoLig.MutID)
test=cbind.data.frame(test,NoLig.Count[idx,])
Mutation.Table=test;rm(test,Lib.Count,NoLig.Count,idx)
# Filtering based on strandBias
filtered=Mutation.Table
filtered=filtered[!((filtered$bamRC.NoLig.strandbias==Inf | filtered$bamRC.NoLig.strandbias==-Inf)|(filtered$bamRC.Lib.strandbias==Inf | filtered$bamRC.Lib.strandbias==-Inf)),]
message(paste("Filtered out",(length(Mutation.Table$allMuts)-length(filtered$allMuts)),"for infinite SB. Remaining =",length(filtered$allMuts),"unique AAs=",length(unique(filtered$AAchange))))
Mutation.Table=filtered
filtered=filtered[filtered$bamRC.NoLig.strandbias<=10&filtered$bamRC.NoLig.strandbias>=-10|filtered$bamRC.Lib.strandbias<=10&filtered$bamRC.Lib.strandbias>=-10,]
message(paste("Filtered out",(length(Mutation.Table$allMuts)-length(filtered$allMuts)),"for SB>10. Remaining =",length(filtered$allMuts),"unique AAs=",length(unique(filtered$AAchange))))
Mutation.Table=filtered
#--------------
allMuts=Mutation.Table$allMuts
searchI=gregexpr(":",allMuts,fixed = T)
start.posI=unlist(lapply(searchI, `[[`, 1),use.names = F)
chr=substr(allMuts,0,start.posI-1)
searchI=gregexpr(">",allMuts,fixed = T)
start.posI=unlist(lapply(searchI, `[[`, 1),use.names = F)
mut=substr(allMuts,start.posI-1,nchar(allMuts))
searchI=gregexpr(">",mut,fixed = T)
start.posI=unlist(lapply(searchI, `[[`, 1),use.names = F)
ref=substr(mut,start.posI-1,start.posI-1)
alt=substr(mut,start.posI+1,start.posI+1);rm(mut,searchI,start.posI)
searchI=gregexpr(":",allMuts,fixed = T)
start.pos=unlist(lapply(searchI, `[[`, 1),use.names = F)
searchI=gregexpr(">",allMuts,fixed = T)
end.pos=unlist(lapply(searchI, `[[`, 1),use.names = F)
start=end=substr(allMuts,start.pos+1,end.pos-2);rm(searchI,start.pos,end.pos)
AnnoDF=data.frame(chr,start,end,ref,alt,allMuts,stringsAsFactors = F);rm(chr,start,end,ref,alt)
# write.table(AnnoDF,file = "../20180401.InputAnnovar.Allvars.txt",col.names = F,quote = F,row.names = F)
# table_annovar.pl "20180401.InputAnnovar.Allvars.txt" ~/NGS_Seq_Tools/annovar2/humandb -buildver hg19 -out 20180401.EGFR.iSCREAM.Allvars -remove -protocol refgene,gnomad_genome,gnomad_exome,avsnp150,clinvar_20170905 -operation g,f,f,f,f -nastring . -csvout -polish
rm(AnnoDF)
var=read.table("../20180401.EGFR.iSCREAM.Allvars.hg19_multianno.csv",sep=",",stringsAsFactors = F,header = T)
var$MutID=paste(var$Chr,":",var$Start,var$Ref,">",var$Alt,sep="")
idx=match(Mutation.Table$allMuts,var$MutID)
Mutation.Table=data.frame(Mutation.Table,var[idx,c(6:33)]);rm(var)
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/EleniusGroup/DvsP.EGFR.HiDP/AnnovarMutCodeFind.R")
Mutation.Table$AAchange=annovarMutCodeFind(Mutation.Table$AAChange.refgene,isoform = "NM_005228")
temp=gsub("^.","",x = Mutation.Table$AAchange);temp=gsub(".$","",x=temp)
Mutation.Table$AAPos=as.numeric(temp);rm(temp)
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/EleniusGroup/DvsP.EGFR.HiDP/Annovar_cDNA_Find.R")
Mutation.Table$cDNAchange=annovar_cDNA_Find(Mutation.Table$AAChange.refgene,isoform = "NM_005228")
temp=gsub("^.","",x = Mutation.Table$cDNAchange);temp=gsub(".$","",x=temp)
Mutation.Table$cDNAPos=as.numeric(temp);rm(temp)
# keeping exonic
filtered=Mutation.Table[Mutation.Table$Func.refgene=="exonic",] # because only looking at cDNA variants so exonic
message(paste("Filtered out",(length(Mutation.Table$allMuts)-length(filtered$allMuts)),"for exonic. Remaining =",length(filtered$allMuts),"unique AAs=",length(unique(filtered$AAchange))))
Mutation.Table=filtered
# Keeping common mutations
filtered=droplevels.data.frame(na.exclude(Mutation.Table))
message(paste("Filtered out",(length(Mutation.Table$allMuts)-length(filtered$allMuts)),"Uncommon between samples. Remaining =",length(filtered$allMuts),"unique AAs=",length(unique(filtered$AAchange))))
Mutation.Table=filtered
# temp=droplevels.data.frame(na.exclude(Mutation.Table))
# removing synonymous
Mutation.Table=filtered
filtered=Mutation.Table[Mutation.Table$ExonicFunc.refgene!="synonymous SNV",]
message(paste("Filtered out",(length(Mutation.Table$allMuts)-length(filtered$allMuts)),"synonymous variants. Remaining =",length(filtered$allMuts),"unique AAs=",length(unique(filtered$AAchange))))
Mutation.Table=filtered
rm(filtered)
# saveRDS(object =Mutation.Table,file = "../20180401.MutationTable.RDS")
# write.table(x = Mutation.Table,file = "../20180401.MutationTable.tsv",sep="\t",row.names = F,col.names = T,quote = F)
load("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/EleniusGroup/ERBB.Miscellaneous/Posssible mutants/EGFR/egfr_Variants.bin")
print(paste(length(unique(Variants$mutants)),length(unique(Variants$mutantprotein))))
library(ggplot2)
#----------------------
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
Mutation.Table=readRDS("20180401.MutationTable.RDS")
Mutation.Table$FC.reg=(Mutation.Table$NoLig.VF/Mutation.Table$Lib.VF)
# source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/Gitlab.DC/SignedCalcs/SignedFoldChange.R")
# Mutation.Table$FC=FoldChange(Mutation.Table$Lib.VF,Mutation.Table$NoLig.VF)
# saveRDS(object = Mutation.Table,file = "20180411.MutationTable.iSCREAM.RDS")
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Misc Code/Master ggplot2 Theme/theme.DC.plot.R")
#----------------------------------
subdf=Mutation.Table[,c("allMuts","Lib.DP","NoLig.DP")]
subdf.long=reshape2::melt(data = subdf)
#ggplot(data=subdf.long,aes(x=value,group=variable))+geom_histogram(color="black",fill="cornflowerblue",binwidth = 2000)+facet_wrap(~variable)+theme.drugs.iscream+scale_x_continuous(breaks = seq(0,max(subdf.long$value),by=50000),labels=scales::comma)
y.breaks=seq(0,ceiling(max(subdf.long$value,na.rm = T)),by = 50000)
ggplot(data = subdf.long,aes(x=variable,y=value))+geom_jitter(height = 0,alpha=0.3,size=1,color="grey70")+geom_boxplot(outlier.color = NA,color="black",fill=NA)+scale_y_continuous(breaks = y.breaks,limits = c(0,max(subdf.long$value,na.rm = T)),labels = scales::comma)+ylab("Total Depth for each variant")+xlab("Sample")+theme.drugs.iscream
# ggsave(filename = "20180401.DP.box.pdf",width = 3,height = 4)
ggplot(data = subdf.long,aes(x=variable,y=value))+geom_jitter(height = 0,alpha=0.3,size=1,color="grey70")+geom_violin(color="black",fill=NA,trim = F,adjust=2,na.rm = T,scale = "width",width=0.8)+geom_boxplot(outlier.color = NA,color="black",fill=NA,width=0.1)+scale_y_continuous(breaks = y.breaks,limits = c(0,max(subdf.long$value,na.rm = T)),labels = scales::comma)+ylab("Total Depth for each variant")+xlab("Sample")+theme.drugs.iscream
# ggsave(filename = "20180401.DP.violin.pdf",width = 3,height = 4)
subdf=Mutation.Table[,c("allMuts","Lib.AD","NoLig.AD")]
subdf.long=reshape2::melt(data = subdf)
# subdf.long$value=subdf.long$value+1
base=c(1,2,4,6,8)
y.breaks=c(base*1,base*10,base*100,base*1000,10000)
ggplot(data = subdf.long,aes(x=variable,y=log10(value)))+geom_jitter(height = 0,alpha=0.3,size=1,color="grey70")+geom_boxplot(outlier.color = NA,color="black",fill=NA)+scale_y_continuous(breaks = log10(y.breaks),labels = c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000),limits = c(0,4.1))+ylab("Reads supporting each variant")+xlab("Sample")+theme.drugs.iscream
# # ggsave(filename = "20180401.AD.box.pdf",width = 3,height = 4)
ggplot(data = subdf.long,aes(x=variable,y=log10(value)))+geom_jitter(height = 0,alpha=0.3,size=1,color="grey70")+geom_violin(color="black",fill=NA,trim = F,adjust=2,na.rm = T,scale = "width",width=0.8)+geom_boxplot(outlier.color = NA,color="black",fill=NA,width=0.1)+scale_y_continuous(breaks = log10(y.breaks),labels = c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000),limits = c(0,4.1))+ylab("Reads supporting each variant")+xlab("Sample")+theme.drugs.iscream
# ggsave(filename = "20180401.AD.violin.pdf",width = 3,height = 4)
#-------------------------
# Fold change
rm(subdf.long,subdf)
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/Gitlab.DC/SignedCalcs/SignedFoldChange.R")
subdf=Mutation.Table[,c("allMuts","AAchange","Lib.VF","NoLig.VF","FC.reg"),]
subdf$FC=FoldChange(subdf$Lib.VF,subdf$NoLig.VF)
base=5;(dim(subdf[subdf$FC>=(-1*base)&subdf$FC<=base,])) # 6912
base=10;(dim(subdf[subdf$FC>=(-1*base)&subdf$FC<=base,])) # 7068
base=15;(dim(subdf[subdf$FC>=(-1*base)&subdf$FC<=base,])) # 7123
base=20;(dim(subdf[subdf$FC>=(-1*base)&subdf$FC<=base,])) # 7138
base=25;(dim(subdf[subdf$FC>=(-1*base)&subdf$FC<=base,])) # 7149
base=30;(dim(subdf[subdf$FC>=(-1*base)&subdf$FC<=base,])) # 7164
base=5;(dim(subdf[subdf$FC>=base,])) # 120
base=10;(dim(subdf[subdf$FC>=base,])) # 40
base=15;(dim(subdf[subdf$FC>=base,])) # 28
base=20;(dim(subdf[subdf$FC>=base,])) # 23
base=25;(dim(subdf[subdf$FC>=base,])) # 21
base=30;(dim(subdf[subdf$FC>=base,])) # 15
base=0.5;(dim(subdf[subdf$NoLig.VF>=base,])) # 33
base=1;(dim(subdf[subdf$NoLig.VF>=base,])) # 12
base=2;(dim(subdf[subdf$NoLig.VF>=base,])) # 6
base=3;(dim(subdf[subdf$NoLig.VF>=base,])) # 5
base=4;(dim(subdf[subdf$NoLig.VF>=base,])) # 1
base=5;(dim(subdf[subdf$NoLig.VF>=base,])) # 0
base=0.5;(dim(subdf[subdf$Lib.VF>=base,])) # 192
base=1;(dim(subdf[subdf$Lib.VF>=base,])) # 114
base=2;(dim(subdf[subdf$Lib.VF>=base,])) # 39
base=3;(dim(subdf[subdf$Lib.VF>=base,])) # 13
base=4;(dim(subdf[subdf$Lib.VF>=base,])) # 5
base=5;(dim(subdf[subdf$Lib.VF>=base,])) # 1
base=6;(dim(subdf[subdf$Lib.VF>=base,])) # 1
base=7;(dim(subdf[subdf$Lib.VF>=base,])) # 0
# ggplot(data = subdf,aes(x=FC.reg))+stat_bin(breaks=c(0,1,2.5,5,7.5,10,15,25,50,75,100,150),color='black',fill="grey50")+stat_bin(breaks=c(0,1,2.5,5,7.5,10,15,25,50,75,100,150), geom="text", aes(label=..count..), vjust=-0.5,size=2)+theme.drugs.iscream+ylab("No. of observations")+xlab("FoldChange")+scale_x_continuous(breaks = seq(0,150,by = 25))+ggtitle("Distribution of Fold Change")
ggplot(data = subdf,aes(x=FC.reg))+stat_bin(binwidth=5,color='black',fill="grey50")+stat_bin(binwidth=5, geom="text", aes(label=..count..), vjust=-0.5,size=2)+theme.drugs.iscream+ylab("No. of observations")+xlab("FoldChange")+scale_x_continuous(breaks = seq(0,150,by = 25))+ggtitle("Distribution of Fold Change")
ggsave(filename = "20180410.FC.reg.Hist.bw5.pdf",width = 6,height = 3)
# base=c(1,2,4,6,8)
# y.breaks=c(base*1,base*10,base*100,base*1000,10000)
# ggplot(data = subdf,aes(x=FC))+stat_bin(binwidth=5,color='black',fill="grey50")+stat_bin(binwidth=5, geom="text", aes(label=..count..), vjust=-0.5,size=2)+theme.drugs.iscream+scale_y_log10(breaks=c(y.breaks),labels=c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000))+ylab("No. of observations (log10 scale)")+xlab("FoldChange")+scale_x_continuous(breaks = seq(-250,150,by = 25))+ggtitle("Distribution of Fold Change")
# # ggsave(filename = "20180401.FC.Log.Hist.pdf",width = 10,height = 6)
#
# ggplot(data = subdf,aes(x=FC))+stat_bin(binwidth=5,color='black',fill="grey50")+stat_bin(binwidth=5, geom="text", aes(label=..count..), vjust=-0.5,size=2)+theme.drugs.iscream+ylab("No. of observations")+ggtitle("Distribution of Fold Change")+scale_y_continuous(breaks = seq(0,8000,by = 500))+scale_x_continuous(breaks = seq(-250,150,by = 25))
# # ggsave(filename = "20180401.FC.Hist.pdf",width = 10,height = 6)
#
# ggplot(data = subdf,aes(x=FC))+stat_bin(binwidth=5,color='black',fill="grey50")+stat_bin(binwidth=5, geom="text", aes(label=..count..), vjust=-0.5,size=2)+theme.drugs.iscream+ylab("No. of observations")+xlab("FoldChange")+scale_x_continuous(breaks = seq(-250,150,by = 25))+ggtitle("Distribution of Fold Change")+scale_y_continuous(breaks = seq(0,8000,by = 500))
# # ggsave(filename = "20180401.FC.Hist.tall.pdf",width = 10,height = 20)
bw=0.1
ggplot(data = subdf,aes(x=NoLig.VF))+geom_histogram(binwidth=bw,color='black',fill="grey50")+stat_bin(binwidth=bw, geom="text", aes(label=..count..),hjust=-0.2,vjust=-0.2,size=2,angle = 45)+theme.drugs.iscream+ylab(paste("No. of observations"))+xlab("Variant Frequency (%)")+scale_x_continuous(breaks = seq(0,7,by = 0.5))+ggtitle("Distribution of Variant Frequency (NoLigand)")+scale_y_continuous(breaks = seq(0,6000,by = 1000),limits = c(0,6000))
ggsave(filename = "20180410.VF.NoLig.pdf",width = 6,height = 3)
# bw=0.1
# p1=ggplot(data = subdf,aes(x=Lib.VF))+geom_histogram(binwidth=bw,color='black',fill="grey50")+stat_bin(binwidth=bw, geom="text", aes(label=..count..),hjust=-0.2,vjust=-0.2,size=2,angle = 45)+theme.drugs.iscream+ylab(paste("No. of observations"))+xlab("Variant Frequency (%)")+scale_x_continuous(breaks = seq(0,7,by = 0.5),limits = c(-0.1,6.2))+ggtitle("Distribution of Variant Frequency (Library)")+scale_y_continuous(breaks = seq(0,6000,by = 1000),limits = c(0,6000));p1
# p2=ggplot(data = subdf,aes(x=NoLig.VF))+geom_histogram(binwidth=bw,color='black',fill="grey50")+stat_bin(binwidth=bw, geom="text", aes(label=..count..),hjust=-0.2,vjust=-0.2,size=2,angle = 45)+theme.drugs.iscream+ylab(paste("No. of observations"))+xlab("Variant Frequency (%)")+scale_x_continuous(breaks = seq(0,7,by = 0.5),limits = c(-0.1,6.2))+ggtitle("Distribution of Variant Frequency (NoLigand)")+scale_y_continuous(breaks = seq(0,6000,by = 1000),limits = c(0,6000));p2
#
# gp=gridExtra::grid.arrange(p1, p2)
# ggsave(plot = gp,filename = "20180401.VF.Hist.pdf",width = 10,height = 6)
# subdf.long=reshape2::melt(data = subdf[,c("allMuts","Lib.VF","NoLig.VF")])
# ggplot(data=subdf.long,aes(x=variable,y=value))+geom_jitter(height = 0,alpha=0.3,size=1,color="grey70")+geom_boxplot(outlier.color = NA,color="black",fill=NA)+theme.drugs.iscream
#------------------------------
# dups=duplicated(Mutation.Table$AAchange)
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/Gitlab.DC/SignedCalcs/SignedFoldChange.R")
Mutation.Table$FC=FoldChange(Mutation.Table$Lib.VF,Mutation.Table$NoLig.VF)
subdf=Mutation.Table[,c("FC","AAchange","AAPos","allMuts","Lib.VF","NoLig.VF")]
dups=subdf$AAchange[duplicated(subdf$AAchange)];length(unique(dups))
# match(x = dups,table = subdf$AAchange)
# which(dups %in% subdf$AAchange)
dups.df=subdf[which(subdf$AAchange %in% dups),]
length(unique(dups.df$AAchange))
ggplot(data=dups.df,aes(x=AAPos,y=FC,group=AAchange))+geom_point()+geom_line()+geom_text(data=dups.df[dups.df$FC>15,],aes(label=dups.df[dups.df$FC>15,"AAchange"]),hjust=0.5,vjust=-0.8,size=3)+geom_text(data=dups.df[dups.df$FC<(-15),],aes(label=dups.df[dups.df$FC<(-15),"AAchange"]),hjust=0.5,vjust=1.8,size=3)+theme.drugs.iscream+ggtitle(paste("FC Dist for Degenerate Mutations. Total dots =",length(dups.df$AAchange)))+ylab("Fold Change")+xlab("EGFR AA position")+geom_rug(sides="r",alpha=0.5,size=0.2)
# ggsave(filename = "20180401.DegenrateMuts.FC.pdf",width = 10,height = 7)
#cols=colorRampPalette(c("#003366","firebrick1"))(20)
ggplot(data=dups.df,aes(x=AAPos,y=FC,group=AAchange))+geom_point(alpha=0.5,aes(color=dups.df$NoLig.VF,size=dups.df$NoLig.VF))+geom_point(alpha=0.5,aes(color=dups.df$Lib.VF,size=dups.df$Lib.VF))+geom_line()+geom_text(data=dups.df[dups.df$FC>15,],aes(label=dups.df[dups.df$FC>15,"AAchange"]),hjust=0.5,vjust=-0.8,size=3)+geom_text(data=dups.df[dups.df$FC<(-15),],aes(label=dups.df[dups.df$FC<(-15),"AAchange"]),hjust=0.5,vjust=1.8,size=3)+ggtitle(paste("FC Dist for Degenerate Mutations. Total dots =",length(dups.df$AAchange)))+ylab("Fold Change")+xlab("EGFR AA position")+scale_colour_gradient(low="#003366",high="firebrick1")+theme.drugs.iscream.legend
# ggsave(filename = "20180401.DegenrateMuts.FC.with.VF.pdf",width = 11,height = 7)
#------------------------
#----------------------
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
Mutation.Table=readRDS("20180401.MutationTable.RDS")
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Misc Code/Master ggplot2 Theme/theme.DC.plot.R")
#----------------------------------
subdf=Mutation.Table[,c("allMuts","Lib.DP","NoLig.DP","AAchange","AAPos","cDNAchange")]
df=subdf[,c(6,2,3)]
subdf.long=reshape2::melt(df)
temp=gsub("^.","",x =subdf.long$cDNAchange);temp=gsub(".$","",x=temp)
subdf.long$cDNAPos=as.numeric(temp);rm(temp)
apply
ggplot(data=subdf.long,aes(x=cDNAPos,y=value))+geom_point(size=0.2,aes(color=variable,alpha=0.2))+geom_line()+theme_dc.plot2+scale_x_continuous(breaks = c(0,seq(1,3500,by = 200),3633))+scale_y_continuous(breaks =c(seq(0,300000,by = 50000)))+scale_color_manual(values=c("black","red"))
# base=c(1,2,4,6,8)
# y.breaks=c(base*1,base*10,base*100,base*1000,base*10000,base*100000)
# ggplot(data=subdf.long,aes(color=variable,x=cDNAPos,y=log10(value)))+geom_point(size=0.2)+geom_line()+theme_dc.plot2+scale_x_continuous(breaks = c(0,seq(1,3500,by = 200),3633))+scale_y_continuous(breaks = log10(y.breaks),limits = c(0,5.5),labels =c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000,rep("",4),100000,rep("",4)))
#MutInfo# Function Description
# This function takes a mutation info column from ANNOVAR output and selects the protein change based on the isoform provided.
# Isoform accepted as GI identifier in the block for the particular isoform in the ANNOVAR mutation info. You should look at the data and then provide the info. Defaults to ERBB4 A2 isoform.
annovarMutCodeFind=function(MutationColumn,isoform="NM_005228"){
MutationList=c("List of mutations")
for(i in seq(1:length(MutationColumn))){
MutInfo=MutationColumn[i]
l=sort(unique(unlist(strsplit(MutInfo,","))))
l2=l[grep(isoform,l)]
l2.s=unique(unlist(strsplit(l2,":")))
l3=l2.s[grep("^p",l2.s)]
l3=gsub("p.","",l3)
l3=gsub("X","*",l3)
MUTATION=l3
if(length(MUTATION)==0) MUTATION=" "
MutationList=c(MutationList,MUTATION)
}
return(MutationList[-1])
}
# MutationColumn=Mutation.Table$AAChange.refgene;i=7591;isoform="NM_005228"
# annovarMutCodeFind(tab.s$Mutation)
#MutInfo# Function Description
# This function takes a mutation info column from ANNOVAR output and selects the cDNA change based on the isoform provided.
# Isoform accepted as GI identifier in the block for the particular isoform in the ANNOVAR mutation info. You should look at the data and then provide the info. Defaults to ERBB4 A2 isoform.
annovar_cDNA_Find=function(MutationColumn,isoform="NM_005228"){
MutationList=c("List of mutations")
for(i in seq(1:length(MutationColumn))){
MutInfo=MutationColumn[i]
l=sort(unique(unlist(strsplit(MutInfo,","))))
l2=l[grep(isoform,l)]
l2.s=unique(unlist(strsplit(l2,":")))
l3=l2.s[grep("^c",l2.s)]
l3=gsub("c.","",l3)
#l3=gsub("X","*",l3)
MUTATION=l3
if(length(MUTATION)==0) MUTATION=" "
MutationList=c(MutationList,MUTATION)
}
return(MutationList[-1])
}
# MutationColumn=Mutation.Table$AAChange.refgene;i=7591;isoform="NM_005228"
# annovarMutCodeFind(tab.s$Mutation)
library(ggplot2)
rm(list=ls())
setwd("/Users/Deepankar/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Klaus Lab/Manuscripts/EGFR Screen Manuscript/Data/Supplementary figures/COSMIC/")
source("/Users/Deepankar/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/Gitlab.DC/MutSiteFind/MutSiteFind.R")
source("/Users/Deepankar/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Misc Code/Master ggplot2 Theme/theme.DC.plot.R")
data=readr::read_csv("V84_38_MUTANT.csv",col_names = T)
data=data.frame(data)
data=na.exclude(data)
data$SAMPLE_NAME=toupper(data$SAMPLE_NAME)
data$MUTATION_AA=gsub("^p.","",data$MUTATION_AA)
data$MutID=paste(data$SAMPLE_NAME,"=",data$MUTATION_AA,sep="")
data.bak=data
data=data[-which(duplicated(data$MutID)),]
data=data[-grep("\\?",data$MUTATION_AA),]
#data=data[data$Mutation.Type!="Fusion",]
uniqueMuts=table(data$MUTATION_AA)
df=data.frame(AAchange=names(uniqueMuts),count=uniqueMuts,stringsAsFactors = F)
df=df[,-2];colnames(df)=c("AAchange","count")
df$AAPos=as.numeric(MutSiteFind(df$AAchange))
df$Type=data$MUTATION_DESCRIPTION[match(df$AAchange,data$MUTATION_AA)]
df=na.exclude(df)
df=subset(df,subset = df$Type!="Substitution - coding silent")
# temp=df[(df$AAPos%in%c(746,747)),]
temp=df[(df$AAPos%in%seq(746,752)),]
temp=temp[temp$Type!="Substitution - Missense",]
temp=temp[-grep("Frameshift",temp$Type),]
df=df[-match(x = temp$AAchange,table = df$AAchange),]
df=rbind(df,c("exon19del",sum(temp$count),746,"Complex - deletion inframe"))
df$count=as.numeric(df$count)
df$AAPos=as.numeric(df$AAPos)
rang=c("brown","brown","black","brown","black","brown","black","black","brown","#198c19","red")
names(rang)=sort(unique(df$Type))
base=c(1,2,4,6,8)
breaks=c(base*1,base*10,base*100,base*1000,10000)
labels=c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000)
ggplot(df,aes(x = AAPos,y=log10(count)))+geom_segment(aes(yend=-0.4,xend=df$AAPos))+geom_point(size=1.5,alpha=0.65,aes(color=Type))+theme_dc.plot2+xlab("EGFR Amino Acid Position")+ylab("No. of mutations (log10 scale)")+scale_x_continuous(breaks = c(1,seq(100,1200,by = 100),1210))+geom_vline(xintercept = c(1,24,75,300,390,600,646,668,712,979),color="black",linetype="dotted")+scale_y_continuous(breaks = log10(breaks),labels=labels,limits = c(-0.4,4.1))+scale_color_manual(values=rang)+ggtitle(paste("EGFR mutations COSMIC, N=",length(data$MutID)))
# ggsave("log.20180330 EGFR_lollipop COSMIC.pdf",width = 10,height = 2.5)
ggplot(df,aes(x = AAPos,y=count))+geom_segment(aes(yend=-20,xend=df$AAPos))+geom_point(size=1.5,alpha=0.65,aes(color=Type))+theme_dc.plot2+xlab("EGFR Amino Acid Position")+ylab("No. of mutations")+scale_x_continuous(breaks = c(1,seq(100,1200,by = 100),1210))+geom_vline(xintercept = c(1,24,75,300,390,600,646,668,712,979),color="black",linetype="dotted")+scale_color_manual(values=rang)
# ggsave("reg.20180330 EGFR_lollipop COSMIC.pdf",width = 10,height = 2.5)
base=c(1,2,4,6,8)
breaks=c(base*1,base*10,base*100,base*1000,10000)
labels=c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000)
ggplot(df,aes(x = AAPos,y=log10(count)))+geom_segment(aes(yend=-0.4,xend=df$AAPos))+geom_point(size=1.5,alpha=0.65,aes(color=Type))+theme_dc.plot2+xlab("EGFR Amino Acid Position")+ylab("No. of mutations (log10 scale)")+scale_x_continuous(breaks = c(1,seq(100,1200,by = 100),1210))+geom_vline(xintercept = c(1,24,75,300,390,600,646,668,712,979),color="black",linetype="dotted")+scale_y_continuous(breaks = log10(breaks),labels=labels,limits = c(-0.4,4.1))+scale_color_manual(values=rang)+ggtitle(paste("EGFR mutations COSMIC, N=",length(data$MutID)))+geom_rug(sides="t",alpha=0.5,color="black",size=0.2)
# ggsave("log.20180330 EGFR_lollipop COSMIC.rug.pdf",width = 10,height = 2.5)
#---------------
# only hits from screen
# source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
Mutation.Table=readRDS("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/20180411.MutationTable.iSCREAM.RDS")
sub=Mutation.Table[Mutation.Table$FC.reg>=25,];rm(Mutation.Table)
#---------------------
subdf=sub[,c("AAchange","AAPos","cDNAchange","cDNAPos","FC.reg","allMuts")]
colnames(subdf)=paste("sdf.",colnames(subdf),sep="")
idx=match(x = subdf$sdf.AAchange,table = df$AAchange)
subdf=cbind(subdf,df[idx,])
subdf$count[is.na(subdf$count)]=0.1
subdf$Type[is.na(subdf$Type)]="absent"
subdf$Type[is.na(subdf$Type)]="absent"
subdf$AAPos[is.na(subdf$AAPos)]=0
subdf$AAchange[is.na(subdf$AAchange)]=""
rang=c("gray","#198c19","red")
names(rang)=sort(unique(subdf$Type))
base=c(1,2,4,6,8)
breaks=c(base*1,base*10,base*100,base*1000,10000)
labels=c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000)
ggplot(subdf,aes(x = sdf.AAPos,y=log10(count)))+geom_segment(aes(yend=-1,xend=subdf$sdf.AAPos))+geom_point(size=1.5,alpha=0.65,aes(color=Type))+theme_dc.plot2+xlab("EGFR Amino Acid Position")+ylab("No. of mutations (log10 scale)")+scale_x_continuous(breaks = c(1,seq(100,1200,by = 100),1210))+scale_y_continuous(breaks = log10(breaks),labels=labels,limits = c(-1,4.2))+scale_color_manual(values=rang)+ggtitle(paste("EGFR mutations COSMIC, N=",length(data$MutID)))+geom_text(data=subdf,aes(label=subdf$sdf.AAchange),vjust=-0.3,size=2)
# +geom_vline(xintercept = c(1,24,75,300,390,600,646,668,712,979),color="black",linetype="dotted")
ggsave("log.20180411 EGFR_iSCREAM COSMIC same change.pdf",width = 10,height = 2.5)
rm(idx,subdf,rang,base,breaks)
#---------------------
source("/Users/Deepankar/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/Gitlab.DC/MutSiteFind/sameSiteChangesFind.R")
uniquePos=unique(df$AAPos)
change=c()
count=c()
for(pos in uniquePos){
subdf=df[df$AAPos==pos,]
count=c(count,sum(subdf$count))
subdf=subdf[order(subdf$count,decreasing = T),]
temp=sameSiteChangesFind(subdf$AAchange)
change=c(change,paste(temp,collapse="/"));rm(temp)
}
newdf=data.frame(uniquePos,change,count,stringsAsFactors = F)
#---------------
subdf=sub[,c("AAchange","AAPos","cDNAchange","cDNAPos","FC.reg","allMuts")]
colnames(subdf)=paste("sdf.",colnames(subdf),sep="")
idx=match(x = subdf$sdf.AAPos,table = newdf$uniquePos)
subdf=cbind(subdf,newdf[idx,])
sum(subdf$count,na.rm = T)
subdf$count[is.na(subdf$count)]=0.1
subdf$uniquePos[is.na(subdf$uniquePos)]=0
subdf$change[is.na(subdf$change)]=""
subdf$Type=rep("",length(subdf$sdf.AAchange))
subdf$Type[subdf$change!=""]="Missense"
subdf$Type[subdf$Type!="Missense"]="absent"
subdf$Type[subdf$change=="*"]="Nonsense"
subdf$label=paste(substr(subdf$sdf.AAchange,start=0,stop=nchar(subdf$sdf.AAchange)-1),subdf$change,sep="")
subdf$label
rang=c("gray","#198c19","red")
names(rang)=sort(unique(subdf$Type))
base=c(1,2,4,6,8)
breaks=c(base*1,base*10,base*100,base*1000,10000)
labels=c(1,rep("",4),10,rep("",4),100,rep("",4),1000,rep("",4),10000)
ggplot(subdf,aes(x = sdf.AAPos,y=log10(count)))+geom_segment(aes(yend=-1,xend=subdf$sdf.AAPos))+geom_point(size=1.5,alpha=0.65,aes(color=Type))+theme_dc.plot2+xlab("EGFR Amino Acid Position")+ylab("No. of mutations (log10 scale)")+scale_x_continuous(breaks = c(1,seq(100,1200,by = 100),1210))+scale_y_continuous(breaks = log10(breaks),labels=labels,limits = c(-1,4.2))+scale_color_manual(values=rang)+ggtitle(paste("EGFR mutations COSMIC, N=",length(data$MutID)))+geom_text(data=subdf,aes(label=subdf$label),vjust=-0.3,size=2)
ggsave("log.20180411 EGFR_iSCREAM COSMIC same residue.pdf",width = 10,height = 2.5)
sum(subdf$count)
library(ggplot2)
rm(list=ls())
setwd("/Users/Deepankar/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Klaus Lab/Manuscripts/EGFR Screen Manuscript/Data & Figures/Supplementary figures/cBioportal/")
source("/Users/Deepankar/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/Gitlab.DC/MutSiteFind/MutSiteFind.R")
source("/Users/Deepankar/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Misc Code/Master ggplot2 Theme/theme.DC.plot.R")
data=readr::read_tsv("20180321 cBioportal EGFR mutations unique samples.tsv",col_names = T)
data=data.frame(data)
data$Sample.ID=toupper(data$Sample.ID)
data$MutID=paste(data$Sample.ID,"=",data$Protein.Change,sep="")
data.bak=data
data=data[-which(duplicated(data$MutID)),]
data=data[data$Mutation.Type!="Fusion",]
uniqueMuts=table(data$Protein.Change)
df=data.frame(AAchange=names(uniqueMuts),count=uniqueMuts,stringsAsFactors = F)
df=df[,-2];colnames(df)=c("AAchange","count")
df$Type=data$Mutation.Type[match(df$AAchange,data$Protein.Change)]
df$AAPos=as.numeric(MutSiteFind(df$AAchange))
df=na.exclude(df)
# df=subset(df,subset = df$Type!="Fusion")
rang=c("#198c19","brown","black","brown","#198c19","black","black","brown","brown","black","black")
names(rang)=unique(df$Type)
ggplot(df,aes(x = AAPos,y=count))+geom_segment(aes(yend=-20,xend=df$AAPos))+geom_point(size=1.5,alpha=0.65,aes(color=Type))+theme_dc.plot2+scale_y_continuous(breaks = c(seq(0,max(df$count),by = 50),max(df$count)),limits = c(-30,max(df$count)))+xlab("EGFR Amino Acid Position")+ylab("No. of mutations")+scale_x_continuous(breaks = c(1,seq(100,1200,by = 100),1210))+geom_vline(xintercept = c(1,24,75,300,390,600,646,668,712,979),color="black",linetype="dotted")+scale_color_manual(values=rang)+ggtitle(paste("EGFR mutations cBioPortal, N=",length(data$MutID)))
ggsave("20180330 EGFR_lollipop cBioportal.pdf",width = 10,height = 2.5)
ggplot(df,aes(x = AAPos,y=count))+geom_segment(aes(yend=-20,xend=df$AAPos))+geom_point(size=1.5,alpha=0.65,aes(color=Type))+theme_dc.plot2+scale_y_continuous(breaks = c(seq(0,max(df$count),by = 50),max(df$count)),limits = c(-30,max(df$count)))+xlab("EGFR Amino Acid Position")+ylab("No. of mutations")+scale_x_continuous(breaks = c(1,seq(100,1200,by = 100),1210))+geom_vline(xintercept = c(1,24,75,300,390,600,646,668,712,979),color="black",linetype="dotted")+scale_color_manual(values=rang)+ggtitle(paste("EGFR mutations cBioPortal, N=",length(data$MutID)))+geom_rug(sides="t",alpha=0.5,color="black",size=0.2)
ggsave("20180330 EGFR_lollipop cBioportal.rug.pdf",width = 10,height = 2.5)
# ggplot(FC.data.cut,aes(x=FoldChange,y=reorder(Mutation,-Lib.AAPos)))+geom_segment(aes(yend=FC.data.cut$Mutation),xend=0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FoldChange>8,],aes(label=FC.data.cut[FC.data.cut$FoldChange>8,"Mutation"],hjust=-0.3),size=0.8)+geom_text(data=FC.data.cut[FC.data.cut$FoldChange<(-8),],aes(label=FC.data.cut[FC.data.cut$FoldChange<(-8),"Mutation"],hjust=1.2),size=0.8)+geom_point(size=0.5,aes(color=Direction))+scale_color_manual(values=c("Blue","Red"))+geom_vline(xintercept = 0)+geom_vline(xintercept = 10,linetype="dashed")+geom_vline(xintercept = -10,linetype="dashed")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text= element_text(size = rel(0.2),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"))+ggtitle("FC Library vs No Ligand (All Mutations)")+scale_x_continuous(breaks = seq(-50,34,by =2),limits = c(-50,34))+xlab("Fold Change")+ylab("Mutations")
data=data.bak
temp=data[data$Protein.Change=="T790M",]
temp=temp[-which(duplicated(temp$MutID)),]
temp2=data[,]
length(unique(data$MutID))
sub=subset(data,duplicated(data$MutID))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment