Commit 875a4e68 authored by Deepankar Chakroborty's avatar Deepankar Chakroborty
Browse files

Upload to Gitlab UTU

parents
# To parse files generated by Bam-readcount
# Details in field of bam read count
#base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end
rm(list=ls())
bam_rc.parse <- function(x){
columns = "base:count:avg_mapping_quality:avg_basequality:avg_se_mapping_quality:num_plus_strand:num_minus_strand:avg_pos_as_fraction:avg_num_mismatches_as_fraction:avg_sum_mismatch_qualities:num_q2_containing_reads:avg_distance_to_q2_start_in_q2_reads:avg_clipped_length:avg_distance_to_effective_3p_end"
column.names = strsplit(columns, ":")[[1]];rm(columns)
# get line
Row = strsplit(x, split = "\t")[[1]]
# get base counts
temp <- lapply(Row[-c(1:4)], function(basecnt){
basecnt = strsplit(basecnt, split = ":")[[1]]
}) %>% do.call(rbind, .)
colnames(temp) = column.names
dat = data.frame(chr =Row[1],pos = Row[2],ref = Row[3], dp = Row[4],
temp)
}
strandBias=function(arg1,arg2){
if(length(arg1)!=length(arg2)){
error=paste("Parameters with unequal lengths passed as argument!\narg1 =",length(arg1),"; arg2 =",length(arg2))
message(error)
return(NA)
}
SB=c()
for (i in seq(1,length(arg1))){
if(is.na(arg1[i]) | is.na(arg2[i])){
calc=NA
}
else if(arg1[i]<arg2[i]){
calc=(arg2[i]/arg1[i])*-1
}
else{
calc=(arg1[i]/arg2[i])
}
SB=c(SB,calc)
}
return(SB)
}
library(dplyr)
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/Bam-readcount")
files=list.files(".",pattern = "bamcount")
for (file in files){
#file="EGFR.Lib.S1.bam.tsv"
#file="EGFREGFP5.S1.bam.tsv"
#file="EGFREGFP1.S1.bam.tsv"
temp=scan(file, what = "character", sep = "\n")
dat = lapply(temp, bam_rc.parse) %>% bind_rows()
dat$chr=as.character(dat$chr)
dat$pos=as.numeric(dat$pos)
dat$dp=as.numeric(dat$dp)
dat$count=as.numeric(dat$count)
dat[,7:18]=sapply(dat[,7:18],as.numeric)
dat=dat[dat$base!="=",]
dat$strandbias=strandBias(dat$num_plus_strand,dat$num_minus_strand)
dat$AlleleFreq=dat$count/dat$dp
saveRDS(dat,file=paste("Parsed/parsed.",gsub(".tsv","",file),".RDS",sep=""))
write.table(dat,file=paste("Parsed/parsed.",file,sep=""),sep="\t",row.names = F,col.names = T,quote = F,na = "NA")
}
# To parse VCF files generated from samtools mpileup & bcftools
rm(list=ls())
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf")
# file="20180112 EGFR_lib.EGFR.locus.HiDP.vcf"
file="20180112 EGFR.No.Lig.EGFR.locus.HiDP.vcf"
temptext=readLines(paste(file,sep=""))
temptext=temptext[-grep("^##",x = temptext)]
require(progress)
pb <- progress_bar$new( format = " Processing [:bar] :percent in :elapsed", total = length(temptext), clear = FALSE, width= 60)
temptext=gsub(";","\t",temptext)
temptext=temptext[-seq(1:49)]
parsing=matrix(NA,length(temptext),19)
colnames(parsing)=c("chr","pos","dot1","Ref","Alt","zero","dot2","DP","I16","QS","VDB","SGB","RPB","MQB","MQSB","BQB","MQ0F","Format","DATA")
for( i in seq(1,length(temptext))){
#for( i in seq(69801,69810)){
line=temptext[i]
words=unlist(strsplit(line,"\t"))
parsing[i,1:10]=words[1:10]
for (j in seq(11,19)){
if(is.na(words[j])|words[j]==""){
break
}
#print(regexpr("VDB",words[j]))
if(regexpr("VDB",words[j])==1){
parsing[i,"VDB"]=words[j]
}
else if(regexpr("SGB",words[j])==1){
parsing[i,"SGB"]=words[j]
}
else if(regexpr("RPB",words[j])==1){
parsing[i,"RPB"]=words[j]
}
else if(regexpr("MQB",words[j])==1){
parsing[i,"MQB"]=words[j]
}
else if(regexpr("MQSB",words[j])==1){
parsing[i,"MQSB"]=words[j]
}
else if(regexpr("BQB",words[j])==1){
parsing[i,"BQB"]=words[j]
}
else if(regexpr("MQ0F",words[j])==1){
parsing[i,"MQ0F"]=words[j]
}
else if(regexpr("PL:AD",words[j])==1){
parsing[i,"Format"]=words[j]
parsing[i,"DATA"]=words[j+1]
}
}
if(i%%1000==0){
pb$tick(1000)
}
}
temp=data.frame(parsing,stringsAsFactors = F)
pb <- progress_bar$new( format = " Processing [:bar] :percent in :elapsed", total = length(temp$DATA), clear = FALSE, width= 60)
PL=c();AD=c()
for(i in seq(1,length(temp$DATA))){
#for(i in seq(1,10)){
#print(temp$DATA[i])
var=unlist(strsplit(temp$DATA[i],":"))
PL=c(PL,var[1])
AD=c(AD,var[2])
if(i%%1000==0){
pb$tick(1000)
}
}
temp=cbind(temp,PL,AD)
temp=temp[,c(-18,-19)]
colnames(temp)[c(19)]="Allelic.Depth"
parsing=temp;rm(temp)
assign(file,parsing);rm(parsing)
file.s=paste("Parsed/P-",file,sep="")
saveRDS(object =get(file),file = paste(file.s,".RDS",sep=""))
write.table(get(file),file=paste(file.s,".tsv",sep=""),sep="\t",row.names = F,col.names = T,quote = F,na = "NA")
bdir="~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Parsed/";setwd(bdir)
# rm(list=ls());file="P-20180112 EGFR_lib.EGFR.locus.HiDP.vcf.tsv"
rm(list=ls());file="P-20180112 EGFR.No.Lig.EGFR.locus.HiDP.vcf.tsv"
#setwd(paste(bdir,dir,sep=""))
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
tab=read.table(file,sep="\t",header = T,as.is = T,stringsAsFactors = F)
tab=tab[c(-3,-6,-7)] # Removing zero and dot (.) columns
#Change Depth to numeric
tab$DP=as.numeric(gsub("DP=","",tab$DP))
# Subsetting to EGFR locus #chr7:55,084,725-55,277,031
subtab=subset(tab,tab$chr=="chr7" & tab$pos>55084725 & tab$pos < 55277031)
#Subsetting to get Depth > 1000
# subtab=subset(subtab,subtab$DP>1000)
# Make New Only Mutation Matrix
require(progress);pb <- progress_bar$new( format = " Processing [:bar] :current SNVs processed in - :elapsed", total = length(unlist(strsplit(subtab$Alt,","))), clear = FALSE, width= 65)
chr=c();pos=c();ref=c();alt=c();DP=c();AD=c();count=0
for(i in seq(1,length(subtab$DP))){
#for(i in seq(1,100)){
datarow=subtab[i,]
Alts=unlist(strsplit(datarow[,"Alt"],",",fixed = T),use.names = F)
ADs=unlist(strsplit(datarow[,"Allelic.Depth"],",",fixed = T),use.names = T)
for(j in seq(1,length(Alts))){
count=count+1
#chr=unlist(c(chr,datarow[1]))
chr=append(chr,unlist(datarow[1]))
pos=append(pos,unlist(datarow[2]))
ref=append(ref,unlist(datarow[3]))
alt=append(alt,Alts[j])
DP=append(DP,unlist(datarow[5]))
AD=append(AD,ADs[j+1])
if(count%%100==0){
pb$tick(100)
}
}
}
AD=as.numeric(AD)
SNV.matrix=data.frame(chr,pos,ref,alt,DP,AD,stringsAsFactors = F)
rm(chr,pos,ref,alt,DP,AD,Alts,ADs,datarow,i,j)
submat=subset(SNV.matrix,SNV.matrix$alt!="<*>")
fnm=gsub("P-","../Variants/V-",gsub(".vcf.tsv","",file))
saveRDS(object = submat,file = paste(fnm,".RDS",sep="")) # use var=readRDS("FileName")
write.table(submat,file = paste(fnm,".tsv",sep=""),sep="\t",row.names = F,col.names = T,quote = F,na = "NA")
#Subsetting to get Depth > 1000
#subtab=subset(subtab,subtab$DP>1000)
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
file1="V-20180112 EGFR_lib.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
Library=tab;rm(tab,file1)
colnames(Library)=paste("Lib.",colnames(Library),sep="")
file1="V-20180112 EGFR.No.Lig.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
NoLigand=tab;rm(tab,file1)
colnames(NoLigand)=paste("NoLig.",colnames(NoLigand),sep="")
allMuts=union(NoLigand$NoLig.MutID,Library$Lib.MutID)
idx=match(allMuts,Library$Lib.MutID)
Mutation.Table=Library[idx,]
idx=match(allMuts,NoLigand$NoLig.MutID)
Mutation.Table=data.frame(allMuts,Mutation.Table,NoLigand[idx,],stringsAsFactors = F)
rm(Library,NoLigand)
FoldChange=function(arg1,arg2){
if(length(arg1)!=length(arg2)){
error=paste("Parameters with unequal lengths passed as argument!\narg1 =",length(arg1),"; arg2 =",length(arg2))
message(error)
return(NA)
}
FC=c()
for (i in seq(1,length(arg1))){
if(is.na(arg1[i]) | is.na(arg2[i])){
calc=NA
}
else if(arg1[i]<arg2[i]){
calc=(arg2[i]/arg1[i])
}
else{
calc=(arg1[i]/arg2[i])*-1
}
FC=c(FC,calc)
}
return(FC)
}
Mutation.Table$FC=FoldChange(Mutation.Table$Lib.VF,Mutation.Table$NoLig.VF)
AnnoDF=Mutation.Table[,c(2,3,3,4,5,1)]
# write.table(AnnoDF,file = "../20180131.InputAnnovar.txt",col.names = F,quote = F,row.names = F)
#- run following line in annovar
# annotate_variation.pl -out EGFR.muts --build hg19 20180131.InputAnnovar.txt humandb
#- more annotations
# table_annovar.pl "20180131.InputAnnovar.txt" ~/NGS_Seq_Tools/annovar2/humandb -buildver hg19 -out EGFR.iSCREAM -remove -protocol refgene,gnomad_genome,gnomad_exome,avsnp150,clinvar_20170905 -operation g,f,f,f,f -nastring . -csvout -polish
rm(AnnoDF)
var=read.table("../EGFR.iSCREAM.hg19_multianno.csv",sep=",",stringsAsFactors = F,header = T)
var$MutID=paste(var$Chr,":",var$Start,var$Ref,">",var$Alt,sep="")
idx=match(Mutation.Table$allMuts,var$MutID)
Mutation.Table=data.frame(Mutation.Table,var[idx,c(6:26)])
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/EleniusGroup/DvsP.EGFR.HiDP/AnnovarMutCodeFind.R")
Mutation.Table$AAchange=annovarMutCodeFind(Mutation.Table$AAChange.refgene,isoform = "NM_005228")
temp=gsub("^.","",x = Mutation.Table$AAchange);temp=gsub(".$","",x=temp)
Mutation.Table$AAPos=as.numeric(temp);rm(temp)
Mutation.Table=Mutation.Table[Mutation.Table$ExonicFunc.refgene!="synonymous SNV",]
# Add BamReadCount
rm(var,idx)
Lib.Count=readRDS("../../Bam-readcount/Parsed/parsed.EGFR.Lib.bamcount.RDS")
Lib.Count$MutID=paste(Lib.Count$chr,":",Lib.Count$pos,Lib.Count$ref,">",Lib.Count$base,sep="")
colnames(Lib.Count)=paste("bamRC.Lib.",colnames(Lib.Count),sep="")
idx=match(Mutation.Table$allMuts,Lib.Count$bamRC.Lib.MutID)
test=cbind.data.frame(Mutation.Table,Lib.Count[idx,])
NoLig.Count=readRDS("../../Bam-readcount/Parsed/parsed.EGFR.NoLig.bamcount.RDS")
NoLig.Count$MutID=paste(NoLig.Count$chr,":",NoLig.Count$pos,NoLig.Count$ref,">",NoLig.Count$base,sep="")
colnames(NoLig.Count)=paste("bamRC.NoLig.",colnames(NoLig.Count),sep="")
idx=match(Mutation.Table$allMuts,NoLig.Count$bamRC.NoLig.MutID)
test=cbind.data.frame(test,NoLig.Count[idx,])
Mutation.Table=test;rm(test,Lib.Count,NoLig.Count,idx)
# write.table(Mutation.Table,file = "../20180206.Result.tsv",sep="\t",col.names = T,quote = F,row.names = F)
# saveRDS(Mutation.Table,file = "../20180206.Result.RDS")
#-------------------------
library(ggplot2)
source("~/Seafile/My Library/Git/GitLab.UTU/Misc Code/Master ggplot2 Theme/theme.DC.plot.R")
# All data. Massive plot!!!
FC.data.cut=droplevels.data.frame(na.exclude(Mutation.Table))
# View(FC.data.cut[FC.data.cut$AAchange%in%c("L858R","T790M","A702V","A1118E","L184P"),])
# str.extra(FC.data.cut)
toRemove=c(10,11,12,13,14,19,21,23,26,28,29,30,31,32,33,34,36,38,39,40,seq(44,46),48,seq(50,52),seq(55,61),64,seq(65,67),69,seq(71,73),seq(76,82),85)
colnames(FC.data.cut)[toRemove]
FC.data.cut=FC.data.cut[,-toRemove]
# Removed columns
# [1] "Lib.MutID" "NoLig.chr"
# [3] "NoLig.pos" "NoLig.ref"
# [5] "NoLig.alt" "NoLig.MutID"
# [7] "Func.refgene" "GeneDetail.refgene"
# [9] "Xref.refgene" "gnomAD_genome_AFR"
# [11] "gnomAD_genome_AMR" "gnomAD_genome_ASJ"
# [13] "gnomAD_genome_EAS" "gnomAD_genome_FIN"
# [15] "gnomAD_genome_NFE" "gnomAD_genome_OTH"
# [17] "CLINSIG" "CLNACC"
# [19] "CLNDSDB" "CLNDSDBID"
# [21] "MutID"
FC.data.cut$Direction=FC.data.cut$FC
FC.data.cut$Direction[FC.data.cut$Direction<0]="Decrease"
FC.data.cut$Direction[FC.data.cut$Direction!="Decrease"]="Increase"
FC.data.cut$Direction=as.factor(FC.data.cut$Direction)
ggplot(data = FC.data.cut,aes(x=bamRC.NoLig.strandbias,y=FC.data.cut$bamRC.Lib.strandbias))+geom_point(alpha=0.4,size=2)+theme_dc.plot.xaxis.regular+geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+xlab("StrandBias NoLigand")+ylab("Strand Bias Plasmid Library")+ggtitle("Strand Bias distribution")+ggtitle(paste("Strand Bias distribution N=",length(FC.data.cut$allMuts)))
# removing infinite strand bias AA changes
FC.data.cut=FC.data.cut[!((FC.data.cut$bamRC.NoLig.strandbias==Inf | FC.data.cut$bamRC.NoLig.strandbias==-Inf)|(FC.data.cut$bamRC.Lib.strandbias==Inf | FC.data.cut$bamRC.Lib.strandbias==-Inf)),]
ggplot(data = FC.data.cut,aes(x=bamRC.NoLig.strandbias,y=FC.data.cut$bamRC.Lib.strandbias))+geom_point(alpha=0.4,size=2)+theme_dc.plot.xaxis.regular+geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+xlab("StrandBias NoLigand")+ylab("Strand Bias Plasmid Library")+ggtitle("Strand Bias distribution (Inf removed)")+ggtitle(paste("Strand Bias distribution N=",length(FC.data.cut$allMuts)))
FC.data.cut=FC.data.cut[FC.data.cut$bamRC.NoLig.strandbias<=10&FC.data.cut$bamRC.NoLig.strandbias>=-10|FC.data.cut$bamRC.Lib.strandbias<=10&FC.data.cut$bamRC.Lib.strandbias>=-10,]
ggplot(data = FC.data.cut,aes(x=bamRC.NoLig.strandbias,y=FC.data.cut$bamRC.Lib.strandbias))+geom_point(alpha=0.4,size=2)+theme_dc.plot.xaxis.regular+geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+xlab("StrandBias NoLigand")+ylab("Strand Bias Plasmid Library")+ggtitle(paste("Strand Bias distribution N=",length(FC.data.cut$allMuts)))
# write.table(FC.data.cut,file = "../20180206.Final.Table.tsv",sep="\t",col.names = T,quote = F,row.names = F)
# saveRDS(FC.data.cut,file = "../20180206.Final.table.RDS")
library(RColorBrewer)
cols=colorRampPalette(c("#003366","firebrick1"))(20)
#cols=colorRampPalette(c("#ffeaea","#ff3030"))(20)
# #temp=temp[order(temp$VF.NoLigand,decreasing = F),]
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)
# pdf(file = "../FC.plot.pdf",width = 10,height = 7);print(p);dev.off()
#
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.1)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)
# pdf(file = "../FC.plot.alpha.pdf",width = 10,height = 7);print(p);dev.off()
# ggplotly(p)
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)+geom_rug(sides="r",alpha=0.5,size=0.2,color="black");p
# ggsave(plot = p,filename = "../FC.plot.w.rug.pdf",width = 10,height = 7);print(p);dev.off()
# Only FC not my signed FC.
FC.data.cut$FC.reg=(FC.data.cut$NoLig.VF/FC.data.cut$Lib.VF)
p=ggplot(FC.data.cut,aes(y=FC.reg,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC.reg>25,"AAchange"]),hjust=1.3,size=3)+geom_hline(yintercept = 0,color="black")+geom_hline(yintercept = 1,linetype="dashed",color="red",alpha=0.5);p
pdf(file = "../FCreg.plot.pdf",width = 10,height = 5);print(p);dev.off()
#
# p=ggplot(FC.data.cut,aes(y=FC.reg,x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.1)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC.reg>25,"AAchange"]),hjust=1.3,size=3)+geom_hline(yintercept = 0,color="black")+geom_hline(yintercept = 1,linetype="dashed",color="red",alpha=0.5);p
# pdf(file = "../FCreg.alpha.plot.pdf",width = 10,height = 5);print(p);dev.off()
# base=c(1,2,4,6,8)
# y.breaks=sort(c(0.01,0.1,base*1,base*10,100))
#
# p=ggplot(FC.data.cut,aes(y=log2(FC.reg),x=AAPos,size=NoLig.VF,color=NoLig.VF))+geom_point(alpha=0.9)+scale_colour_gradient(low="#003366",high="firebrick1")+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("log2(Fold Change)")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC.reg>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks = log2(y.breaks),labels=c(0.01,0.1,1,rep("",4),10,rep("",4),100))+geom_hline(yintercept = 0,color="black");p
# pdf(file = "../FCreg.log.plot.pdf",width = 10,height = 7);print(p);dev.off()
# ggplotly(p)
# Browsable plot
mini=FC.data.cut[,c("AAchange","Lib.VF","NoLig.VF","Lib.AD","NoLig.AD","FC.reg","AAPos")]
colnames(mini)[c(3)]=c("VariantFrequency")
library(plotly)
X.A <- list(
title = "EGFR amino acid",
showticklabels = TRUE,
dtick=100,
ticklen = 5,
gridcolor = toRGB("white"),
gridwidth = 0.5,
tickwidth = 1,
range = c(-10, 1220),
tickangle = 0,
zerolinecolor = toRGB("black"),
zerolinewidth = 1.5)
Y.A <- list(
title = "Fold Change",
showticklabels = TRUE,
dtick=25,
ticklen = 5,
gridcolor = toRGB("white"),
gridwidth = 0.5,
tickwidth = 1,
zerolinecolor = toRGB("black"),
zerolinewidth = 1.5)
p=plot_ly(mini,alpha=0.7,x=~AAPos,y=~FC.reg,color=~VariantFrequency,size=~VariantFrequency,mode="text",text=~paste('EGFR ',AAchange,'</br> FC: ',round(FC.reg,digits = 3),'</br>- - - -</br>Variant frequency</br>Library: ',round(Lib.VF,digits = 3),'</br>Surving cells: ',round(VariantFrequency,digits=3),'</br>- - - -</br>Number of reads</br>Library: ',Lib.AD,'</br>Surving cells: ',NoLig.AD),colors = cols,sizes=c(50,300))%>%
add_markers()%>%
layout(title = "EGFR iSREAM",xaxis = X.A,yaxis = Y.A)
print(p)
#
#
# library(htmlwidgets)
# library(htmltools)
#
# p <- htmlwidgets::appendContent(p, htmltools::tags$input(id='inputText', value='L858R', ''), htmltools::tags$button(id='buttonSearch', 'Search'))
# p <- htmlwidgets::appendContent(p, htmltools::tags$script(HTML(
# 'document.getElementById("buttonSearch").addEventListener("click", function()
# {
# var i = 0;
# var j = 0;
# var found = [];
# var myDiv = document.getElementsByClassName("js-plotly-plot")[0]
# var data = JSON.parse(document.querySelectorAll("script[type=\'application/json\']")[0].innerHTML);
# for (i = 0 ;i < data.x.data.length; i += 1) {
# for (j = 0; j < data.x.data[i].text.length; j += 1) {
# if (data.x.data[i].text[j].indexOf(document.getElementById("inputText").value) !== -1) {
# found.push({curveNumber: i, pointNumber: j});
# }
# }
# }
# Plotly.Fx.hover(myDiv, found);
# }
# );')))
#
# htmlwidgets::saveWidget(p, paste('../FoldChangeRegulariSCREAM', ".html", sep=""))
# p
#
# # ,text=~paste('EGFR Mut: ',AAchange,'</br> Reads_Library: ',Lib.AD,'</br> VF_Lib: ',round(Lib.VF,digits = 3),'</br> Reads_NoLig: ',NoLig.AD,'</br> VF_NoLig: ',round(NoLig.VF,digits=3),'</br> FC: ',round(FC.reg,digits = 3))
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
FC.data.cut=readRDS("../20180401.MutationTable.RDS")
FC.data.cut$FC.reg=(FC.data.cut$NoLig.VF/FC.data.cut$Lib.VF)
selected=FC.data.cut[,c("AAchange","AAPos","Lib.VF","NoLig.VF","FC.reg")]
expandFrames=function(FC,VF.Begin,VF.End,frames=30){
# FC is foldchange column
# frames is the number of frames you want for a smooth animation
DF=matrix(data = 0,nrow =length(FC),ncol = frames)
VF=matrix(data = 0,nrow =length(VF.Begin),ncol = frames)
for(i in seq(1,length(FC))){
DF[i,]=seq(from = 0,to = FC[i],length.out = frames)
VF[i,]=seq(from = VF.Begin[i],to = VF.End[i],length.out = frames)
}
DF=as.data.frame(DF)
colnames(DF)=paste("FC.frame.",seq(1,frames,by = 1),sep="")
VF=as.data.frame(VF)
colnames(VF)=paste("VF.frame.",seq(1,frames,by = 1),sep="")
out.DF=cbind(DF,VF)
return(out.DF)
}
DF=expandFrames(FC = selected$FC,VF.Begin = selected$Lib.VF,VF.End = selected$NoLig.VF,frames = 60)
300
# setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
#
# rm(list=ls())
# source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto/Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
# # saveRDS(Mutation.Table,file = "../20180206.Result.RDS")
#
# FC.data.cut=readRDS("../20180206.Final.table.RDS")
# selected=FC.data.cut[,c("AAchange","AAPos","Lib.VF","NoLig.VF","FC")]
#
# expandFrames=function(FC,VF.Begin,VF.End,frames=30){
# # FC is foldchange column
# # frames is the number of frames you want for a smooth animation
# DF=matrix(data = 0,nrow =length(FC),ncol = frames)
# VF=matrix(data = 0,nrow =length(VF.Begin),ncol = frames)
# for(i in seq(1,length(FC))){
# DF[i,]=seq(from = 0,to = FC[i],length.out = frames)
# VF[i,]=seq(from = VF.Begin[i],to = VF.End[i],length.out = frames)
# }
# DF=as.data.frame(DF)
# colnames(DF)=paste("FC.frame.",seq(1,frames,by = 1),sep="")
# VF=as.data.frame(VF)
# colnames(VF)=paste("VF.frame.",seq(1,frames,by = 1),sep="")
# out.DF=cbind(DF,VF)
# return(out.DF)
# }
#
# DF=expandFrames(FC = selected$FC,VF.Begin = selected$Lib.VF,VF.End = selected$NoLig.VF,frames = 60)
# selected=cbind(selected,DF)
# # FC = selected$FC;VF.Begin = selected$Lib.VF;VF.End = selected$NoLig.VF;frames = 20
#
# FC.data=selected[,1:5];colnames(FC.data)
# # [1] "AAchange" "AAPos" "Lib.VF" "NoLig.VF" "FC"
# FC.data$FC=rep(0,length(FC.data$FC))
# FC.data$VF=rep(0,length(FC.data$Lib.VF))
#
# frames=grep("FC.frame",colnames(selected))
# VF.frames=grep("VF.frame",colnames(selected))
# j=1
# VF.range=seq(0,5,by = 0.01)
# color.range=colorRampPalette(c("#003366","firebrick1","firebrick1","firebrick1","firebrick1","firebrick1","firebrick1"))(length(VF.range))
# for(i in frames){
# print(paste("Frame",i))
# FC.data$FC=selected[,i]
# FC.data$VF=selected[,VF.frames[j]];j=j+1
# p=ggplot(FC.data,aes(y=FC,x=AAPos,size=VF,color=VF))+geom_point(alpha=0.9)+scale_colour_gradientn(colours = color.range,values=VF.range)+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+scale_y_continuous(breaks=seq(-240,100,by = 20),limits = c(-240,110))+geom_hline(yintercept = 0,color="black")
# ggsave(filename = paste("../Animated.FC/FC.plot.frame",stringr::str_pad(i,2,pad = "0"),".png",sep = ""),plot = p,dpi = 200,width = 10,height = 7)
# };j=1
# # Final frame
# FC.data.cut$VF=FC.data.cut$NoLig.VF
# p=ggplot(FC.data.cut,aes(y=FC,x=AAPos,size=VF,color=VF))+geom_point(alpha=0.9)+scale_colour_gradientn(colours = color.range,values=VF.range)+theme(panel.border = element_rect(linetype = 1, colour = "black",fill=NA,size=0.15),panel.background=element_rect(fill = NA, colour = NA),axis.text.y= element_text(size = rel(1),color="black"),legend.key= element_rect(fill=NA,colour = NA), axis.ticks =element_line(colour = "black"),axis.text.x = element_text(angle = 90, hjust = 1,size = rel(0.75)))+ggtitle("Fold Change Map of Mutations (From Library to Ligand Independent)")+xlab("Amino Acid Position")+ylab("Fold Change")+scale_x_continuous(breaks = c(1,seq(50,1210,by = 50),1210))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC>25,],aes(label=FC.data.cut[FC.data.cut$FC>25,"AAchange"]),hjust=1.3,size=3)+scale_y_continuous(breaks=seq(-240,100,by = 20),limits = c(-240,110))+geom_hline(yintercept = 0,color="black")+geom_text(data=FC.data.cut[FC.data.cut$FC<(-50),],aes(label=FC.data.cut[FC.data.cut$FC<(-50),"AAchange"]),hjust=1.3,size=3)
# ggsave(filename = paste("../Animated.FC/FC.plot.frame",paste("../Animated.FC/FC.plot.frame",stringr::str_pad(i+1,2,pad = "0"),".png",sep = ""),".png",sep = ""),plot = p,dpi = 200,width = 10,height = 7)
#
#
# # run this: convert -delay 0.1 -loop 1 *.png animation.gif
#
load("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/EleniusGroup/ERBB.Miscellaneous/Posssible mutants/EGFR/egfr_Variants.bin")
print(paste("Unique EGFR cDNA=",length(unique(Variants$mutants)),"Unique EGFR proteins=",length(unique(Variants$mutantprotein))))
EGFR.WT=Variants$mutantprotein[1]
non.syn=Variants[Variants$mutantprotein!=EGFR.WT,]
length(unique(non.syn$mutants))+1 # +1 for WT
length(unique(non.syn$mutantprotein))+1 # +1 for WT
print(paste("Non-synonymous EGFR muts =",length(non.syn$mutants)))
#Subsetting to get Depth > 1000
#subtab=subset(subtab,subtab$DP>1000)
setwd("~/BaseSpace/20180112 EGFR Trimmed HiDP/vcf/Variants/")
rm(list=ls())
source("~/Documents/OneDrive.UTU/OneDrive - O365 Turun yliopisto//Git/GitLab.UTU/Personal/str.extra-for-R.R/str.extra.R")
file1="V-20180112 EGFR_lib.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
Library=tab;rm(tab,file1)
colnames(Library)=paste("Lib.",colnames(Library),sep="")
file1="V-20180112 EGFR.No.Lig.EGFR.locus.HiDP.RDS";tab=readRDS(file1)
VF=(tab$AD*100)/tab$DP
Rel.Ab=(tab$AD*100)/sum(tab$AD)
MutID=paste(tab$chr,":",tab$pos,tab$ref,">",tab$alt,sep="")
tab=data.frame(tab,VF,Rel.Ab,MutID,stringsAsFactors = F)
rm(VF,Rel.Ab,MutID)
NoLigand=tab;rm(tab,file1)
colnames(NoLigand)=paste("NoLig.",colnames(NoLigand),sep="")
allMuts=union(NoLigand$NoLig.MutID,Library$Lib.MutID)
idx=match(allMuts,Library$Lib.MutID)
Mutation.Table=Library[idx,]
idx=match(allMuts,NoLigand$NoLig.MutID)