####################################################################### ### RDA gene<-read.table("geneNorm177.txt",header=T, row.names=1) tot<-t(gene) fact<-read.table("fact.txt",header=T,row.names=1) library(vegan) #RDA global RDAgene<-rda(tot~Sex+Progeny,fact) anova(RDAgene) RDAgene #plot n=3 mypalette<-rainbow(n) cond<-fact[,2] plot(RDAfull, type="n") text(RDAfull, dis="cn", col="deeppink") points(RDAfull,pch=19, col=mypalette[cond], cex=1.2) legend("bottomleft",legend=c("DAN", "HYB", "FRA"),col=mypalette, pch=19) #RDA partial by progeny RDApop<-rda(tot~Pop+Condition(Sexe),fact) anova(RDApop) plot(RDApop) RDApop #RDA partial by sex RDAsex<-rda(tot~Sexe+Condition(Pop),fact) anova(RDAsex) plot(RDAsex) RDAse # Inertia Proportion #Total 10750 #Pop 463 4.3 #Sexe 10270 95.5 #Pop/Sexe 17 0.2 #genes that scored on RDA axis 2 score_gene<-scores(RDAgene)$species[ , 2] summary(score_gene) quantile(score_gene,probs=c(0.001, 0.999)) #0.1% et 99.9% a<-score_gene[score_genequantile(score_gene,probs=0.999)] gene_rda<-tot[,colnames(tot) %in% names(a)] write.table(gene_rda,"gene_rda.txt",sep="\t") dim(gene_rda) toto<-t(gene_rda) DAN<-apply(toto[,1:58],1,mean) HYB<-apply(toto[,59:118],1,mean) FRA<-apply(toto[,119:177],1,mean) clust<-cbind(DAN,HYB,FRA) rownames(clust)<-gsub(".p.cg.6", "",rownames(clust)) ####heatmap library(geneplotter) library(marray) slf=function(d) hclust(d,method="ward.D2") distcor=function(x){as.dist(1-cor(t(x)))} #distance based on correlation matrix distcor=function(x){as.dist(1-cor(t(x)))} hv<-heatmap(as.matrix(clust), col=maPalette(low="green",high="red",mid="black"), distfun=distcor, hclustfun=slf, keep.dendro=TRUE) ####################################################################### ### ANOVA Danish vs French progeny #create a matrix of danish and ffrench individuals + factor definition fact<-read.table("facteurs.txt",header=T,row.names=1) totfact<-cbind(fact, tot) tablo=totfact noms=rownames(tablo) nb_noms=length(noms) danois=vector("logical", length=nb_noms) francais=vector("logical", length=nb_noms) hybride=vector("logical", length=nb_noms) for(i_nom in 1:nb_noms) { if (substr(noms[i_nom],1,3)=="DAN") {danois[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="HYB") {hybride[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="IDR") {francais[i_nom]=TRUE} } danfra=tablo[danois|francais,] danfra_f<-danfra[danfra$Sexe=="F",] danfra_m<-danfra[danfra$Sexe=="M",] #ANOVA function monaov <- function(variable,facteurs) { myX <- variable formule <- as.formula(paste("myX", "~", facteurs,sep="")) modele <- aov(formule, na.action=na.omit) table.anova <-anova(modele) return(table.anova[,5])} pvalue.anova<-apply(danfra[,5:31922],2,monaov,facteurs="danfra$Sexe*danfra$Pop") #Correcting p-values with BH library(multtest) anova.adj <- mt.rawp2adjp(pvalue.anova, "BH") for (i in 1:3) { anova.adj <- mt.rawp2adjp(pvalue.anova[i,], "BH") print(mt.reject(anova.adj$adjp,c (0.05,0.01,0.001,0.0001))$r)} #Retrieve differentially expressed transcripts between progeny index<-1:31918 name<-as.vector(colnames(tot)) name<-gsub(".p.cg.6","",name) colnumber<-cbind(index,name) annot<-read.csv("annot.csv", sep=";",header=T) ncolannot<-merge(colnumber,annot,by.x="name",by.y="Query_Id") names.test<-c("rawp", "BH" ,"index") anova.adj<-mt.rawp2adjp(pvalue.anova[2,],"BH") adj<-cbind(anova.adj$adjp,anova.adj$index) colnames(adj)<-names.test tablo<-merge(adj,ncolannot,by.x="index",by.y="index") triBH<-tablo[order(tablo$BH),] DEpop<-triBH[triBH[,3]<0.0001,] ################################################################################################## ####################################################################### ### ePST #Creating two different matrix by sex femelles<-totfact[totfact$Sexe=="F",] males<-totfact[totfact$Sexe=="M",] femelles<-femelles[,5:31922] males<-males[,5:31922] ##EST with intermediate values in hybrid progeny tablo=femelles # or tablo=males noms=rownames(tablo) nb_noms=length(noms) danois=vector("logical", length=nb_noms) francais=vector("logical", length=nb_noms) hybride=vector("logical", length=nb_noms) for(i_nom in 1:nb_noms) { if (substr(noms[i_nom],1,3)=="DAN") {danois[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="HYB") {hybride[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="IDR") {francais[i_nom]=TRUE} } dan=tablo[danois,] fra=tablo[francais,] hyb=tablo[hybride,] moydan=apply(dan,2,mean) moyfra=apply(fra,2,mean) moyhyb=apply(hyb,2,mean) moy<-cbind(moyhyb,moydan,moyfra) IsCentre_fem <- moy[,1]>=moy[,2] & moy[,1]<=moy[,3] | moy[,1]<=moy[,2] & moy[,1]>=moy[,3] sum(IsCentre_fem) IsCentre_mal <- moy[,1]>=moy[,2] & moy[,1]<=moy[,3] | moy[,1]<=moy[,2] & moy[,1]>=moy[,3] sum(IsCentre_mal) centrefem<-femelles[,IsCentre_fem==T] centremal<-males[,IsCentre_mal==T] tablo=centrefem #or tablo=centremal noms=rownames(tablo) nb_noms=length(noms) danois=vector("logical", length=nb_noms) francais=vector("logical", length=nb_noms) hybride=vector("logical", length=nb_noms) for(i_nom in 1:nb_noms) { if (substr(noms[i_nom],1,3)=="DAN") {danois[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="HYB") {hybride[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="IDR") {francais[i_nom]=TRUE} } moy<-apply(tablo[danois|francais,],2,mean) etfra<-apply(tablo[francais,],2,sd) etdan<-apply(tablo[danois,],2,sd) etfradan<-cbind(etfra,etdan) et<-apply(etfradan,1,mean) fradan<-cbind(moy,et) n=dim(fradan)[1] norm=matrix(data=NA,nrow=n,ncol=60) for (i in 1:n) { pseudohyb<-rnorm(60,fradan[i,1],fradan[i,2]) norm[i,]=pseudohyb } n=dim(tablo)[2] pvalue=vector("logical", length=i) for (i in 1:n) { test_centre<- t.test(tablo[hybride,i],norm,conf.level=0.999) pvalue[i]<-test_centre$p.value } sum(pvalue>0.01) intermediairesfem<-(tablo[,pvalue>0.01]) intermediairesmal<-(tablo[,pvalue>0.01]) ## ePST estimates tableau=t(intermediairesfem) #or tableau=t(intermediairesmal) noms=colnames(tableau) nb_noms=length(noms) danois=vector("logical", length=nb_noms) francais=vector("logical", length=nb_noms) hybride=vector("logical", length=nb_noms) for(i_nom in 1:nb_noms){ if (substr(noms[i_nom],1,3)=="DAN") {danois[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="HYB") {hybride[i_nom]=TRUE} if (substr(noms[i_nom],1,3)=="IDR") {francais[i_nom]=TRUE} } groupe_d=tableau[,danois] groupe_f=tableau[,francais] tot=cbind(groupe_d,groupe_f) n_carac=dim(tableau)[1] qst_reel=vector("numeric",length=n_carac) N=5000 matrice_pseudo_qst=matrix(0,nrow=n_carac,ncol=N) ### FUNCTION TO CALCULATE PST OR QST CalculatePst <- function(variance_inter=NA, variance_intra=NA, heritability=NA,c_number=NA, RatioCH = NA){ ###arg1 :variance_inter is the variance estimated between populations ###arg2 :variance_intra is the variance estimated intra populations ###variance could be estimated be either from a anova, linear or mixed model ###arg3 :heritability (the proportion of phenotypic variance ### that is because of additive genetic effects) ###arg4 :The scalar c_number expresses the proportion of the total ### variance that is presumed to be because of additive ### genetic effects across populations. ###arg5 : it is possible to input directly ratioCH rather than ### c_number and heritability ###source of the formula : see Spitze, 1993 ### title: Population structure in Daphnia obtusa : ### Quantitative Genetic and Allozymic variation ### Genetics 135 367-374 (October, 1993) #qst1 <-variance_inter/(variance_inter+2*variance_intra) ###source of the formula : see Brommer, 2011 in J. of Evoluationary Biology ###page 1162, eq 3. #qst<-variance_inter/(variance_inter+2*variance_intra) if ((is.na(RatioCH)) == TRUE ) {RatioCH <- c_number/heritability} # if (variance_inter < 0) {variance_inter = 0 } #a variance should not be negative #print(RatioCH) denominator <- ((RatioCH*variance_inter)+2*variance_intra) #print(denominator) qst2<-(RatioCH*variance_inter)/denominator #print(qst2) ###CHOOSE THE GOOD QST FORMULATION return(qst<-qst2) } # ePST estimate for (i_carac in 1:n_carac){ MS_inter=dim(groupe_d)[2]*(mean(as.numeric(groupe_d[i_carac,]))-mean(as.numeric(tot[i_carac,])))^2+dim(groupe_f)[2]*(mean(as.numeric(groupe_f[i_carac,]))-mean(as.numeric(tot[i_carac,])))^2 SCE_intra= sum((groupe_d[i_carac,]-mean(as.numeric(groupe_d[i_carac,])))^2)+sum((groupe_f[i_carac,]-mean(as.numeric(groupe_f[i_carac,])))^2) variance_intra = SCE_intra/(dim(tot)[2]-2) variance_inter = (MS_inter - variance_intra)/mean(ncol(groupe_d),ncol(groupe_f)) qst=variance_inter/(variance_inter+2*variance_intra) qst<-CalculatePst(variance_inter = variance_inter, variance_intra = variance_intra, RatioCH = 1) qst_reel[i_carac]=qst nb_individu=dim(tot)[2] distribution_originale=as.numeric(tot[i_carac,]) # ePst simulations by permutations for (repet in 1:N){ distribution=sample(distribution_originale) #here the sampling function pseudo_d=vector("logical",length=nb_individu) pseudo_d[1:dim(groupe_d)[2]]=TRUE pseudo_f=!pseudo_d MS_inter=dim(groupe_d)[2]*((mean(distribution[pseudo_d]))-mean(distribution))^2+dim(groupe_f)[2]*((mean(distribution[pseudo_f]))-mean(distribution))^2 SCE_intra= sum((distribution[pseudo_d]-mean(distribution[pseudo_d]))^2)+sum((distribution[pseudo_f]-mean(distribution[pseudo_f]))^2) variance_intra=SCE_intra/(nb_individu-2) variance_inter = (MS_inter - variance_intra)/mean(ncol(groupe_d),ncol(groupe_f)) pseudo_qst<-CalculatePst(variance_inter = variance_inter, variance_intra = variance_intra, RatioCH = 1) matrice_pseudo_qst[i_carac,repet]=pseudo_qst } } ### Results i_carac=dim(tableau)[1] is.signif=vector("logical", length=i_carac) ic=vector("numeric",length=n_carac) ic_1=quantile(matrice_pseudo_qst,probs=0.999) for (i_carac in 1:n_carac){ test_qst= qst_reel[i_carac]>ic_1 ic[i_carac]=ic_1 is.signif[i_carac]=test_qst } sum(is.signif) mean(qst_reel) # Retrieve outliers names_outliers=rownames(tableau[is.signif,]) qst_outliers=qst_reel[is.signif] qst_outliers=round(qst_outliers,2) outlierstot=cbind(names_outliers,qst_outliers)