# # Estimating Mutation rates from star genealogies with singletons # { ExpansionStart <- 1994 Temp <- scan("StrainSampleDates.txt",quiet=TRUE) message("Computation done for ",appendLF=FALSE,length(Temp) / 2) message(" strains and ",appendLF=FALSE) SampleDateStrains <- matrix(ncol = length(Temp) / 2 ,nrow = 2,0) SampleDateStrains[] <- Temp SampleDateStrains <- t(SampleDateStrains ) SampleDateStrains[,2] <- SampleDateStrains[,2]- ExpansionStart TotalTime <- sum(SampleDateStrains[,2]) Temp<- scan("LociMutNbandLength.txt",quiet=TRUE) LociNb <- length(Temp) / 2 message("",appendLF=FALSE,LociNb) message(" loci") MutNbLocusLength <- matrix(ncol = LociNb ,nrow = 2,0) MutNbLocusLength [] <- Temp MutNbLocusLength <- t(MutNbLocusLength ) MinMutRate <- 10^-12 MaxMutRate <- 10^-4 Steps <- 0.1 # in log units MutationRates <- exp( seq( log( MinMutRate ), log( MaxMutRate ), Steps )) PointNb <- length(MutationRates) Likelihoods1 <- matrix(nrow=PointNb,ncol=LociNb,0) MultiLocusLikelihoods1 <- numeric(PointNb) Likelihoods2 <- matrix(nrow=PointNb,ncol=LociNb,0) MultiLocusLikelihoods2 <- numeric(PointNb) for (i in 1:length(MutationRates)) { lambda <- TotalTime * MutationRates[i]*MutNbLocusLength[,2] Likelihoods1[i,] <- dpois(MutNbLocusLength[,1],lambda) MultiLocusLikelihoods1[i] <- log(prod(Likelihoods1[i,])) if(MultiLocusLikelihoods1[i]==-Inf) MultiLocusLikelihoods1[i]<- -800 Likelihoods2[i,] <- dbinom(MutNbLocusLength[,1],TotalTime,MutationRates[i]*MutNbLocusLength[,2]) MultiLocusLikelihoods2[i] <- log(prod(Likelihoods2[i,])) if(MultiLocusLikelihoods2[i]==-Inf) MultiLocusLikelihoods2[i]<- -800 } Fit1 <- spline(log(MutationRates) , MultiLocusLikelihoods1, n = 10*PointNb, method = "fmm", xmin = min(log(MutationRates)), xmax = max(log(MutationRates)), ties = mean) Fit2 <- spline(log(MutationRates) , MultiLocusLikelihoods2, n = 10*PointNb, method = "fmm", xmin = min(log(MutationRates)), xmax = max(log(MutationRates)), ties = mean) ##windows() quartz() plot(MutationRates,MultiLocusLikelihoods1,log="x",main="Likelihood Surface with Poisson apporximations") lines(exp(Fit1$x),Fit1$y) message("With Poisson Approximation") message("Maximum likelihood value of the data is ",max(Fit1$y)) message ("corresponding to a mutation rate of ",exp(Fit1$x[which.max(Fit1$y)])) message("Lower 0.95% CI bound is ", exp(Fit1$x[which(Fit1$y >= (max(Fit1$y) - 2))[1]]) ) message("Upper 0.95% CI bound is ", exp(Fit1$x[which(Fit1$y >= (max(Fit1$y) - 2))[length(which(Fit1$y >= (max(Fit1$y) - 2)))]]) ) abline(v=exp(Fit1$x[which(Fit1$y >= (max(Fit1$y) - 2))[length(which(Fit1$y >= (max(Fit1$y) - 2)))]]),,col=rgb(0,0,1)) abline(v=exp(Fit1$x[which(Fit1$y >= (max(Fit1$y) - 2))[1]]),,col=rgb(0,0,1)) abline(v=exp(Fit1$x[which.max(Fit1$y)]),col=rgb(1,0,0)) message("\n ") ##windows() quartz() plot(MutationRates,MultiLocusLikelihoods2,log="x",main="Likelihood Surface with exact binomial computations") lines(exp(Fit2$x),Fit2$y) message("With exact Binomial computations") message("Maximum likelihood value of the data is ",max(Fit2$y)) message ("corresponding to a mutation rate of ",exp(Fit2$x[which.max(Fit2$y)])) message("Lower 0.95% CI bound is ", exp(Fit2$x[which(Fit2$y >= (max(Fit2$y) - 2))[1]]) ) message("Upper 0.95% CI bound is ", exp(Fit2$x[which(Fit2$y >= (max(Fit2$y) - 2))[length(which(Fit2$y >= (max(Fit2$y) - 2)))]]) ) abline(v=exp(Fit2$x[which(Fit2$y >= (max(Fit2$y) - 2))[length(which(Fit2$y >= (max(Fit2$y) - 2)))]]),,col=rgb(0,0,1)) abline(v=exp(Fit2$x[which(Fit2$y >= (max(Fit2$y) - 2))[1]]),,col=rgb(0,0,1)) abline(v=exp(Fit2$x[which.max(Fit2$y)]),col=rgb(1,0,0)) ##windows() quartz() plot(exp(Fit1$x),Fit1$y,type="l",log="x",main="Fit Comparisons Poisson Vs Binomial") lines(exp(Fit2$x),Fit2$y) }