#-----------------------------------------------------------------------------------# ## GLOBAL VARIABLES ## traits<-read.csv("the name of your species traits file")#see ?gowdis weights<-read.csv("the weighting scheme for you traits")#see ?gowdis asym.bin<-read.csv("list of asymmetric binary trait variable")#see ?gowdis #a species composition matrix with species as columns and sites as rows; #percent frequency for each sites should equal 100 or 1 freq.data<-read.csv("species composition data") #-------------------------------Calculating Tsim ----------------------------------------------------# #Step 1. Calculate the Gower dissimilarity (Gdis) for the species traits library(FD) #Calculate Gower similarity (see ?gowdis for help) gdis<-gowdis(traits, weights, asym.bin) #Step 2. Convert the Gower dissimilarity matrix to a similarity data.frame #print the upper half of the distance matrix as well attr(gdis, "Upper") <- TRUE #Convert gower dissimilarity into similarity and arrange into a matrix v<-as.matrix(1-gdis) diag(v) <- 1 #flatten matrix into 3 columns gsim <- data.frame( sitex=rep(row.names(v),ncol(v)), sitey=rep(colnames(v),each=nrow(v)), gsim=as.vector(v),stringsAsFactors=FALSE) #Step 3. Calculate Tsim tsim<- function (gsim,freq.data) { ds<-freq.data #repeats sitecodes the appropriate number of times q<-rep.int(row.names(ds),ncol(ds)) #repeats each species name nrow times p<-rep(colnames(ds),each = (nrow(ds))) #places sitecode,species name and percent frequency into a data frame ssf<-data.frame(q,p,as.numeric(ds)) names(ssf) <- c("site", "species", "freq") ###Load function for calculatin Tsim variable a tsim.a <- function (x, y, ssf) { # Create an empty matrix 'specfreq' to fill #Selects the species frequency data greater than 0 for the two sites being compared ssfx <- ssf[ssf$site == x & ssf$freq >0,] ssfy <- ssf[ssf$site == y & ssf$freq >0,] #pulls out the species that are present at site x and/or site y using a merge, #this is needed to create the initial empty matrix m<-(merge(ssfx,ssfy,all=TRUE)) species<-unique(m$species) #Creates an empty matrix of the frequency of each species at site x and y specfreq <- matrix(0, length(species), 2, dimnames=list(species,c(x,y))) #Fills in the empty matrix with the appropriate value for(i in 1:nrow(ssf)) {specfreq[rownames(specfreq)==ssf[i,"species"], colnames(specfreq)==ssf[i,"site"]] <- ssf[i,"freq"]} # For species present at site x and y remove the minimum of the two from both #(this is variable a -the exact trait similarity and frequency between sites) #find minimum frequency for each species (row) for site x and y #(i.e. parallel minimum value in 'state' matrix column 'x' and 'y') and assign it to a a <- pmin(specfreq[,x], specfreq[,y]) asum<-sum(a) asum } ###Load function for calculating Tsim variable b tsim.b <- function (x, y, ssf, gsim) { # Create an empty matrix 'specfreq' to fill #Selects the species frequency data greater than 0 for the two sites being compared ssfx <- ssf[ssf$site == x & ssf$freq >0,] ssfy <- ssf[ssf$site == y & ssf$freq >0,] #pulls out the species that are present at site x and/or site y using a merge, #this is needed to create the initial empty matrix m<-(merge(ssfx,ssfy,all=TRUE)) species<-unique(m$species) #Creates an empty matrix of the frequency of each species at site x and y specfreq <- matrix(0, length(species), 2, dimnames=list(species,c(x,y))) # Fill empty matrix with data from ssf #Fills in the empty matrix with the appropriate value for(i in 1:nrow(ssf)) {specfreq[rownames(specfreq)==ssf[i,"species"], colnames(specfreq)==ssf[i,"site"]] <- ssf[i,"freq"]} # For species present at site x and y remove the minimum of the two from both #(this is variable a -the exact trait similarity and frequency between sites) #find minimum frequency for each species (row) for site x and y #(i.e. parallel minimum value in 'state' matrix column 'x' and 'y') and assign it to a a <- pmin(specfreq[,x], specfreq[,y]) #subtract this for the species frequency matrix specfreq <- specfreq - a # Calulate variable B #Set answer to 0 answer <- 0 MAX <- 60 count <- 1 #while state is >0 keep doing the following while(sum(specfreq[,1]) > 1e-10 & sum(specfreq[,2]) > 1e-10 & count < MAX) { #Find species remaining at site x sx <- species[specfreq[,1] > 0] #Find species remaining at site y sy <- species[specfreq[,2] > 0] #Return gower similarity scores for remaining species gsimre <-gsim[gsim$sitex %in% sx & gsim$sitey %in% sy,] #determines the row containing the maximum value for remaining gsim and assigns it to i i <- which.max(gsimre$gsim) #once the max gsim value has been found (i) we go back to the specfreq matrix and filter out #the frequency of the site x species associated with i xf <- specfreq[as.character(gsimre$sitex[i]),x] #and the frequency of the site y species associated with i yf <- specfreq[as.character(gsimre$sitey[i]),y] #Then take the minimum common frequency for the two species associated with i #When xf and yf are 0 the f becomes infinity f <- min(xf,yf) #The frequency of the species at site x associated withthe greatest i is multiplied by i and #added to the answer (in other words the trait similarity between the most similar species at the #two sites is weighted by the minimum common frequency for the species at site x) answer <- answer + f * gsimre$gsim[i] #Subtract the common minimum frequency from both the site x and site y column #when infinity is subtracted the cell becomes numeric(0) not 0 specfreq[gsimre$sitex[i],x] <- specfreq[gsimre$sitex[i],x] - f specfreq[gsimre$sitey[i],y] <- specfreq[gsimre$sitey[i],y] - f count <- count+1 } answer } #create the output data.frame where the final results will be placed r<-t(combn(rownames(freq.data),2)) output<-data.frame(r,0,0,0,0,stringsAsFactors=FALSE) names(output)<-c("site.x","site.y","A","B","C","Tsim") #Calculate Tsim for all site combination from input for(i in 1:nrow(output)){ x <- output$site.x[i] y <- output$site.y[i] output$A[i]<-tsim.a(x,y,ssf) output$B[i]<-tsim.b(x,y,ssf,gsim) output$Tsim[i]<-output$A[i] + output$B[i] print(i/nrow(output)) } print(output) } ##################################### TSIM-ASSOCIATED GRAPHICS ########################################## ################################### EMERGENT-GROUP PLOTS (E-PLOTS) ####################################### #Please note that the colour pallete would need to be adjusted if emergent groups differ from those used for southwest Madagascar ###Style Sheet pretty.chart <- theme_update( axis.text.x = theme_text(colour = "black", size=8, family="Times"), axis.text.y = theme_text(colour = "black", hjust = 1,size=8, family="Times"), axis.title.x = theme_text(colour = "black", face = "bold",size = 8,vjust=0, family="Times"), axis.title.y = theme_text(colour = "black", face = "bold", size = 8, angle = 90,vjust=0.3, family="Times"), legend.background = theme_rect(fill = "white"), legend.text = theme_text(size =8, family="Times"), legend.title = theme_text(face = "bold",size = 8, family="Times"), panel.background = theme_rect(fill = "white"), panel.border = theme_rect(colour="black",size=0.5), panel.grid.major=theme_line(colour = "grey",size = 0.2, linetype = 1), panel.grid.minor=theme_line(colour = "grey",size = 0.1, linetype = 1), plot.background = theme_rect(fill = "white"), plot.title = theme_text(size=8, face="bold",vjust=1, family="Times")) ##Colour palettes cols1<-c( "Acropora.bottlebrush"="#00008B", "Acropora.bushy"="#181E98", "Acropora.corymbose"="#313DA4", "Acropora.digitate"="#4A5CB2", "Acropora.staghorn"="#627BBF", "Acropora.tables.and.plates"="#7B9ACC", "Isopora"="#94B9D9", "Montipora"="#ADD8E6", "Porites"= "#DDCC00", "Porites.massive"="#EEFF77", "Pocilloporidae"="#FFC0CB", "Agariicidae"="#F2AAB4", "Bubble.Coral"="#E5959D", "Cycloseris.Fungia"="#D88087", "Fleshy.Domes"="#CB6A70", "Free.living.colonies"="#BE555A", "Goniopora.Alveopora"="#B14043", "Laminar"="#A42A2D", "Leptastrea"="#971516", "Other"="#BEBEBE") blues<-colorRampPalette(c('dark blue', 'light blue'),bias=10,space="rgb",interpolate="linear") reds<-colorRampPalette(c('pink', 'dark red'),bias=1,space="rgb",interpolate="linear") yellows<-colorRampPalette(c('#DDCC00', '#EEFF77'),bias=1,space="rgb",interpolate="linear") greys<-colorRampPalette(c('grey','light grey'),bias=10,space="rgb",interpolate="linear") ###Basic graph #Function creates the dataframe that the histogram is based on #cut.off=minimum number of species present to be included in plot #x=the number of Tsim/Renkonen scores to show in the table #pc=species composition for sites in terms of point codes #sm=Species composition for sites #tsim.min,tsim.max, renk.min, renk.max =these are used to #define the area of the mechano-functional sace to explore basic.graph<- function(cut.off,x,tsim.min,tsim.max,renk.min,renk.max,pc,sm,tm){ #Filters out sites with cut.off or fewer coral points from species and total composition filter<-rowSums(pc)>=cut.off sm<-sm[filter,] #Replaces species names with emergent group names colnames(sm) = plot.labels$Plot.Group[which(plot.labels$SpeciesName %in% colnames(sm))] nms<-colnames(sm) em<-sm %*% sapply(unique(nms),"==", nms) #Filters out site-pairs from Tsim results that are preset in em filter1<-tm$site.x %in% row.names(em) & tm$site.y %in% row.names(em) tm<-tm[filter1,] #Pulls out site-pairs with a maximum Renk of renk.max and minimum Tsim of tsim.min tsim.filter1<- tm$Tsim >= tsim.min tm<-tm[tsim.filter1,] tsim.filter2<- tm$Tsim <= tsim.max tm<-tm[tsim.filter2,] renk.filter1<- tm$A >= renk.min tm<-tm[renk.filter1,] renk.filter2<- tm$A <=renk.max tm<-tm[renk.filter2,] tm.ord<-tm[with(tm,order(-tm$Tsim,-tm$A)),] top <- as.vector(unique(unlist(tm.ord[1:x,c("site.x","site.y")]))) #Filter out the sites to include in histogram and table #(high tsim but low species sim) top.sites<-em[top,] #create data frame for plotting histogram df <- data.frame(Site=rep(row.names(top.sites), times=ncol(top.sites)),Emergent.groups= rep(colnames(top.sites), each=nrow(top.sites)),Count=as.vector(top.sites)) #order bars (sites) in histogram from tallest to shortest df$Site <- factor(df$Site, levels = levels(df$Site)[order(aggregate (Count ~ Site, data = df, sum)$Count,decreasing = TRUE)]) # reorder the groups df$Emergent.groups <- factor(df$Emergent.groups , levels=levels(df$Emergent.groups)[order(levels(df$Emergent.groups), decreasing = TRUE)]) #filter out emergent groups for top sites that are 0 filter<-df$Count != 0 df<-df[filter,] df } #Function that creates the inset table inset.table<- function(cut.off,x,tsim.min,tsim.max,renk.min,renk.max,pc,sm,tm){ #Filters out sites with 10 or fewer coral points from species and total composition filter<-rowSums(pc)>=cut.off sm<-sm[filter,] #Replaces species names with emergent group names colnames(sm) = plot.labels$Plot.Group[which(plot.labels$SpeciesName %in% colnames(sm))] nms<-colnames(sm) em<-sm %*% sapply(unique(nms),"==", nms) #Filters out site-pairs from Tsim results that are preset in em filter1<-tm$site.x %in% row.names(em) & tm$site.y %in% row.names(em) tm<-tm[filter1,] #Orders Tsim by increasing Renkonen (species similarity) and #decreasing Tsim (traits similarity) #tm.ord<-tm[with(tm,order(-tm$Tsim,tm$A)),] #top <- as.vector(unique(unlist(tm.ord[1:x,c("site.x","site.y")]))) #Orders results by the residual of the linear relationship #between Tsim and Renkonen #m = lm(tm$Tsim ~ tm$A, df) #tm$resid<-residuals(m) #tm.ord<-tm[with(tm,order(-tm$resid)),] #top <- as.vector(unique(unlist(tm.ord[1:x,c("site.x","site.y")]))) tsim.filter1<- tm$Tsim >= tsim.min tm<-tm[tsim.filter1,] tsim.filter2<- tm$Tsim <= tsim.max tm<-tm[tsim.filter2,] renk.filter1<- tm$A >= renk.min tm<-tm[renk.filter1,] renk.filter2<- tm$A <=renk.max tm<-tm[renk.filter2,] tm.ord<-tm[with(tm,order(-tm$Tsim,tm$A)),] top <- as.vector(unique(unlist(tm.ord[1:x,c("site.x","site.y")]))) #Create text box (tb) inset for histogram show site similarities w/o resid data<-data.frame("site.x"=tm.ord$site.x,"site.y"=tm.ord$site.y, "Renk."=round(tm.ord$A,3),"Tsim"=round(tm.ord$Tsim,3)) tb<-tableGrob(data[1:x,],gpar.coretext=gpar(cex= 1.4), gpa.coltext =gpar(cex=1.4),gpa.rowtext =gpar(cex=1.4)) } #Function extracts legend from plot g_legend<-function(a.gplot){ tmp <- ggplot_gtable(ggplot_build(a.gplot)) leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") legend <- tmp$grobs[[leg]] return(legend)} #Function puts all the pieces together using the functions above #and creates a histogram of emergent groups #cut.off=minimum number of coral points required fr inclusion #sc=species count matrix #sm=species or total composition matrix #tm=Tsim data #y.max=maximum y axis value for histogram #depth.rang=character value to be included in title i.e. "2-5m" #legend height=center position of legend on 0-1 scale #table height=center position of table on 0-1 scale emergent.plot<- function(cut.off,x,tsim.min,tsim.max,renk.min,renk.max,pc, sm,tm,y.max,depth.range,legend.height,table.height){ #Create dataframe for histogram df<-basic.graph(cut.off,x,tsim.min,tsim.max,renk.min,renk.max,pc,sm,tm) #Create histogram qp<-qplot(df$Site,data=df,weight=df$Count,geom="histogram", fill=df$Emergent.groups, ylim = c(0,y.max)) qp<-qp + geom_bar(colour="white",cex=2) qp<-qp + xlab("Sites") qp<-qp + ylab("Frequency") qp<-qp + guides(fill = guide_legend(reverse = TRUE)) qp<-qp + opts(title =paste("Emergent Groups at selected sites",depth.range)) qp<-qp + theme_set(pretty.chart) qp<-qp + scale_fill_manual(name = "Emergent Groups",values=cols1) #create inset table my_table<-inset.table(cut.off,x,tsim.min,tsim.max,renk.min,renk.max,pc,sm,tm) #extract legend legend <- g_legend(qp) #Create the viewports, push them, draw and go up grid.newpage() vp1 <- viewport(width = 0.9, height = 0.50, x = 0.5, y = .7) vpleg <- viewport(width = 0.4, height = 0.2, x = 0.25, y = 0.2) subvp <- viewport(width = 0.4, height = 0.2, x = 0.75, y = 0.2) print(qp + opts(legend.position = "none"), vp = vp1) upViewport(0) pushViewport(vpleg) grid.draw(legend) #Make the new viewport active and draw upViewport(0) pushViewport(subvp) grid.draw(my_table) #Shut down window and pass it to pdf #dev.off() } ############################# ABUNDANCE-WEIGHTED HEAT PLOTS (AWH-PLOTS) ############################# #Creates a colour plot of gsim between species present at two sites and shows their abundance using histograms along the axes #Notation of use #x ;site 1 name (i.e "A28") #y ;site 2 name (i.e. "T10") #freq.data; species composition for sites, each site must total 1 or 100, rows are sites and columns are species #tsim.scores ; data.frame containing the tsim scores between sites #depth ; the depth zone examined (i.e. "2-5 m") #x.remove ; the number of species name labels to remove from the x axis for clarity #y.remove; the number of species name labels to remove from the y axis for clarity #x.mindiff; the minimum distance between labels on x axis (i.e. 10) #x.stepsize; step size is used for calculating the label spreading on x axis (i.e 1/10) #x.min; minimum x-axis value #x.max; maximum x-axis value #y.mindiff; the minimum distance between labels on y axis (i.e. 10) #y.stepsize<-step size is used for calculating the label spreading on y axis (i.e 1/10) #y.min ; minimum x-axis value #y.max; maximum x-axis value match<- function (x,y,x.remove,y.remove,x.mindiff,x.stepsize,x.min,x.max,y.mindiff, y.stepsize,y.min,y.max,freq.data,tsim.scores,depth,gsim) { #Pull out data to work with m<-freq.data[c(x,y),] #filter out species abundance data for site x and y m<-m[,colSums(m)>0]#remove columns for species not present at either site m<-t(m) #transpose matrix overlap<-pmin(m[,1],m[,2]) m1<-m-overlap #m1 is the remainder m2<-m-m1 #m2 is the species overlapping m<-rbind(m1,m2) m<-m*100 m<-m[rowSums(m)>0,] print(m[order(m[,1],m[,2],decreasing=TRUE),]) #split into abundance by site in decreasing abundance x.abun<-m[m[,1]>0,] x.abun<-x.abun[order(x.abun[,1],decreasing=TRUE),] y.abun<-m[m[,2]>0,] y.abun<-y.abun[order(y.abun[,2],decreasing=TRUE),] sx<-rownames(x.abun)#species names site x sy<-rownames(y.abun)#species names site y #Create the co-ordinates for the boxes abun.x<-as.numeric(x.abun[,1]) csx<-cumsum(abun.x)#cummulative sum of species freqencies at site x csx2<-csx[-length(csx)]#removes last entery in vector xleft<-rep(c(0,csx2),length(sy)) #creates x left of rectangles xright<-rep(c(csx),length(sy)) #creates x right of rectangles abun.y<-as.numeric(y.abun[,2]) csy<-cumsum(abun.y)#cummulative sum of species freqencies at site y csy2<-csy[-length(csy)]#removes last entery in vector yb<-c(0,csy2) ybottom<-as.vector(sapply(yb, function(x) rep(x,length(sx)))) ytop<-as.vector(sapply(csy, function(x) rep(x,length(sx)))) #Create list of species names in same order as co-ordinates sitex<-rep(sx,length(sy))#repeat entire sequence sitey<-as.vector(sapply(sy,function (x) rep(x,length(sx))))#repeat each element of sequence #Summarize species, coordinates and tsim in one dataframe df<-data.frame(sitex=sitex, sitey=sitey,xleft=xleft,ybottom=ybottom,xright=xright,ytop=ytop) #Pull out gsim scores my.gsim<-gsim filter<-my.gsim$sitex %in% sitex & my.gsim$sitey %in% sitey my.gsim<-my.gsim[filter,] #merge gsim scores with other data mdf<-merge(df,my.gsim) #specifiy where you want the breaks in the columns "colour" breaks<-c(0.9999999999999,0.9,0.8,0.7,0.6,0.5,0.4) #Create a colour for each section col1<-rev(brewer.pal(4,"Reds")) col2<-rev(brewer.pal(3,"Blues")) cols<-rev(c("black",col1,col2)) #Replace the gsim values with colours using the breaks mdf$colour<-mdf$gsim mdf$colour <- as.character(cut(mdf$colour, c(-Inf, breaks, Inf),labels=cols)) #Order and check that colours are ok mdf<-mdf[order(mdf[,"gsim"],decreasing=TRUE),] #Order from most to least abundant along both axes mdf<-mdf[order(mdf[,"gsim"],decreasing=TRUE),] #PLOTTING# #opt1 quartz("colour.plot") par(mar=c(13.5,13.5,6,6)+0.1)#sets margins of plotting area par(pty="s")#fixes the aspect ratio to 1:1 par(xpd=TRUE)#allows for plotting in the margins #create the data plot plot(c(0,100), c(0,100), axes=FALSE, frame.plot=TRUE,type = "n",xlab="",ylab="",xaxs="i", yaxs="i") #add axes mtext(paste("Species composition Site ",x),side=1,line=-19.5,font=2) mtext(paste("Species composition Site ",y),side=2,line=-19.5,font=2) axis(side=4,las=2) axis(side=3,las=1) #Add the rectangles rect(mdf$xleft,mdf$ybottom,mdf$xright,mdf$ytop,col=mdf$colour) #Add species labels #Find mid-point between xleft and xright x.left.pos<-c(0,csx2) x.right.pos<-csx x.pos<-(x.left.pos + x.right.pos)/2 #remove positions for removed labels x.pos1<-x.pos[1:(length(sx)-x.remove)] #spread out the remaining labels x.pos2<-spread.labs(x.pos1,mindiff=x.mindiff, stepsize=x.stepsize, min=x.min,max=x.max) #Find mid-point between y.bottom and y.top for points to show only y.bottom.pos<-yb y.top.position<-csy y.pos<-(y.bottom.pos + y.top.position)/2 #remove positions for removed labels y.pos1<-y.pos[1:(length(sy)-y.remove)] #spread out the remaining labels y.pos2<-spread.labs(y.pos1,mindiff=y.mindiff, stepsize=y.stepsize, min=y.min,max=y.max) #create labels for points you want to show only x.label<-as.vector(sx) x.label<-x.label[1:(length(sx)-x.remove)] y.label<-as.vector(sy) y.label<-y.label[1:(length(sy)-y.remove)] #Add the species labels text(x=par()$usr[1]-10,y=y.pos2,y.label,xpd=TRUE,adj=1,) text(y=par()$usr[3]-10,x=x.pos2,x.label,xpd=TRUE,adj=1,srt=90) #Add segments segments(x0=x.pos1,y0=rep(0,length(x.pos1)),x1=x.pos2,y1=rep(-10,length(x.pos2)),col='black') segments(x0=rep(0,length(y.pos1)),y0=y.pos1,x1=rep(-10,length(y.pos1)),y1=y.pos2) #Add legend par(xpd=TRUE) legend(-80,-10, legend=c("1=Same species","0.9-0.999","0.8-0.9","0.7-0.8", "0.6-0.7","0.5-0.6","0.4-0.5","<0.4"), fill=c(cols[8],cols[7],cols[6],cols[5], cols[4],cols[3],cols[2],cols[1]),title="Gower similarity") #Add Tsim and Renk score between sites as text labels t1<-tsim.scores t2<-data.frame( site.x=t1$site.y, site.y=t1$site.x, A=t1$A,B=t1$B,C=t1$C,Tsim=t1$Tsim) t<-rbind(t1,t2) t<-t[t[,1] ==x,] t<-t[t[,2] ==y,] Renkonen<-round(t$A,3) Tsim<-round(t$Tsim,3) #Add text lables to graph text(-70,110,labels=paste("Tsim:",Tsim), cex=1) text(-35,110,labels=paste("Renk.:",Renkonen),cex=1) text(-70,120,labels=paste("Site 1:",x), cex=1) text(-35,120,labels=paste("Site 2:",y),cex=1) text(-67,130,labels=paste("Depth:",depth),cex=1) rect(-85,105,-15,135)#put a box around the text labels }