# Author: Erwan Saulnier, PhD candidate (May 2018)
# R version 3.3.3 (32 bit)


#---------------------------------------------------------------------------------------------#
#                 R code for calculating annual production estimates 
#				using the Brey (2012) model.
#---------------------------------------------------------------------------------------------#


# Developed by and used in:
# Erwan Saulnier, Anik Brind’Amour, Adrien Tableau, Marta M. Rufino, Jean-Claude Dauvin, Christophe Luczak, Hervé Le Bris
# Seasonality in coastal macrobenthic biomass and its implications for estimating secondary production using empirical models
# Limnology and Oceanography

### Main goal: Estimating secondary production using the Brey model directly in R ###
# This R code quickly returns an estimate of P:B and P for each species/station/site/year.
# No need to fill in a different Excel spreadsheat for each station/site/year.
# This allows to save time if your study involves several years and/or stations/sites.

### How to cite this code: Please cite both the present study (Saulnier et al. xxxx) and the original paper of Brey (2012) 				
# Feel free to contact the authors for any additional information.				



#---------------------------------------------------------------------------------------------#
#                                 Preliminaries
#---------------------------------------------------------------------------------------------#


# This R code requires to load 3 different data files to work:
# 1) your main data frame (or the example data frame supplied as Supp. Info. online)
# 2) the file called "biological_traits_matrix.csv" (supplied as Supp. Info. online)
# 3) the file called "Brey_5ANN_parameters.rds" (supplied as Supp. Info. online).

# The 3 files must be in the same folder (same working directory)

# 1) Note that your main data frame should include the following variables:
# species, year, site, station
# J.mgAFDM = J/ [mg AFDM] -> ratio converting biomass (mg AFDM) in energy (J). Source = Brey et al. (2010)
# mean_biom = mean annual biomass in g AFDM/m2
# W = mean individual weight  in g AFDM (= mean annual biomass divided by mean annual abundance)
# M = mean individual body mass in J (M = W * J.mgAFDM * 1000)
# D = depth in meters
# T = mean annual temperature in °C

# WARNING: Use the same column names in your data frame and in this R code (e.g. the name of the temperature variable should be "T")
# Or change them both in your data frame and in this R code.

# --> See the data file supplied online or the example below

# Example dataset: Macrobenthic data collected in 1980 & 1981 at 'Pierre Noire' (PN) 
# & at 'Rivière de Morlaix' (RM), Bay of Morlaix, France

#> head(data)
#          species year    site station J.mgAFDM  mean_biom            W         M  D        T
#1       Abra alba 1980 morlaix      PN   19.997 1.54033446 0.0051276114 102.53684 17 12.07917
#2       Abra alba 1980 morlaix      RM   19.997 0.57932556 0.0091956438 183.88529 10 12.07917
#3       Abra alba 1981 morlaix      PN   19.997 0.64467226 0.0066188117 132.35638 17 11.76250
#4       Abra alba 1981 morlaix      RM   19.997 0.05544413 0.0041376218  82.74002 10 11.76250


# 2) The data file "biological_traits_matrix.csv" refers to the 17 categorical variables 
# of the Brey model (taxonomic and lifestyle information). 
# WARNING: This csv file should include all species of your study.
# If not, fill in all categorical variables for each missing species with the binary code "1 or -1"
# The value "1" means "YES" and the value "-1" means "NO"
# Example: Abra alba is a bivalve so that the value of the "Mollusca variable" should be 1, and the values
# of the 4 other taxonomic variables (Annelida, Crustacea, Echinodermata, Insecta) should be -1
# The variable "Exploited" refers to exploited populations, usually set to -1

#3) The file called "Brey_5ANN_parameters.rds" refers to the numerical parameters' values of the Brey model.
 

# Note that the P/B estimates calculated with this R code slightly differ from the P estimates 
# you get using the Brey model as an Excel spreadsheet.
# The differences come from the way to back-transform log(P/B).
# In the Excel spreadsheet, P:B ratios are estimated from 10^(log10(x)) and thus are slightly biased.
# With the compute_PB() function, P/B ratio are estimated from exp(log(10)*mean(x) + 0.5*log(10)*(sd(x))^2))
# and are thus considered more accurate.
# Anyway, the differences between the 2 back-tranformation methods are marginal (~ 1%).



#---------------------------------------------------------------------------------------------#
#                                 Load data
#---------------------------------------------------------------------------------------------#


# Load macrobenthic data
path <- "C:/Users/esaulnie/Documents/Ph.D.E_SAULNIER/Data/Brey_ANNmodel_2012/Brey_model_in_R" # Don't forget to change the working directory
setwd(path)
data <- read.csv('morlaix_data.csv', header=T, sep=';',dec=',') ; data$year <- as.factor(data$year)

# Load the biological traits (inputs of the Brey model)
bio_trait <- read.csv("biological_traits_matrix.csv",header=T, sep=';',dec=',')

# Merge the 2 datasets
data <- merge(data,bio_trait)


#---------------------------------------------------------------------------------------------#
#                           Load the compute_PB() function
#---------------------------------------------------------------------------------------------#


# This function computes the P/B ratios using the Brey (2012) model 
# WARNING - UNITS : M in J, T in °C, D in meters

compute_PB = function (x){
  
  param <- readRDS("Brey_5ANN_parameters.rds")
  logPB_vector <- c()
 
   for (i in 1:5){
    # NB : temperature in Kelvin  
    ANN <- param[i,]
    H1 = tanh(0.5*(ANN$b0 + ANN$b1*log10(x$M) + ANN$b2*(1/(273.15+x$T)) + ANN$b3*log10(x$D) + ANN$b4*x$Mollusca + ANN$b5*x$Annelida + 
                     ANN$b6*x$Crustacea + ANN$b7*x$Echinodermata + ANN$b8*x$Insecta + ANN$b9*x$Infauna + ANN$b10*x$Sessile +
                     ANN$b11*x$Crawler + ANN$b12*x$FacSwim + ANN$b13*x$Herbivor + ANN$b14*x$Omnivor + ANN$b15*x$Carnivor +
                     ANN$b16*x$Lake + ANN$b17*x$River + ANN$b18*x$Marine + ANN$b19*x$Subtidal + ANN$b20*x$Exploited))
    
    H2 = tanh(0.5*(ANN$c0 + ANN$c1*log10(x$M) + ANN$c2*(1/(273.15+x$T)) + ANN$c3*log10(x$D) + ANN$c4*x$Mollusca + ANN$c5*x$Annelida + 
                     ANN$c6*x$Crustacea + ANN$c7*x$Echinodermata + ANN$c8*x$Insecta + ANN$c9*x$Infauna + ANN$c10*x$Sessile +
                     ANN$c11*x$Crawler + ANN$c12*x$FacSwim + ANN$c13*x$Herbivor + ANN$c14*x$Omnivor + ANN$c15*x$Carnivor +
                     ANN$c16*x$Lake + ANN$c17*x$River + ANN$c18*x$Marine + ANN$c19*x$Subtidal + ANN$c20*x$Exploited))
    
    output =ANN$a0 + ANN$a1*H1 + ANN$a2*H2
    logPB_vector <- c(logPB_vector,output)
  }
  
  result <- cbind.data.frame(mean_logPB=mean(logPB_vector),SD_logPB=sd(logPB_vector),
                             PB=exp(log(10)*mean(logPB_vector) + 0.5*log(10)*(sd(logPB_vector))^2))
  return(result)
}


# Outputs are:
# mean_logPB = log(P:B), expressed as the mean of the 5 ANN model estimates. See Brey (2012) for more details or contact us
# SD_logPB = Standard deviation of log(P:B) (Deviation from the mean of the 5 ANN model estimates)
# PB = P:B ratio, corresponding to the back-transformation of log(P:B)



#---------------------------------------------------------------------------------------------#
#                           Apply the compute_PB() function
#---------------------------------------------------------------------------------------------#


PB_data <- do.call(rbind.data.frame,lapply(split(data,data[,c('year','site','station','species')],drop=T),compute_PB))

# Re-create the columns 'species', 'year' & 'month' from the rownames
PB_data$id <- row.names(PB_data) ; PB_data$year <- sapply(as.character(PB_data$id), function(x) strsplit(x, "[.]")[[1]][1])
PB_data$site <- sapply(as.character(PB_data$id), function(x) strsplit(x, "[.]")[[1]][2])
PB_data$station <- sapply(as.character(PB_data$id), function(x) strsplit(x, "[.]")[[1]][3])
PB_data$species <- sapply(as.character(PB_data$id), function(x) strsplit(x, "[.]")[[1]][4])
row.names(PB_data) <- 1:dim(PB_data)[1] ; PB_data <- PB_data[c('year','site','station','species','mean_logPB','SD_logPB','PB')]


# Calculate the production P of each species
data <- merge(data,PB_data[,c("year","site","station","species","PB")]) # add the P/B ratio to the dataset
data <- data[c("species","year","site","station", "J.mgAFDM", "mean_biom", "W", "M", "D", "T", "PB")] # remove the biological traits' columns
data$P <- data$mean_biom * data$J.mgAFDM * data$PB  # P in kJ/m2/y


# Estimate the annual secondary production (P) of the macrobenthic community (final_df)
final_df <- aggregate(P~year+site+station,data=data,sum)






#---------------------------------------------------------------------------------------------#
#                                       References
#---------------------------------------------------------------------------------------------#

# This study (Saulnier et al. xxxx). Limnology & Oceanography.

# Brey, T., 2012. A multi-parameter artificial neural network model to estimate macrobenthic invertebrate productivity and production. 
# Limnol. Oceanogr. Methods 10, 581-589

# Brey, T., Müller-Wiegmann, C., Zittier, Z.M.C., Hagen, W., 2010. Body composition in aquatic organisms - A global data bank of relationships 
# between mass, elemental composition and energy content. J. Sea Res. 64, 334-340