# cloud forest distribution modelling

OUTPUT<-c("/home/karger/Documents/bio/R_Backup/cloudforests/results/V4/")


# LOAD THE REQUIRED LIBRARIES
require(raster)
require(rasterVis)
require(dismo)
require(PresenceAbsence)
require(AUC)
require(mgcv) # the package for GAM; there is another one called "gam"
require(rpart) # the package for CART; there is also a "tree" package.
require(spatial.tools)
require(doParallel)
library(randomForest)
#opar <- par(no.readonly = TRUE)

setwd("/home/karger/Documents/bio/R_Backup/cloudforests/")

# required functions
source("0.Functions/panel.cor.fun")
source("0.Functions/panel.hist.fun")
source("0.Functions/panel.smooth.fun")
source("0.Functions/adj.D2.glm.fun")
source("0.Functions/vartest0.glm.fun")
source('0.Functions/cv.glm.strat.lgit.fun')
source('0.Functions/cv.gam.strat.fun')

# predictors
r1<-raster("/mnt/md0/chelsa/bioclim_v1.2/V1.2.2/bio1.sdat")

r2<-raster("/mnt/md0/data/Cloud_climatology/MODCF_interannualSD.sdat")
r3<-raster("/mnt/md0/data/Cloud_climatology/MODCF_intraannualSD.sdat")
r4<-raster("/mnt/md0/data/Cloud_climatology/MODCF_meanannual.sdat")
r5<-raster("/mnt/md0/chelsa/exCHELSA/treeline_mountain_belts/CHELSA_distance2treeline_1979-2013.tif")
r6<-raster("/mnt/md0/data/mn30_grd/mn30.sdat")
r7<-raster("/mnt/md0/chelsa/bioclim_v1.2/V1.2.2/bio4.sdat")           
r8<-raster("/mnt/md0/chelsa/bioclim_v1.2/V1.2.2/bio1.sdat")           
                                          
# predictant
shp1<-shapefile("/home/karger/Documents/bio/R_Backup/cloudforests/cloud_forest_points_1997.shp")

# stack
rs<-stack(r1,r2,r3,r4,r5,r6,r7,r8)

names(rs)<-c("prec","cloud_inter","cloud_intra","cloud_mean","elev","gmted","tseas","temp")

#absence
#bg <- randomPoints(r2, length(shp1)*100)
bg <- read.table("/home/karger/Documents/bio/R_Backup/cloudforests/randompoints.txt",header=T)
bgv<- rep(0,length(bg[,1]))
bg2<- cbind(bg,as.matrix(bgv))

#presence
aa <- coordinates(shp1)
a1 <- rep(1,length(aa[,1]))
aa2<- cbind(aa,as.matrix(a1))

colnames(bg2)<-c("x","y","pa")
colnames(aa2)<-c("x","y","pa")

pa <-rbind(bg2,aa2)
pa <-as.data.frame(pa)
coordinates(pa)<-~x+y


# extract values
#m<-extract(rs,pa)
m<-read.table("cf_extracted.txt")
pa <-rbind(bg2,aa2)
pa_df<-cbind(m,pa[,3])
colnames(pa_df)<-c("prec","cloud_inter","cloud_intra","cloud_mean","elev","gmted","tseas","temp","pa")
pa_df<-as.data.frame(pa_df)
pa_df<-pa_df[complete.cases(pa_df),]

p_length<-length(pa_df$pa[pa_df$pa==1])           # NUMBER OF PRESENCE
a_length<-length(pa_df$pa[pa_df$pa==0])           # NUMBER OF ABSENCES
pa_df$weights<-pa_df$pa                           # CREATE VECTOR TO BE FILLED WITH WEIGHTS
pa_df$weights[pa_df$pa==1]<-1                     # INVERSE WEIGHTING OF PRESENCES BY ABSENCES
pa_df$weights[pa_df$pa==0]<-p_length/a_length     # INVERSE WEIGHTING OF ABSENCES BY PRESENCES

pa_df$weight=1e-6
pa_df$weight[pa_df$pa==1]<-1

# Predicive power of predictors
var_pred_pow<-vartest0.glm(pa_df$pa, pa_df[, 1:8]) * 100
rownames(var_pred_pow)<-names(pa_df[1:8])
colnames(var_pred_pow)<-c("D2")
outname<-paste(OUTPUT,"table_var_predictive_power.txt",sep="")
write.table(var_pred_pow,file=outname)

glm_bino <- glm(pa ~ cloud_intra + cloud_mean *prec + elev + I(elev^2) + temp + I(temp^2), family = "binomial" (link="logit"), weights=weights, data=pa_df)
glm_pois <- glm(pa/weights ~ cloud_intra + cloud_mean *prec + elev + I(elev^2) + temp + I(temp^2), family = "poisson" (link="log"), data=pa_df)
gam.full <- gam(pa ~ s(cloud_intra, k = 10) + s(cloud_mean, k = 10) + s(prec, k = 10) + s(elev, k = 10) + s(temp, k = 10), family = "binomial", data = pa_df, weights = weights )


                 
sink(file = paste(OUTPUT,"table_model_outputs.txt",sep=""), append = TRUE)
    summary(glm_bino)
    adj.D2.glm(glm_bino)
    summary(glm_pois)
    adj.D2.glm(glm_pois) 
    summary(gam.full)
    adj.D2.glm(gam.full) 
sink()



# cross-validation
#w.names(pa_df) <- 1:nrow(pa_df)

# cross-validation

row.names(pa_df) <- 1:nrow(pa_df)

glm_bino.xval          <- cv.glm.strat.lgit(pa_df, glm_bino, 5, lvo.cv = TRUE)
glm_pois.xval          <- cv.glm.strat.lgit(pa_dfW, glm_pois, 5, lvo.cv = TRUE)
gam.full.xval          <- cv.gam.strat(pa_df, gam.full, 5, lvo.cv = TRUE)
save(glm_bino.xval,file=paste0(OUTPUT,"glm_bino_xval.RDA"))
save(glm_pois.xval,file=paste0(OUTPUT,"glm_pois_xval.RDA"))
save(gam.full.xval,file=paste0(OUTPUT,"gam_xval.RDA"))

#read("gam_xval.RDA")
#read("glm_bino_xval.RDA")
#read("glm_pois_xval.RDA")
range01 <- function(x){(x-min(x))/(max(x)-min(x))}

pdf(paste(OUTPUT,"figure_model_fitted_comp.pdf",sep=""))
    plot(glm_bino$fitted.values, glm_bino.xval$predicted)
    abline(0, 1, lwd = 3, col = "red")
    plot(range01(glm_pois$fitted.values), range01(glm_pois.xval$predicted))
    abline(0, 1, lwd = 3, col = "red")
    plot(gam.full$fitted.values, gam.full.xval$predicted)
    abline(0, 1, lwd = 3, col = "red")
dev.off()

# Threshold optimization
Idw<-seq(1,length(pa_df$pa),1)
idw<-as.data.frame(Idw)
colnames(idw)<-c("id")

glm_bino.xval.test               <- data.frame(cbind(Idw, pa_df$pa, glm_bino.xval$predicted))
gam.full.xval.test               <- data.frame(cbind(Idw, pa_df$pa, gam.full.xval$predicted))
glm_pois.xval.test               <- data.frame(cbind(Idw, pa_df$pa, glm_pois.xval$predicted))


names(gam.full.xval.test)[2:3]                       <- c("observed", "predicted")
names(glm_bino.xval.test)[2:3]                       <- c("observed", "predicted")
names(glm_pois.xval.test)[2:3]                       <- c("observed", "predicted")
glm_pois.xval.test$predicted<-range01(glm_pois.xval.test$predicted)


thres_bino     <-optimal.thresholds(glm_bino.xval.test, threshold = 1001, opt.methods = 1:9)

thres_pois     <-optimal.thresholds(glm_pois.xval.test, threshold = 1001, opt.methods = 1:9)

thres.gam.full <- optimal.thresholds(gam.full.xval.test,opt.methods = 1:9, threshold = 1001)


sink(file = paste(OUTPUT,"table_model_thresholds.txt",sep=""), append = TRUE)
      print("glm_bino_3")
      thres_bino
      print("glm_pois_3")
      thres_pois
      print("gam_3")
      thres.gam.full
sink()

# model performance using kappa satistics
sink(file = paste(OUTPUT,"table_model_kappa_statistics.txt",sep=""), append = TRUE)
      print("glm_bino")
      Kappa(cmx(glm_bino.xval.test, threshold = thres_bino[2,2]))
      print("glm_pois")
      Kappa(cmx(glm_pois.xval.test, threshold = thres_pois[2,2]))
      print("gam")
      Kappa(cmx(gam.full.xval.test, threshold = thres.gam.full[2,2]))
sink()

# roc curves
glm_bino.roc.full          <- roc(glm_bino.xval.test$predicted, as.factor(glm_bino$model$pa))
roc.gam.full               <- roc(gam.full.xval.test$predicted, as.factor(gam.full$model$pa))
glm_pois.roc.full          <- roc(glm_pois.xval.test$predicted, as.factor(glm_pois$model$pa))

pdf(paste(OUTPUT,"figure_model_fitted_comp.pdf",sep=""))
      plot(glm_bino.roc.full, col = "red", lwd = 2, lty = 1)
      plot(glm_pois.roc.full, col = "blue", lwd = 2, lty = 1,add=T)
      plot(roc.gam.full, col = "green", lwd = 2, lty = 1,add=T)
dev.off()

sink(file = paste(OUTPUT,"table_model_AUC.txt",sep=""), append = TRUE)
      print("glm_bino_3")
      auc(glm_bino.roc.full)
      print("glm_pois_3")
      auc(glm_pois.roc.full)
      print("gam_3")
      auc(roc.gam.full)
sink()

sink(file = paste(OUTPUT,"table_model_TSS.txt",sep=""), append = TRUE)
print("glm_bino_3")
cmx                     <- cmx(glm_bino.xval.test, threshold = thres_bino[2,2])                                                 # TRESHOLD MODEL MAX KAPPA
tss                     <- ((PresenceAbsence::sensitivity(cmx)[1])+(PresenceAbsence::specificity(cmx)[1])-1)   # CALCULATE TSS
print(tss)
print("glm_pois_3")
cmx                     <- cmx(gam.full.xval.test, threshold = thres.gam.full[2,2])                                                 # TRESHOLD MODEL MAX KAPPA
tss                     <- ((PresenceAbsence::sensitivity(cmx)[1])+(PresenceAbsence::specificity(cmx)[1])-1)   # CALCULATE TSS
print(tss)
cmx                     <- cmx(glm_pois.xval.test, threshold = thres_pois[2,2])                                                 # TRESHOLD MODEL MAX KAPPA
tss                     <- ((PresenceAbsence::sensitivity(cmx)[1])+(PresenceAbsence::specificity(cmx)[1])-1)   # CALCULATE TSS
print(tss)
sink()
