_biogeog_workshop_GIS_v4.R
##############################################################
# INTRODUCTORY STUFF
#
# This script is by Nick Matzke (and whoever else adds to this PhyloWiki page)
# Copyright 2011-infinity
# matzkeATberkeley.edu
# January 2011
#
# Please link/cite if you use this, email me if you have 
#   thoughts/improvements/corrections.
#
##############################################################
#
# Free to use/redistribute under:
# Attribution-NonCommercial-ShareAlike 3.0 Unported (CC BY-NC-SA 3.0) 
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the above license, linked here:
# 
# http://creativecommons.org/licenses/by-nc-sa/3.0/
# 
# Summary:
#
# You are free:
#
#   * to Share — to copy, distribute and transmit the work
#   * to Remix — to adapt the work
#
# Under the following conditions:
#
#   * Attribution — You must attribute the work in the manner 
#     specified by the author or licensor (but not in any way that 
#     suggests that they endorse you or your use of the work).
#   * Noncommercial — You may not use this work for commercial purposes. 
#
#   * Share Alike — If you alter, transform, or build upon this work,
#     you may distribute the resulting work only under the same or
#     similar license to this one. 
#
# http://creativecommons.org/licenses/by-nc-sa/3.0/
# 
###################################################################

########################################################
# SCRIPT DESCRIPTION
# 
# A demo showing how GIS, GBIF, and phylogenetics can be
# integrated in R.
# 
# These scripts were both meant as "for instance" demos,
# to give users a taste of what you can do with R, and,
# hopefully, the ability to get the basic code running
# themselves. They are not meant to be complete analyses
# by themselves.
#
# Script #2/2 for:
#
# Evolution of Life on Pacific Islands and Reefs: Past, present, and future
# WORKSHOP: Methodological Workshop on Biodiversity Dynamics
# THURSDAY, May 26, 2011: 9am - 1pm
# http://botany.si.edu/events/2011_pacific/program.htm
########################################################

#===================================================
# SETUP HINTS
#
# INSTRUCTIONS:
# 1. Before running this script, run the code in
#    "_biogeog_workshop_GIS_v3_installation.R"
#
#    ...this will install the various packages.
#    
#    Most packages will install easily, but RGDAL
#    requires that your computer have the GDAL
#    libraries installed outside of R.
#
#    This can be difficult.  On a Mac OS X, you should
#    able to type, in the "Terminal" window:
#
#    sudo port install gdal
#
#    On a PC, try the GDAL binaries (a "binary" or
#    "executable" is a precompiled, ready-to-run / 
#    ready-to-execute file)
#
#    They are linked from here:
#    http://trac.osgeo.org/gdal/wiki/DownloadingGdalBinaries
#    sudo port install gdal
#
# 2. Next, you have to "source" my utility scripts,
#    which contain special functions I have written.
#    
#    The "source" command make the scripts accessible
#    by R.
# 
#    You can download the scripts and source them
#    locally if you like.  This means you have to 
#    put the correct directory into "sourcedir"
#    (I use /_njm/ on my computer, but you should 
#     make the directory wherever you downloaded 
#     the scripts to.).
#
#    Alternatively, you can source the versions I 
#    have put on the web -- but these will only 
#    work as long as they are online on this 
#    Berkeley course website, which might not be
#    up forever.
#    
# Note on directories: R is always looking at a 
# working directory, and only sees files
# in that directory.  You have to tell R
# what directory it should look at.
# 
# setwd("directory_pathname_here")
# ...sets the wd (wd=working directory)
#
# getwd()
# ...gets the wd and prints it to screen so you
# can see what working directory you are in
#
# list.files()
# ...lists the files in the working directory.
#
#
# 2a. Mac OS X directories are written like this:
#     /Users/nick/Desktop/_2011-01_IBS_meeting
# 
# 2b. For some reason, in *WINDOWS* R, Windows PC
#     directories are written like this:
#     C:\\Documents\ and\ Settings\\My\ Documents\\_2011-01_IBS_meeting
# 
#     ...which represents:
#     "C:/Documents and Settings/My Documents/_2011-01_IBS_meeting"
#
#     ...basically, Windows R wants "\\" instead of "/", and
#     "| " instead of " ".  (In general, just avoid spaces in
#     filenames and directories.)
# 
# 2c. Directory paths, and filenames, are character strings,
#     which means R won't recognize them unless they are
#     put in "quotes_like_this".
#
# 3. Make sure you have the data files I provided in your
#    working directory ("getwd()").  Check if the files are
#    there with "list.files()".
#
# 4. If you can get #1-3 working, the rest should just be
#    a matter of copying-and-pasting the below code
#    into R.
#===================================================

# Local sourcing
#===================================================
sourcedir = '/_njm/'
source3 = '_R_tree_functions_v1.R'
source(paste(sourcedir, source3, sep=""))

sourcedir = '/_njm/'
source3 = '_genericR_v1.R'
source(paste(sourcedir, source3, sep=""))

sourcedir = '/_njm/'
source3 = '_R_GBIF_functions_v1.R'
source(paste(sourcedir, source3, sep=""))

# For latlong
source5 = '_phylogeostats_v1.R'
source(paste(sourcedir, source5, sep=""))

# may be needed for _biogeog_sim_utils
sourcedir4 = '/_njm/'
source4 = 'pbdbtools_v2.R'
source(paste(sourcedir4, source4, sep=""))

sourcedir8 = '/_njm/'
source8 = '_biogeog_sim_utils_v1.R'
source(paste(sourcedir8, source8, sep=""))

#===================================================

# Remote sourcing
#===================================================
source3 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_R_tree_functions_v1.R'
source(source3)

source3 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_genericR_v1.R'
source(source3)

source3 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_R_GBIF_functions_v1.R'
source(source3)

# For latlong
source5 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_phylogeostats_v1.R'
source(source5)

source5 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/pbdbtools_v2.R'
source(source5)

source5 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_biogeog_sim_utils_v1.R'
source(source5)

#===================================================

#########################
# Finished with getting packages etc.

#########################

#===================================================
# Run with these commands
#===================================================
library(ape)
library(phylobase)
library(gdata) # needed for trim (whitespace trimming) function
library(lattice) # for histogram

# for mapping
library(dismo)
library(maptools)
library(sp)
library(rgdal)     # for readOGR

# for loading ASCII grid
library(adehabitat)
library(maptools)

# You'll have to set your own working directory, of course...
#wd = "/_njm/BioGeoR"
wd = "/Users/nick/Desktop/_2011-05-25_Hawaii_biogeog_workshop/_code"
setwd(wd)

###########################################################
###########################################################
# MAIN SCRIPT -- INTEGRATING GIS AND PHYLOGENETICS
###########################################################
###########################################################

###########################################################
# 1. Display biogeog data
# Hawaii:
# DEM elevation
# Mean precip
# Mean temp
###########################################################

###################################
# GIS STUFF
# Load shapefile data
###################################

##################################
# Load DEM data -- as basemap
# source: http://www.prism.oregonstate.edu/pub/prism/pacisl/
##################################

# Coarsening a grid:
# https://stat.ethz.ch/pipermail/r-sig-geo/2011-May/011798.html
# output.dim
# The number of rows and columns to return in the created object 
# using GDAL's method to take care of image decimation / replication;
# presently ordered (y,x) - this may change
# for dem, default:
xdim = 1328
ydim = 811
xdim_new = floor(xdim/4)
ydim_new = floor(ydim/4)

fn1 = "hi_dem_15s.asc"

# full resolution
# dem_orig = readGDAL(fn1)

# subsampling
dem = readGDAL(fn1, output.dim=c(ydim_new, xdim_new))

proj4string(dem)=CRS("+proj=longlat +ellps=GRS80 +datum=NAD83")
attr(dem, "bbox")
basemap = dem

demr = raster(fn1)
image(dem)
box()
title("Hawaii elevation DEM")

#######################################################
# Load raingauge positions
#######################################################
fn = "raingauge_n83.shp/raingauge_n83.shp"
raingauge = readOGR(fn, "raingauge_n83")
# plot(raingauge)

# match the projection to coastlines
raingauge_reproj = match_projection_and_extent(input_layer=raingauge, basemap)

# Extract some DEM elevations from the rain gauge locations

# extracting points requires a raster::raster object, 
# and e.g. a co-projected SpatialPoints object...
# http://www.mail-archive.com/r-sig-geo@stat.math.ethz.ch/msg04662.html
demr_pts = extract(demr, raingauge_reproj)

##################################
# Load precip data
# source: http://www.prism.oregonstate.edu/pub/prism/pacisl/
##################################
ppt_fn = "hi_ppt_1971_2000.14.asc"

# original and new dimensions
xdim = 1328
ydim = 811
xdim_new = floor(xdim/4)
ydim_new = floor(ydim/4)

# full resolution
# ppt_orig = readGDAL(ppt_fn)

# subsampling
ppt = readGDAL(ppt_fn, output.dim=c(ydim_new, xdim_new))

proj4string(ppt)=CRS("+proj=longlat +ellps=GRS80 +datum=NAD83")
attr(ppt, "bbox")
image(ppt)
box()
title("Hawaii annual precip (mm)")

# extract points
pptr = raster(ppt_fn)
pptr_pts = extract(pptr, raingauge_reproj)

##################################
# Load mean temperature data
# source: http://www.prism.oregonstate.edu/pub/prism/pacisl/
##################################
temp_fn = "hi_tdmean_1971_2000.14.asc"

tdmean_orig = readGDAL(temp_fn)

# original and new dimensions
xdim = 1328
ydim = 811
xdim_new = floor(xdim/4)
ydim_new = floor(ydim/4)

tdmean = readGDAL(temp_fn, output.dim=c(ydim_new, xdim_new))

proj4string(tdmean)=CRS("+proj=longlat +ellps=GRS80 +datum=NAD83")
attr(tdmean, "bbox")
image(tdmean)
box()
title("Hawaii mean temperature")

# extract points
tdmeanr = raster(temp_fn)
tdmean_pts = extract(tdmeanr, raingauge_reproj)

#######################################################
# Load coastline data
#######################################################
fn = "coast_n83.shp/coast_n83.shp"
coastline <- readOGR(fn, "coast_n83")
coastline_reproj = match_projection_and_extent(input_layer=coastline, basemap)
plot(coastline_reproj)

# plot coastline data on top of grid
# outlist = make_feature_list_to_plot_polygons(coastline_reproj)
# spplot(basemap, zcol="band1", sp.layout=outlist)

########################################################
# Plot vector layers on top of Raster layer (the DEM)
########################################################
tmpcols = topo.colors(16)
tmpcols[1] = "grey90"
tmpcols[11:15] = tmpcols[12:16]
tmpcols[16] = "white"
spplot(dem, "band1", col.regions=tmpcols, title="Hawaii DEM raster with rain gauges (+) and\ncoastline vectors plotted on top", panel = function(...)
    {
    panel.gridplot(...)
    sp.points(raingauge_reproj, cex=0.3, col="black")#pch=".")
    sp.polygons(coastline_reproj, fill="transparent", col="black", lty="dashed", lwd=1)
    } )

###########################################################
# 2. Get species location data using 
# species distribution modeling package (dismo)
###########################################################

# GBIF with R
library(dismo)
library(ape)

spname = c("hawaiiensis", "mauiensis", "fauriei", "hathewayi", "kaduana", "mauiensis", "kaduana", "greenwelliae", "mariniana", "wawrae", "grandiflora", "hobdyi", "hexandra")

gbif_res = NULL
for (i in 1:length(spname))
    {
    tmp_gbif_res = gbif2(genus='Psychotria', species=spname[i], geo=TRUE, sp=FALSE)
    if (nrow(tmp_gbif_res) > 0)
        {
        gbif_res = rbind(gbif_res, tmp_gbif_res)
        }
    }

# Turn into SpatialPoints 
tmpcoords = adf(cbind(gbif_res$lat, gbif_res$lon))
names(tmpcoords) = c("y", "x")
coordinates(tmpcoords) = ~x+y
df = SpatialPointsDataFrame(coords=tmpcoords, data=gbif_res)
proj4string(df) = CRS("+proj=longlat  +ellps=GRS80 +datum=NAD83")
bbox(df)

df_reproj = match_projection_and_extent(input_layer=df, basemap)
plot(df_reproj)
title("Hawaii Psychotria collections in GBIF")

########################################################
# Plot vector layers on top of Raster layer (the DEM)
########################################################
tmpcols = topo.colors(16)
tmpcols[1] = "grey90"
tmpcols[11:15] = tmpcols[12:16]
tmpcols[16] = "white"
spplot(dem, "band1", col.regions=tmpcols, panel = function(...)
    {
    panel.gridplot(...)
    sp.points(df_reproj, cex=1, col="black", pch=19)
    sp.polygons(coastline_reproj, fill="transparent", col="black", lty="dashed", lwd=1)
    } )

###########################################################
# 3. Bring in phylogenetics
###########################################################
# phylogenetic tree from:
# http://www.reelab.net/lagrange/configurator/index

newick_str = "((((((((P_hawaiiensis_WaikamoiL1:0.010853,P_mauiensis_Eke:0.010853):0.007964,(P_fauriei2:0.013826,P_hathewayi_1:0.013826):0.004991):0.001986,(P_kaduana_PuuKukuiAS:0.020803,P_mauiensis_PepeAS:0.020803):1e-05):0.003762,P_kaduana_HawaiiLoa:0.024565):0.003398,(P_greenwelliae07:0.01271500,P_greenwelliae907:0.01271500):0.01524800):0.018984,((((P_mariniana_MauiNui:0.02241,P_hawaiiensis_Makaopuhi:0.02241):0.008236,P_mariniana_Oahu:0.030646):0.002893,P_mariniana_Kokee2:0.033539):0.005171,P_wawraeDL7428:0.03871):0.008237):0.008255,(P_grandiflora_Kal2:0.027864,P_hobdyi_Kuia:0.027864):0.027338):0.003229,((P_hexandra_K1:0.026568,P_hexandra_M:0.026568):0.005204,P_hexandra_Oahu:0.031771):0.026659);"

tr = read.tree(file="", text=newick_str)
plot(tr)

# simplify tree
tips_to_drop = c("P_mauiensis_PepeAS", "P_hexandra_M", "P_greenwelliae907", "P_mariniana_MauiNui", "P_hexandra_K1", "P_kaduana_HawaiiLoa", "P_mariniana_Oahu", "P_hawaiiensis_Makaopuhi")
tr2 = drop.tip(tr, tips_to_drop)
plot(tr2)

tr2$tip.label = c("Psychotria_hawaiiensis", "Psychotria_mauiensis", "Psychotria_fauriei", "Psychotria_hathewayi", "Psychotria_kaduana", "Psychotria_greenwelliae", "Psychotria_mariniana", "Psychotria_wawrae", "Psychotria_grandiflora", "Psychotria_hobdyi", "Psychotria_hexandra")

plot(tr2)

# Extract some points

dem_pts = extract(demr, df_reproj)
ppt_pts = extract(pptr, df_reproj)
tdmean_pts = extract(tdmeanr, df_reproj)

# redo names
tmpnames = df_reproj$species
for (i in 1:length(tmpnames))
    {
    tmprow = strsplit(tmpnames[i], " ")
    newrow = paste(tmprow[[1]][1], "_", tmprow[[1]][2], sep="")
    tmpnames[i] = newrow
    }

pts = cbind(tmpnames, dem_pts, ppt_pts, tdmean_pts)
pts = adf(pts)

# remove nas
pts = pts[is.na(pts[, 3])==FALSE, ]

unique_names = unique(pts[,1])

env_means = NULL
for (i in 1:len(unique_names))
    {
    tmpname = unique_names[i]
    tmpdf = adf(pts[pts[,1] == tmpname, 2:4])

    tmprow = colMeans(dfnums_to_numeric(tmpdf))
    tmprow2 = (c(tmpname, tmprow))
    env_means = rbind(env_means, tmprow2)
    }
env_means = adf2(env_means)
env_means = dfnums_to_numeric(env_means)

tips_to_drop = tr2$tip.label %in% env_means$V1 == FALSE
tr3 = drop.tip(tr2, tr2$tip.label[tips_to_drop])

###########################################################
# 4. Ancestral character estimation of mean elevation
#    preference
###########################################################

# order table
tmpmatch = match(tr3$tip.label, env_means$V1)

env_means2 = env_means[, 2:4]
row.names(env_means2) = env_means[, 1]

(MLdem = ace(env_means2[,1], tr3, type="continuous", method="ML", CI=TRUE))
(MLppt = ace(env_means2[,2], tr3, type="continuous", method="ML", CI=TRUE))
(MLtdmean = ace(env_means2[,3], tr3, type="continuous", method="ML", CI=TRUE))

plot(tr3)
tiplabels(round(env_means2[,1]), 1:9)
nodelabels(round(MLdem$ace), 10:17)
#################################################
# From here you can do lots of stuff!
#################################################

save.image(file="_biogeog_workshop_GIS_v4.Rdata")
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License