_biogeog_workshop_GIS_v4.R
##############################################################
# INTRODUCTORY STUFF
#
# This script is by Nick Matzke (and whoever else adds to this PhyloWiki page)
# Copyright 2011-infinity
# matzkeATberkeley.edu
# January 2011
#
# Please link/cite if you use this, email me if you have
# thoughts/improvements/corrections.
#
##############################################################
#
# Free to use/redistribute under:
# Attribution-NonCommercial-ShareAlike 3.0 Unported (CC BY-NC-SA 3.0)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the above license, linked here:
#
# http://creativecommons.org/licenses/by-nc-sa/3.0/
#
# Summary:
#
# You are free:
#
# * to Share — to copy, distribute and transmit the work
# * to Remix — to adapt the work
#
# Under the following conditions:
#
# * Attribution — You must attribute the work in the manner
# specified by the author or licensor (but not in any way that
# suggests that they endorse you or your use of the work).
# * Noncommercial — You may not use this work for commercial purposes.
#
# * Share Alike — If you alter, transform, or build upon this work,
# you may distribute the resulting work only under the same or
# similar license to this one.
#
# http://creativecommons.org/licenses/by-nc-sa/3.0/
#
###################################################################
########################################################
# SCRIPT DESCRIPTION
#
# A demo showing how GIS, GBIF, and phylogenetics can be
# integrated in R.
#
# These scripts were both meant as "for instance" demos,
# to give users a taste of what you can do with R, and,
# hopefully, the ability to get the basic code running
# themselves. They are not meant to be complete analyses
# by themselves.
#
# Script #2/2 for:
#
# Evolution of Life on Pacific Islands and Reefs: Past, present, and future
# WORKSHOP: Methodological Workshop on Biodiversity Dynamics
# THURSDAY, May 26, 2011: 9am - 1pm
# http://botany.si.edu/events/2011_pacific/program.htm
########################################################
#===================================================
# SETUP HINTS
#
# INSTRUCTIONS:
# 1. Before running this script, run the code in
# "_biogeog_workshop_GIS_v3_installation.R"
#
# ...this will install the various packages.
#
# Most packages will install easily, but RGDAL
# requires that your computer have the GDAL
# libraries installed outside of R.
#
# This can be difficult. On a Mac OS X, you should
# able to type, in the "Terminal" window:
#
# sudo port install gdal
#
# On a PC, try the GDAL binaries (a "binary" or
# "executable" is a precompiled, ready-to-run /
# ready-to-execute file)
#
# They are linked from here:
# http://trac.osgeo.org/gdal/wiki/DownloadingGdalBinaries
# sudo port install gdal
#
# 2. Next, you have to "source" my utility scripts,
# which contain special functions I have written.
#
# The "source" command make the scripts accessible
# by R.
#
# You can download the scripts and source them
# locally if you like. This means you have to
# put the correct directory into "sourcedir"
# (I use /_njm/ on my computer, but you should
# make the directory wherever you downloaded
# the scripts to.).
#
# Alternatively, you can source the versions I
# have put on the web -- but these will only
# work as long as they are online on this
# Berkeley course website, which might not be
# up forever.
#
# Note on directories: R is always looking at a
# working directory, and only sees files
# in that directory. You have to tell R
# what directory it should look at.
#
# setwd("directory_pathname_here")
# ...sets the wd (wd=working directory)
#
# getwd()
# ...gets the wd and prints it to screen so you
# can see what working directory you are in
#
# list.files()
# ...lists the files in the working directory.
#
#
# 2a. Mac OS X directories are written like this:
# /Users/nick/Desktop/_2011-01_IBS_meeting
#
# 2b. For some reason, in *WINDOWS* R, Windows PC
# directories are written like this:
# C:\\Documents\ and\ Settings\\My\ Documents\\_2011-01_IBS_meeting
#
# ...which represents:
# "C:/Documents and Settings/My Documents/_2011-01_IBS_meeting"
#
# ...basically, Windows R wants "\\" instead of "/", and
# "| " instead of " ". (In general, just avoid spaces in
# filenames and directories.)
#
# 2c. Directory paths, and filenames, are character strings,
# which means R won't recognize them unless they are
# put in "quotes_like_this".
#
# 3. Make sure you have the data files I provided in your
# working directory ("getwd()"). Check if the files are
# there with "list.files()".
#
# 4. If you can get #1-3 working, the rest should just be
# a matter of copying-and-pasting the below code
# into R.
#===================================================
# Local sourcing
#===================================================
sourcedir = '/_njm/'
source3 = '_R_tree_functions_v1.R'
source(paste(sourcedir, source3, sep=""))
sourcedir = '/_njm/'
source3 = '_genericR_v1.R'
source(paste(sourcedir, source3, sep=""))
sourcedir = '/_njm/'
source3 = '_R_GBIF_functions_v1.R'
source(paste(sourcedir, source3, sep=""))
# For latlong
source5 = '_phylogeostats_v1.R'
source(paste(sourcedir, source5, sep=""))
# may be needed for _biogeog_sim_utils
sourcedir4 = '/_njm/'
source4 = 'pbdbtools_v2.R'
source(paste(sourcedir4, source4, sep=""))
sourcedir8 = '/_njm/'
source8 = '_biogeog_sim_utils_v1.R'
source(paste(sourcedir8, source8, sep=""))
#===================================================
# Remote sourcing
#===================================================
source3 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_R_tree_functions_v1.R'
source(source3)
source3 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_genericR_v1.R'
source(source3)
source3 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_R_GBIF_functions_v1.R'
source(source3)
# For latlong
source5 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_phylogeostats_v1.R'
source(source5)
source5 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/pbdbtools_v2.R'
source(source5)
source5 = 'http://ib.berkeley.edu/courses/ib200b/labs/_code/_biogeog_sim_utils_v1.R'
source(source5)
#===================================================
#########################
# Finished with getting packages etc.
#########################
#===================================================
# Run with these commands
#===================================================
library(ape)
library(phylobase)
library(gdata) # needed for trim (whitespace trimming) function
library(lattice) # for histogram
# for mapping
library(dismo)
library(maptools)
library(sp)
library(rgdal) # for readOGR
# for loading ASCII grid
library(adehabitat)
library(maptools)
# You'll have to set your own working directory, of course...
#wd = "/_njm/BioGeoR"
wd = "/Users/nick/Desktop/_2011-05-25_Hawaii_biogeog_workshop/_code"
setwd(wd)
###########################################################
###########################################################
# MAIN SCRIPT -- INTEGRATING GIS AND PHYLOGENETICS
###########################################################
###########################################################
###########################################################
# 1. Display biogeog data
# Hawaii:
# DEM elevation
# Mean precip
# Mean temp
###########################################################
###################################
# GIS STUFF
# Load shapefile data
###################################
##################################
# Load DEM data -- as basemap
# source: http://www.prism.oregonstate.edu/pub/prism/pacisl/
##################################
# Coarsening a grid:
# https://stat.ethz.ch/pipermail/r-sig-geo/2011-May/011798.html
# output.dim
# The number of rows and columns to return in the created object
# using GDAL's method to take care of image decimation / replication;
# presently ordered (y,x) - this may change
# for dem, default:
xdim = 1328
ydim = 811
xdim_new = floor(xdim/4)
ydim_new = floor(ydim/4)
fn1 = "hi_dem_15s.asc"
# full resolution
# dem_orig = readGDAL(fn1)
# subsampling
dem = readGDAL(fn1, output.dim=c(ydim_new, xdim_new))
proj4string(dem)=CRS("+proj=longlat +ellps=GRS80 +datum=NAD83")
attr(dem, "bbox")
basemap = dem
demr = raster(fn1)
image(dem)
box()
title("Hawaii elevation DEM")
#######################################################
# Load raingauge positions
#######################################################
fn = "raingauge_n83.shp/raingauge_n83.shp"
raingauge = readOGR(fn, "raingauge_n83")
# plot(raingauge)
# match the projection to coastlines
raingauge_reproj = match_projection_and_extent(input_layer=raingauge, basemap)
# Extract some DEM elevations from the rain gauge locations
# extracting points requires a raster::raster object,
# and e.g. a co-projected SpatialPoints object...
# http://www.mail-archive.com/r-sig-geo@stat.math.ethz.ch/msg04662.html
demr_pts = extract(demr, raingauge_reproj)
##################################
# Load precip data
# source: http://www.prism.oregonstate.edu/pub/prism/pacisl/
##################################
ppt_fn = "hi_ppt_1971_2000.14.asc"
# original and new dimensions
xdim = 1328
ydim = 811
xdim_new = floor(xdim/4)
ydim_new = floor(ydim/4)
# full resolution
# ppt_orig = readGDAL(ppt_fn)
# subsampling
ppt = readGDAL(ppt_fn, output.dim=c(ydim_new, xdim_new))
proj4string(ppt)=CRS("+proj=longlat +ellps=GRS80 +datum=NAD83")
attr(ppt, "bbox")
image(ppt)
box()
title("Hawaii annual precip (mm)")
# extract points
pptr = raster(ppt_fn)
pptr_pts = extract(pptr, raingauge_reproj)
##################################
# Load mean temperature data
# source: http://www.prism.oregonstate.edu/pub/prism/pacisl/
##################################
temp_fn = "hi_tdmean_1971_2000.14.asc"
tdmean_orig = readGDAL(temp_fn)
# original and new dimensions
xdim = 1328
ydim = 811
xdim_new = floor(xdim/4)
ydim_new = floor(ydim/4)
tdmean = readGDAL(temp_fn, output.dim=c(ydim_new, xdim_new))
proj4string(tdmean)=CRS("+proj=longlat +ellps=GRS80 +datum=NAD83")
attr(tdmean, "bbox")
image(tdmean)
box()
title("Hawaii mean temperature")
# extract points
tdmeanr = raster(temp_fn)
tdmean_pts = extract(tdmeanr, raingauge_reproj)
#######################################################
# Load coastline data
#######################################################
fn = "coast_n83.shp/coast_n83.shp"
coastline <- readOGR(fn, "coast_n83")
coastline_reproj = match_projection_and_extent(input_layer=coastline, basemap)
plot(coastline_reproj)
# plot coastline data on top of grid
# outlist = make_feature_list_to_plot_polygons(coastline_reproj)
# spplot(basemap, zcol="band1", sp.layout=outlist)
########################################################
# Plot vector layers on top of Raster layer (the DEM)
########################################################
tmpcols = topo.colors(16)
tmpcols[1] = "grey90"
tmpcols[11:15] = tmpcols[12:16]
tmpcols[16] = "white"
spplot(dem, "band1", col.regions=tmpcols, title="Hawaii DEM raster with rain gauges (+) and\ncoastline vectors plotted on top", panel = function(...)
{
panel.gridplot(...)
sp.points(raingauge_reproj, cex=0.3, col="black")#pch=".")
sp.polygons(coastline_reproj, fill="transparent", col="black", lty="dashed", lwd=1)
} )
###########################################################
# 2. Get species location data using
# species distribution modeling package (dismo)
###########################################################
# GBIF with R
library(dismo)
library(ape)
spname = c("hawaiiensis", "mauiensis", "fauriei", "hathewayi", "kaduana", "mauiensis", "kaduana", "greenwelliae", "mariniana", "wawrae", "grandiflora", "hobdyi", "hexandra")
gbif_res = NULL
for (i in 1:length(spname))
{
tmp_gbif_res = gbif2(genus='Psychotria', species=spname[i], geo=TRUE, sp=FALSE)
if (nrow(tmp_gbif_res) > 0)
{
gbif_res = rbind(gbif_res, tmp_gbif_res)
}
}
# Turn into SpatialPoints
tmpcoords = adf(cbind(gbif_res$lat, gbif_res$lon))
names(tmpcoords) = c("y", "x")
coordinates(tmpcoords) = ~x+y
df = SpatialPointsDataFrame(coords=tmpcoords, data=gbif_res)
proj4string(df) = CRS("+proj=longlat +ellps=GRS80 +datum=NAD83")
bbox(df)
df_reproj = match_projection_and_extent(input_layer=df, basemap)
plot(df_reproj)
title("Hawaii Psychotria collections in GBIF")
########################################################
# Plot vector layers on top of Raster layer (the DEM)
########################################################
tmpcols = topo.colors(16)
tmpcols[1] = "grey90"
tmpcols[11:15] = tmpcols[12:16]
tmpcols[16] = "white"
spplot(dem, "band1", col.regions=tmpcols, panel = function(...)
{
panel.gridplot(...)
sp.points(df_reproj, cex=1, col="black", pch=19)
sp.polygons(coastline_reproj, fill="transparent", col="black", lty="dashed", lwd=1)
} )
###########################################################
# 3. Bring in phylogenetics
###########################################################
# phylogenetic tree from:
# http://www.reelab.net/lagrange/configurator/index
newick_str = "((((((((P_hawaiiensis_WaikamoiL1:0.010853,P_mauiensis_Eke:0.010853):0.007964,(P_fauriei2:0.013826,P_hathewayi_1:0.013826):0.004991):0.001986,(P_kaduana_PuuKukuiAS:0.020803,P_mauiensis_PepeAS:0.020803):1e-05):0.003762,P_kaduana_HawaiiLoa:0.024565):0.003398,(P_greenwelliae07:0.01271500,P_greenwelliae907:0.01271500):0.01524800):0.018984,((((P_mariniana_MauiNui:0.02241,P_hawaiiensis_Makaopuhi:0.02241):0.008236,P_mariniana_Oahu:0.030646):0.002893,P_mariniana_Kokee2:0.033539):0.005171,P_wawraeDL7428:0.03871):0.008237):0.008255,(P_grandiflora_Kal2:0.027864,P_hobdyi_Kuia:0.027864):0.027338):0.003229,((P_hexandra_K1:0.026568,P_hexandra_M:0.026568):0.005204,P_hexandra_Oahu:0.031771):0.026659);"
tr = read.tree(file="", text=newick_str)
plot(tr)
# simplify tree
tips_to_drop = c("P_mauiensis_PepeAS", "P_hexandra_M", "P_greenwelliae907", "P_mariniana_MauiNui", "P_hexandra_K1", "P_kaduana_HawaiiLoa", "P_mariniana_Oahu", "P_hawaiiensis_Makaopuhi")
tr2 = drop.tip(tr, tips_to_drop)
plot(tr2)
tr2$tip.label = c("Psychotria_hawaiiensis", "Psychotria_mauiensis", "Psychotria_fauriei", "Psychotria_hathewayi", "Psychotria_kaduana", "Psychotria_greenwelliae", "Psychotria_mariniana", "Psychotria_wawrae", "Psychotria_grandiflora", "Psychotria_hobdyi", "Psychotria_hexandra")
plot(tr2)
# Extract some points
dem_pts = extract(demr, df_reproj)
ppt_pts = extract(pptr, df_reproj)
tdmean_pts = extract(tdmeanr, df_reproj)
# redo names
tmpnames = df_reproj$species
for (i in 1:length(tmpnames))
{
tmprow = strsplit(tmpnames[i], " ")
newrow = paste(tmprow[[1]][1], "_", tmprow[[1]][2], sep="")
tmpnames[i] = newrow
}
pts = cbind(tmpnames, dem_pts, ppt_pts, tdmean_pts)
pts = adf(pts)
# remove nas
pts = pts[is.na(pts[, 3])==FALSE, ]
unique_names = unique(pts[,1])
env_means = NULL
for (i in 1:len(unique_names))
{
tmpname = unique_names[i]
tmpdf = adf(pts[pts[,1] == tmpname, 2:4])
tmprow = colMeans(dfnums_to_numeric(tmpdf))
tmprow2 = (c(tmpname, tmprow))
env_means = rbind(env_means, tmprow2)
}
env_means = adf2(env_means)
env_means = dfnums_to_numeric(env_means)
tips_to_drop = tr2$tip.label %in% env_means$V1 == FALSE
tr3 = drop.tip(tr2, tr2$tip.label[tips_to_drop])
###########################################################
# 4. Ancestral character estimation of mean elevation
# preference
###########################################################
# order table
tmpmatch = match(tr3$tip.label, env_means$V1)
env_means2 = env_means[, 2:4]
row.names(env_means2) = env_means[, 1]
(MLdem = ace(env_means2[,1], tr3, type="continuous", method="ML", CI=TRUE))
(MLppt = ace(env_means2[,2], tr3, type="continuous", method="ML", CI=TRUE))
(MLtdmean = ace(env_means2[,3], tr3, type="continuous", method="ML", CI=TRUE))
plot(tr3)
tiplabels(round(env_means2[,1]), 1:9)
nodelabels(round(MLdem$ace), 10:17)
#################################################
# From here you can do lots of stuff!
#################################################
save.image(file="_biogeog_workshop_GIS_v4.Rdata")