Uncategorized

Analysis of SimilaRities

Anosim from the package vegan tests statistical 
differences between two or more groups of 
community data similar to an anova.

This example looks at the differences 
between a, b, and c using data from 20 species
require(dplyr)
require(vegan)

setwd('C:/Users/kerry/Documents/Rblog')

env <- read.csv("env.csv")
dat <- read.csv("communitydata.csv")

env <- data.frame(env[, 2])
colnames(env) <- c("variable")
dat <- dat[, 2:20]

data.dist <- vegdist(dat)
meow <- with(env, anosim(data.dist, variable))
summary(meow)
plot(meow)

env

dat

dissimilaity

plot
Advertisements
plots

Rotating ScatteRplots in R!

require(rgl)
require(dplyr)
setwd('C:/Users/kerry/Documents/Rblog')
dat <- read.csv("WaterQualityData.csv")

x <- filter(dat, Parameter=="BOD5W")
y <- filter(dat, Parameter=="DOC")
z <- filter(dat, Parameter=="NH4F")

x <- select(x, MeasureValue)
y <- select(y, MeasureValue)
z <- select(z, MeasureValue)

dat2 <- cbind(x, y, z)
dat3 <- data.frame(na.omit(dat2))

colnames(dat3) <- c("bod", "doc", "nh4")
x <- dat3$bod
y <- dat3$doc
z <- dat3$nh4

#Open rgl plot
rgl.open()
rgl.bg(color = "white")
rgl.spheres(x, y, z, r = 0.2, color = "pink")

#Scale data
x1 <- (x - min(x))/(max(x) - min(x))
y1 <- (y - min(y))/(max(y) - min(y))
z1 <- (z - min(z))/(max(z) - min(z))

# Make a scatter plot
rgl_init()
rgl.spheres(x1, y1, z1, r = 0.02, color = "pink") 
# Add x, y, and z Axes
rgl.lines(c(0, 1), c(0, 0), c(0, 0), color = "purple")
rgl.lines(c(0, 0), c(0,1), c(0, 0), color = "blue")
rgl.lines(c(0, 0), c(0, 0), c(0,1), color = "green")

rgl2

#add cool shapes
rgl_init()
shapelist3d(octahedron3d(), x1, y1, z1, size = 0.02, 
color="pink")
rgl.lines(c(0, 1), c(0, 0), c(0, 0), color = "purple")
rgl.lines(c(0, 0), c(0,1), c(0, 0), color = "blue")
rgl.lines(c(0, 0), c(0, 0), c(0,1), color = "green")

rgl3
#make it a movie
rgl_init()
shapelist3d(octahedron3d(), x1, y1, z1, size = 0.02, 
color="pink")
rgl.lines(c(0, 1), c(0, 0), c(0, 0), color = "purple")
rgl.lines(c(0, 0), c(0,1), c(0, 0), color = "blue")
rgl.lines(c(0, 0), c(0, 0), c(0,1), color = "green")
movie3d(spin3d(axis = c(0, 1, 0)), duration = 30,
 dir = getwd())

movie
summarystatistics

Dendrograms

hierarchical clustering is a method of cluster analysis which uses algorithms to measure similarity or dissimilarity between pairs.  This method uses Euclidean distance. The Euclidean distance or Euclidean metric is the “ordinary” straight line distance between two points in Euclidean space. Euclidean distance can be calculated by the formula below.

\|a-b\|_{2}={\sqrt  {\sum _{i}(a_{i}-b_{i})^{2}}}
When analyzing ecological data, we can look at the similarity or dissimilarity of sites as defined by their species composition.  The example below analyzes 20 sites with observations of 20 species across all sites using packages such as “ggdendro” and “ape”

dendro1

require(ggdendro)
require(ape)

dat <- read.csv("dendroex.csv")

rownames(dat)<- dat[,1]
dat2 <- dat[,2:21]

dd <- dist(scale(dat2), method = "euclidean")
clust <- hclust(dd, method = "ward.D2")

plot(clust)
dendro2
plot(clust, hang = -1, cex = 0.6)

plot(as.phylo(clust), type = "fan")

dendro3

colors = c("pink", "blue", "green", "purple")
color = cutree(clust, 4)
plot(as.phylo(clust), type = "fan", tip.color = colors[color],
label.offset = 1, cex = 0.7)

dendro4

code here
 Continue reading "Dendrograms" 
Uncategorized

Data for Breakfast

I was looking for inspiration so I googled “Fun R packages”and found waffle!

Say I sampled 77 ponds and wanted to show the percent vegetation cover of all ponds.  I can use waffle!

more fun R packages here: R is fun

require(waffle)

wetcover <- c('100% cover' = (5), '80% cover' = 10, '50% cover' = 10, 
'20% cover'= 20, '10% cover'= 30, '0% cover'= 2)
waffle(wetcover, rows=8)

waffle

summarystatistics

Chlorophyll Circular Plots

code from:  Circular banded graphs for ggplot

Create a circular plot to show daily measurements above and below the average chlorophyll a concentration

library(readxl)
library(tidyr)
library(dplyr)
library(plotrix)
library(ggplot2)

setwd("C:/Users/kerry/Documents/Rblog")
chla <- read.csv("chla_data2.csv")

data

chla$logc <- log(chla$MeasureValue)

perc <- (chla$logc)/2.18
lf <- cbind(chla, perc)
lfa <- lf
lfb <- lf
lf <- rbind(lfa,lfb)

lf$baseline <- 1

ggplot(lf) + coord_polar() +
 geom_ribbon(aes(ymin=baseline,
 ymax=perc,
 x=day_of_year), fill="deepskyblue2")
 
r <- range(lf$perc)
summary(r)
circleplotblue-e1522075823392.jpeg
##################
a <- lf
a$perc[a$perc < 1] <- 1
a$ranges <- "+10% to +15%"

b <- lf
b$perc[b$perc < 1] <- 1
b$perc[b$perc > 1.10] <- 1.10
b$ranges <- "+5% to +10%"

c <- lf
c$perc[c$perc < 1] <- 1
c$perc[c$perc > 1.05] <- 1.05
c$ranges <- "0% to +5%"

d <- lf
d$perc[d$perc > 1] <- 1
d$ranges <- "-25% to -20%"

e <- lf
e$perc[e$perc > 1] <- 1
e$perc[e$perc < 0.8] <- 0.8
e$ranges <- "-20% to -15%"

f <- lf
f$perc[f$perc > 1] <- 1
f$perc[f$perc < 0.85] <- 0.85
f$ranges <- "-15% to -10%"

g <- lf
g$perc[g$perc > 1] <- 1
g$perc[g$perc < 0.90] <- 0.90
g$ranges <- "-10% to -5%"

h <- lf
h$perc[h$perc > 1] <- 1
h$perc[h$perc < 0.95] <- 0.95
h$ranges <- "-5% to 0%"

grf <- rbind(a, b, c, d, e, f, g, h)

ggplot() + coord_polar() + 
 geom_blank(data=lf, aes(x=day_of_year,
 y=perc)) +
 geom_ribbon(data=grf,aes(ymin=baseline,
 ymax=perc,
 x=day_of_year,
 group=ranges,
 fill=ranges))

grf$`Difference to mean` <- 
factor(grf$ranges, levels=c("-25% to -20%",
 "-20% to -15%",
 "-15% to -10%",
 "-10% to -5%",
 "-5% to 0%",
 "+10% to +15%",
 "+5% to +10%",
 "0% to +5%"))
ggplot() + coord_polar() + 
 geom_blank(data=long3, aes(x=day_of_year,
 y=perc)) +
 geom_ribbon(data=grf,aes(ymin=baseline,
 ymax=perc,
 x=day_of_year,
 group=`Difference to mean`,
 fill=`Difference to mean`))

circleplotrainbow

ShinyR · Uncategorized

Reactivity in Shiny-R

Lately I’ve been learning how to use Shiny! Shiny-R is an R package that allows you to create interactive web apps.

I started out with a simple app which has 3 components:

  • A user interface object
  • A server function
  • A call to the shiny app function

All your shiny apps will start with this basic syntax:

ui <- fluidePage()
Server <- function (input, output) {}
shinyApp(ui=ui, server=server)

From there you can add in all sorts of R capability…
the possibilities are endless!

#start of with some simple text
ui <- fluidPage(
titlePanel("Hello Iris!")
)

server <- function(input, output) {}

shinyApp(ui = ui, server = server)
fig1
#play around with formatting and color
ui <- fluidPage(
 titlePanel("Iris"),
 sidebarLayout(
 sidebarPanel(),
 mainPanel(
 h1("Iris", align = "center"),
 h2(div("Iris", align = "center", style="color:pink")),
 h3("Iris", align = "center"),
 h4("Iris", align = "center"),
 h5("Iris", align = "center"),
 h6("Iris", align = "center")
 )
 )
)

server <- function(input, output) {}
shinyApp(ui = ui, server = server)
fi2
#now try to add reactivity
#you will need to build the input in the ui 
#function and a call from the server function
#the server function defines how to build 
the object
ui <- fluidPage(
 titlePanel("Iris"),
 sidebarLayout(
 sidebarPanel(),
 mainPanel(selectInput("var", 
 label = "Choose a variable to display",
 choices = c("setosa",
 "virginica", 
 "versica"),
 selected = "setosa"),
 
 textOutput("selected_var")
 
 )))

server <- function(input, output) {
output$selected_var <- renderText({ 
 paste("You have selected", input$var)
 })}

shinyApp(ui = ui, server = server)
fig3
#Once you get a hand of reactivity, try adding a graph
#now you can toggle between species and the graph
#will be reactive!
fig4
Maps

Animated Maps

mapcrimes.gif

This map shows nutrient measurements over the TMDL limit
some of the early data however was collected before TMDLs
were established

code based on: Easy Animation in R

#get staion info and nutrient data
station <- read.csv("station.csv")
result <- read.csv("result.csv")

station2 <- dplyr::select(station, 
OrganizationIdentifier,StateCode, 
CountyCode, MonitoringLocationIdentifier, 
LatitudeMeasure,LongitudeMeasure)

result2 <- dplyr::select(result, 
OrganizationIdentifier,ActivityStartDate,
ActivityMediaName, CharacteristicName,
ResultMeasureValue,ResultMeasure.MeasureUnitCode, 
MonitoringLocationIdentifier)

#join based on monitoring site
dat <- left_join(result2, station2,
by = "MonitoringLocationIdentifier")

#get map from ggmap
li <- get_map(location = 'long island', zoom = 8,
maptype = "roadmap",
color = "bw")
 
datN <- filter(dat, CharacteristicName=="Nitrate")
datN$nutrient <- "nitrate"
datN2 <- filter(dat,CharacteristicName=="Nitrite")
datN2$nutrient <- "nitrite"

datA <- filter(dat, CharacteristicName=="Ammonia")
datA$nutrient <- "Ammonia"

datA2 <- filter(dat, CharacteristicName=="Ammonium")
datA2$nutrient <- "Ammonium"

dat2 <- rbind(datN, datN2, datA, datA2)

dat2$timek <- format(as.Date(dat2$ActivityStartDate), 
"%Y-%m")
#create list of times used to save unique plots
times <- unique(dat2$timek)

#loop through all times, and save ggplots as jpeg

for(i in 1:length(times)){
t <- times[i]
m <- ggmap(li) + 
geom_point(data = dat2[dat2$timek == t,],
mapping = aes(x = LongitudeMeasure, 
y = LatitudeMeasure, color= nutrient),
size=3) +
ggtitle(paste("Nutrient Measurements"), t) +
theme(legend.position="bottom") +
scale_colour_discrete(drop = FALSE)
title <- paste("map",i,".jpg",sep="")
jpeg(title)
print(m)
dev.off()
}