summarystatistics

Cluster Analysis

K-means clustering is a technique used to partition data into groups that are similar to one another.  The groups, or clusters aim to maximize between group variability and minimize within group variability using algorithms.  I wanted to see how 20 lakes compared to one another in terms of their chemistry.  So I did a k-means clustering on some basic water chemistry data (DO, temperature and conductivity)

#munge data and require packages
require(cluster)
require(factoextra)

require(reshape2)

setwd('C:/Users/kk/Documents/ponds')
dat <- read.csv("chem.csv")
str(dat)

dat2 <- reshape(dat, idvar = "site", timevar = "sampledate", direction = "wide")
rownames(dat2)<- dat2$site

dat3 <- dat2[2:13]

#describe data through basic statistics
desc_stats <- data.frame(
 Min = apply(dat3, 2, min), # minimum
 Med = apply(dat3, 2, median), # median
 Mean = apply(dat3, 2, mean), # mean
 SD = apply(dat3, 2, sd), # Standard deviation
 Max = apply(dat3, 2, max) # Maximum
 )

stat

#generate an elbow plot to see how many clusters you should use
set.seed(123)
fviz_nbclust(dat3, kmeans, method = "wss") +
 geom_vline(xintercept = 5, linetype = 2)

cluster
# Compute k-means clustering with desired number of clusters
set.seed(123)
km.res <- kmeans(dat3, 5, nstart = 25)
print(km.res)
cluster3


From this plot it looks like we have 5 groups of lakes. Lake 15 may be an outlier since it is not similar to any of the other lakes.  From here we might be able to distinguish why some lakes are more similar than others.

 

Advertisements
Uncategorized

2-way ANOVA and Tukey HSD

#1 Read in .csv of ammonium data
 require(dplyr)
 setwd('C:/Users/kkuntz/Documents/Personal_Doc/thesis')
 NH4 <- read.csv('ammonium.csv')

#2 calculate daily flux for summer using averages in light and dark
 NH4$dailyflux <- ((NH4$ave_H_dark*10)+(NH4$ave_H_light*14))

dat1
#3. Run a 2 way anova with interaction for sediment and organism

ANOVA <- aov(dailyflux ~ Sed * Org, data = NH4_day2)
 summary(ANOVA)
 

 

summ


#4 We have significant differences for the organism treatment
 we still want to find out where the difference occurs so we run a post hoc test
 
 TukeyHSD(ANOVA, "Org", ordered = TRUE)
 plot(TukeyHSD(ANOVA, "Org"))



posthic

fig
Uncategorized

Plotting Soil Composition

#Load packages
library(GSIF)
library(soiltexture)
library(aqp)
library(plyr)
TT.plot( class.sys = "none" )
TT.plot( class.sys = "USDA.TT" )

#convert texture to classifications
vert <- TT.vertices.tbl(class.sys = "USDA.TT")
vert$x <- 1-vert$SAND+(vert$SAND-(1-vert$SILT))*0.5
vert$y <- vert$CLAY*sin(pi/3)

USDA.TT <- data.frame(TT.classes.tbl(class.sys = "USDA.TT", collapse = ", "))
TT.pnt <- as.list(rep(NA, length(USDA.TT$name)))
poly.lst <- as.list(rep(NA, length(USDA.TT$name)))
for(i in 1:length(USDA.TT$name)){
TT.pnt[[i]] <- as.integer(strsplit(unclass(paste(USDA.TT[i, "points"])), ", ")[[1]])
poly.lst[[i]] <- vert[TT.pnt[[i]],c("x","y")]
poly.lst[[i]] <- Polygons(list(Polygon(rbind(poly.lst[[i]], poly.lst[[i]][1,]))), ID=i)
}

#convert texture triangle to a spatial object:
poly.sp <- SpatialPolygons(poly.lst, proj4string=CRS(as.character(NA)))
poly.USDA.TT <- SpatialPolygonsDataFrame(poly.sp, data.frame(ID=USDA.TT$name), match.ID=FALSE)
spplot(poly.USDA.TT)
ttcolor
#Load soil composition data
soil <- read.csv("soil.csv")
TT.plot(
class.sys = "HYPRES.TT",
tri.data = soil,
main = "Soil texture data"
)
tt
TT.plot(class.sys = "none",
tri.data = soil,
z.name = "Organic Content",
main = "Soil texture triangle and Organic Content bubble plot"
)

tt2








Maps

R foR Rasters

require(rgdal)
require(raster)
require(ggplot2)
require(dplyr)
require(rasterVis)

setwd("C:/DATA/Blog/raster/NLCD2011_LC_N39W072")
li <- raster("NLCD2011_LC_N39W072.tif")
plot(li)
li
shape <- readOGR(dsn = "C:/DATA/Blog/raster/lishape/government_units_NRCSCNTY_ny_3451580_01/government_units", layer = "county_nrcs_a_ny")
suffolk <- subset(shape, COUNTYNAME == "Suffolk")
plot(suffolk)
suf
suffolk_c <- crop(li, extent(suffolk))
suffolk_c2 <- mask(suffolk_c, suffolk)
suff

dist <- data.frame(freq(suffolk_c2))
g <- ggplot(dist, aes(x=X.1, y=count2, fill=X.1)) + geom_bar(stat = "identity") +
 coord_flip()

hist2
Uncategorized

Word Clouds

Word clouds are a cool way to show themes in text.  I did this word cloud with my master's thesis!

1.Require packages
 library(RXKCD)
 library(tm)
 library(wordcloud)
 library(RColorBrewer)
 library(SnowballC)

2.Pull out text file
 filePath <- "C:/data/blog/wordcloud/kk.txt"
 text <- readLines(filePath)
 docs <- Corpus(VectorSource(text))
 inspect(docs)

3.Get rid of messy characters
 toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
 docs <- tm_map(docs, toSpace, "/")
 docs <- tm_map(docs, toSpace, "@")
 docs <- tm_map(docs, toSpace, "\\|")

4.Convert the text to lower case
 docs <- tm_map(docs, content_transformer(tolower))

5. Remove numbers
 docs <- tm_map(docs, removeNumbers)

6. Remove english common stopwords
 docs <- tm_map(docs, removeWords, stopwords("english"))

7. Remove punctuations
 docs <- tm_map(docs, removePunctuation)

8.Eliminate extra white spaces
 docs <- tm_map(docs, stripWhitespace)

9. Convert to matrix
 dtm <- TermDocumentMatrix(docs)
 m <- as.matrix(dtm)
 v <- sort(rowSums(m),decreasing=TRUE)
 d <- data.frame(word = names(v),freq=v)
 head(d, 10)

10. Create wordcloud
 wordcloud(words = d$word, freq = d$freq, min.freq = 1,
 max.words=200, random.order=FALSE, rot.per=0.35,
 colors=brewer.pal(8, "Dark2"))

pic

Maps

Coral Reef Bleaching

1.Start with some coral reef data
 library(dplyr)
 library(ggplot2)
 library (rgdal)
 library (rgeos)
 library(maptools)
 library(tmap)

cb <- read.csv("CoralBleaching.csv")

2.Read in reef data with latitudes and longitudes
 b <- ggplot(cb, aes(LON, LAT)) + coord_cartesian(ylim=c(-40, 40), xlim=c(-200, 200))
 b + geom_point()
 b + geom_point(aes(color=cb$BLEACHING_SEVERITY))+scale_colour_brewer("Bleaching Severity", palette="PiYG")+ coord_equal(ratio=1)

reefvleach1

3. Add in a shapefile of coastlines
 coast <-readOGR("ne_50m_coastline.shp", layer="ne_50m_coastline")
 ggplot() + geom_polygon(data=coast, aes(x=long, y=lat, group=group))

4. Add your point data on the map and color according to bleaching severity
 ggplot() + geom_polygon(data=coast aes(x=long, y=lat, group=group), fill="grey40",
 colour="grey90", alpha=1)+labs(x="", y="", title="Coral Reef Bleaching")+ #labels
 theme(axis.ticks.y = element_blank(),axis.text.y = element_blank(),
 plot.title = element_text(lineheight=.8, face="bold", vjust=1))+space
 geom_point(aes(x=LON, y=LAT, color=BLEACHING_SEVERITY), data=cb, alpha=1, size=3, color="grey20")+geom_point(aes(x=LON, y=LAT, color=BLEACHING_SEVERITY), data=cb, alpha=1, size=2)+scale_colour_brewer("Bleaching Severity",
 palette="PiYG")+ coord_equal(ratio=1)

reefbleachpic

http://zevross.com/blog/2014/07/16/mapping-in-r-using-the-ggplot2-package/
http://www.sthda.com/english/wiki/ggplot2-colors-how-to-change-colors-automatically-and-manually
http://remi-daigle.github.io/GIS_mapping_in_R/

Maps

Google Maps & R Maps

You can grab satellite imagery from google maps by simply calling up the area by latitude and longitude

  1. Load packages
    library(ggmap)
    library(ggplot2)
  2. Set a range
    lat <- c(18, 23)
    lon <- c(-161, -154)
  3. Get a map
    map <- get_map(location = c(lon = mean(lon), lat = mean(lat)), zoom = 6, maptype = “satellite”, source = “google”)
    plot(map)
    hi
    4.  You can also play with the zoom and image type ie :zoom into Oahulat <- c(21.1, 21.8)
    lon <- c(-158.3, -157.567)

    map <- get_map(location = c(lon = mean(lon), lat = mean(lat)), zoom = 10, maptype = “satellite”, source = “google”)
    plot(map)
    oahu
    map2 <- get_map(location = c(lon = mean(lon), lat = mean(lat)), zoom = 10, maptype = “terrain”, source = “google”)
    plot(map2)
    oahu_terrain
    and if you want to make your map really pretty…
    map3 <- get_map(location = c(lon = mean(lon), lat = mean(lat)), zoom = 10, maptype = “watercolor”, source = “google”)
    plot(map3)

    oahu_watercolor
    https://www.r-bloggers.com/google-maps-and-ggmap/