Basic Mapping

Preliminaries: load in libraries

library(rgdal) #manipulating shape files (.shp)
library(ggplot2) #very advanced plotting/graphics library
library(sp) #spatial data utility
library(maptools) #mapping utility
library(rgeos) #additional geographic utility
library(raster) #raster data
library(spatstat) #spatial statistics
#library(ggmap) #interfacing with Google Maps (VPN required)
library(OpenStreetMap)

Reading in Map Data

#chinaMap<-getData('GADM',country="CHN",level=1) #download data from GADM; getData is part of package raster
ChinaRD0<-readRDS("data/CHN_adm0.rds") #Country outline; data folder is local
ChinaRD1<-readRDS("data/CHN_adm1.rds") #provinces
ChinaRD2<-readRDS("data/CHN_adm2.rds") #prefectures
ChinaRD3<-readRDS("data/CHN_adm3.rds") #counties
plot(ChinaRD0,main="Mainland China country outline") #Plot Mainland China 

Look at properties of data frame

ChinaRD1@data[1:5,1:7] #shows first 5 rows 7 cols of ChinaRD1; could also use: slot(ChinaRD1,"data")[1:10,]
names(ChinaRD3) #names of columns in data
 [1] "OBJECTID"  "ID_0"      "ISO"       "NAME_0"    "ID_1"      "NAME_1"    "ID_2"     
 [8] "NAME_2"    "ID_3"      "NAME_3"    "CCN_3"     "CCA_3"     "TYPE_3"    "ENGTYPE_3"
[15] "NL_NAME_3" "VARNAME_3"
shanghaiCities<-ChinaRD3[ChinaRD3$NAME_2=="Shanghai",] #Selects only cities in Shanghai municipality

Plot geographic subset of data with names

plot(shanghaiCities)
plot(shanghaiCities[shanghaiCities$NAME_3=="Shanghai",],border="blue",add=TRUE) #add=T draws on current plot
text(coordinates(shanghaiCities),
     labels=shanghaiCities$NAME_3,col="red",cex=0.7) #Adds names

Merge population data onto map

popData<-read.table("data/china_pop_9293.csv",header=T,sep=",",stringsAsFactors=FALSE) #examine: str(popData)
ChinaData2 <- slot(ChinaRD2, "data")
names(ChinaData2)[c(6,8)]<-c("province_name","city_name")
table(ChinaData2$province_name) #Show frequency table of prefectures by province
prefectureData<-merge(ChinaData2, popData, by=c("province_name", "city_name"),all.x=TRUE) #merging by two columns
prefectureData$city_name[is.na(prefectureData$pop92)] #not a great way to match
names(ChinaRD2)[c(6,8)]<-c("province_name","city_name") #SP package allows merging onto SpatialPolygonsDataFrame
popDataSP<-merge(ChinaRD2, popData, by=c("province_name", "city_name"),all.x=TRUE)
#rm(ChinaRD2) #Removes old object--not really necessary
summary(popDataSP) #shows summary statistics on new object

Plot Population Data with ggplot2 (chloropleth plot)

gg_map_obj<-fortify(ChinaRD2, region= "OBJECTID")
china_map<-ggplot() + geom_map(data = slot(popDataSP, "data"),
    aes(map_id = OBJECTID, fill = pop92), map = gg_map_obj) + 
  expand_limits(x = gg_map_obj$long, y = gg_map_obj$lat) + ggtitle("Population in 1992, Selected Prefectures")
china_map

#ggsave("china_pop92.pdf", china_map, width = 10, height = 6)

Points Data: which prefecture contains province’s centroid?

provinceCentroids<-coordinates(ChinaRD1) #gets coordinates
plot(ChinaRD1)
points(provinceCentroids,col="red") #adds points to current plot

#plot(ChinaRD3,border="blue",add=T) #draws borders of prefectures

Geographic Projections, Spatial Overlays, Write to CSV

spdf_pts <- SpatialPointsDataFrame(coords=provinceCentroids, data=data.frame(ChinaRD1$NAME_1))
proj4string(spdf_pts) <-proj4string(popDataSP) #Make sure projection is the same (should be since comes from same data source)
#proj4string(spdf_pts)<-CRS(" +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0") #could use this code to adjust
pref_pts<-over(spdf_pts,popDataSP)
pref_pts$x<-slot(spdf_pts,"coords")[,1]
pref_pts$y<-slot(spdf_pts,"coords")[,2]
#write.table(pref_pts, file = "centroids_in_cities.CSV", sep = ",", col.names=names(pref_pts),row.names=FALSE,na="") #write CSV

Plot Province Centroids with Prefecture Name

plot(ChinaRD1)
points(provinceCentroids,col="red") #adds points to current plot
text(pref_pts$x,pref_pts$y, labels=pref_pts$city_name,col="blue",cex=0.7) #Add names

Using Open Street Map Tiles

sufe_names="SUFE"
sufe_pts<-SpatialPointsDataFrame(coords=cbind(121.5153883,31.2948375), data=data.frame(sufe_names))
sufe_pts$lon<-coordinates(sufe_pts)[1,1]
sufe_pts$lat<-coordinates(sufe_pts)[1,2]
sufe_map <- invisible(openmap(c(sufe_pts$lat+0.03,sufe_pts$lon-0.03), c(sufe_pts$lat-0.03,sufe_pts$lon+0.03),
               minNumTiles=10,type="osm")) #invisible suppresses warning messages

SUFE Plot from Open Street Map

plot(sufe_map)

Geocoding

  • Geocoding is the process of converting street addresses to latitude and longitude

  • This can be done with a geodatabase or by calling a service (ex: Google, Baidu)

  • In R, there are multiple packages to do this, including geoChina and ggmap

  • You will need an API and the output will often come back as a JSON; Google gives 2500 free requests per day, Baidu seems to allow even more

  • Google will also allow network analysis (distance between two points by travel mode)

  • Note that Chinese maps use a shifted coordinate system (GCJ02) that may require additional work to use

Working with Point Pattern Data

Bring in restaurants point data and plot

We use a projection (NAD1983 Long Island, feet) to do simple distance calcuations

rest_data<-read.table("data/menupages_Wb126.csv", header=T, sep=",",stringsAsFactors =FALSE)
nyc_tracts<-readOGR("data","manhattan_tracts") #Read shapefiles with readOGR  
OGR data source with driver: ESRI Shapefile 
Source: "C:\Users\nschiff\Dropbox\notShared\teaching\graduate_urban\SUFE_2018\lectures\11_spatial_methods\data", layer: "manhattan_tracts"
with 221 features
It has 46 fields
ggplot() + geom_polygon(data = nyc_tracts, aes(x=long, y = lat, group = group), fill = NA, color = "black") + 
   geom_point(data=rest_data,aes(x=x_feet, y=y_feet), size=1, color = "blue") + ggtitle("Manhattan Restaurants")
Regions defined for each Polygons

Subset Plot: Chinese Restaurants

ggplot() + geom_polygon(data = nyc_tracts, aes(x=long, y = lat, group = group), fill = NA, color = "black") + 
   geom_point(data=rest_data[rest_data$cuisName1=="Chinese",],aes(x=x_feet, y=y_feet), size=1, color = "red") + ggtitle("Chinese Restaurants")
Regions defined for each Polygons

Convert to Point Pattern Object for Analysis

min_x<-min(rest_data$x_feet)
max_x<-max(rest_data$x_feet)
min_y<-min(rest_data$y_feet)
max_y<-max(rest_data$y_feet)
W <- owin(xrange=c(min_x,max_x),yrange=c(min_y,max_y)) #a window defined by limits of data
allrests.ppp<-ppp(rest_data$x_feet,rest_data$y_feet,window=W)
italian.ppp<-allrests.ppp[rest_data$cuisName1=="Italian",]
chinese.ppp<-allrests.ppp[rest_data$cuisName1=="Chinese",]
pizza.ppp<-allrests.ppp[rest_data$cuisName1=="Pizza",]

Look at count of Chinese restaurants within given radii

neighDist<-pairdist(chinese.ppp)
radii_set<-seq(200,3600,200)
chinese_mat<-array(,c(length(radii_set),2))
for (i in 1:length(radii_set)) {
  chinese_mat[i,1]<-radii_set[i]
  chinese_mat[i,2]<-mean(rowSums((neighDist<radii_set[i]), na.rm=TRUE))-1 #remove own distance
}
plot(chinese_mat,xlab="radius",ylab="Count",main="Count Chinese Restaurants within Radius")

Compare to pizza restaurants

Now compare Chinese to simulated measure

numSims<-1000
sim_mat<-array(,c(length(chinese_mat[,1]),numSims))
for (i in 1:numSims) {
  sim.ppp<-allrests.ppp[sample(1:length(allrests.ppp$x),length(chinese.ppp$x),replace=F)]
  neighDist_sim<-pairdist(sim.ppp)
  for (j in 1:length(radii_set)) {
    sim_mat[j,i]<-mean(rowSums(neighDist_sim<radii_set[j], na.rm=TRUE))-1
  }
}
conf_mat<-array(,c(length(chinese_mat[,1]),2))
for (j in 1:length(radii_set)) {
  conf_mat[j,1]<-quantile(sim_mat[j,],probs=.025,na.rm=TRUE)
  conf_mat[j,2]<-quantile(sim_mat[j,],probs=.975,na.rm=TRUE)
}
chinese_mat<-cbind(chinese_mat,conf_mat)

Plot with Confidence Bands

Compare to pizza

---
title: "2018 Spatial Methods Lecture"
output: html_notebook
---
#Basic Mapping

## Preliminaries: load in libraries
```{r message=FALSE}
library(rgdal) #manipulating shape files (.shp)
library(ggplot2) #very advanced plotting/graphics library
library(sp) #spatial data utility
library(maptools) #mapping utility
library(rgeos) #additional geographic utility
library(raster) #raster data
library(spatstat) #spatial statistics
#library(ggmap) #interfacing with Google Maps (VPN required)
library(OpenStreetMap)
```

## Reading in Map Data
```{r}
#chinaMap<-getData('GADM',country="CHN",level=1) #download data from GADM; getData is part of package raster
ChinaRD0<-readRDS("data/CHN_adm0.rds") #Country outline; data folder is local
ChinaRD1<-readRDS("data/CHN_adm1.rds") #provinces
ChinaRD2<-readRDS("data/CHN_adm2.rds") #prefectures
ChinaRD3<-readRDS("data/CHN_adm3.rds") #counties
plot(ChinaRD0,main="Mainland China country outline") #Plot Mainland China 
```
  
## Look at properties of data frame

```{r echo=TRUE}
ChinaRD1@data[1:5,1:7] #shows first 5 rows 7 cols of ChinaRD1; could also use: slot(ChinaRD1,"data")[1:10,]
names(ChinaRD3) #names of columns in data
shanghaiCities<-ChinaRD3[ChinaRD3$NAME_2=="Shanghai",] #Selects only cities in Shanghai municipality
```

## Plot geographic subset of data with names

```{r}
plot(shanghaiCities)
plot(shanghaiCities[shanghaiCities$NAME_3=="Shanghai",],border="blue",add=TRUE) #add=T draws on current plot
text(coordinates(shanghaiCities),
     labels=shanghaiCities$NAME_3,col="red",cex=0.7) #Adds names
```

## Merge population data onto map
```{r results='hide'}
popData<-read.table("data/china_pop_9293.csv",header=T,sep=",",stringsAsFactors=FALSE) #examine: str(popData)
ChinaData2 <- slot(ChinaRD2, "data")
names(ChinaData2)[c(6,8)]<-c("province_name","city_name")
table(ChinaData2$province_name) #Show frequency table of prefectures by province
prefectureData<-merge(ChinaData2, popData, by=c("province_name", "city_name"),all.x=TRUE) #merging by two columns
prefectureData$city_name[is.na(prefectureData$pop92)] #not a great way to match
names(ChinaRD2)[c(6,8)]<-c("province_name","city_name") #SP package allows merging onto SpatialPolygonsDataFrame
popDataSP<-merge(ChinaRD2, popData, by=c("province_name", "city_name"),all.x=TRUE)
#rm(ChinaRD2) #Removes old object--not really necessary
summary(popDataSP) #shows summary statistics on new object
```

## Plot Population Data with ggplot2 (chloropleth plot)
```{r}
gg_map_obj<-fortify(ChinaRD2, region= "OBJECTID")
china_map<-ggplot() + geom_map(data = slot(popDataSP, "data"),
    aes(map_id = OBJECTID, fill = pop92), map = gg_map_obj) + 
  expand_limits(x = gg_map_obj$long, y = gg_map_obj$lat) + ggtitle("Population in 1992, Selected Prefectures")
china_map
#ggsave("china_pop92.pdf", china_map, width = 10, height = 6)

```

## Points Data: which prefecture contains province's centroid?
```{r}
provinceCentroids<-coordinates(ChinaRD1) #gets coordinates
plot(ChinaRD1)
points(provinceCentroids,col="red") #adds points to current plot
#plot(ChinaRD3,border="blue",add=T) #draws borders of prefectures
```

## Geographic Projections, Spatial Overlays, Write to CSV
```{r}
spdf_pts <- SpatialPointsDataFrame(coords=provinceCentroids, data=data.frame(ChinaRD1$NAME_1))
proj4string(spdf_pts) <-proj4string(popDataSP) #Make sure projection is the same (should be since comes from same data source)
#proj4string(spdf_pts)<-CRS(" +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0") #could use this code to adjust
pref_pts<-over(spdf_pts,popDataSP)
pref_pts$x<-slot(spdf_pts,"coords")[,1]
pref_pts$y<-slot(spdf_pts,"coords")[,2]
#write.table(pref_pts, file = "centroids_in_cities.CSV", sep = ",", col.names=names(pref_pts),row.names=FALSE,na="") #write CSV
```

## Plot Province Centroids with Prefecture Name
```{r}
plot(ChinaRD1)
points(provinceCentroids,col="red") #adds points to current plot
text(pref_pts$x,pref_pts$y, labels=pref_pts$city_name,col="blue",cex=0.7) #Add names
```

## Using Open Street Map Tiles
```{r message=FALSE, results='hide'}
sufe_names="SUFE"
sufe_pts<-SpatialPointsDataFrame(coords=cbind(121.5153883,31.2948375), data=data.frame(sufe_names))
sufe_pts$lon<-coordinates(sufe_pts)[1,1]
sufe_pts$lat<-coordinates(sufe_pts)[1,2]
sufe_map <- invisible(openmap(c(sufe_pts$lat+0.03,sufe_pts$lon-0.03), c(sufe_pts$lat-0.03,sufe_pts$lon+0.03),
               minNumTiles=10,type="osm")) #invisible suppresses warning messages
```

## SUFE Plot from Open Street Map
```{r}
plot(sufe_map)
```

## Geocoding
- Geocoding is the process of converting street addresses to latitude and longitude

- This can be done with a geodatabase or by calling a service (ex: Google, Baidu)

- In R, there are multiple packages to do this, including geoChina and ggmap

- You will need an API and the output will often come back as a JSON; Google gives 2500 free requests per day, Baidu seems to allow even more

- Google will also allow network analysis (distance between two points by travel mode)

- Note that Chinese maps use a shifted coordinate system (GCJ02) that may require additional work to use

#Working with Point Pattern Data

## Bring in restaurants point data and plot
We use a projection (NAD1983 Long Island, feet) to do simple distance calcuations
```{r}
rest_data<-read.table("data/menupages_Wb126.csv", header=T, sep=",",stringsAsFactors =FALSE)
nyc_tracts<-readOGR("data","manhattan_tracts") #Read shapefiles with readOGR  
ggplot() + geom_polygon(data = nyc_tracts, aes(x=long, y = lat, group = group), fill = NA, color = "black") + 
   geom_point(data=rest_data,aes(x=x_feet, y=y_feet), size=1, color = "blue") + ggtitle("Manhattan Restaurants")
```

## Subset Plot: Chinese Restaurants
```{r}
ggplot() + geom_polygon(data = nyc_tracts, aes(x=long, y = lat, group = group), fill = NA, color = "black") + 
   geom_point(data=rest_data[rest_data$cuisName1=="Chinese",],aes(x=x_feet, y=y_feet), size=1, color = "red") + ggtitle("Chinese Restaurants")
```

## Convert to Point Pattern Object for Analysis
```{r results='hide'}
min_x<-min(rest_data$x_feet)
max_x<-max(rest_data$x_feet)
min_y<-min(rest_data$y_feet)
max_y<-max(rest_data$y_feet)
W <- owin(xrange=c(min_x,max_x),yrange=c(min_y,max_y)) #a window defined by limits of data
allrests.ppp<-ppp(rest_data$x_feet,rest_data$y_feet,window=W)
italian.ppp<-allrests.ppp[rest_data$cuisName1=="Italian",]
chinese.ppp<-allrests.ppp[rest_data$cuisName1=="Chinese",]
pizza.ppp<-allrests.ppp[rest_data$cuisName1=="Pizza",]
```

## Look at count of Chinese restaurants within given radii
```{r}
neighDist<-pairdist(chinese.ppp)
radii_set<-seq(200,3600,200)
chinese_mat<-array(,c(length(radii_set),2))
for (i in 1:length(radii_set)) {
  chinese_mat[i,1]<-radii_set[i]
  chinese_mat[i,2]<-mean(rowSums((neighDist<radii_set[i]), na.rm=TRUE))-1 #remove own distance
}
plot(chinese_mat,xlab="radius",ylab="Count",main="Count Chinese Restaurants within Radius")
```

## Compare to pizza restaurants
```{r echo=FALSE}
neighDist<-pairdist(pizza.ppp)
pizza_mat<-array(,c(length(radii_set),1))
pizza_mat<-cbind(chinese_mat[,1],pizza_mat)
for (i in 1:length(radii_set)) {
  pizza_mat[i,2]<-mean(rowSums((neighDist<radii_set[i]), na.rm=TRUE))-1 #remove own distance
}
plot(chinese_mat,xlab="radius",ylab="Count",main="Chinese (red) and Pizza (green)",
     ylim=range(c(chinese_mat[,2],pizza_mat[,2])),col="red")
par(new=T)
plot(pizza_mat,xlab="",ylab="",axes=FALSE,col="green")
```

## Now compare Chinese to simulated measure
```{r results='hide'}
numSims<-1000
sim_mat<-array(,c(length(chinese_mat[,1]),numSims))
for (i in 1:numSims) {
  sim.ppp<-allrests.ppp[sample(1:length(allrests.ppp$x),length(chinese.ppp$x),replace=F)]
  neighDist_sim<-pairdist(sim.ppp)
  for (j in 1:length(radii_set)) {
    sim_mat[j,i]<-mean(rowSums(neighDist_sim<radii_set[j], na.rm=TRUE))-1
  }
}
conf_mat<-array(,c(length(chinese_mat[,1]),2))
for (j in 1:length(radii_set)) {
  conf_mat[j,1]<-quantile(sim_mat[j,],probs=.025,na.rm=TRUE)
  conf_mat[j,2]<-quantile(sim_mat[j,],probs=.975,na.rm=TRUE)
}
chinese_mat<-cbind(chinese_mat,conf_mat)
```

## Plot with Confidence Bands
```{r echo=FALSE}
plot(chinese_mat[,1],chinese_mat[,2],ylim=range(c(chinese_mat[,2],chinese_mat[,3],chinese_mat[,4])),
     xlab="Radius",ylab="Count",pch=20,col="red")
par(new=T)
plot(chinese_mat[,1],chinese_mat[,3],ylim=range(c(chinese_mat[,2],chinese_mat[,3],chinese_mat[,4])),
     axes=FALSE,xlab="",ylab="",main="Count Chinese Restaurants within Radius",pch=25,cex=0.5)
par(new=T)
plot(chinese_mat[,1],chinese_mat[,4],ylim=range(c(chinese_mat[,2],chinese_mat[,3],chinese_mat[,4])),
     axes=FALSE,xlab="",ylab="",pch=24,cex=0.5)
```

## Compare to pizza
```{r echo=FALSE}
sim_mat<-array(,c(length(pizza_mat[,1]),numSims))
for (i in 1:numSims) {
  sim.ppp<-allrests.ppp[sample(1:length(allrests.ppp$x),length(pizza.ppp$x),replace=F)]
  neighDist_sim<-pairdist(sim.ppp)
  for (j in 1:length(radii_set)) {
    sim_mat[j,i]<-mean(rowSums(neighDist_sim<radii_set[j], na.rm=TRUE))-1
  }
}
conf_mat<-array(,c(length(pizza_mat[,1]),2))
for (j in 1:length(radii_set)) {
  conf_mat[j,1]<-quantile(sim_mat[j,],probs=.025,na.rm=TRUE)
  conf_mat[j,2]<-quantile(sim_mat[j,],probs=.975,na.rm=TRUE)
}
pizza_mat<-cbind(pizza_mat,conf_mat)
plot(pizza_mat[,1],pizza_mat[,2],ylim=range(c(pizza_mat[,2],pizza_mat[,3],pizza_mat[,4])),
     xlab="Radius",ylab="Count",col="red",cex=1,pch=20)
par(new=T)
plot(pizza_mat[,1],pizza_mat[,3],ylim=range(c(pizza_mat[,2],pizza_mat[,3],pizza_mat[,4])),
     axes=FALSE,xlab="",ylab="",main="Count Pizza Restaurants within Radius",pch=25,cex=0.5)
par(new=T)
plot(pizza_mat[,1],pizza_mat[,4],ylim=range(c(pizza_mat[,2],pizza_mat[,3],pizza_mat[,4])),
     axes=FALSE,xlab="",ylab="",pch=24,cex=0.5)
```

