This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Load necessary libraries:
library(rgdal)
library(ggplot2)
library(maptools)
library(rgeos)
library(sp)
library(raster)
library(ggmap)
library(rJava)
Error: package or namespace load failed for <U+6186>Java?
.onLoad failed in loadNamespace() for 'rJava', details:
call: fun(libname, pkgname)
error: No CurrentVersion entry in Software/JavaSoft registry! Try re-installing Java and make sure R and Java have matching architectures.
Load in some map data on China. The getData() function from the raster package can be used to download administrative boundaries directly from GADM.org.
#Get data directly from website (GADM.org)
#chinaMap<-getData('GADM',country="CHN",level=1) #province level; getData is part of package raster
#Load in pre-downloaded files; data folder is local to folder where R notebook is saved
ChinaRD0<-readRDS("data/CHN_adm0.rds") #Country outline
ChinaRD1<-readRDS("data/CHN_adm1.rds") #provinces
ChinaRD2<-readRDS("data/CHN_adm2.rds") #prefectures
ChinaRD3<-readRDS("data/CHN_adm3.rds") #counties
#Plot Mainland China
plot(ChinaRD0,main="Mainland China country outline")
Let’s plot Shanghai and its counties
names(ChinaRD1) #variables in ChinaRD1 provincial dataset
[1] "OBJECTID" "ID_0" "ISO" "NAME_0" "ID_1" "NAME_1" "HASC_1" "CCN_1" "CCA_1" "TYPE_1"
[11] "ENGTYPE_1" "NL_NAME_1" "VARNAME_1"
ChinaRD1@data[1:10,] #this takes data from ChinaRD1 and shows first 10 rows
#slot(ChinaRD1,"data")[1:10,] #another way to do same as ChinaRD1@data[1:10,]
names(ChinaRD3) #names of columns in data
[1] "OBJECTID" "ID_0" "ISO" "NAME_0" "ID_1" "NAME_1" "ID_2" "NAME_2" "ID_3" "NAME_3"
[11] "CCN_3" "CCA_3" "TYPE_3" "ENGTYPE_3" "NL_NAME_3" "VARNAME_3"
shanghaiCities<-ChinaRD3[ChinaRD3$NAME_2=="Shanghai",] #Selects only cities in Shanghai municipality
plot(shanghaiCities)
plot(shanghaiCities[shanghaiCities$NAME_3=="Shanghai",],border="blue",add=TRUE) #Notice add=T; this draws on top of current plot
#Add names
text(coordinates(shanghaiCities),
labels=shanghaiCities$NAME_3,col="red",cex=0.7)
#Get some external data and merge
popData<-read.table("data/china_pop_9293.csv",header=T,sep=",",stringsAsFactors =FALSE)
str(popData)
'data.frame': 289 obs. of 5 variables:
$ GB_code : int 110000 120000 130100 130200 130300 130400 130500 130600 130700 130800 ...
$ province_name: chr "Beijing" "Tianjin" "Hebei" "Hebei" ...
$ city_name : chr "Beijing" "Tianjin" "Shijiazhuang" "Tangshan" ...
$ pop92 : num 1045 883 290 668 252 ...
$ pop93 : num 1051 888 827 672 255 ...
ChinaData2 <- slot(ChinaRD2, "data")
names(ChinaData2)
[1] "OBJECTID" "ID_0" "ISO" "NAME_0" "ID_1" "NAME_1" "ID_2" "NAME_2" "HASC_2" "CCN_2"
[11] "CCA_2" "TYPE_2" "ENGTYPE_2" "NL_NAME_2" "VARNAME_2"
names(ChinaData2)[c(6,8)]<-c("province_name","city_name")
table(ChinaData2$province_name) #Show frequency table of prefectures by province
Anhui Beijing Chongqing Fujian Gansu Guangdong Guangxi Guizhou
17 1 1 9 14 21 14 9
Hainan Hebei Heilongjiang Henan Hubei Hunan Jiangsu Jiangxi
3 11 13 18 17 14 13 11
Jilin Liaoning Nei Mongol Ningxia Hui Qinghai Shaanxi Shandong Shanghai
9 13 12 5 8 10 17 1
Shanxi Sichuan Tianjin Xinjiang Uygur Xizang Yunnan Zhejiang
11 22 1 15 7 16 11
#This is an example of merging a simple data table by two columns
prefectureData<-merge(ChinaData2, popData, by=c("province_name", "city_name"),all.x=TRUE)
prefectureData$city_name[is.na(prefectureData$pop92)] #not a great way to match
[1] "Chizhou" "Lu'an" "Ma'anshan" "Xuancheng"
[5] "Dingxi" "Gannan Tibetan" "Linxia Hui" "Longnan"
[9] "Qingyang" "Yunfu" "Chongzuo" "Fangchenggang"
[13] "Guilin" "Hezhou" "Laibin" "Bijie"
[17] "Qiandongnan Miao and Dong" "Qiannan Buyei and Miao" "Qianxinan Buyei and Miao" "Tongren"
[21] "Haikou" "Hainan" "Sanya" "Daxing'anling"
[25] "Jiyuan shi" "Enshi Tujia and Miao" "Huanggang" "Jingzhou"
[29] "Qianjiang" "Shennongjia" "Suizhou Shi" "Tianmen"
[33] "Xiantao" "Xiangxi Tujia and Miao" "Zhangjiajie" "Huai'an"
[37] "Suqian" "Taizhou" "Fuzhou" "Ji'an"
[41] "Yanbian Korean" "Huludao" "Alxa" "Baotou"
[45] "Baynnur" "Chifeng" "Hohhot" "Hulunbuir"
[49] "Ordos" "Tongliao" "Ulaan Chab" "Wuhai"
[53] "Xilin Gol" "Xing'an" "Guyuan" "Shizuishan"
[57] "Wuzhong" "Yinchuan" "Zhongwei" "Golog Tibetan"
[61] "Gyêgu Tibetan" "Haibei Tibetan" "Haidong" "Hainan Tibetan"
[65] "Haixi Mongol and Tibetan" "Huangnan Tibetan" "Shangluo" "Xi'an"
[69] "Yan'an" "Tai'an" "Jinzhong" "Luliang"
[73] "Bazhong" "Dazhou" "Garzê Tibetan" "Guang'an"
[77] "Liangshan Yi" "Meishan" "Neijiang]]" "Ngawa Tibetan and Qiang"
[81] "Ya'an" "Ziyang" "Aksu" "Altay"
[85] "Bayin'gholin Mongol" "Börtala Mongol" "Changji Hui" "Hami"
[89] "Ili Kazakh" "Karamay" "Kashgar" "Khotan"
[93] "Kizilsu Kirghiz" "Shihezi" "Tacheng" "Turfan"
[97] "Ürümqi" "Chamdo" "Lhasa" "Nagchu"
[101] "Ngari" "Nyingtri" "Shannan" "Shigatse"
[105] "Baoshan" "Chuxiong Yi" "Dali Bai" "Dehong Dai and Jingpo"
[109] "Dêqên Tibetan" "Honghe Hani and Yi" "Kunming" "Lijiang"
[113] "Lincang" "Nujiang Lisu" "Pu'er" "Qujing"
[117] "Wenshan Zhuang and Miao" "Xishuangbanna Dai" "Yuxi" "Zhaotong"
[121] "Taizhou"
#Because we are using the SP package, we can also easily merge directly onto SpatialPolygonsDataFrame
names(ChinaRD2)[c(6,8)]<-c("province_name","city_name")
popDataSP<-merge(ChinaRD2, popData, by=c("province_name", "city_name"),all.x=TRUE)
#rm(ChinaRD2) #Removes old object--not really necessary
summary(popDataSP)
Object of class SpatialPolygonsDataFrame
Coordinates:
min max
x 73.55770 134.77393
y 18.15931 53.56086
Is projected: FALSE
proj4string :
[+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0]
Data attributes:
province_name city_name OBJECTID ID_0 ISO NAME_0 ID_1
Length:344 Length:344 Min. : 1.00 Min. :49 Length:344 Length:344 Min. : 1.00
Class :character Class :character 1st Qu.: 86.75 1st Qu.:49 Class :character Class :character 1st Qu.: 8.75
Mode :character Mode :character Median :172.50 Median :49 Mode :character Mode :character Median :15.00
Mean :172.50 Mean :49 Mean :16.12
3rd Qu.:258.25 3rd Qu.:49 3rd Qu.:23.00
Max. :344.00 Max. :49 Max. :31.00
ID_2 HASC_2 CCN_2 CCA_2 TYPE_2 ENGTYPE_2 NL_NAME_2
Min. : 1.00 Length:344 Min. : NA Length:344 Length:344 Length:344 Length:344
1st Qu.: 86.75 Class :character 1st Qu.: NA Class :character Class :character Class :character Class :character
Median :172.50 Mode :character Median : NA Mode :character Mode :character Mode :character Mode :character
Mean :172.50 Mean :NaN
3rd Qu.:258.25 3rd Qu.: NA
Max. :344.00 Max. : NA
NA's :344
VARNAME_2 GB_code pop92 pop93
Length:344 Min. :110000 Min. : 10.93 Min. : 11.4
Class :character 1st Qu.:320550 1st Qu.: 88.78 1st Qu.: 105.9
Mode :character Median :371100 Median : 223.25 Median : 245.7
Mean :372528 Mean : 281.87 Mean : 305.3
3rd Qu.:441650 3rd Qu.: 416.97 3rd Qu.: 445.8
Max. :630100 Max. :1496.96 Max. :1503.7
NA's :97 NA's :121 NA's :119
#Chloropleth Plots: colors polygons according to some underlying variable
#Chloropleth Plotting Method 1: you can find different color palettes in R Helpfiles
brks <- quantile(popDataSP$pop92,seq(0,1,1/100),na.rm=TRUE) # Use 100 break points
grayCols <- gray(seq(1,.25,length=100)) # This is 0 to 1 inclusive
ints <- findInterval(popDataSP$pop92,brks,all.inside=TRUE) # Inputs: variable, break points, use all.inside=TRUE
polCols <- grayCols[ints]
#pdf("pop92_map.pdf")
plot(popDataSP,col=polCols,main="1992 Population by Prefecture")
plot(popDataSP[is.na(popDataSP$pop92),],border="red",add=TRUE) #draw red border around prefectures with missing values
#polCols[is.na(popDataSP$pop92)]<-"red"
#plot(popDataSP,col=polCols,main="1992 Population by Prefecture")
#dev.off()
Another way to do chloropleth plots using ggplot()
#Chloropleth Plotting Method 2: using ggplot2
city_map<-fortify(ChinaRD2, region= "OBJECTID")
Error: isTRUE(gpclibPermitStatus()) is not TRUE
Next, we work with points data
#Work with points data
provinceCentroids<-coordinates(ChinaRD1)
plot(ChinaRD1)
points(provinceCentroids,col="red") #adds points to current plot
plot(ChinaRD3,border="blue",add=T)
#Which prefecture is province centroid in?
spdf_pts <- SpatialPointsDataFrame(coords=provinceCentroids, data=data.frame(ChinaRD1$NAME_1))
#Make sure projection is the same (should be since comes from same data source)
proj4string(spdf_pts) <-proj4string(popDataSP)
#proj4string(spdf_pts)<-CRS(" +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
plot(popDataSP)
plot(spdf_pts,col="red",add=TRUE)
pref_pts<-over(spdf_pts,popDataSP)
pref_pts$x<-slot(spdf_pts,"coords")[,1]
pref_pts$y<-slot(spdf_pts,"coords")[,2]
plot(ChinaRD1)
points(provinceCentroids,col="red") #adds points to current plot
#Add names
text(pref_pts$x,pref_pts$y,
labels=pref_pts$city_name,col="blue",cex=0.7)
#Now write to CSV
write.table(pref_pts, file = "centroids_in_cities.CSV", sep = ",", col.names=names(pref_pts),row.names=FALSE,na="")
Using OpenStreetMap
#Mapping with OSM
sufe_names="SUFE"
sufe_pts<-SpatialPointsDataFrame(coords=cbind(121.5153883,31.2948375), data=data.frame(sufe_names))
sufe_pts$lon<-coordinates(sufe_pts)[1,1]
sufe_pts$lat<-coordinates(sufe_pts)[1,2]
sufe_map <- openmap(c(sufe_pts$lat+0.03,sufe_pts$lon-0.03),
c(sufe_pts$lat-0.03,sufe_pts$lon+0.03),
minNumTiles=10,type="osm")
Error in openmap(c(sufe_pts$lat + 0.03, sufe_pts$lon - 0.03), c(sufe_pts$lat - :
could not find function "openmap"