已部署到shinyapps.io,详见
0.配置环境和加载包
## encodingoptions(encoding = "UTF-8") ## for chines## use getOption("encoding") to see if things were changedloc <- function(os, language = "english") { switch(language, english = ifelse(os == "Windows", "English_United States.1252", "en_US.UTF-8"), chinese = ifelse(os == "Windows", "Chinese", "zh_CN.utf-8"))}## set localeSys.setlocale(category = "LC_ALL", loc(Sys.info()[["sysname"]], "chinese"))##加载工作环境和所需包setwd("/Users/jeevanyue/Rproject/map/SHMetro")library(data.table)library(bit64)library(dplyr)library(tidyr)library(scales)library(lubridate) #日期处理包library(shiny)library(leaflet)library(lattice)library(plotly)library(chorddiag) #绘制chord
1. 地铁数据
#地铁站进站数据和出站数据shmetro_in <- fread("data/shmetro_in.csv",encoding="UTF-8")shmetro_out <- fread("data/shmetro_out.csv",encoding="UTF-8")## 进出地铁站数据shmetro_line_in_out <- fread("data/shmetro_line_in_out.csv",encoding="UTF-8")## 进出地铁站关联in_out <- shmetro_line_in_out %>% spread(line_out,count)in_out[is.na(in_out)]<-0## 地铁站经纬度stations <- fread("data/stations.csv",encoding="UTF-8")stations <- stations %>% select(c(1:5)) %>% arrange(line,line_id)stations_no <- nrow(stations)for (i in 1:stations_no) { s <- stations$station[i] stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/")}
1.1 地铁站经纬度
stations <- fread("data/stations.csv",encoding="UTF-8")stations <- stations %>% select(c(1:5)) %>% arrange(line,line_id)stations_no <- nrow(stations)for (i in 1:stations_no) { s <- stations$station[i] stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/")}invisible(gc())
1.2 交通卡交易数据
交通卡的交易信息有7个字段,分别是:卡号、交易日期、交易时间、站点名称、行业名称、交易金额、交易性质。
卡号:交通卡卡号
交易日期:日期格式yyyy-mm-dd
交易时间:时间个是hh:mm:ss
站点名称:内容包括线路和站名,如:"1号线莘庄"
行业名称:都是"地铁"
交易金额:0和大于0的值,0表示进站,大于0的值表示出战
交易性质:"优惠"和"非优惠"
#mac下用如下读取system.time(trade <- read.csv("/Users/jeevanyue/Desktop/SPTCC-20150401.csv",header = F,sep=",", fileEncoding = "GB2312"))#windows下用如下读取#system.time(trade <- fread("SPTCC-20150401/SPTCC-20150401.csv",integer64='character',stringsAsFactors=F))#trade <- read.csv('data/SPTCC-20150401_Sample.txt',header=T,encoding='UTF-8',stringsAsFactors = F)#重命名names(trade) <- c('card_id','date','time','station','vehicle','money','property')#筛选地铁数据trade_metro <- trade %>% filter(vehicle=='地铁')rm(trade)invisible(gc())#将"station"(原含义为线路和站名),分为"line"和"station"trade_metro <- trade_metro %>% separate(station, c('line', 'station'), sep = '号线')invisible(gc())#按五分钟统计时间,向上取整trade_metro <- trade_metro %>% mutate(M5=ceiling(period_to_seconds(hms(time))/300))invisible(gc())#删除不需要的字段trade_metro <- trade_metro %>% select(-vehicle,-property,-date)invisible(gc())#时间格式#trade_metro$time <- strptime(paste("2015-04-01", trade_metro$time, sep=' '), "%Y-%m-%d %H:%M:%S", tz = "GMT")
1.3 处理异常值
## 对与stations地铁站名不一致的trade数据进行处理trade_metro[trade_metro$station=="淞浜路",]$station <- "淞滨路"trade_metro[trade_metro$station=="大木桥路 ",]$station <- "大木桥路"trade_metro[trade_metro$station=="上海大学站",]$station <- "上海大学"
1.4 进/出站数据
## 进站数据trade_metro_in <- trade_metro %>% filter(money==0) %>% select(card_id,"time_in"=time,"line_in"=line,"station_in"=station,"M5_in"=M5)## 出站数据trade_metro_out <- trade_metro %>% filter(money>0)%>% select(card_id,"time_out"=time,"line_out"=line,"station_out"=station,money,"M5_out"=M5)
1.5 虚拟换乘
上海火车站为虚拟换乘,删除半小时内3/4换1和1换3/4的数据
3/4换1的数据
## 3/4换1的数据,统计发现在上海火车站3/4号线出站以3号线名义出站trade_metro_out_34 <- trade_metro_out %>% filter(station_out=='上海火车站') %>% filter(line_out==3 | line_out==4)trade_metro_in_1 <- trade_metro_in %>% filter(station_in=='上海火车站', line_in==1)## merge出站和进站的数据trade_metro_out34_in1 <- merge(trade_metro_out_34,trade_metro_in_1,all.x=T) %>% mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>% filter(duration>0,duration<=60*30)### 数据大了什么样的数据都有,发现有几个人在3/4号线出站后,半小时内在1号线进站多次#trade_metro_out34_in1 <- na.omit(trade_metro_out34_in1)## 根据卡号和进站时间,查询最近的进站时间,作为本次进站时间trade_metro_out34_in1 <- data.table(trade_metro_out34_in1)trade_metro_out34_in1[, duration_min := min(duration), by=list(card_id, M5_in)]trade_metro_out34_in1 <- trade_metro_out34_in1 %>% filter(duration==duration_min) %>% select(-duration_min)## 统计发现绝大部分人在10分钟内完成换乘#histogram(ceiling(trade_metro_out34_in1$duration/60))## rbind出站数据trade_metro_out <- rbind(trade_metro_out, trade_metro_out34_in1[,c(1:6)])## 删除全部重复的出站数据trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ]## rbind进站数据trade_metro_in <- rbind(trade_metro_in, trade_metro_out34_in1[,c(1,7:10)])## 删除全部重复的进站数据trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]
1换3/4的数据
## 1换3/4的数据,统计发现在3/4号线上海火车站以3号线名义进站trade_metro_out_1 <- trade_metro_out %>% filter(station_out=='上海火车站',line_out==1) trade_metro_in_34 <- trade_metro_in %>% filter(station_in=='上海火车站') %>% filter(line_in==3 | line_in==4)## merge出站和进站的数据trade_metro_out1_in34 <- merge(trade_metro_out_1,trade_metro_in_34,all.x=T, all.y=F) %>% mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>% filter(duration>0,duration<=60*30)#trade_metro_out1_in34 <- na.omit(trade_metro_out1_in34)## 根据卡号和进站时间,查询最近的进站时间,作为本次进站时间trade_metro_out1_in34 <- data.table(trade_metro_out1_in34)trade_metro_out1_in34[, duration_min := min(duration), by=list(card_id, M5_in)]trade_metro_out1_in34 <- trade_metro_out1_in34 %>% filter(duration==duration_min) %>% select(-duration_min)## 统计发现绝大部分人在10分钟内完成换乘#histogram(ceiling(trade_metro_out1_in34$duration/60))## rbind出站数据trade_metro_out <- rbind(trade_metro_out, trade_metro_out1_in34[,c(1:6)])## 删除全部重复的出站数据trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ]## rbind进站数据trade_metro_in <- rbind(trade_metro_in, trade_metro_out1_in34[,c(1,7:10)])## 删除全部重复的进站数据trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]
1.6 地铁站进站数据
根据消费金额为0,每5分钟统计每站地铁的进站人数
trade_metro_in_station <- trade_metro_in %>% group_by(station_in, M5_in) %>% summarise(count=n()) %>% select(station=station_in, M5=M5_in, count)#trade_metro_in_station <- na.omit(trade_metro_in_station)invisible(gc())## 合并地铁站坐标shmetro_in <- merge(trade_metro_in_station,stations,all.x=T, all.y=F)#rm(trade_metro_in_station)## 查看未匹配到的地铁站#l <- shmetro_in[is.na(shmetro_in$gps_lat),]#unique(l$station)#trade_metro_in_station[trade_metro_in_station$station=="淞浜路",]$station <- "淞滨路"#trade_metro_in_station[trade_metro_in_station$station=="大木桥路 ",]$station <- "大木桥路"#trade_metro_in_station[trade_metro_in_station$station=="上海大学站",]$station <- "上海大学"#stations[grepl("淞滨路", stations$station),]$station#trade_metro_in_station[grepl("淞浜路", trade_metro_in_station$station),]$station <- "淞滨路"#shmetro_in <- na.omit(shmetro_in)invisible(gc())#write.csv(shmetro_in,"shmetro_in.csv",row.names = F,fileEncoding="UTF-8")
1.7 地铁站出站数据
根据消费金额大雨0,每5分钟统计每站地铁的出站人数
trade_metro_out_station <- trade_metro_out %>% group_by(station_out, M5_out) %>% summarise(count=n()) %>% select(station=station_out, M5=M5_out, count)#trade_metro_out_station <- na.omit(trade_metro_out_station)invisible(gc())## 合并地铁站坐标shmetro_out <- merge(trade_metro_out_station,stations,all.x=T, all.y=F)#rm(trade_metro_out_station)#shmetro_out <- na.omit(shmetro_out)invisible(gc())#write.csv(shmetro_out,"shmetro_out.csv",row.names = F,fileEncoding="UTF-8")
1.8 地铁线路起始和终点
## merge进站和出站数据,并计算乘坐时间trade_metro_in_out <- merge(trade_metro_in, trade_metro_out, all.x=T, all.y=F) %>% mutate(duration=period_to_seconds(hms(time_out)) - period_to_seconds(hms(time_in)), duration_M5=M5_out-M5_in) %>% filter(duration>0)#根据卡号和进站时间,查询最近出站的时间,作为本次出站时间trade_metro_in_out <- data.table(trade_metro_in_out)trade_metro_in_out[, duration_min := min(duration), by=list(card_id, M5_in)]trade_metro_in_out <- trade_metro_in_out %>% filter(duration==duration_min) %>% select(-duration_min)#统计进站线路A->出站线路B的笔数shmetro_line_in_out <- trade_metro_in_out %>% group_by(line_in,line_out) %>% summarise(count=n())shmetro_line_in_out$line_in <- as.numeric(shmetro_line_in_out$line_in)shmetro_line_in_out$line_out <- as.numeric(shmetro_line_in_out$line_out)#排序shmetro_line_in_out <- arrange(shmetro_line_in_out,line_in,line_out)#将出站线路数据转换为属性字段in_out <- shmetro_line_in_out %>% spread(line_out,count)in_out[is.na(in_out)]<-0#write.csv(shmetro_line_in_out,"shmetro_line_in_out.csv",row.names = F,fileEncoding="UTF-8")
2. 绘图
2.1 相关数据及地图
#地铁颜色lines_color <- data.frame("line"=c(1:13,16),"color"=c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA"))pal <- colorFactor(as.character(lines_color$color), domain = stations$line)#辅助函数绘制线路draw_line_add <- function(l_no,line_s_id=NULL){ line_color <- lines_color[lines_color$line==l_no,]$color line_data <- stations[stations$line==l_no,] if(is.null(line_s_id)){ draw_lines <- Shanghai %>% addPolylines(lat=line_data$gps_lat,lng=line_data$gps_lon,color=line_color,weight=2) }else{ draw_lines <- Shanghai %>% addPolylines(lat=line_data$gps_lat[line_s_id],lng=line_data$gps_lon[line_s_id],color=line_color,weight=2) } return(draw_lines)}## 上海线路地图Shanghai <- leaflet() %>% setView(lng = 121.60, lat = 31.20, zoom = 10) %>% addProviderTiles("CartoDB.Positron") %>% addLegend(position = "bottomleft",pal=pal,values = stations$line)for(l in unique(stations$line)){ line_length <- nrow(stations[stations$line==l,]) if(l==4){ #由于4号线为环线,需将首尾相连 Shanghai <- draw_line_add(l_no=l) Shanghai <- draw_line_add(l_no=l,line_s_id=c(1,line_length)) }else if(l==10){ #由于10号线在龙溪路站以后分为两条线路,需分两端绘制 Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-3))) Shanghai <- draw_line_add(l_no=l,line_s_id=c(24,(line_length-2):line_length)) }else if(l==11){ #由于11号线在嘉定新城站以后分为两条线路,需分两端绘制 Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-7))) Shanghai <- draw_line_add(l_no=l,line_s_id=c(28,(line_length-6):line_length)) }else{ Shanghai <- draw_line_add(l_no=l) }}
2.2 chord图数据
##绘制chord图metro_chord <- data.matrix(as.data.frame(in_out)[,c(2:15)])haircolors <- in_out$line_indimnames(metro_chord) <- list(have = haircolors, prefer = colnames(metro_chord))groupColors <- c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA")#chorddiag(metro_chord, groupColors = groupColors, margin=50, showTicks=F, groupnamePadding = 5)
2.3 图形参数
b <- list(x = 0, y = 1,bgcolor = "#00FFFFFF")yax <- list( title = "", zeroline = FALSE, showline = FALSE, showticklabels = FALSE, showgrid = FALSE)xax <- list( title = "", titlefont = list(size = 8), tickangle = -20, color = "black")
2.4 UI 和 SERVER
ui <- shinyUI(navbarPage("SHMetro", tabPanel("进站流量", div(class="outer", #tags$style(type = "text/css", "html, body {width:100%;height:100%}"), tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"), leafletOutput("map", width = "100%", height = "100%"), absolutePanel(top = 10, right = 10, h4(textOutput("output_slider_time")), sliderInput("slider_time", "Time:", #min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "2015-04-01", tz = "GMT"), #max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"), #value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"), min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"), value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), step = 60*5, timeFormat = "%T", timezone = "GMT"), selectInput("select_line", "Line", c("All",lines_color$line)), h4("TOP 5"), plotlyOutput("in_top5",height = 200), checkboxInput("legend", "Show legend", TRUE) ) ) ), tabPanel("出站流量", div(class="outer", #tags$style(type = "text/css", "html, body {width:100%;height:100%}"), tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"), leafletOutput("map_out", width = "100%", height = "100%"), absolutePanel(top = 10, right = 10, h4(textOutput("output_slider_time_out")), sliderInput("slider_time_out", "Time:", #min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "1960-01-01", tz = "GMT"), #max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"), #value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"), min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"), value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"), step = 60*5, timeFormat = "%T", timezone = "GMT"), selectInput("select_line_out", "Line", c("All",lines_color$line)), h4("TOP 5"), plotlyOutput("out_top5",height = 200), checkboxInput("legend_out", "Show legend", TRUE) ) ) ), tabPanel("线路关联", div(class="outer", #tags$style(type = "text/css", "html, body {width:100%;height:100%}"), tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"), chorddiagOutput("line_chord", width = "100%",height="100%") ) ) ))server <- shinyServer(function(input, output, session) { ## 进站流量统计 # Reactive expression for the data subsetted to what the user selected filteredData <- reactive({ if(input$select_line=="All"){ shmetro_in %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300)) }else{ shmetro_in %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300),line==as.numeric(input$select_line)) } }) stations_in_top5 <- reactive({ filteredData() %>% group_by(station) %>% summarise(count=sum(count),line=min(line)) %>% arrange(desc(count)) %>% head(5) %>% as.data.frame() }) ## time output$output_slider_time <- renderText({ paste0("Time: ", format(input$slider_time,"%H:%M:%S")) }) output$map <- renderLeaflet({ Shanghai %>% addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>% clearMarkerClusters() %>% clearMarkers() }) observe({ data_in_circle <- data.table(filteredData())[, count := sum(count), by=list(station, M5)] %>% arrange(count) leafletProxy("map", data = data_in_circle) %>% clearMarkerClusters() %>% clearMarkers() %>% addCircleMarkers(data_in_circle$gps_lon,data_in_circle$gps_lat, color = pal(data_in_circle$line), fillOpacity = 0.5,stroke = FALSE, popup=paste(data_in_circle$station,data_in_circle$line,data_in_circle$count,sep=","), radius=(data_in_circle$count)^(1/2.5)) }) # top5 output$in_top5 <- renderPlotly({ # If no stations_in_top5 are in view, don't plot if (nrow(stations_in_top5()) == 0) return(NULL) plot_ly(stations_in_top5(), x = stations_in_top5()$station, y = stations_in_top5()$count, type = "bar", marker = list(color = pal(stations_in_top5()$line)), bgcolor = "#00FFFFFF") %>% layout(showlegend=FALSE, yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF', paper_bgcolor='#00FFFFFF') }) # Use a separate observer to recreate the legend as needed. observe({ proxy <- leafletProxy("map") # Remove any existing legend, and only if the legend is # enabled, create a new one. proxy %>% clearControls() if (input$legend) { proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line) } }) ## 出站流量统计 # Reactive expression for the data subsetted to what the user selected filteredData_out <- reactive({ if(input$select_line_out=="All"){ shmetro_out %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300)) }else{ shmetro_in %>% filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300),line==as.numeric(input$select_line_out)) } }) stations_out_top5 <- reactive({ filteredData_out() %>% group_by(station) %>% summarise(count=sum(count),line=min(line)) %>% arrange(desc(count)) %>% head(5) %>% as.data.frame() }) ## time output$output_slider_time_out <- renderText({ paste0("Time: ", format(input$slider_time_out,"%H:%M:%S")) }) output$map_out <- renderLeaflet({ Shanghai %>% addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>% clearMarkerClusters() %>% clearMarkers() }) observe({ data_out_circle <- data.table(filteredData_out())[, count := sum(count), by=list(station, M5)] %>% arrange(count) leafletProxy("map_out", data = filteredData_out()) %>% clearMarkerClusters() %>% clearMarkers() %>% addCircleMarkers(data_out_circle$gps_lon, data_out_circle$gps_lat, color = pal(data_out_circle$line),fillOpacity = 0.5,stroke = FALSE, popup=paste(data_out_circle$station,data_out_circle$line,data_out_circle$count,sep=","), radius=(data_out_circle$count)^(1/2.5)) }) # top5 output$out_top5 <- renderPlotly({ # If no stations_in_top5 are in view, don't plot if (nrow(stations_out_top5()) == 0) return(NULL) plot_ly(stations_out_top5(), x = stations_out_top5()$station, y = stations_out_top5()$count, type = "bar", marker = list(color = pal(stations_out_top5()$line)), bgcolor = "#00FFFFFF") %>% layout(showlegend=FALSE, yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF', paper_bgcolor='#00FFFFFF') }) # Use a separate observer to recreate the legend as needed. observe({ proxy <- leafletProxy("map_out") # Remove any existing legend, and only if the legend is # enabled, create a new one. proxy %>% clearControls() if (input$legend_out) { proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line) } }) ## 线路关联 output$line_chord <- renderChorddiag({ chorddiag(metro_chord, groupColors = groupColors, showTicks=F, groupnamePadding = 5) })})
2.5 运行shinyApp
shinyApp(ui = ui,server = server)
进站流量
出站流量
进出地铁