library(tidyverse)
library(magrittr)
library(sf)
Make sure you have the following sub-folder in your folder: https://www.dropbox.com/scl/fo/xqonwazhrwml6wp3n28md/h?dl=0&rlkey=qd0a56dfzfpqppcan1xi23597
Please note that the sub-folder is not included in the github repo due to storage constraints
Zones <- st_read(
"Mexico OD Survey/DistritosEODHogaresZMVM2017/DistritosEODHogaresZMVM2017.shp")
Reading layer `DistritosEODHogaresZMVM2017' from data source
`C:\Users\Orlan\Dropbox\Teaching\SpatialAnalysis\Tutorials\04_MexicoCity_HTS\Mexico OD Survey\DistritosEODHogaresZMVM2017\DistritosEODHogaresZMVM2017.shp'
using driver `ESRI Shapefile'
Simple feature collection with 194 features and 2 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 431878.5 ymin: 2093686 xmax: 542232.1 ymax: 2219035
Projected CRS: WGS 84 / UTM zone 14N
Trips <- read_delim(
"Mexico OD Survey/eod_2017_csv/tviaje_eod2017/conjunto_de_datos/tviaje.csv",
delim = ",")
Rows: 531594 Columns: 82── Column specification ──────────────────────────────────────────────────────────────
Delimiter: ","
chr (22): n_via, p5_6, p5_7_6, p5_7_7, dto_origen, p5_9_1, p5_9_2, p5_10_1, p5_10_...
dbl (58): id_via, id_soc, p5_3, p5_14_01, p5_15_01, p5_14_02, p5_15_02, p5_14_03, ...
lgl (2): p5_15_15, p5_15_19
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#p5_11: HomeTrip or not
#p5_13: Trip Purpose
#p5_9_1: Time the trip started
TripsStages <- read_delim(
"Mexico OD Survey/eod_2017_csv/ttransporte_eod2017/conjunto_de_datos/ttransporte.csv",
delim = ",")
Rows: 890748 Columns: 18── Column specification ──────────────────────────────────────────────────────────────
Delimiter: ","
chr (11): n_via, p5_14, p5_16, p5_16_1_1, p5_16_1_2, p5_16_2, p5_17_1c, p5_17_2c, ...
dbl (7): id_tra, id_via, p5_3, estrato, factor, tloc, sexo
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# id_via is the unique identifier. It is consistent with Trips
# p5_14 is the transport mode
# p5_16 is the order of the stages.
People <- read_delim(
"Mexico OD Survey/eod_2017_csv/tsdem_eod2017/conjunto_de_datos/tsdem.csv",
delim = ",")
Rows: 200117 Columns: 22── Column specification ──────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): n_ren, edad, niv, p5_4, p6_4, upm_dis, est_dis, distrito, ent, mun
dbl (12): id_soc, id_hog, parentesco, sexo, gra, p3_7, p3_8, p4_2, p4_3, estrato, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#id_soc is the unique identifier por people
Home <- read_delim(
"Mexico OD Survey/eod_2017_csv/thogar_eod2017/conjunto_de_datos/thogar.csv",
delim = ",")
Rows: 56685 Columns: 20── Column specification ──────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): hogar, n_inf, upm_dis, est_dis, distrito, ent, mun
dbl (13): id_hog, id_viv, p2_1_1, p2_1_2, p2_1_3, p2_2, p2_2b_1, p2_2b_2, p2_2b_3,...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot(st_geometry(Zones))
Zones %>% glimpse()
Rows: 194
Columns: 3
$ Distrito <chr> "001", "002", "003", "004", "005", "006", "007", "008", "009", "0…
$ Descripcio <chr> "Centro Histórico", "Buenavista-Reforma", "Tlatelolco", "Morelos"…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((485707.7 21..., MULTIPOLYGON (((4850…
dim(Trips)
[1] 531594 82
dim(TripsStages)
[1] 890748 18
names(Trips)
[1] "id_via" "id_soc" "p5_3" "n_via" "p5_6" "p5_7_6"
[7] "p5_7_7" "dto_origen" "p5_9_1" "p5_9_2" "p5_10_1" "p5_10_2"
[13] "p5_11a" "p5_12_6" "p5_12_7" "dto_dest" "p5_13" "p5_14_01"
[19] "p5_15_01" "p5_14_02" "p5_15_02" "p5_14_03" "p5_15_03" "p5_14_04"
[25] "p5_15_04" "p5_14_05" "p5_15_05" "p5_14_06" "p5_15_06" "p5_14_07"
[31] "p5_15_07" "p5_14_08" "p5_15_08" "p5_14_09" "p5_15_09" "p5_14_10"
[37] "p5_15_10" "p5_14_11" "p5_15_11" "p5_14_12" "p5_15_12" "p5_14_13"
[43] "p5_15_13" "p5_14_14" "p5_15_14" "p5_14_15" "p5_15_15" "p5_14_16"
[49] "p5_15_16" "p5_14_17" "p5_15_17" "p5_14_18" "p5_15_18" "p5_14_19"
[55] "p5_15_19" "p5_14_20" "p5_15_20" "p5_18" "p5_19" "p5_20"
[61] "p5_21_1" "p5_21_2" "p5_22" "p5_23" "p5_24" "p5_25"
[67] "p5_26" "p5_27_1" "p5_27_2" "p5_27_3" "p5_27_4" "p5_27_5"
[73] "p5_27_6" "p5_27_7" "p5_27_8" "estrato" "factor" "upm_dis"
[79] "est_dis" "tloc" "sexo" "edad"
names(TripsStages)
[1] "id_tra" "id_via" "p5_3" "n_via" "p5_14" "p5_16"
[7] "p5_16_1_1" "p5_16_1_2" "p5_16_2" "p5_17_1c" "p5_17_2c" "estrato"
[13] "factor" "upm_dis" "est_dis" "tloc" "sexo" "edad"
Keep in mind that for the “Trips” object:
And keep in mind that for the “Trips”Stages” object: - id_via is the unique identifier. It is consistent with the “Trips” - p5_14 is the transport mode - p5_16 is the order of the stages.
dim(People)
[1] 200117 22
dim(Home)
[1] 56685 20
names(People)
[1] "id_soc" "id_hog" "n_ren" "parentesco" "sexo" "edad"
[7] "niv" "gra" "p3_7" "p3_8" "p4_2" "p4_3"
[13] "p5_4" "p6_4" "estrato" "factor" "upm_dis" "est_dis"
[19] "tloc" "distrito" "ent" "mun"
names(Home)
[1] "id_hog" "id_viv" "hogar" "n_inf" "p2_1_1" "p2_1_2" "p2_1_3"
[8] "p2_2" "p2_2b_1" "p2_2b_2" "p2_2b_3" "p2_2b_4" "estrato" "factor"
[15] "upm_dis" "est_dis" "tloc" "distrito" "ent" "mun"
Keep in mind that id_soc is the unique identifier for every individual
Trips %>% group_by(p5_13) %>% summarise(TotalTrips = n()) %>% arrange(desc(TotalTrips))
class(Trips$p5_13)
[1] "character"
PurposeNames <- data.frame(p5_13 = c(1:10,99),
Purpose = c("Hogar","Trabajar", "Estudiar",
"Otro", "Recreacion","Otro", "Otro",
"Salud", rep("Otro",3)))
Trips %<>%
mutate(p5_13 = as.numeric(p5_13)) %>%
left_join(PurposeNames) %>%
select(id_via, Purpose, dto_origen, dto_dest)
Joining, by = "p5_13"
Trips
TripsStages %>% group_by(p5_14) %>%
summarise(TotalPerModel = n()) %>%
arrange(desc(TotalPerModel))
class(TripsStages$p5_14)
[1] "character"
ModesNames <- data.frame(
p5_14 = seq(1:20),
Modes = c("Automovil", "Colectivo_Micro", "App",
"Taxi", "Metro", "Autobus_RTP_M1",
"Bicicleta", "Autobus", "Moto", "Trolebus",
"Metrobus_Mexibus", "TrenLigero","TrenSuburbano",
"Caminar", "Mexicable", "Bicitaxi", "Mototaxi",
"TransporteEscolar", "TransporteDePersonal", "Otro"))
TripsStages %<>%
mutate(p5_14 = as.numeric(p5_14)) %>%
left_join(ModesNames) %>%
select(id_via, Modes,
TT_Hours = p5_16_1_1, TT_Minutes = p5_16_1_2) %>%
mutate(TT_Hours = as.numeric(TT_Hours),
TT_Minutes = as.numeric(TT_Minutes)) %>%
filter(!TT_Hours == 99) %>% filter(!TT_Minutes == 99) %>%
filter(TT_Hours < 4) %>% # Remove trips lasting more than 4 hours.
mutate(TravelTime = TT_Hours*60 + TT_Minutes)
Joining, by = "p5_14"
Trips_Final <- TripsStages %>% left_join(Trips)
Joining, by = "id_via"
Trips_Final
Why is this not accurate?
Agg_TotalTrips <- Trips_Final %>% group_by(dto_origen) %>%
summarise(TotalTrips = n())
Agg_TripsPerMode <- Trips_Final %>%
group_by(Modes, dto_origen) %>%
summarise(
TripsPerMode = n(),
TravelTime = mean(TravelTime)
)
`summarise()` has grouped output by 'Modes'. You can override using the `.groups` argument.
How do we fix this?
Agg_TripsPerMode %>%
pivot_wider(id_cols = dto_origen,
names_from = Modes, names_prefix = "Trips_",
values_from = TripsPerMode, values_fill = 0)
Agg_Trips_Trips <- Agg_TripsPerMode %>%
pivot_wider(id_cols = dto_origen,
names_from = Modes, names_prefix = "Trips_",
values_from = TripsPerMode, values_fill = 0)
Agg_Trips_TravelTime <- Agg_TripsPerMode %>%
pivot_wider(id_cols = dto_origen,
names_from = Modes, names_prefix = "TravelTime_",
values_from = TravelTime, values_fill = 0)
summary(People$edad)
Length Class Mode
200117 character character
dim(People)
[1] 200117 22
People %>% filter(edad == "00") %>% dim()
[1] 1779 22
People %>% filter(edad == "97") %>% dim()
[1] 65 22
People %>% filter(edad == "99") %>% dim()
[1] 144 22
People %<>% filter(!edad %in% c("00", "97", "99"))
dim(People)
[1] 198129 22
People$edad = as.numeric(People$edad)
People_TotalPeople_Age <- People %>% group_by(distrito) %>%
summarise(TotalPeople = n(),
Age = mean(edad, na.rm = TRUE))
People_TotalPeople_Age
People %>% group_by(niv) %>% summarise(Total = n())
People %<>%
mutate(EducationLevel = recode(niv,
"00" = "Low",
"01" = "Low",
"02" = "Low",
"03" = "Low",
"99" = "Low", # Is this correct?
"04" = "Medium",
"05" = "Medium",
"06" = "Medium",
"07" = "Medium",
"08" = "High",
"09" = "High"))
People_EducationLevel <- People %>% group_by(distrito, EducationLevel) %>%
summarise(Total = n()) %>% drop_na() %>%
pivot_wider(id_cols = distrito,
names_from = EducationLevel, names_prefix = "TotalEducation_",
values_from = Total, values_fill = 0)
`summarise()` has grouped output by 'distrito'. You can override using the `.groups` argument.
People_EducationLevel
sexo
summary(People$sexo)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 2.000 1.517 2.000 2.000
summary(as.factor(People$sexo))
1 2
95779 102350
People %<>% mutate(Gender = if_else(sexo == 1, "Male", "Female"))
People_Gender <- People %>% group_by(distrito, Gender) %>%
summarise(Total = n()) %>%
pivot_wider(id_cols = distrito,
names_from = Gender, names_prefix = "Total_",
values_from = Total, values_fill = 0)
`summarise()` has grouped output by 'distrito'. You can override using the `.groups` argument.
People_Gender
p2_1_1: ¿Cuántos autos o camionetas tienen para transportarse cotidianamente? p2_1_2: ¿Cuántas motocicletas o motonetas tienen para transportarse cotidianamente? p2_1_3: ¿Cuántas bicicletas tienen para transportarse cotidianamente?
Home %<>%
select(distrito,
Cars = p2_1_1,
Motos = p2_1_2,
Bicycles = p2_1_3) %>%
mutate(
Has_Car = if_else(Cars > 0, 1, 0),
Has_Motos = if_else(Motos > 0, 1, 0),
Has_Bicycles = if_else(Bicycles > 0, 1, 0))
Home
Homes_TotalHomes <- Home %>%
group_by(distrito) %>%
summarise(Total_Homes = n())
Homes_CarOwnership <- Home %>%
filter(Has_Car == 1) %>%
group_by(distrito) %>%
summarise(Homes_With_Cars = n())
Homes_MotorbikeOwnership <- Home %>%
filter(Has_Motos == 1) %>%
group_by(distrito) %>%
summarise(Homes_With_Motorbikes = n())
Homes_BicycleOwnership <- Home %>%
filter(Has_Bicycles == 1) %>%
group_by(distrito) %>%
summarise(Homes_With_Bicycles = n())
Homes_Mean_Veahicles <- Home %>%
group_by(distrito) %>%
summarise(
Cars_mean = mean(Cars),
Motorbikes_mean = mean(Bicycles),
Bicycles_mean = mean(Bicycles)
)
Data_Agg <- Agg_TotalTrips %>%
left_join(Agg_Trips_Trips) %>%
left_join(Agg_Trips_TravelTime) %>%
left_join(People_TotalPeople_Age, by = c("dto_origen" = "distrito")) %>%
left_join(People_EducationLevel, by = c("dto_origen" = "distrito")) %>%
left_join(People_Gender, by = c("dto_origen" = "distrito")) %>%
left_join(Homes_TotalHomes, by = c("dto_origen" = "distrito")) %>%
left_join(Homes_CarOwnership, by = c("dto_origen" = "distrito")) %>%
left_join(Homes_MotorbikeOwnership, by = c("dto_origen" = "distrito")) %>%
left_join(Homes_BicycleOwnership, by = c("dto_origen" = "distrito")) %>%
left_join(Homes_Mean_Veahicles, by = c("dto_origen" = "distrito"))
Joining, by = "dto_origen"Joining, by = "dto_origen"
Check the districts 888 and 999
MexicoCity_HTS <- Zones %>%
left_join(Data_Agg, by = c("Distrito" = "dto_origen"))
dim(MexicoCity_HTS)
[1] 194 58
names(MexicoCity_HTS)
[1] "Distrito" "Descripcio"
[3] "TotalTrips" "Trips_App"
[5] "Trips_Autobus" "Trips_Autobus_RTP_M1"
[7] "Trips_Automovil" "Trips_Bicicleta"
[9] "Trips_Bicitaxi" "Trips_Caminar"
[11] "Trips_Colectivo_Micro" "Trips_Metro"
[13] "Trips_Metrobus_Mexibus" "Trips_Mexicable"
[15] "Trips_Moto" "Trips_Mototaxi"
[17] "Trips_Otro" "Trips_Taxi"
[19] "Trips_TransporteDePersonal" "Trips_TransporteEscolar"
[21] "Trips_TrenLigero" "Trips_TrenSuburbano"
[23] "Trips_Trolebus" "TravelTime_App"
[25] "TravelTime_Autobus" "TravelTime_Autobus_RTP_M1"
[27] "TravelTime_Automovil" "TravelTime_Bicicleta"
[29] "TravelTime_Bicitaxi" "TravelTime_Caminar"
[31] "TravelTime_Colectivo_Micro" "TravelTime_Metro"
[33] "TravelTime_Metrobus_Mexibus" "TravelTime_Mexicable"
[35] "TravelTime_Moto" "TravelTime_Mototaxi"
[37] "TravelTime_Otro" "TravelTime_Taxi"
[39] "TravelTime_TransporteDePersonal" "TravelTime_TransporteEscolar"
[41] "TravelTime_TrenLigero" "TravelTime_TrenSuburbano"
[43] "TravelTime_Trolebus" "TotalPeople"
[45] "Age" "TotalEducation_High"
[47] "TotalEducation_Low" "TotalEducation_Medium"
[49] "Total_Female" "Total_Male"
[51] "Total_Homes" "Homes_With_Cars"
[53] "Homes_With_Motorbikes" "Homes_With_Bicycles"
[55] "Cars_mean" "Motorbikes_mean"
[57] "Bicycles_mean" "geometry"
st_crs(MexicoCity_HTS)
Coordinate Reference System:
User input: WGS 84 / UTM zone 14N
wkt:
PROJCRS["WGS 84 / UTM zone 14N",
BASEGEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]],
CONVERSION["UTM zone 14N",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",0,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",-99,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",0.9996,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",500000,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",0,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["(E)",east,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["(N)",north,
ORDER[2],
LENGTHUNIT["metre",1]],
ID["EPSG",32614]]
MexicoCity_HTS <- st_transform(MexicoCity_HTS, 4326)
ls()
[1] "Agg_TotalTrips" "Agg_Trips_TravelTime" "Agg_Trips_Trips"
[4] "Agg_TripsPerMode" "Data_Agg" "Home"
[7] "Homes_BicycleOwnership" "Homes_CarOwnership" "Homes_Mean_Veahicles"
[10] "Homes_MotorbikeOwnership" "Homes_TotalHomes" "MexicoCity_HTS"
[13] "ModesNames" "People" "People_EducationLevel"
[16] "People_Gender" "People_TotalPeople_Age" "PurposeNames"
[19] "Trips" "Trips_Final" "TripsStages"
[22] "Zones"
rm(
Agg_TotalTrips, Agg_Trips_TravelTime, Agg_Trips_Trips,
Agg_TripsPerMode, Data, Data_Agg, Home, Homes_BicycleOwnership, Homes_CarOwnership,
Homes_Mean_Veahicles, Homes_MotorbikeOwnership, Homes_TotalHomes, ModesNames, People, People_EducationLevel, People_Gender, People_TotalPeople_Age,
PurposeNames,Trips, Trips_Final, TripsStages, Zones )
Warning: object 'Data' not found
ls()
[1] "MexicoCity_HTS"
```r
save.image(file = \Mexico_HTS.RData\)
st_write(MexicoCity_HTS, \MexicoCity_HTS.shp\)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiV2FybmluZzogRmllbGQgbmFtZXMgYWJicmV2aWF0ZWQgZm9yIEVTUkkgU2hhcGVmaWxlIGRyaXZlclxuV3JpdGluZyBsYXllciBgTWV4aWNvQ2l0eV9IVFMnIHRvIGRhdGEgc291cmNlIFxuICBgTWV4aWNvQ2l0eV9IVFMuc2hwJyB1c2luZyBkcml2ZXIgYEVTUkkgU2hhcGVmaWxlJ1xuV3JpdGluZyAxOTQgZmVhdHVyZXMgd2l0aCA1NyBmaWVsZHMgYW5kIGdlb21ldHJ5IHR5cGUgTXVsdGkgUG9seWdvbi5cbiJ9 -->
Warning: Field names abbreviated for ESRI Shapefile driver Writing
layer MexicoCity_HTS' to data source
MexicoCity_HTS.shp’
using driver `ESRI Shapefile’ Writing 194 features with 57 fields and
geometry type Multi Polygon.
<!-- rnb-output-end -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuTWV4aWNvQ2l0eV9IVFNfRGF0YWJhc2UgPC0gTWV4aWNvQ2l0eV9IVFMgJT4lIHN0X3NldF9nZW9tZXRyeShOVUxMKVxuTWV4aWNvQ2l0eV9EaXN0cmljdHMgPC0gTWV4aWNvQ2l0eV9IVFMgJT4lIHNlbGVjdChEaXN0cml0bylcblxuXG53cml0ZV9kZWxpbShNZXhpY29DaXR5X0hUU19EYXRhYmFzZSwgXFxNZXhpY29DaXR5X0hUU19EYXRhYmFzZS5jc3ZcXCwgZGVsaW0gPSBcXDtcXClcbnN0X3dyaXRlKE1leGljb0NpdHlfRGlzdHJpY3RzLCBcXE1leGljb0NpdHlfRGlzdHJpY3RzLnNocFxcKVxuYGBgXG5gYGAifQ== -->
```r
```r
MexicoCity_HTS_Database <- MexicoCity_HTS %>% st_set_geometry(NULL)
MexicoCity_Districts <- MexicoCity_HTS %>% select(Distrito)
write_delim(MexicoCity_HTS_Database, \MexicoCity_HTS_Database.csv\, delim = \;\)
st_write(MexicoCity_Districts, \MexicoCity_Districts.shp\)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiV3JpdGluZyBsYXllciBgTWV4aWNvQ2l0eV9EaXN0cmljdHMnIHRvIGRhdGEgc291cmNlIFxuICBgTWV4aWNvQ2l0eV9EaXN0cmljdHMuc2hwJyB1c2luZyBkcml2ZXIgYEVTUkkgU2hhcGVmaWxlJ1xuV3JpdGluZyAxOTQgZmVhdHVyZXMgd2l0aCAxIGZpZWxkcyBhbmQgZ2VvbWV0cnkgdHlwZSBNdWx0aSBQb2x5Z29uLlxuIn0= -->
Writing layer
MexicoCity_Districts' to data source
MexicoCity_Districts.shp’
using driver `ESRI Shapefile’ Writing 194 features with 1 fields and
geometry type Multi Polygon. ```