Water and citizenship

Author

Sendas, UV, Inecol

Statistical methodology report

The data obtained from the Google Forms online form, was cleaned to reduce high cardinality in open-end questions; the database was revised with Ydata Profile Report (Ydata, 2024) for quality assessment and correction. Likert-type questions were ordered and refactored before including them in statistical analyses. Variables with location information were linked to their corresponding geospatial layers in R (R Core Team, 2024).

We performed several statistical analyses, departing from an exploratory data analysis of the most important and answered variables. Pearson correlations were performed on continuous variables, and linear models were built for variables such as water usage or service quality as response variables of demographics. Student’s t test was used for comparing group means, ANOVAs were used for assessing relationship between response and predictor variables; we also used chi-square tests for categorical variables and log likelihood tests for binary variables. All analyses were carried in R (R Core Team, 2024)

Code
library(leaflet)
library(lemon)
library(sensemakr)
library(dplyr)
library(ggplot2)
library(readxl)
library(forcats)
library(stringr)
library(ggsci)
library(sf)
library(tidyr)
library(magrittr)
library(stargazer)
library(knitr)

base = read_excel("../El Agua y la Ciudadanía.xlsx") |> 
  dplyr::select(starts_with("limpio"))  |>
  dplyr::filter(limpio_gasto_agua_num < 1000) 

munis = read_sf("~/geoespacial/muni_2018gw.shp",  
                query = "select * from \"muni_2018gw\" where cve_ent = '30' ")

names(base) = gsub("limpio_", "", names(base))

munisb = names(table(base$municipio)[ table(base$municipio) >= 5])
munis = munis |> filter(NOM_MUN %in% munisb)
base = filter(base, municipio %in% munisb)
encuadre = st_bbox(munis) |> st_as_sfc() |> 
  st_transform("+proj=lcc +lat_0=12 +lon_0=-102 +lat_1=17.5 +lat_2=29.5 +x_0=2500000 +y_0=0 +datum=WGS84 +units=m +no_defs") 
cps = read_sf("~/geoespacial/CP_ver.gpkg") |> st_filter(encuadre) 

# refactorización --------------------------------

niveles = c("Pésimo", "Malo", "Regular", "Bueno", "Excelente")
base = within(base, {
  nivel_estudios = factor(nivel_estudios, 
              levels = c("Sin estudios", "Primaria", "Secundaria", "Preparatoria", "Universidad", "Posgrado"),
              labels = c("Did not study", "Elementary", "Junior high", "High school", "University", "Grad school"))
  almacenamiento = ifelse(is.na(almacenamiento), "No almacenan", almacenamiento)
  dias_sin_serv = factor(dias_sin_serv,
              levels = c("No falta", "1 a 3", "4 a 6", "7 a 9", "10 a 12", "13 a 15", "Más de 15"),
              labels = c("0", "1 to 3", "4 to 6", "7 to 9", "10 to 12", "13 to 15", "More than 15"))
  percep_calidad_servicio = factor(percep_calidad_servicio,
              levels = niveles,
              labels = c("Unacceptable", "Bad", "Regular", "Good", "Excelent"))
  percep_costo_saneamiento = factor(percep_costo_saneamiento,
              levels = c("Inaceptable", "Poco", "Regular", "Bueno", "Excelente"),
              labels = c("Unacceptable", "Low", "Regular", "Good", "Excelent"))
  visita_cuerpo = factor(visita_cuerpo, ordered = TRUE)
  sexo = factor(genero, ordered = TRUE, labels = c("Female", "Male"))
  disponibilidad_agua = factor(disponibilidad_agua, labels = c("No", "Yes"))
  razon_NA_checar = factor(razon_NA_checar,
            labels = c("Turbid water", "Water shut-off", "Water Shut-Off/Repair", 
                      "Water shortage", "Shortage/For Repair", "Unknown", "Destruction of pipeline", 
                      "Lack of electric power", "Lack of pressure", "Not applicable", 
                      "Due to misuse", "Maintenance", "By repairment", "Due to repair/maintenance", 
                      "By supply region shifting", "By supply region shifting/maintenance", "By Tandeos/Repairs", 
                      "Rainy Season"))
  obtencion_agua = factor(obtencion_agua,
            labels = c("Storage", "Storage, carboy", "Storage, Rain", "Storage, Spring/River", 
                      "Storage, Pipe", "Cistern", "Bucket", "From nowhere", "Municipal source", 
                      "Carboy", "Rain", "Rain, Pipe", "Source/River", "Masonry tank", 
                      "Pipe", "Pipe, Rain", "Well", "Plastic drum", "Water tank", "Neighbors", 
                      "Neighbors, Carboy"))
  almacenamiento = factor(almacenamiento,
            labels = c("Cistern", "Cistern, buckets", "Cistern, masonry tank", "Cistern, plastic barrel", 
                       "Cistern, plastic drum, buckets", "Cistern, water tank", "Buckets", "Buckets, cistern",
                       "Buckets, masonry tank", "Buckets, plastic drum", "Buckets, plastic drum, water tank",
                       "None", "They do not store water", "Masonry tank", "Masonry tank, buckets", 
                       "Masonry tank, plastic tank", "Masonry tank, water tank", "Plastic drum", 
                       "Plastic drum, cistern", "Plastic drum, buckets", "Plastic drum, water tank",
                       "Plastic drum, water tank, buckets",
                       "Water tank", "Water tank, cistern", "Water tank, buckets", "Water tank, masonry tank",
                       "Water tank, plastic barrel", "Water tank, plastic barrel, buckets"))
  fuente_abasto = factor(fuente_abasto, 
             labels = c("I don't know", "Municipal source", "Carboy", "Rain", "Creek", "Spring",
                        "Well", "Dam", "River", "River/rain", "River/creek") )
  nombre_fuente = gsub("Desconozco", "I don't know", nombre_fuente)
  instancia = factor(instancia,
             labels = c("CAEV", "CMAP", "CMAS", "Local committee", "Local committee", 
                        "I don't know", "Self-management", "INECOL A.C.", "Rain", "None",
                        "Neighbor organization", "SAPAM", "SAPASE"))
  contexto = factor(contexto, 
              labels = c("Rural", "Suburban", "Urban"))
  actividad_economica = factor(actividad_economica,
              labels = c("No, unemployed", "No, full-time student",
                         "No, retiree", "No, full-time house chores",
                         "Yes, salaried", "Yes, own formal business",
                         "Yes, own informal business", "Yes, commission income"))
  tipo_casa = factor(tipo_casa,
              labels = c("Duplex house", "Social interest house", "House that shares plot with another", 
                        "Rented house", "Unique house on the plot", "Condominium", "Apartment", 
                        "Multifamily building", "Shelter", "Housing unit", "Collective housing", 
                        "Housing in neighborhood or quartier"))
  negativa_visita = factor(negativa_visita,
               labels = c("I do not know which one it is", "Difficult to access", 
                "Difficult to access/Secure area", "It is a public network", 
                "It is almost dry", "It is polluted", "It is polluted/Difficult to access", 
                "It is contaminated/It is on private property", "It is contaminated/It is on Private Property/Unsafe area", 
                "Is on Private Property", "Is contaminated/Is inaccessible", 
                "Lack of time", "Not nearby", "Not interested", "Not available", 
                "I don't frequent it", "I hadn't thought about it", "I'm not in the habit", 
                "Lack of time", "It is Black Water", "Unsafe area"))
 afimartiva_visita = factor(afimartiva_visita,
                labels = c("Sometimes", "Recreational activity", "Recreational activity, Swimming", 
                  "Recreational activity/Every two months", "Recreational activity/Every third day", 
                  "Recreational activity/Every three months", "Recreational activity/Daily", 
                  "Recreational activity/Twice a year", "Recreational activity/Twice a month", 
                  "Recreational activity/Twice a year", "Recreational activity/Weekends", 
                  "Recreational activity/Monthly", "Recreational activity/Weekly", 
                  "Recreational activity/Semi-annually", "Recreational activity/Three times a year", 
                  "Recreational activity/Quarterly", "Recreational activity/Once a week", 
                  "Drinking", "Walking", "Walking/Weekly", "Contemplation", "Daily", 
                  "Twice a year", "Holiday/Annually", "Weekends", "Frequent", "Well cleaning/Every two months", 
                  "Maintenance", "Maintenance/Annually", "Monthly", "Monitor condition", 
                  "Monitor condition, Maintenance", "Monitor condition/Quarterly", 
                  "Monitor condition/Daily", "Monitor your condition/Twice a week", 
                  "Monitor status/Monthly", "Monitor your condition/Weekly", "Monitor your condition/Once a month", 
                  "Swimming", "Bird watching", "For pleasure", "For need of drinking water", 
                  "Relaxation", "Relaxation, Contemplation/Weekends", "Ritual", 
                  "Weekly", "Daily transfer", "Daily transfer/daily", "Daily transfer/Twice a week", 
                  "Once a month", "Once a week."))
 visita_cuerpo = factor(visita_cuerpo, labels = c("No", "Yes"))
 conocimiento_art_1_y_4 = factor(conocimiento_art_1_y_4, labels = c("No", "Yes"))
 conocimiento_iniciativa = factor(conocimiento_iniciativa, labels = c("No", "Yes"))
 freq_pago = factor(freq_pago, labels = c("Annual", "Bimonthly", "Cooperation to committee", "Unknown", 
"Monthly", "No payment"))
 destino_agua_servida = factor(destino_agua_servida, labels = c("To the street", "Biodigester", "Anaerobic biodigester", "Biodigester/Sorption pit/Grey water trap", 
"Unknown", "Drainage", "Septic tank", "Absorption pit", "River, stream",  "Domestic root system", "Land", "Grease trap, garden"))
 separacion_negra_gris = factor(separacion_negra_gris, labels = c("No", "Don't know", "Yes"))
 destino_drenaje = factor(destino_drenaje, labels = c("Unknown", "No", "Not applicable", "Yes", 
                                                      "To the rivers", "To the ravine", "To the street", 
                                                      "To a parcel", "To the rivers", "To the land", "To sewage", "To the sewer", "To the stream",  "To the creek, To the river", "Spillway", "Drainage", "To the Sea", 
 "Underground", "Biodigester", "Home septic tank", "Lakes", 
"Treatment plant", "Maybe/Rivers", "Maybe/Sea", "Maybe/Treatment plant"
))
 sabe_uso_dinero_tarifa_saneamiento = factor(sabe_uso_dinero_tarifa_saneamiento, 
                                             labels = c("No", "Not applicable", "No/Maintenance", "Yes",
                                                        "Conservation", "Operating costs", "Diversion of money",
                                                        "Infrastructure", 
                                                        "Maintenance", "Maintenance and water treatment", 
                                                        "To extend the supply network", "Wages", 
                                                        "Environmental services", "Water treatment", 
                                                        "Water treatment and operating costs", "I have a well."))
 percepcion_pago = factor(percepcion_pago, 
                          levels = c("Nada", "Poco", "Lo justo", "Mucho", "No puedo pagarlo"),
                          labels = c("Nothing", "Little", "Fair", "too much", "I can't pay it"))
 
  })

Demographic profile of respondents

Code
poblacion = base |> 
  dplyr::filter(edad < 100 & edad > 10 ) |>
  count(edad, sexo, name = "pop") |>
  mutate(edad_cut = cut(edad, breaks = seq(0,100, by = 10), 
                        labels = paste(seq(0,90, by = 10), " to ", 
                                       seq(0,90, by = 10) +10 ))) 
poblacion |>  
  group_by(edad_cut, sexo) |> 
  summarise(pop = sum(pop)) |>
  mutate(pobmf = ifelse(test = sexo == "Male", yes = -pop, no = pop)) |>
  ggplot( aes(x = pobmf, 
              y = edad_cut, fill = sexo))  +
  geom_col() +
  scale_x_symmetric(labels = abs) +
  labs(x = "Population", y = "Age") +
  scale_fill_d3(name = "Sex")

Code
# plots vars demográficas --------------------------

demog = c("contexto", "actividad_economica", "nivel_estudios", "municipio", "tipo_casa")
demog_en = c("context", "Economic activity", "Study level", "Municipality", "House type")
names(demog_en) = demog
preguntas_demog = c("¿En qué contexto vive?", "¿Es actualmente empleado/a de un trabajo remunerado?", "¿Último nivel de estudios?",
  "¿En qué municipio vive?", "¿En qué tipo de casa habitación vive? ")
names(preguntas_demog) = demog
preguntas_demog_en = c("In which context do you live?", "Are you currently employed in a payed job?", 
                       "Which is your highest level of education?", "In which municipality do you live?", 
                       "In which type of house do you live?")
names(preguntas_demog_en) = demog 

p_demog = lapply(demog, function(x){  base |>
  ggplot(aes(fct_rev(fct_infreq(fct_lump_n(get(x), n = 10)))  ) ) + 
    geom_bar(fill = "dodgerblue") + coord_flip() + geom_text(stat='count',aes(label=..count..),vjust=-.5) +
  labs(x = "", y = "Frequency"#, 
       #title = gsub("_", " ", demog_en[[x]]) |> stringr::str_to_sentence() 
       ) })
names(p_demog) = demog

src = vector(mode = "character", length = length(demog))

for(i in seq_along(demog)){
textotemp = paste0("p_demog[['",  demog[[i]], "']]")
#    ES MUY IMPORTANTE PONER "output: asis" en el chunk    #
src[[i]] = knitr::knit_expand(
    text = c( 
  paste("### ", preguntas_demog_en[[ demog[[i]] ]]),
  "\n",
  "\n",
  paste0('Total answers: `r .QuartoInlineRender(table(is.na(base[["', demog[[i]], '"]]))[[1]])`'),
  "\n",
  "```{r}",
  "#| echo: false",
  "#| message: false",
  "#| warning: false",
  "\n",
  textotemp,
  "\n",
  "```"
    ))
}

res <- knitr::knit_child(text = src, quiet = TRUE)
cat(res, sep = '\n')

In which context do you live?

Total answers: 364

Are you currently employed in a payed job?

Total answers: 364

Which is your highest level of education?

Total answers: 364

In which municipality do you live?

Total answers: 364

In which type of house do you live?

Total answers: 364

Water availability and supply

Code
agua = c("disponibilidad_agua", "razon_NA_checar", "obtencion_agua",
         "almacenamiento", "fuente_abasto", "nombre_fuente", "instancia", "visita_cuerpo", "negativa_visita", "afimartiva_visita")

agua_en = c("water_availability", "reason", "water_procurement",
            "storage", "water_source", "name_of_source", "office/instance", "do_you_visit_water_body", "does_visit", "does_not_visit")
names(agua_en) = agua
preguntas_agua_en = c("Is water always available in your home?", 
                      "Why didn't you have access to enough water when you needed it?",
                      "When you lack enough water, where do you obtain it from?",
                      "If you store water at home, where do you keep it?",
                      "Which is the main source of water for your home?",
                      "What is the name of the source of water that supplies it? E.g. Río Huitzilapan",
                      "On which agency does your household's water supply depend? (e.g. local committee, municipal water commission, Veracruz state commission, none).", "Do you usually visit the body of water closest to your home?", "If you answered you do not visit the closest water body, specify why",
                      "If you answered that you visit the closest water body, specify why and the frequency of your visits")
names(preguntas_agua_en) = agua

p_agua = lapply(agua, function(x){  base |>
    filter(!is.na(get(x))) |>
    ggplot(
      aes(fct_relevel(
            fct_rev(
                fct_infreq(fct_lump_n(get(x), n = 10))
                ), 
            "Other", after = 0L)  ) ) + 
    geom_bar(fill = "dodgerblue") + coord_flip() + geom_text(stat='count',aes(label=..count..),vjust=-.1) +
    labs(x = "", y = "Frequency"
         #title = gsub("_|NA_checar", " ", agua_en[[x]]) |> stringr::str_to_sentence() 
         ) })
names(p_agua) = agua

src = vector(mode = "character", length = length(agua))
for(i in seq_along(agua)){
textotemp = paste0("p_agua[['",  agua[[i]], "']]")
#    ES MUY IMPORTANTE PONER "output: asis" en el chunk    #
src[[i]] = knitr::knit_expand(
    text = c( 
  paste("### ", preguntas_agua_en[[ agua[[i]] ]]),
  "\n",
  "\n",
  paste0('Total answers: `r .QuartoInlineRender(table(is.na(base[["', agua[[i]], '"]]))[[1]])`'),
  "\n",
  "```{r}",
  "#| echo: false",
  "#| message: false",
  "#| warning: false",
  "\n",
  textotemp,
  "\n",
  "```"
    ))
}

#    ES MUY IMPORTANTE PONER "output: asis" en el chunk    #
res <- knitr::knit_child(text = src, quiet = TRUE)
cat(res, sep = '\n')

Is water always available in your home?

Total answers: 364

Why didn’t you have access to enough water when you needed it?

Total answers: 327

When you lack enough water, where do you obtain it from?

Total answers: 311

If you store water at home, where do you keep it?

Total answers: 364

Which is the main source of water for your home?

Total answers: 363

What is the name of the source of water that supplies it? E.g. Río Huitzilapan

Total answers: 274

On which agency does your household’s water supply depend? (e.g. local committee, municipal water commission, Veracruz state commission, none).

Total answers: 232

Do you usually visit the body of water closest to your home?

Total answers: 364

If you answered you do not visit the closest water body, specify why

Total answers: 218

If you answered that you visit the closest water body, specify why and the frequency of your visits

Total answers: 129

Water hauling at home

Total answers: 235 (respondents who did provide an answer to “If you have to travel to get water, which members of your household are responsible for getting it? specify gender (male or female) and age (number of years).”)

Code
por_mun = base |> 
  count(municipio) 

respondientes = left_join(munis, por_mun, 
          by = c("NOM_MUN" = "municipio" )) |>
  filter(!is.na(n)) |>
  mutate(nom_mun = gsub("Tlalnelhuayocan", "Tlalnel.", NOM_MUN)) |>
  st_transform(4326)

por_cp = base |> count(cp) |> mutate(cp = as.character(cp))

# acarreo ----------------------------------------------

acarreo = tidyr::separate(base, acarreo, sep= "/", into = c("acarrea1", "acarrea2", "acarrea3", "acarrea4")) |>
  dplyr::select(starts_with("acarrea"), municipio) |>
  mutate(across(everything(), ~ trimws(.x))) |>
  mutate(across(starts_with("acarrea"), 
                ~ str_match(.x, "\\d+") |> as.numeric(), .names = "edad_{.col}" ) ) |>
  mutate(across(starts_with("acarrea"), 
                ~ str_match(.x, "Femenino|Masculino") |> as.character(), .names = "sexo_{.col}" ) ) |>
  select(-1, -2, -3, -4) |>
  mutate(idn = row_number()) 

acarreo = lapply(0:3, function(x) acarreo[,c(1,2+x,6+x)] |> set_names(c("municipio", "edad", "sexo")) )

acarreo = do.call(rbind, acarreo)

acarreo$sexo = factor(acarreo$sexo, labels = c("Female", "Male"))

acarreo |>
  ggplot(aes(edad, fill = sexo)) + geom_histogram() +
  facet_wrap(~municipio) +
  scale_fill_d3(name = "Sex") +
  labs(title = "Water hauling in homes", subtitle = "By sex of respondent",
       x = "Age", y = "Total persons")

Code
base |>
  mutate(edad_cut = edad - (edad %% 10)) |>
  mutate(acarrea_si_no = ifelse(grepl("Ma|Fem", acarreo), "Yes", "No")) |>
  count(edad_cut, acarrea_si_no, municipio) |>
  ggplot(aes(edad_cut, n, fill = acarrea_si_no)) + geom_col(position = "fill") +
  scale_fill_d3(name = "Water is hauled \nin your dwelling") +
  facet_wrap(~municipio) +
  labs(title = "Water hauling in homes", subtitle = "By age of respondent",
       x = "Age", y = "Share of homes")

Code
# por cp -------------

respondientes_cp = left_join(cps, por_cp, by = c("d_cp" = "cp") ) |> 
  filter(!is.na(n))  |> st_transform(4326)

Geospatial visualizations

Total answers: 364

Questions: 1. In which municipality do you live? 1. Which is your zip code? 1. If you have to travel to get water, which members of your household are responsible for getting it? specify gender (male or female) and age (number of years). 1. What is the name of the water source that supplies you? example: Huitzilapan river

Empty answers were considered as not hauling water or not knowing the name of the water source

Code
acarreo_cp = base |>
  mutate(acarrea_si_no = ifelse(grepl("Ma|Fem", acarreo), "Yes", "No")) |>
  count(acarrea_si_no, cp) |>
  mutate(cp = as.character(cp)) |>
  group_by(cp) |> 
  mutate(porciento = round(n/sum(n)*100,2 ) ) |> 
  # esto lo hacemos para no perder cps donde un único respondiente dice no acarrear
  mutate(porciento = ifelse(acarrea_si_no == "No" & porciento == 100, 0, porciento)) |>
  mutate(acarrea_si_no = ifelse(acarrea_si_no == "No" & porciento == 0, "Yes",
                                acarrea_si_no)) |>
  filter(acarrea_si_no == "Yes") |> ungroup() |>
  right_join(cps, by = c("cp" = "d_cp")) |> st_as_sf() |>
  filter(!is.na(n)) |>
  st_transform(4326)

know_source = base |>
  mutate(knows_source = ifelse(!grepl("I don", nombre_fuente), "Yes", "No")) |>
  mutate(knows_source = ifelse(is.na(nombre_fuente), "No", knows_source)) |>
  count(knows_source, cp) |>
  mutate(cp = as.character(cp)) |>
  group_by(cp) |> 
  mutate(porciento = round(n/sum(n)*100,2 ) ) |> 
  # esto lo hacemos para no perder cps donde un único respondiente dice no saber la fuente
  mutate(porciento = ifelse(knows_source == "No" & porciento == 100, 0,
                                porciento)) |>
  mutate(knows_source = ifelse(knows_source == "No" & porciento == 100, 0,
                                "Yes")) |>
  filter(knows_source == "Yes") |> ungroup() |>
  right_join(cps, by = c("cp" = "d_cp")) |> st_as_sf() |>
  filter(!is.na(n)) |>
  st_transform(4326)

pal_resp = colorNumeric(palette = "viridis", domain = respondientes$n)
pal_resp_cp = colorNumeric(palette = "inferno", domain = respondientes_cp$n, reverse = TRUE)
pal_ac_cp = colorNumeric(palette = "plasma", domain = acarreo_cp$porciento, reverse = TRUE)
pal_knows = colorNumeric(palette = "magma", domain = know_source$porciento, reverse = TRUE)
Code
leaflet() |>    
  addProviderTiles(provider = providers$CartoDB.Positron) |> #
  #addTiles() |> 
  setView(lng = -96.94, lat = 19.48, zoom = 11) |>
  addPolygons(data = respondientes, color = pal_resp(respondientes$n), weight = 1.5,
              group = "Number of respondents by municipality",
              fillOpacity = .6,
              popup =  ~paste("Mun:", NOM_MUN, "<hr>Resp.:",  n)) |>
  addPolygons(data = respondientes_cp,
              color = pal_resp_cp(respondientes_cp$n), weight = 1.5,
              group = "Number of respondents by zip code",
              fillOpacity = .6,
              popup =  ~paste("Zip code:", d_cp, "<hr>Resp:",  n)) |>
  addPolygons(data = acarreo_cp, color = pal_ac_cp(acarreo_cp$porciento), weight = 1.5,
              group = "% of houses hauling water by zip code",
              fillOpacity = .6,
              popup =  ~paste("Zip code:", cp, "<hr>Percentage:",  porciento, "%")) |>
  addPolygons(data = know_source, color = pal_knows(know_source$porciento), weight = 1.5,
              group = "% of respondents that report knowing the water source",
              fillOpacity = .6,
              popup =  ~paste("Zip code:", cp, "<hr>Percentage:",  porciento, "%")) |>
  addLegend(pal = pal_resp, values = respondientes$n, title = "N of resp. by municipality",
            group = "Number of respondents by municipality", position = "bottomleft") |>
  addLegend(pal = pal_resp_cp, values = respondientes_cp$n, title = "N of resp. by zip code",
            group = "Number of respondents by zip code", position = "bottomleft") |>
  addLegend(pal = pal_ac_cp, values = acarreo_cp$porciento, title = "% houses that haul water by zip code", 
            group = "% of houses hauling water by zip code", position = "bottomleft") |>
  addLegend(pal = pal_knows, values = know_source$porciento, title = "% resp. know source",
            group = "% of respondents that report knowing the water source", position = "bottomright") |>
  addLayersControl(overlayGroups = c("Number of respondents by municipality",
                                     "Number of respondents by zip code",
                                     "% of houses hauling water by zip code",
                                     "% of respondents that report knowing the water source") ,
                   options = layersControlOptions(collapsed = FALSE)) |>
  hideGroup(c("Number of respondents by zip code", "% of houses hauling water by zip code",
              "% of respondents that report knowing the water source"))

Scarcity throughout the year

Total answers: 364 (to question “In which month has water service been insufficient in your household?”, which included “None” as an option)

Code
meses = base[,c("municipio", "contexto", "tipo_casa", "nivel_estudios",
              "mes_sin_servicio")] 
names(meses) = c("municipality", "context", "housing_type", "study_level", "month_without_service")

meses = meses |>  separate_wider_delim("month_without_service", 
                       delim = ", ", names = c("m1", "m2", "m3", "m4"),
                       too_few = "align_start", too_many = "drop") 

meses |> 
  mutate(falta_servicio = ifelse(m1 == "Ningún mes", "Has not been interrupted", "Has been interrupted") ) |>
  count(municipality, falta_servicio) |>
  knitr::kable()
municipality falta_servicio n
Banderilla Has been interrupted 32
Coatepec Has been interrupted 19
Coatepec Has not been interrupted 38
Emiliano Zapata Has been interrupted 14
Emiliano Zapata Has not been interrupted 2
Jilotepec Has been interrupted 9
Jilotepec Has not been interrupted 1
Teocelo Has been interrupted 5
Tlalnelhuayocan Has been interrupted 7
Tlalnelhuayocan Has not been interrupted 6
Xalapa Has been interrupted 158
Xalapa Has not been interrupted 58
Xico Has been interrupted 6
Xico Has not been interrupted 9
Code
meses |>  
  pivot_longer(cols = matches("\\d")) |>
  filter(!is.na(value)) |>
  mutate(value = factor(value, 
                        levels = c("Enero", "Febrero", "Marzo", "Abril", "Mayo", 
                                          "Junio", "Julio", "Agosto", "Septiembre", "Octubre", 
                                          "Noviembre", "Diciembre", "Ningún mes"),
                        labels = c(month.abb, "None")) ) |> 
  ggplot(aes(value, fill = study_level)) +
  geom_bar() +  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_d3(name = "Study level") +
  labs(x = "", y = "Total respondents") +
  facet_wrap(~municipality) 

Code
# días sin  agua -------------

base |>
  dplyr::filter(!is.na(dias_sin_serv)) |>
  ggplot(aes(dias_sin_serv, fill = contexto)) + 
  geom_bar() +  theme(axis.text.x = element_text(angle = 90)) +
  labs(x  = "Days without service in the last month", 
       title = "Days without service", y = "Total respondants") +
  facet_wrap(~municipio, scales = "free_y") + scale_fill_d3(name = "Context")

Code
# consumo de agua --------------

cor(base$gasto_agua_num, base$habitantes, use = "complete.obs")
[1] 0.1505061
Code
base |>
  ggplot(aes(habitantes, gasto_agua_num)) +
  geom_point() +
  scale_fill_d3() +
  facet_wrap(~municipio, scales = "free_y") +
  labs(x = "Inhabitants per house", y = "Daily water usage, liters")

Code
base_en = base[,c("gasto_agua_num", "habitantes", "nivel_estudios", 
        "sexo", "edad", "municipio",
        "percep_calidad_servicio", "dias_sin_serv", "percep_costo_saneamiento",
        "destino_drenaje", "visita_cuerpo", "actividad_economica")]

names(base_en) = c("water_usage_liters", "inhabitants", "study_level",
                   "sex", "age", "municipality",
                   "service_quality_perception", "days_without_service", "water_sanitation_cost_perception",
                   "sewage_destiny", "visits_water_body", "economic_activity")

Linear models, anovas, chi squared and log likelihood analyses

Code
# RECORDAR QUE LOS FACTORES QUE NO PONE SON EL INTERCEPTO  


model.gasto = lm(water_usage_liters ~ inhabitants + study_level + sex + age + municipality, data = base_en)
stargazer(model.gasto, type = "html")
Dependent variable:
water_usage_liters
inhabitants 13.385***
(3.878)
study_levelJunior high -67.155
(95.696)
study_levelHigh school -10.581
(78.636)
study_levelUniversity 14.900
(78.339)
study_levelGrad school 8.759
(78.936)
sex.L 17.240**
(8.472)
age 1.462***
(0.456)
municipalityCoatepec -34.676
(24.575)
municipalityEmiliano Zapata -7.745
(33.783)
municipalityJilotepec 158.161***
(39.877)
municipalityTeocelo 26.564
(52.572)
municipalityTlalnelhuayocan -16.779
(36.589)
municipalityXalapa -38.253*
(21.456)
municipalityXico -44.411
(35.090)
Constant 25.361
(82.436)
Observations 362
R2 0.172
Adjusted R2 0.138
Residual Std. Error 108.548 (df = 347)
F Statistic 5.137*** (df = 14; 347)
Note: p<0.1; p<0.05; p<0.01
Code
model.costo = lm(water_sanitation_cost_perception ~ service_quality_perception + study_level + municipality +
                   days_without_service + sex + study_level + age,
                 data = base_en)
Warning in model.response(mf, "numeric"): using type = "numeric" with a factor
response will be ignored
Warning in Ops.factor(y, z$residuals): '-' no es significativo para factores
Code
stargazer(model.costo, type = "html") 
Dependent variable:
water_sanitation_cost_perception
service_quality_perceptionBad 0.643
service_quality_perceptionRegular 1.259
service_quality_perceptionGood 1.729
service_quality_perceptionExcelent 2.658
study_levelJunior high -1.001
study_levelHigh school 0.023
study_levelUniversity 0.170
study_levelGrad school 0.157
municipalityCoatepec -0.245
municipalityEmiliano Zapata 0.056
municipalityJilotepec -0.200
municipalityTeocelo -1.126
municipalityTlalnelhuayocan -0.090
municipalityXalapa -0.061
municipalityXico -0.162
days_without_service1 to 3 0.149
days_without_service4 to 6 0.044
days_without_service7 to 9 0.049
days_without_service10 to 12 -0.068
days_without_service13 to 15 0.563
days_without_serviceMore than 15 0.024
sex.L 0.055
age -0.006
Constant 1.898
Observations 279
Note: p<0.1; p<0.05; p<0.01
Code
# t student -----------------------

#with(base_en,
#     t.test(water_usage_liters[sex == "Masculine"], water_usage_liters[sex == "Femenine"] ))

# with(base_en,
#      t.test(water_usage_liters[grepl("Yes", actividad_economica)], 
#             water_usage_liters[grepl("No", actividad_economica)] ))
# 
# with(base_en,
#      t.test(water_usage_liters[contexto == "Urbano"], 
#             water_usage_liters[contexto == "Rural"] ))

# anovas ---------------

aov(water_usage_liters ~ study_level, data = base_en) |> summary()
             Df  Sum Sq Mean Sq F value Pr(>F)
study_level   4   80966   20241   1.494  0.203
Residuals   359 4863012   13546               
Code
boxplot(water_usage_liters ~ study_level, data = base_en, 
        varwidth = TRUE, outline = FALSE, las = 2, xlab = "", 
        ylab = "Daily water usage, liters", cex.axis = 0.7)

Code
aov(water_usage_liters ~ economic_activity, data = base_en) |>  summary()
                   Df  Sum Sq Mean Sq F value Pr(>F)  
economic_activity   7  227186   32455    2.45 0.0183 *
Residuals         356 4716792   13249                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
ggplot(base_en, aes(economic_activity, water_usage_liters)) + 
  geom_boxplot() + coord_flip() + labs(x = "", y = "Daily water usage, liters")

Code
aov(water_usage_liters ~ municipality, data = base_en) |> summary()
              Df  Sum Sq Mean Sq F value   Pr(>F)    
municipality   7  471478   67354   5.361 7.48e-06 ***
Residuals    356 4472500   12563                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
aov(water_usage_liters ~ municipality, data = base_en) |> TukeyHSD()
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = water_usage_liters ~ municipality, data = base_en)

$municipality
                                       diff         lwr        upr     p adj
Coatepec-Banderilla              -45.503289 -120.994768  29.988189 0.5946196
Emiliano Zapata-Banderilla       -33.312500 -137.953283  71.328283 0.9782460
Jilotepec-Banderilla             131.812500    7.999856 255.625144 0.0277593
Teocelo-Banderilla                28.812500 -135.532393 193.157393 0.9994670
Tlalnelhuayocan-Banderilla       -43.110577 -155.512809  69.291655 0.9400744
Xalapa-Banderilla                -60.133796 -124.868821   4.601228 0.0903887
Xico-Banderilla                  -76.720833 -183.661688  30.220022 0.3618368
Emiliano Zapata-Coatepec          12.190789  -84.498719 108.880298 0.9999413
Jilotepec-Coatepec               177.315789   60.146090 294.485489 0.0001481
Teocelo-Coatepec                  74.315789  -85.084372 233.715951 0.8469152
Tlalnelhuayocan-Coatepec           2.392713 -102.647419 107.432844 1.0000000
Xalapa-Coatepec                  -14.630507  -65.520496  36.259483 0.9879684
Xico-Coatepec                    -31.217544 -130.391704  67.956617 0.9796039
Jilotepec-Emiliano Zapata        165.125000   27.359008 302.890992 0.0071080
Teocelo-Emiliano Zapata           62.125000 -112.972520 237.222520 0.9602900
Tlalnelhuayocan-Emiliano Zapata   -9.798077 -137.407479 117.811325 0.9999980
Xalapa-Emiliano Zapata           -26.821296 -115.368014  61.725422 0.9836570
Xico-Emiliano Zapata             -43.408333 -166.234407  79.417740 0.9611237
Teocelo-Jilotepec               -103.000000 -290.187123  84.187123 0.7017138
Tlalnelhuayocan-Jilotepec       -174.923077 -318.672988 -31.173166 0.0058206
Xalapa-Jilotepec                -191.946296 -302.492208 -81.400385 0.0000058
Xico-Jilotepec                  -208.533333 -348.054377 -69.012289 0.0001909
Tlalnelhuayocan-Teocelo          -71.923077 -251.766647 107.920494 0.9258577
Xalapa-Teocelo                   -88.946296 -243.542776  65.650184 0.6514307
Xico-Teocelo                    -105.533333 -282.015045  70.948379 0.6046062
Xalapa-Tlalnelhuayocan           -17.023219 -114.619784  80.573345 0.9994846
Xico-Tlalnelhuayocan             -33.610256 -163.112428  95.891915 0.9934995
Xico-Xalapa                      -16.587037 -107.840389  74.666315 0.9993242
Code
ggplot(base_en, aes(municipality, water_usage_liters)) + 
  geom_boxplot(varwidth = TRUE) + coord_flip() + labs(x = "", y = "Daily water usage, liters")

Code
# normalidad

qqnorm(base_en$water_usage_liters)
qqline(base_en$water_usage_liters)

Code
shapiro.test(base_en$water_usage_liters)

    Shapiro-Wilk normality test

data:  base_en$water_usage_liters
W = 0.73243, p-value < 2.2e-16
Code
#  no son normales 

# chi squared ----------
table(base_en$service_quality_perception, base_en$municipality) |>
  chisq.test()
Warning in chisq.test(table(base_en$service_quality_perception,
base_en$municipality)): Chi-squared approximation may be incorrect

    Pearson's Chi-squared test

data:  table(base_en$service_quality_perception, base_en$municipality)
X-squared = 26.789, df = 28, p-value = 0.5298
Code
table(base_en$service_quality_perception, base_en$visits_water_body) |>
  chisq.test() 
Warning in chisq.test(table(base_en$service_quality_perception,
base_en$visits_water_body)): Chi-squared approximation may be incorrect

    Pearson's Chi-squared test

data:  table(base_en$service_quality_perception, base_en$visits_water_body)
X-squared = 12.819, df = 4, p-value = 0.01219
Code
table(base_en$sewage_destiny, base_en$sex) |>
  chisq.test()
Warning in chisq.test(table(base_en$sewage_destiny, base_en$sex)): Chi-squared
approximation may be incorrect

    Pearson's Chi-squared test

data:  table(base_en$sewage_destiny, base_en$sex)
X-squared = 40.327, df = 23, p-value = 0.01412
Code
# logistic regression
modlog = glm(visits_water_body ~ age, data = base_en, family = binomial) 

#summary(modlog) |> kable()
logLik(modlog)
'log Lik.' -240.0286 (df=2)

Level of education and water knowledge

Total answers: 364 (to question “Do you know articles 1st and 4th of the Mexican constitution?”) and 273 (to question “Do you know of any initiative, project, action that is being carried out in favor of sanitation?”)

We correlate an ordinal variable (highest level of education) with binary variables

  • Do you know articles 1st and 4th of the Mexican constitution?:
  • 0.1025241
  • Do you know of any initiative, project, action that is being carried out in favor of sanitation?
  • 0.0692181

Answers in the open-ended final question

Question: Write if you have a question or commentary:

The following list presents an outline of the general issues discussed at the open-ended final question.

  1. Infrastructure and Water Services Issues:
  • Lack of hydraulic network in localities near aquifers.
  • Issues with easily damaged hoses leading to water wastage.
  1. Water Pollution and Conservation:
  • Concerns about pollution of rivers and bodies of water.
  • Need for sustainability and sanitation projects.
  1. Environmental Education and Awareness:
  • Lack of appreciation and education about water.
  • Need to disseminate information and raise awareness.
  1. Management and Transparency:
  • Criticism of government management and lack of transparency.
  • Uncertainty about the use and destination of water payments.
  1. Community Participation and Action:
  • Interest in actively participating in water-related projects.
  • Need to involve the community in water-related decisions.
  1. Sustainability and Public Policies:
  • Need to implement sustainable actions.
  • Criticism of ineffective governmental actions.
  1. Water System and Sanitation:
  • Issues with sanitation services and water management.
  • Criticism of water tariffing and minimum services.
  1. Information and Results:
  • Interest in knowing the results of water-related studies and surveys.
  • Need to disseminate information about projects and results.

General Comments:

Thanks and support for water-related initiatives. Personal and family concerns about water quality and availability.

Water access, sewage, water initiatives and the human right to water

Code
agua = c( "freq_pago", "destino_agua_servida", "separacion_negra_gris", 
         "destino_drenaje", "sabe_uso_dinero_tarifa_saneamiento", 
         "iniciativas_conocidas",
         "conocimiento_iniciativas_saneamiento",
         "conocimiento_art_1_y_4")

preguntas_agua_en = c( 
  "How often is your water billed?",
  "Where does the water you use in your home go?",
  "Do you know if gray water is separated in your colony?",
  "Where does the drainage in your neighborhood go to?", 
  "Do you know what the money collected in the sanitation fee is used for?",
  "What initiatives, projects and actions in favor of sanitation do you know about (e.g. from residents of neighborhoods, barrios and communities, civil society organizations, academia, government entities...)?",
  "Do you know of any initiative, project, action that is being carried out in favor of sanitation? ",
  "Do you know what the 1st and 4th articles of the constitution say about water?")

#"percepcion_pago",
#"percep_calidad_servicio",
# "percep_costo_saneamiento",

#"Your opinion on the amount of money you pay for water is:"
#  "Your opinion on the water service quality",
# "Your opinion on the cost of your water treatment fee",
names(preguntas_agua_en) = agua

p_agua = lapply(agua, function(x){  base |>
    filter(!is.na(get(x))) |>
    ggplot(
      aes(fct_relevel(
            fct_rev(
                fct_infreq(fct_lump_n(get(x), n = 10))
                ), 
            "Other", after = 0L)  ) ) + 
    geom_bar(fill = "dodgerblue") + coord_flip() + geom_text(stat='count',aes(label=..count..),vjust=-.1) +
    labs(x = "", y = "Frequency"
         #title = gsub("_|NA_checar", " ", agua_en[[x]]) |> stringr::str_to_sentence() 
         ) })
names(p_agua) = agua

src = vector(mode = "character", length = length(agua))
for(i in seq_along(agua)){
textotemp = paste0("p_agua[['",  agua[[i]], "']]")
#    ES MUY IMPORTANTE PONER "output: asis" en el chunk    #
src[[i]] = knitr::knit_expand(
    text = c( 
  paste("### ", preguntas_agua_en[[ agua[[i]] ]]),
  "\n",
  "\n",
  paste0('Total answers: `r .QuartoInlineRender(table(is.na(base[["', agua[[i]], '"]]))[[1]])`'),
  "\n",
  "```{r}",
  "#| echo: false",
  "#| message: false",
  "#| warning: false",
  "\n",
  textotemp,
  "\n",
  "```"
    ))
}

#    ES MUY IMPORTANTE PONER "output: asis" en el chunk    #
res <- knitr::knit_child(text = src, quiet = TRUE)
cat(res, sep = '\n')

How often is your water billed?

Total answers: 364

Where does the water you use in your home go?

Total answers: 364

Do you know if gray water is separated in your colony?

Total answers: 364

Where does the drainage in your neighborhood go to?

Total answers: 288

Do you know what the money collected in the sanitation fee is used for?

Total answers: 306

What initiatives, projects and actions in favor of sanitation do you know about (e.g. from residents of neighborhoods, barrios and communities, civil society organizations, academia, government entities…)?

Total answers: 248

Do you know of any initiative, project, action that is being carried out in favor of sanitation?

Total answers: 202

Do you know what the 1st and 4th articles of the constitution say about water?

Total answers: 364

Percepciones de calidad y costos

Code
agua = c("percepcion_pago", "percep_calidad_servicio", "percep_costo_saneamiento")

preguntas_agua_en = c("Your opinion on the amount of money you pay for water is:",
                      "Your opinion on the water service quality",
                      "Your opinion on the cost of your water treatment fee")

names(preguntas_agua_en) = agua

p_agua = lapply(agua, function(x){  base |>
    filter(!is.na(get(x))) |>
    ggplot(
      aes(fct_lump_n(get(x), n = 10)) ) + 
    geom_bar() + scale_fill_d3() +
    coord_flip() + 
    geom_text(stat='count', aes(label=..count..), vjust=-.1, 
                             position = "stack" ) +
    labs(x = "", y = "Frequency"
         #title = gsub("_|NA_checar", " ", agua_en[[x]]) |> stringr::str_to_sentence() 
         ) })
names(p_agua) = agua

src = vector(mode = "character", length = length(agua))
for(i in seq_along(agua)){
textotemp = paste0("p_agua[['",  agua[[i]], "']]")
#    ES MUY IMPORTANTE PONER "output: asis" en el chunk    #
src[[i]] = knitr::knit_expand(
    text = c( 
  paste("### ", preguntas_agua_en[[ agua[[i]] ]]),
  "\n",
  "\n",
  paste0('Total answers: `r .QuartoInlineRender(table(is.na(base[["', agua[[i]], '"]]))[[1]])`'),
  "\n",
  "```{r}",
  "#| echo: false",
  "#| message: false",
  "#| warning: false",
  "\n",
  textotemp,
  "\n",
  "```"
    ))
}

#    ES MUY IMPORTANTE PONER "output: asis" en el chunk    #
res <- knitr::knit_child(text = src, quiet = TRUE)
cat(res, sep = '\n')

Your opinion on the amount of money you pay for water is:

Total answers: 364

Your opinion on the water service quality

Total answers: 319

Your opinion on the cost of your water treatment fee

Total answers: 308

References

R Core Team (2024). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.

Ydata (2024). Ydata profiling. https://github.com/ydataai/ydata-profiling