25  GIS Data to ODK itemlist

Author

Chrissy h Roberts

25.1 Background

Using Administrative data from [DIVA-GIS](www.diva-gis.org), this script will convert the geographical administrative data (levels 0-3) for one or more country and output an itemsets.csv file that works with ODK and preserves accented characters (tested only on roman characters). The primary purpose here is to create a list that is compatible with the cascading select system that ODK uses to filter down to more granular responses in questions where you’d want to capture district > area > village or similar.

This also removes accents from internal variables like the XLSForm ‘name’ column.

Accents will appear on screen, but won’t be preserved in data frames. This is desirable because working with mixed data that may or may not include accents is a pain.

25.2 Data

You will need to get administrative data from [DIVA GIS](http://www.diva-gis.org/datadown#) for each of the countries you want to include.

Unzip this data to a folder in the same directory as this script,

This example uses Uganda and Democratic Republic of Congo.

25.3 Libraries

library(knitr)

25.3.1 Create a folder to house data

if(!dir.exists("data/divadownload/")){dir.create("data/divadownload")}
system("rm -rf data/divadownload/*")

25.3.2 Define target countries

Specify ISO codes (3 digit) to tell R which data sets to include. A full list of ISO codes is available [here](#https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3 )

countries<-c("UGA","COD")

25.4 Download and bind data

For each country, find the data set in the folder structure and load up then bind together

options(timeout=120)
for(i in 1:length(countries))
{
  
  #download the data set for country i
  download.file(url = paste("http://biogeo.ucdavis.edu/data/diva/adm/",countries[i],"_adm.zip",sep=""),destfile =paste("data/divadownload/",countries[i],"_adm.zip",sep=""))

  #unzip the data for country i
  system (paste("unzip data/divadownload/",countries[i],"_adm.zip -d data/divadownload/",countries[i],sep=""))
  
  #read the level 3 data for country i
  level3data<-read.csv(list.files(pattern = paste(countries[i],"_adm3.csv",sep=""),full.names = T,recursive = T))
  #select only the relevant columns
  level3data<-level3data[,c("NAME_0","NAME_1","NAME_2","NAME_3","ISO")]
  
  #for the first country, create a new df
  if(i==1){full.data.output<-level3data}
  #for countries 2 to n, bind data
  if(i!=1){full.data.output<-rbind(full.data.output,level3data)}
}

25.5 Define function to remove accents

#define function to remove accents from text
removeAccents<-function(x)
{
  a <- c('À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'ß', 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ', 'Ā', 'ā', 'Ă', 'ă', 'Ą', 'ą', 'Ć', 'ć', 'Ĉ', 'ĉ', 'Ċ', 'ċ', 'Č', 'č', 'Ď', 'ď', 'Đ', 'đ', 'Ē', 'ē', 'Ĕ', 'ĕ', 'Ė', 'ė', 'Ę', 'ę', 'Ě', 'ě', 'Ĝ', 'ĝ', 'Ğ', 'ğ', 'Ġ', 'ġ', 'Ģ', 'ģ', 'Ĥ', 'ĥ', 'Ħ', 'ħ', 'Ĩ', 'ĩ', 'Ī', 'ī', 'Ĭ', 'ĭ', 'Į', 'į', 'İ', 'ı', 'IJ', 'ij', 'Ĵ', 'ĵ', 'Ķ', 'ķ', 'Ĺ', 'ĺ', 'Ļ', 'ļ', 'Ľ', 'ľ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'Ń', 'ń', 'Ņ', 'ņ', 'Ň', 'ň', 'ʼn', 'Ō', 'ō', 'Ŏ', 'ŏ', 'Ő', 'ő', 'Œ', 'œ', 'Ŕ', 'ŕ', 'Ŗ', 'ŗ', 'Ř', 'ř', 'Ś', 'ś', 'Ŝ', 'ŝ', 'Ş', 'ş', 'Š', 'š', 'Ţ', 'ţ', 'Ť', 'ť', 'Ŧ', 'ŧ', 'Ũ', 'ũ', 'Ū', 'ū', 'Ŭ', 'ŭ', 'Ů', 'ů', 'Ű', 'ű', 'Ų', 'ų', 'Ŵ', 'ŵ', 'Ŷ', 'ŷ', 'Ÿ', 'Ź', 'ź', 'Ż', 'ż', 'Ž', 'ž', 'ſ', 'ƒ', 'Ơ', 'ơ', 'Ư', 'ư', 'Ǎ', 'ǎ', 'Ǐ', 'ǐ', 'Ǒ', 'ǒ', 'Ǔ', 'ǔ', 'Ǖ', 'ǖ', 'Ǘ', 'ǘ', 'Ǚ', 'ǚ', 'Ǜ', 'ǜ', 'Ǻ', 'ǻ', 'Ǽ', 'ǽ', 'Ǿ', 'ǿ');
  b <- c('A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'O', 'U', 'U', 'U', 'U', 'Y', 's', 'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'y', 'A', 'a', 'A', 'a', 'A', 'a', 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g', 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'IJ', 'ij', 'J', 'j', 'K', 'k', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'l', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', 'O', 'o', 'O', 'o', 'O', 'o', 'OE', 'oe', 'R', 'r', 'R', 'r', 'R', 'r', 'S', 's', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w', 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's', 'f', 'O', 'o', 'U', 'u', 'A', 'a', 'I', 'i', 'O', 'o', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'A', 'a', 'AE', 'ae', 'O', 'o');
  for(i in 1:length(a))
  {
    x<-gsub(x = x,pattern = a[i],replacement = b[i])
  }
  return(x)
}

25.6 Tidy data

#Set correct names so that label is descriptive country name
names(full.data.output)[which(names(full.data.output)=="NAME_0")]<-"label"
#set correct names so that ISO code is NAME_0 (level 0 country data)
names(full.data.output)[which(names(full.data.output)=="ISO")]<-"NAME_0"


#get rid of whitespace and dots and so on [might need to add more]
full.data.output<-full.data.output[,c("NAME_0","NAME_1","NAME_2","NAME_3","label")]
full.data.output$NAME_1<-gsub(full.data.output$NAME_1,pattern = "/| |'|//.",replacement = "_")
full.data.output$NAME_2<-gsub(full.data.output$NAME_2,pattern = "/| |'|//.",replacement = "_")
full.data.output$NAME_3<-gsub(full.data.output$NAME_3,pattern = "/| |'|//.",replacement = "_")

#find level zero and blank out levels 1,2,3
level0<-full.data.output
level0[,2:4]<-""
level0<-unique(level0)
level0$list_name<-"NAME_0"


#find level one and blank out levels 2,3
level1<-full.data.output[,c(2,1,3,4,5)]
level1[,3:4]<-""
level1<-unique(level1)
level1$list_name<-"NAME_1"
level1$label<-level1$NAME_1
x<-level1
x$NAME_1<-""
x$label<-""
x<-unique(x)
x$NAME_1<-"Other"
x$label<-"Other"
level1<-rbind(level1,x)
rm(x)


#find level two and blank out level 3
level2<-full.data.output[,c(3,1,2,4,5)]
level2[,4]<-""
level2<-unique(level2)
level2$list_name<-"NAME_2"
level2$label<-level2$NAME_2
x<-level2
x$NAME_2<-""
x$label<-""
x<-unique(x)
x$NAME_2<-"Other"
x$label<-"Other"
level2<-rbind(level2,x)
rm(x)

#find level three
level3<-full.data.output[,c(4,1,2,3,5)]
level3<-unique(level3)
level3$list_name<-"NAME_3"
level3$label<-level3$NAME_3
x<-level3
x$NAME_3<-""
x$label<-""
x<-unique(x)
x$NAME_3<-"Other"
x$label<-"Other"
level3<-rbind(level3,x)
rm(x)


#put it all together
output<-level0
output<-rbind(output,level1)
output<-rbind(output,level2)
output<-rbind(output,level3)


# remove accents from name, levels 0-3, leaving them only in label.
output<-output[,c("list_name","NAME_0","NAME_1","NAME_2","NAME_3","label")]
output$name<-output$label
output$name<-removeAccents(output$name)
output$NAME_0<-removeAccents(output$NAME_0)
output$NAME_1<-removeAccents(output$NAME_1)
output$NAME_2<-removeAccents(output$NAME_2)
output$NAME_3<-removeAccents(output$NAME_3)

25.7 Show output

kable(head(output,100))
list_name NAME_0 NAME_1 NAME_2 NAME_3 label name
1 NAME_0 UGA Uganda Uganda
968 NAME_0 COD Democratic Republic of the Congo Democratic Republic of the Congo
12 NAME_1 UGA Adjumani Adjumani Adjumani
7 NAME_1 UGA Apac Apac Apac
29 NAME_1 UGA Arua Arua Arua
65 NAME_1 UGA Bugiri Bugiri Bugiri
82 NAME_1 UGA Bundibugyo Bundibugyo Bundibugyo
92 NAME_1 UGA Bushenyi Bushenyi Bushenyi
122 NAME_1 UGA Busia Busia Busia
132 NAME_1 UGA Gulu Gulu Gulu
155 NAME_1 UGA Hoima Hoima Hoima
169 NAME_1 UGA Iganga Iganga Iganga
194 NAME_1 UGA Jinja Jinja Jinja
205 NAME_1 UGA Kabale Kabale Kabale
224 NAME_1 UGA Kabarole Kabarole Kabarole
239 NAME_1 UGA Kaberamaido Kaberamaido Kaberamaido
248 NAME_1 UGA Kalangala Kalangala Kalangala
255 NAME_1 UGA Kampala Kampala Kampala
260 NAME_1 UGA Kamuli Kamuli Kamuli
283 NAME_1 UGA Kamwenge Kamwenge Kamwenge
292 NAME_1 UGA Kanungu Kanungu Kanungu
302 NAME_1 UGA Kapchorwa Kapchorwa Kapchorwa
318 NAME_1 UGA Kasese Kasese Kasese
340 NAME_1 UGA Katakwi Katakwi Katakwi
358 NAME_1 UGA Kayunga Kayunga Kayunga
367 NAME_1 UGA Kibale Kibale Kibale
386 NAME_1 UGA Kiboga Kiboga Kiboga
400 NAME_1 UGA Kisoro Kisoro Kisoro
414 NAME_1 UGA Kitgum Kitgum Kitgum
433 NAME_1 UGA Kotido Kotido Kotido
453 NAME_1 UGA Kumi Kumi Kumi
469 NAME_1 UGA Kyenjojo Kyenjojo Kyenjojo
483 NAME_1 UGA Lake_Albert Lake_Albert Lake_Albert
484 NAME_1 UGA Lake_Victoria Lake_Victoria Lake_Victoria
485 NAME_1 UGA Lira Lira Lira
513 NAME_1 UGA Luwero Luwero Luwero
533 NAME_1 UGA Masaka Masaka Masaka
556 NAME_1 UGA Masindi Masindi Masindi
570 NAME_1 UGA Mayuge Mayuge Mayuge
577 NAME_1 UGA Mbale Mbale Mbale
608 NAME_1 UGA Mbarara Mbarara Mbarara
654 NAME_1 UGA Moroto Moroto Moroto
665 NAME_1 UGA Moyo Moyo Moyo
673 NAME_1 UGA Mpigi Mpigi Mpigi
690 NAME_1 UGA Mubende Mubende Mubende
710 NAME_1 UGA Mukono Mukono Mukono
738 NAME_1 UGA Nakapiripirit Nakapiripirit Nakapiripirit
748 NAME_1 UGA Nakasongola Nakasongola Nakasongola
757 NAME_1 UGA Nebbi Nebbi Nebbi
776 NAME_1 UGA Ntungamo Ntungamo Ntungamo
791 NAME_1 UGA Pader Pader Pader
809 NAME_1 UGA Pallisa Pallisa Pallisa
837 NAME_1 UGA Rakai Rakai Rakai
864 NAME_1 UGA Rukungiri Rukungiri Rukungiri
875 NAME_1 UGA Sembabule Sembabule Sembabule
882 NAME_1 UGA Sironko Sironko Sironko
902 NAME_1 UGA Soroti Soroti Soroti
919 NAME_1 UGA Tororo Tororo Tororo
943 NAME_1 UGA Wakiso Wakiso Wakiso
960 NAME_1 UGA Yumbe Yumbe Yumbe
9682 NAME_1 COD Equateur Équateur Equateur
993 NAME_1 COD Bandundu Bandundu Bandundu
1010 NAME_1 COD Bas-Congo Bas-Congo Bas-Congo
1022 NAME_1 COD Kasai-Occidental Kasaï-Occidental Kasai-Occidental
1033 NAME_1 COD Kasai-Oriental Kasaï-Oriental Kasai-Oriental
1046 NAME_1 COD Katanga Katanga Katanga
1069 NAME_1 COD Kinshasa_City Kinshasa_City Kinshasa_City
1071 NAME_1 COD Kivu Kivu Kivu
1093 NAME_1 COD Orientale Orientale Orientale
11 NAME_1 UGA Other Other Other
9681 NAME_1 COD Other Other Other
13 NAME_2 UGA Adjumani East_Moyo East_Moyo East_Moyo
72 NAME_2 UGA Apac Kole Kole Kole
121 NAME_2 UGA Apac Kwania Kwania Kwania
17 NAME_2 UGA Apac Maruzi Maruzi Maruzi
22 NAME_2 UGA Apac Oyam Oyam Oyam
293 NAME_2 UGA Arua Arua_Municipality Arua_Municipality Arua_Municipality
31 NAME_2 UGA Arua Ayivu Ayivu Ayivu
37 NAME_2 UGA Arua Koboko Koboko Koboko
42 NAME_2 UGA Arua Madi-Okollo Madi-Okollo Madi-Okollo
48 NAME_2 UGA Arua Maracha Maracha Maracha
55 NAME_2 UGA Arua Terego Terego Terego
61 NAME_2 UGA Arua Vurra Vurra Vurra
652 NAME_2 UGA Bugiri Bukooli Bukooli Bukooli
823 NAME_2 UGA Bundibugyo Bwamba Bwamba Bwamba
89 NAME_2 UGA Bundibugyo Ntoroko Ntoroko Ntoroko
922 NAME_2 UGA Bushenyi Buhweju Buhweju Buhweju
96 NAME_2 UGA Bushenyi Bunyaruguru Bunyaruguru Bunyaruguru
101 NAME_2 UGA Bushenyi Igara Igara Igara
108 NAME_2 UGA Bushenyi Ruhinda Ruhinda Ruhinda
115 NAME_2 UGA Bushenyi Sheema Sheema Sheema
1222 NAME_2 UGA Busia Samia-Bugwe Samia-Bugwe Samia-Bugwe
1322 NAME_2 UGA Gulu Aswa Aswa Aswa
137 NAME_2 UGA Gulu Gulu Gulu Gulu
141 NAME_2 UGA Gulu Kilak Kilak Kilak
145 NAME_2 UGA Gulu Nwoya Nwoya Nwoya
149 NAME_2 UGA Gulu Omoro Omoro Omoro
1552 NAME_2 UGA Hoima Bugahya Bugahya Bugahya
164 NAME_2 UGA Hoima Buhaguzi Buhaguzi Buhaguzi
1692 NAME_2 UGA Iganga Bugweri Bugweri Bugweri

25.8 Write output to itemsets.csv

write.csv(x = output,file = "output/diva_itemsets.csv",quote = F,row.names = F)

25.9 Delete the raw data

system("rm -rf data/divadownload/")