########################################################################################################################
##
## Human Development Report Office (HDRO), United Nations Development Programme
## Multidimensional Poverty Index (MPI) 2020 release
##
## This code calculates the MPI and its component using the 2014/2015 MICS data for Republic of Congo.
## Users should first download the MICS data available at http://mics.unicef.org/surveys
## Users should also download the macro packages from WHO that calculates anthropometric z scores for children under 5 years (https://www.who.int/childgrowth/software/en/) 
## Please follow WHO instructions.
## The WHO macro for R produces z scores that are slightly different in comparison with the z scores calculated in Stata.
## WHO will update the macro in R, after this is done, estimates in R and Stata will be identical.
## 
## For now, MPI programs in R are available for 4 selected countries (Benin, Republic of Congo, India and Iraq). 
## This is still an experimental phase and HDRO plans to expand the availability of such programs.  
## However, users can adapt any of the MPI codes in R and produce programs for other countries. The modifications will depend on the information collected in the data for the other countries.
## We welcome feedback from the users.
########################################################################################################################

  
### Set-up ### 
rm(list=ls())             # Clean up the environment
options(scipen=6)         # Display digits, not the scientific version
par(mfrow=c(1,1))         # Reset plot placement to normal 1 by 1
options(warn = -1)

### Working Folder Path ###
path_in <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Congo/"
path_out <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Congo/"
path_logs <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Congo/"
path_pc <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Congo/"


### Log file ### 
#sink(file.path(path_logs,"cog_mics15_dataprep.txt"), split = TRUE)

### WHO2007 R macro package ###
# https://www.who.int/growthref/tools/readme_r.pdf?ua=1
wfawho2007<-read.table(file.path(path_in,"wfawho2007.txt"),header=T,sep="",skip=0)
hfawho2007<-read.table(file.path(path_in,"hfawho2007.txt"),header=T,sep="",skip=0) 
bfawho2007<-read.table(file.path(path_in,"bfawho2007.txt"),header=T,sep="",skip=0) 
source(file.path(path_in,"who2007.r"))

### Packages ###
# install.packages(c("haven", "Hmisc", "plyr", "memisc", "expss", "questionr", "anthro", "survey"))
library(haven)      # ready in dta file
library(Hmisc)      # to label variables
library(plyr)       # used for desc
library(memisc)     # command as codebook
library(expss)      # table with label
library(questionr)  # lookfor command
library(anthro)     # https://www.who.int/childgrowth/software/en/
library(survey)     # takes survey design into account  

########################################################################################################################
### Congo MICS 2014-15                                                                                               ###
########################################################################################################################
  
  
########################################################################################################################
### Step 1: Data preparation 
### Selecting main variables from CH, WM, HH & MN recode & merging with HL recode 
########################################################################################################################
#  Congo MICS 2014-15: P4 indicates that anthropometric data was collected for all childen under 5 years.


########################################################################################################################
### Step 1.1  CH - CHILDREN's RECODE (under 5)
########################################################################################################################
DataCH <- read_spss(file.path(path_in, "ch.sav"))
names(DataCH) <- tolower(names(DataCH))

### Generate individual unique key variable required for data merging
### hh1=cluster number; 
### hh2=household number; 
### ln=child's line number in household

DataCH$ind_id <- DataCH$hh1*100000 + DataCH$hh2*100 + DataCH$ln 
label(DataCH$ind_id) <- "Individual ID"
str(DataCH$ind_id)

anyDuplicated(DataCH$ind_id) 

DataCH$child_CH <- 1 
    # Generate identification variable for observations in CH recode


### Next check the variables that WHO needs to calculate the z-scores:
### sex, age, weight, height, measurement, oedema & child sampling weight

### Variable: SEX ###
table(DataCH$hl4, useNA = "always")
    ### "1" for male ;"2" for female
DataCH$gender <- DataCH$hl4 
str(DataCH$gender)
table(DataCH$gender, useNA = "always")
  

### Variable: AGE ###
table(DataCH$caged, useNA = "always")
codebook(DataCH$caged)
    # Age is measured in months
DataCH$age_days <- DataCH$caged
describe(DataCH$age_days)
summary(DataCH$age_days)
DataCH$age_days[DataCH$caged==9999] <- NA
DataCH$age_days[DataCH$caged<0] <- NA
DataCH$trunc_temp <-trunc(DataCH$cage*(365/12))
DataCH$age_days <- ifelse(DataCH$caged==9999, DataCH$trunc_temp, DataCH$age_days)
summary(DataCH$age_days)
DataCH$ageunit <- "days"
label(DataCH$ageunit) <- "Days"


### Variable: BODY WEIGHT (KILOGRAMS) ###
describe(DataCH$an3)
table(DataCH$an3, useNA = "always")
DataCH$weight <- DataCH$an3
DataCH$weight[DataCH$an3>=99] <- NA 
table(DataCH$an2[DataCH$an3>=99 | is.na(DataCH$an3)], DataCH$an3[DataCH$an3>=99 | is.na(DataCH$an3)], useNA = "always")
    # an2: result of the measurement
summary(DataCH$weight)


### Variable: HEIGHT (CENTIMETERS) ###
describe(DataCH$an4)
table(DataCH$an4, useNA = "always")
DataCH$height <- DataCH$an4
DataCH$height[DataCH$an4>=999] <- NA 
    # All missing values or out of range are replaced as "NA"
table(DataCH$an2[DataCH$an4>=999 | is.na(DataCH$an4)], DataCH$an4[DataCH$an4>=999 | is.na(DataCH$an4)], useNA = "always")
summary(DataCH$height)


### Variable: MEASURED STANDING/LYING DOWN ###
describe(DataCH$an4a)  
DataCH$measure[DataCH$an4a==1] <- "l" 
    # Child measured lying down
DataCH$measure[DataCH$an4a==2] <- "h" 
    # Child measured standing up
DataCH$measure[DataCH$an4a5==9 | DataCH$an4a==0 | is.na(DataCH$an4a)] <- NA 
    # Replace with "NA" if unknown
table(DataCH$measure, useNA = "always")


### Variable: OEDEMA ###
lookfor(DataCH, "oedema")
DataCH$oedema <- "n"  
    # It assumes no-one has oedema
describe(DataCH$oedema)
table(DataCH$oedema, useNA = "always")	


### Variable: INDIVIDUAL CHILD SAMPLING WEIGHT ### 
DataCH$sw <- DataCH$chweight
describe(DataCH$sw)
summary(DataCH$sw)


# We now run the command to calculate the z-scores with the R-Command #
children_nutri_cog_z_rc <- with(DataCH, anthro_zscores
                                        (sex = gender, 
                                         age = age_days,
                                        is_age_in_month = FALSE, 
                                         weight = weight,
                                         lenhei = height,
                                         oedema = oedema
                                         )
                              )

### Standard MPI indicator ### 
    # Takes value 1 if the child is under 2 stdev below the median & 0 otherwise
children_nutri_cog_z_rc$underweight <- ifelse(children_nutri_cog_z_rc$zwei < -2.0,1,0)
children_nutri_cog_z_rc$underweight[is.na(children_nutri_cog_z_rc$zwei)] <- 0 
children_nutri_cog_z_rc$underweight[is.na(children_nutri_cog_z_rc$zwei) | children_nutri_cog_z_rc$fwei == 1] <- NA
label(children_nutri_cog_z_rc$underweight) <- "Child is undernourished (weight-for-age) 2sd - WHO"
table(children_nutri_cog_z_rc$underweight, useNA = "always")

children_nutri_cog_z_rc$stunting <- ifelse(children_nutri_cog_z_rc$zlen < -2.0,1,0)
children_nutri_cog_z_rc$stunting[is.na(children_nutri_cog_z_rc$zlen)] <- 0 
children_nutri_cog_z_rc$stunting[is.na(children_nutri_cog_z_rc$zlen) | children_nutri_cog_z_rc$flen == 1] <- NA
label(children_nutri_cog_z_rc$stunting) <- "Child is stunted (length/height-for-age) 2sd - WHO"
table(children_nutri_cog_z_rc$stunting, useNA = "always")

children_nutri_cog_z_rc$wasting <- ifelse(children_nutri_cog_z_rc$zwfl < -2.0,1,0)
children_nutri_cog_z_rc$wasting[is.na(children_nutri_cog_z_rc$zwfl)] <- 0 
children_nutri_cog_z_rc$wasting[is.na(children_nutri_cog_z_rc$zwfl) | children_nutri_cog_z_rc$fwfl == 1] <- NA
label(children_nutri_cog_z_rc$wasting) <- "Child is wasted (weight-for-length/height) 2sd - WHO"
table(children_nutri_cog_z_rc$wasting, useNA = "always")

sum(children_nutri_cog_z_rc$fwei==1 | children_nutri_cog_z_rc$flen==1)
# Note: In Congo MICS 2014-15, 159 children were replaced as missing because they have extreme z-scores which are 
# biologically implausible. 


# Retain relevant variables:
cog15_CH <- cbind(children_nutri_cog_z_rc, DataCH)
cog15_CH <- cog15_CH[c("ind_id", "child_CH", "ln", "underweight", "stunting", "wasting")]
rm("children_nutri_cog_z_rc")

cog15_CH[order(cog15_CH$ind_id),] 
anyDuplicated(cog15_CH$ind_id) 


########################################################################################################################
### Step 1.2  BR - BIRTH RECODE
### (All females 15-49 years who ever gave birth) 
########################################################################################################################
# The purpose of step 1.2 is to identify children of any age who died in the last 5 years prior to the survey date.
DataBH <- read_spss(file.path(path_in, "bh.sav"))
names(DataBH) <- tolower(names(DataBH))

### Generate individual unique key variable required for data merging
### hh1=cluster number; 
### hh=household number; 
### wm4=women's line number

str(DataBH$ln)
str(DataBH$bhln)
# Two non-identical variables exist instead of wm4 for women's line number, ln and bhln; We used ln, because it alligns 
# with the HL dta file.
DataBH$ind_id <- DataBH$hh1*100000 + DataBH$hh2*100 + DataBH$ln 
label(DataBH$ind_id) <- "Individual ID"
str(DataBH$ind_id)

describe(DataBH$bh4c)
describe(DataBH$bh9c)        
DataBH$date_death <- DataBH$bh4c + DataBH$bh9c
    # Date of death = date of birth (bh4c ) + age at death (bh9c)
DataBH$mdead_survey <-  DataBH$wdoi - DataBH$date_death
    # Months dead from survey = Date of interview (wdoi) - date of death
DataBH$mdead_survey[(DataBH$bh9c==0 | is.na(DataBH$bh9c)) & DataBH$bh5==1] <- NA
    # Replace children who are alive as '.' to distinguish them from children who died at 0 months 
DataBH$ydead_survey <- DataBH$mdead_survey/12
DataBH$ydead_survey[DataBH$ydead_survey<0] <- 0
    # Years dead from survey

describe(DataBH$bh5)
describe(DataBH$bh5)
table(DataBH$bh5, useNA = "always")
      # bh5 - Child still alive: 1=Yes; 2=No
DataBH$child_died[DataBH$bh5==2] <- 1
      # Redefine the coding and labels (1=child dead; 0=child alive)
DataBH$child_died[DataBH$bh5==1] <- 0
DataBH$child_died[is.na(DataBH$bh5)] <- NA
DataBH$child_died[DataBH$bh9c>=216 & !is.na(DataBH$bh9c)] <- 0 
table(DataBH$child_died, useNA = "always")

DataBH$tot_child_died <- ave(DataBH$child_died, DataBH$ind_id, FUN = function(x) sum(x,na.rm=T))
    # For each woman, sum the number of children who died

DataBH$temp[DataBH$ydead_survey<=5 & DataBH$child_died==1] <- 1
DataBH$temp[DataBH$ydead_survey<=5 & DataBH$child_died==0] <- 0
DataBH$temp[is.na(DataBH$temp)] <- 0
DataBH$tot_child_died_5y <- ave(DataBH$temp, DataBH$ind_id, FUN = function(x) sum(x,na.rm=T))
    # For each woman, sum the number of children who died in the past 5 years prior to the interview date 
DataBH$tot_child_died_5y[is.na(DataBH$tot_child_died_5y) & DataBH$tot_child_died >=0 & !is.na(DataBH$tot_child_died)] <- 0
    # All children who are alive and died longer than 5 years from the interview date are replaced as '0'
DataBH$tot_child_died_5y[DataBH$child_died==1 & is.na(DataBH$ydead_survey)] <- NA
    # Replace as '.' if there is no information on when the child died
DataBH$tot_child_died_5y[DataBH$ydead_survey>5 | is.na(DataBH$ydead_survey)] <- 0
table(DataBH$tot_child_died, useNA = "always")
table(DataBH$tot_child_died_5y, useNA = "always")

DataBH$child_died_per_wom <- ave(DataBH$tot_child_died, DataBH$ind_id, FUN =  function(x) max(x,na.rm=T)) 
label(DataBH$child_died_per_wom) <- "Total child death for each women (birth recode)"
table(DataBH$child_died_per_wom, useNA = "always")

DataBH$child_died_per_wom_5y <- ave(DataBH$tot_child_died_5y, DataBH$ind_id, FUN = function(x) max(x,na.rm=T)) 
DataBH$child_died_per_wom_5y[DataBH$child_died_per_wom_5y<0] <- NA
label(DataBH$child_died_per_wom_5y) <- "Total child death for each women in the last 5 years (birth recode)"
table(DataBH$child_died_per_wom_5y, useNA = "always")
DataBH$temp <- NULL

#Keepone observation per women
DataBH[order(DataBH$ind_id),] 
DataBH<- DataBH[!duplicated(DataBH$ind_id), ]

DataBH$women_BH <- 1 
    # Identification variable for observations in BR recode


#Retain relevant variables
cog15_BH <- DataBH[c("ind_id", "hh1", "hh2", "women_BH", "child_died_per_wom", "child_died_per_wom_5y")]

	
########################################################################################################################
### Step 1.3   WM - WOMEN's RECODE  
### (All eligible females 15-49 years in the household)
######################################################################################################################## 
DataWM <- read_spss(file.path(path_in, "wm.sav"))
names(DataWM) <- tolower(names(DataWM))

### Generate individual unique key variable required for data merging
### hh1=cluster number; 
### hh2=household number; 
### ln=respondent's line number

DataWM$ind_id <- DataWM$hh1*100000 + DataWM$hh2*100 + DataWM$ln
label(DataWM$ind_id) <- "Individual ID"
str(DataWM$ind_id) 

anyDuplicated(DataWM$ind_id) 

DataWM$women_WM <- 1 
    # Identification variable for observations in WM recode

table(DataWM$wb2, useNA = "always")

table(DataWM$cm1, DataWM$cm8, useNA = "always")
    # Women who has never ever given birth will not have information on child mortality
    # Note: In Congo MICS 2014-15, 18 women reported as not ever given birth but had had children who died.


lookfor(DataWM, "marital")
table(DataWM$mstatus, DataWM$ma6, useNA = "always")
DataWM$marital[DataWM$mstatus == 3 & is.na(DataWM$ma6)] <- 1
    # 1: Never married
DataWM$marital[DataWM$mstatus == 1 & is.na(DataWM$ma6)] <- 2
    # 2: Currently married
DataWM$marital[DataWM$mstatus == 2 & DataWM$ma6 == 1] <- 3
    # 3: Widowed	
DataWM$marital[DataWM$mstatus == 2 & DataWM$ma6 == 2] <- 4
    # 4: Divorced	
DataWM$marital[DataWM$mstatus == 2 & DataWM$ma6 == 3] <- 5
    # 5: Separated/not living together
label(DataWM$marital) <- "Marital status of household member"
table(DataWM$marital, useNA = "always")
table(DataWM$ma6,DataWM$marital, useNA = "always")
table(DataWM$mstatus,DataWM$marital, useNA = "always")

DataWM[order(DataWM$ind_id),] 
cog15_WM <- DataWM[c("wm7", "cm1", "cm8", "cm9a", "cm9b", "ind_id", "women_WM", "marital")]
    # Save a temp file for merging with HL

 
########################################################################################################################
### Step 1.4  MN - MEN'S RECODE 
### (All eligible man: 15-49 years in the household) 
########################################################################################################################  
DataMN <- read_spss(file.path(path_in, "mn.sav"))
names(DataMN) <- tolower(names(DataMN))

### Generate individual unique key variable required for data merging
###  hh1=cluster number;  
###  hh2=household number; 
###  ln=respondent's line number

DataMN$ind_id <- DataMN$hh1*100000 + DataMN$hh2*100 + DataMN$ln
label(DataMN$ind_id) <- "Individual ID"
str(DataMN$ind_id) 

anyDuplicated(DataMN$ind_id) 

DataMN$men_MN <- 1 	
    # Identification variable for observations in MR recode

lookfor(DataMN, "marital")
table(DataMN$mmstatus, DataMN$mma6, useNA = "always")
DataMN$marital[DataMN$mmstatus == 3 & is.na(DataMN$mma6)] <- 1
    # 1: Never married
DataMN$marital[DataMN$mmstatus == 1 & is.na(DataMN$mma6)] <- 2
    # 2: Currently married
DataMN$marital[DataMN$mmstatus == 2 & DataMN$mma6 == 1] <- 3
    # 3: Widowed	
DataMN$marital[DataMN$mmstatus == 2 & DataMN$mma6 == 2] <- 4
    # 4: Divorced	
DataMN$marital[DataMN$mmstatus == 2 & DataMN$mma6 == 3] <- 5
    # 5: Separated/not living together
label(DataMN$marital) <- "Marital status of household member"
table(DataMN$marital, useNA = "always")
table(DataMN$mma6,DataMN$marital, useNA = "always")
table(DataMN$mmstatus,DataMN$marital, useNA = "always")

DataMN[order(DataMN$ind_id),] 
cog15_MN <- DataMN[c("mcm1", "mcm8", "mcm9a", "mcm9b", "ind_id", "men_MN", "marital")]
    # Save a temp file for merging with PR:


########################################################################################################################  
### Step 1.5  HH - HOUSEHOLD RECODE 
### (All households interviewed)
########################################################################################################################  
# Note: In the case of Benin 2017-18, anthropometric data was not collected for men.
DataHH <- read_spss(file.path(path_in, "hh.sav"))
names(DataHH) <- tolower(names(DataHH))

### Generate individual unique key variable required for data merging
### hh1=cluster number;  
### hh2=household number; 

DataHH$hh_id <- DataHH$hh1*100 + DataHH$hh2
label(DataHH$hh_id) <- "Household ID"
str(DataHH$hh_id) 

anyDuplicated(DataHH$hh_id) 

DataHH[order(DataHH$hh_id),] 
cog15_HH <- DataHH
    


########################################################################################################################
### Step 1.6  PHL - HOUSEHOLD MEMBER  
########################################################################################################################
DataHL <- read_spss(file.path(path_in, "hl.sav"))
names(DataHL) <- tolower(names(DataHL))

DataHL$cty <- "Congo"  
DataHL$ccty <- "COG"  
DataHL$year <- "2014-15"  
DataHL$survey <- "MICS"
DataHL$ccnum <- 178
DataHL$ountry <- "Congo" 
DataHL$countrycode <- "COG" 

### Generate a household unique key variable at the household level using: 
### hh1=cluster number 
### hh2=household number
DataHL$hh_id <- DataHL$hh1*100 + DataHL$hh2 
label(DataHL$hh_id) <- "Household ID"
describe(DataHL$hh_id)


### Generate individual unique key variable required for data merging using:
### hh1=cluster number; 
### hh2=household number; 
### hl1=respondent's line number.
DataHL$ind_id = DataHL$hh1*100000 + DataHL$hh2*100 + DataHL$hl1 
label(DataHL$ind_id) <- "Individual ID"
describe(DataHL$ind_id)

anyDuplicated(DataHL$ind_id) 

DataHL[order(c(DataHL$ind_id)),] 



########################################################################################################################
### 1.7 DATA MERGING
########################################################################################################################

### Merging BH Recode 
#########################################
data_merge_1 <- merge(DataHL, cog15_BH ,by="ind_id", all=TRUE)
rm("cog15_BH")


### Merging WM Recode 
#########################################
data_merge_2 <- merge(data_merge_1, cog15_WM, by="ind_id", all=TRUE)
rm("cog15_WM")

table(data_merge_2$hl7, useNA = "always") 
data_merge_2$temp <- ifelse(data_merge_2$hl7>0,1,0)
data_merge_2$temp[is.na(data_merge_2$hl7)] <- 1
table(data_merge_2$women_WM, data_merge_2$temp, useNA = "always")
table(data_merge_2$wm7[data_merge_2$temp==1 & is.na(data_merge_2$women_WM)], useNA = "always") 
    # Total of eligible women not interviewed 


### Merging IR Recode: 15-19 years girls 
#########################################
data_merge_3 <- merge(data_merge_2, cog15_HH, by="hh_id", all.x=TRUE)
rm("cog15_HH")


### Merging MN Recode 
#########################################
data_merge_4 <- merge(data_merge_3, cog15_MN, by="ind_id", all=TRUE)
rm("cog15_MN")


### Merging MR Recode: 15-19 years boys 
#########################################
data_merge_5 <- merge(data_merge_4, cog15_CH, by="ind_id", all=TRUE)
rm("cog15_CH")


DataFinal <- data_merge_5
rm("data_merge_1", "data_merge_2","data_merge_3","data_merge_4")
rm("DataBH", "DataCH","DataHH","DataHL","DataMN","DataWM")


########################################################################################################################
### Step 1.8 CONTROL VARIABLES                     
########################################################################################################################
# Households are identified as having 'no eligible' members if there are no applicable population, that is, children 0-5 
# years, adult women 15-49 years or men 15-64 years. These households will not have information on relevant indicators of 
# health. As such, these households are considered as non-deprived in those relevant indicators.


### No Eligible Women 15-49 years
#########################################
DataFinal$fem_eligible <- ifelse(DataFinal$hl7>0,1,0)
DataFinal$hh_n_fem_eligible <- ave(DataFinal$fem_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Number of eligible women for interview in the hh
DataFinal$no_fem_eligible <- ifelse(DataFinal$hh_n_fem_eligible==0,1,0)
    # Takes value 1 if the household had no eligible females for an interview
label(DataFinal$no_fem_eligible) <- "Household has no eligible women"
table(DataFinal$no_fem_eligible, useNA = "always")


### No Eligible Men 15-64 years
#########################################
DataFinal$male_eligible <- ifelse(DataFinal$hl7a>0,1,0)
DataFinal$hh_n_male_eligible <- ave(DataFinal$male_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
     # Number of eligible men for interview in the hh
DataFinal$no_male_eligible <- ifelse(DataFinal$hh_n_male_eligible==0,1,0)
    # Takes value 1 if the household had no eligible males for an interview
label(DataFinal$no_male_eligible) <- "Household has no eligible man"
table(DataFinal$no_male_eligible, useNA = "always")


### No Eligible Children 0-5 years
#########################################
DataFinal$child_eligible <- ifelse(DataFinal$hl7b>0 | DataFinal$child_CH==1,1,0)
DataFinal$hh_n_children_eligible <- ave(DataFinal$child_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Number of eligible children for anthropometrics
DataFinal$no_child_eligible <- ifelse(DataFinal$hh_n_children_eligible==0,1,0) 
    # Takes value 1 if there were no eligible children for anthropometrics
label(DataFinal$no_child_eligible) <- "Household has no children eligible"
table(DataFinal$no_child_eligible, useNA = "always")


### No Eligible Women and Men 
#########################################
# NOTE: In the DHS datasets, we use this variable as a control variable for the child mortality indicator if mortality 
# data was collected from women and men. If child mortality was only colelcted from women, the we use 'no_fem_eligible' 
# as the eligibility criteria 
DataFinal$no_adults_eligible <-ifelse(DataFinal$no_fem_eligible==1 & DataFinal$no_male_eligible==1,1,0) 
    # Takes value 1 if the household had no eligible men & women for an interview
label(DataFinal$no_adults_eligible) <- "Household has no eligible women or men"
table(DataFinal$no_adults_eligible, useNA = "always") 


### No Eligible Children and Women  
#########################################
# NOTE: In the DHS datasets, we use this variable as a control variable for the nutrition indicator if nutrition data is 
# present for children and women. However, in MICS, we do NOT use this as a control variable. This is because nutrition 
# data is only collected from children. However, we continue to generate this variable in this do-file so as to be consistent
DataFinal$no_child_fem_eligible <- ifelse(DataFinal$no_child_eligible==1 & DataFinal$no_fem_eligible==1,1,0) 
label(DataFinal$no_child_fem_eligible) <- "Household has no children or women eligible"
table(DataFinal$no_child_fem_eligible, useNA = "always") 


### No Eligible Women, Men or Children 
#########################################
# NOTE: In the DHS datasets, we use this variable as a control variable for the nutrition indicator if nutrition data is 
# present for children, women and men. However, in MICS, we do NOT use this as a control variable. This is because nutrition 
# data is only collected from children. However, we continue to generate this variable in this do-file so as to be consistent
DataFinal$no_eligibles <- ifelse(DataFinal$no_fem_eligible==1 & DataFinal$no_male_eligible==1 & DataFinal$no_child_eligible==1,1,0)
label(DataFinal$no_eligibles) <- "Household has no eligible women, men, or children"
table(DataFinal$no_eligibles, useNA = "always")


### No Eligible Subsample 
#########################################
# Note that the MICS surveys do not collect hemoglobin data. As such, this variable takes missing value. However, we continue 
# to generate this variable in this do-file so as to be consistent	
DataFinal$hem_eligible <- NA
DataFinal$hh_n_hem_eligible <- ave(DataFinal$hem_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
DataFinal$no_hem_eligible <- ifelse(DataFinal$hh_n_hem_eligible==0,1,0) 
    # Takes value 1 if the HH had no eligible females for hemoglobin test	
label(DataFinal$no_hem_eligible) <- "Household has no eligible individuals for hemoglobin measurements"
table(DataFinal$no_hem_eligible, useNA ="always")

DataFinal <- DataFinal[!names(DataFinal) %in% c("fem_eligible", "hh_n_fem_eligible", "male_eligible", "hh_n_male_eligible",
                        "child_eligible", "hh_n_children_eligible", "hem_eligible", "hh_n_hem_eligible")]


########################################################################################################################
### 1.9 RENAMING DEMOGRAPHIC VARIABLES 
########################################################################################################################
# Sample weight
summary(DataFinal$hhweight.x)
DataFinal$weight <- DataFinal$hhweight.x 
label(DataFinal$weight) <- "Sample weight"
summary(DataFinal$weight, useNA = "always")


# Area: urban or rural	
describe(DataFinal$hh6.x)
str(DataFinal$hh6.x)
table(DataFinal$hh6.x, useNA = "always")
DataFinal$area[DataFinal$hh6.x==1] <- 1
DataFinal$area[DataFinal$hh6.x==2] <- 0  
DataFinal$area <- factor(DataFinal$area,
                             levels = c(0,1),
                             labels = c("rural", "urban")) 
label(DataFinal$area) <- "Area: urban-rural"
table(DataFinal$area, useNA = "always")


# Relationship to the head of household 
describe(DataFinal$DataFinal$hl3 )
table(DataFinal$DataFinal$hl3 , useNA = "always")
DataFinal$relationship[DataFinal$hl3==1] <- 1
DataFinal$relationship[DataFinal$hl3==2] <- 2
DataFinal$relationship[DataFinal$hl3==3] <- 3
DataFinal$relationship[DataFinal$hl3==13] <- 3
DataFinal$relationship[DataFinal$hl3>=4 & DataFinal$hl3<= 12] <- 4
DataFinal$relationship[DataFinal$hl3==14] <- 6
DataFinal$relationship[DataFinal$hl3==96] <- 5
DataFinal$relationship[DataFinal$hl3==98] <- NA
DataFinal$relationship <- factor(DataFinal$relationship,
                                 levels = c(1,2,3,4,5,6),
                                 labels = c("head", "spouse", "child", "extended family", "not related", "maid"))
label(DataFinal$relationship) <- "Relationship to the head of household"
table(DataFinal$relationship, useNA = "always")


# Sex of household member	
describe(DataFinal$hl4)
table(DataFinal$hl4, useNA = "always")
DataFinal$sex <- DataFinal$hl4 
label(DataFinal$sex) <- "Sex of household member"
table(DataFinal$sex, useNA = "always")


# Age of household member
describe(DataFinal$hl6)
table(DataFinal$hl6, useNA = "always")
DataFinal$age <- DataFinal$hl6  
DataFinal$age[DataFinal$age>=98] <- NA
label(DataFinal$age) <- "Age of household member"
summary(DataFinal$age)


# Age group 
DataFinal$agec7[DataFinal$age>=0 & DataFinal$age<= 4] <- 1
DataFinal$agec7[DataFinal$age>=5 & DataFinal$age<= 9] <- 2
DataFinal$agec7[DataFinal$age>=10 & DataFinal$age<= 14] <- 3
DataFinal$agec7[DataFinal$age>=15 & DataFinal$age<= 17] <- 4
DataFinal$agec7[DataFinal$age>=18 & DataFinal$age<= 59] <- 5
DataFinal$agec7[DataFinal$age>=60] <- 6
DataFinal$agec7 <- factor(DataFinal$agec7,
                                 levels = c(1,2,3,4,5,6),
                                 labels = c("0-4", "5-9", "10-14", "15-17", "18-59", "60+"))
label(DataFinal$agec7) <- "age groups (7 groups)"	
table(DataFinal$agec7, useNA = "always")

DataFinal$agec4[DataFinal$age>=0 & DataFinal$age<= 9] <- 1
DataFinal$agec4[DataFinal$age>=10 & DataFinal$age<= 17] <- 2
DataFinal$agec4[DataFinal$age>=18 & DataFinal$age<= 59] <- 3
DataFinal$agec4[DataFinal$age>=60] <- 4
DataFinal$agec4 <- factor(DataFinal$agec4,
                          levels = c(1,2,3,4),
                          labels = c("0-9", "10-17", "18-59", "60+"))
label(DataFinal$agec4) <- "age groups (4 groups)"
table(DataFinal$agec4, useNA = "always")


# Total number of de jure hh members in the household
DataFinal$member <- 1
DataFinal$hhsize <- ave(DataFinal$member, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
label(DataFinal$hhsize) <- "Household size"
table(DataFinal$hhsize, useNA = "always")
DataFinal$member <- NULL


# Subnational region
lookfor(DataFinal, "region")
describe(DataFinal$hh7.x)
table(DataFinal$hh7.x, useNA = "always")	
DataFinal$region <- DataFinal$hh7.x
label(DataFinal$region) <- "Region for subnational decomposition"
table(DataFinal$region, useNA="always")


########################################################################################################################
###  Step 2 Data preparation  
###  Standardization of the 10 Global MPI indicators 
###  Identification of non-deprived & deprived individuals  
########################################################################################################################

########################################################################################################################
### Step 2.1 Years of Schooling 
########################################################################################################################
#	Basic education in Congo consists of 9 years in total: 6 years of compulsory primary and 3 years of compulsory junior 
# secondary. An additional 3 years of senior secondary education is not compulsory. The admission age to compulsory education 
# is 6 years. Preschool education takes place from age 3-5 years. Primary education takes place from age 6-11+ (grades 1-3,                                                              lower basic education; grades 4-6, middle basic education). Junior secondary 
# education takes place from age 12-14 (grades 7-9). Senior secondary education takes place from age 15-18 (grades 10-12). 
# Reference:  http://www.ibe.unesco.org/fileadmin/user_upload/Publications/WDE/2010/pdf-versions/Congo.pdf 

table(DataFinal$ed4a, useNA = "always")
table(DataFinal$ed4b, useNA = "always")
table(DataFinal$ed3, useNA = "always")
table(DataFinal$age[DataFinal$ed5==1], DataFinal$ed6a[DataFinal$ed5==1], useNA = "always")
    # Check: For those currently in school, check their level of schooling
    # In the case of Congo MICS 2014-15, there is inconsistency such as individuals showing too much schooling given their age.
    # This issue will be addressed in the subsequent set of commands, that is, cleaning theinconsistencies
DataFinal$eduhighyear <-NULL
DataFinal$eduhighyear <- ifelse(DataFinal$ed4a==1 & DataFinal$ed4b<=6, DataFinal$ed4b, NA)
DataFinal$eduhighyear <- ifelse(DataFinal$ed4a==2 & DataFinal$ed4b<=4, DataFinal$ed4b+6, DataFinal$eduhighyear)
DataFinal$eduhighyear <- ifelse(DataFinal$ed4a==3 & DataFinal$ed4b<=3, DataFinal$ed4b+9, DataFinal$eduhighyear)
DataFinal$eduhighyear <- ifelse(DataFinal$ed4a==4 & DataFinal$ed4b<=8, DataFinal$ed4b+13, DataFinal$eduhighyear)
DataFinal$eduhighyear <- ifelse(DataFinal$ed4a==0 | DataFinal$ed3==2, 0, DataFinal$eduhighyear)
label(DataFinal$eduhighyear) <- "Highest year of education completed"
table(DataFinal$eduhighyear, useNA = "always")

### Cleaning inconsistencies 
DataFinal$eduhighyear[DataFinal$age<10] <- 0  
    # The variable "eduhighyear" was replaced with a '0' given that the criteriafor this indicator is household member aged 10 years or older 
  
### Now we create the years of schooling
DataFinal$eduyears <- DataFinal$eduhighyear

### Checking for further inconsistencies 
DataFinal$eduyears[DataFinal$age<=DataFinal$eduyears & DataFinal$age>0] <- NA 
    # There are cases in which the years of schooling are greater than the age of the individual. This is clearly a mistake in the data. 
    # Please check whether this is the case and correct when necessary 
DataFinal$eduyears[DataFinal$age<10] <- 0  
    # The variable "eduyears" was replaced with a '0' given that the criteria for this indicator is household member aged 10 years or older 
label(DataFinal$eduyears) <- "Total number of years of education accomplished"
table(DataFinal$eduyears, useNA = "always")



# A control variable is created on whether there is information on years of education for at least 2/3 of the household members aged 10 years 
# and older
DataFinal <- DataFinal[!names(DataFinal) %in% c("temp")]
DataFinal$temp[!is.na(DataFinal$eduyears) & DataFinal$age>=10 & !is.na(DataFinal$age)] <- 1
DataFinal$no_missing_edu <- ave(DataFinal$temp, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Total household members who are 10 years and older with no missing years of education 
DataFinal$temp2[DataFinal$age>=10 & !is.na(DataFinal$age)] <- 1
DataFinal$hhs<- ave(DataFinal$temp2, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Total number of household members who are 10 years and older 
DataFinal$no_missing_edu <- DataFinal$no_missing_edu/DataFinal$hhs
DataFinal$no_missing_edu <- ifelse(DataFinal$no_missing_edu>=2/3,1,0)
    # Identify whether there is information on years of education for at least 2/3 of the household members aged 10 years and older 
table(DataFinal$no_missing_edu, useNA = "always")
label(DataFinal$no_missing_edu) <- "No missing edu for at least 2/3 of the HH members aged 10 years & older"		
DataFinal <- DataFinal[!names(DataFinal) %in% c("temp", "temp2", "hhs")]

# The entire household is considered deprived if no household member aged 10 years or older has completed SIX years of schooling.

DataFinal$years_edu6 <- ifelse(DataFinal$eduyears>=6,1,0)
    # The years of schooling indicator takes a value of "1" if at least someone in the hh has reported 6 years of education or more 
DataFinal$years_edu6[is.na(DataFinal$eduyears)] <- NA
DataFinal$hh_years_edu6_1 <- ave(DataFinal$years_edu6, DataFinal$hh_id,  FUN = function(x) max(x,na.rm=T)) 
DataFinal$hh_years_edu6 <- ifelse(DataFinal$hh_years_edu6_1==1,1,0)
DataFinal$hh_years_edu6[DataFinal$hh_years_edu6_1<0] <- NA
DataFinal$hh_years_edu6[DataFinal$hh_years_edu6==0 & DataFinal$no_missing_edu==0] <- NA
label(DataFinal$hh_years_edu6) <- "Household has at least one member with 6 years of edu"
table(DataFinal$hh_years_edu6, useNA = "always")


########################################################################################################################
### Step 2.2 Child School Attendance 
########################################################################################################################
describe(DataFinal$ed5)
table(DataFinal$ed5, useNA = "always")
DataFinal$attendance <- NA
DataFinal$attendance[DataFinal$ed5==1] <- 1 
    # Replace attendance with '1' if currently attending school
DataFinal$attendance[DataFinal$ed5==2] <- 0  
    # Replace attendance with '0' if currently not attending school
DataFinal$attendance[DataFinal$ed3==2] <- 0
    # Replace attendance with '0' if never ever attended school	
# replace attendance = 0  if ed3==1 & ed4a==4 
    # Replace attendance with '0' if attended school but informal schooling informal education is not a category in Congo

table(DataFinal$age, DataFinal$ed5, useNA = "always")	
    # Check individuals who are not of school age

DataFinal$attendance[DataFinal$age<5 | DataFinal$age>24] <- 0 
    # Replace attendance with '0' for individuals who are not of school age

label(DataFinal$attendance) <- "Attended school during current school year"
table(DataFinal$attendance, useNA = "always")



# The entire household is considered deprived if any school-aged child is not attending school up to class 8. 
DataFinal$child_schoolage <- ifelse(DataFinal$schage>=6 & DataFinal$schage<=14,1,0) 
    # Note: In Congo, the official school entrance age is 6 years. So, age range is 6-14 (=6+8).  
    # Source: "http://data.uis.unesco.org/?ReportId=163"
    # Go to Education>Education>System>Official entrance age to primary education. Look at the starting age and add 8. 

# A control variable is created on whether there is no information on school attendance for at least 2/3 of the school age children
sum(DataFinal$child_schoolage==1 & is.na(DataFinal$attendance), na.rm=TRUE)
    # Understand how many eligible school aged children are not attending school 
DataFinal$temp <- ifelse(DataFinal$child_schoolage==1 & !is.na(DataFinal$attendance),1,0) 
      # Generate a variable that captures the number of eligible school aged children who are attending school 
DataFinal$no_missing_atten <- ave(DataFinal$temp, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
      # Total school age children with no missing information on school attendance 
DataFinal$temp2 <- ifelse(DataFinal$child_schoolage==1,1,0)
DataFinal$hhs <- ave(DataFinal$temp2, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
      # Total number of household members who are of school age
DataFinal$no_missing_atten <- (DataFinal$no_missing_atten)/(DataFinal$hhs) 
DataFinal$no_missing_atten <- ifelse(DataFinal$no_missing_atten>=2/3,1,0)
DataFinal$no_missing_atten[is.na(DataFinal$no_missing_atten)] <- 1
      # Identify whether there is missing information on school attendance for more than 2/3 of the school age children 			
table(DataFinal$no_missing_atten, useNA = "always")
label(DataFinal$no_missing_atten) <- "No missing school attendance for at least 2/3 of the school aged children"		
DataFinal <- DataFinal[!names(DataFinal) %in% c("temp", "temp2", "hhs")]

DataFinal$hh_children_schoolage <- ave(DataFinal$child_schoolage, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
DataFinal$hh_children_schoolage <- ifelse(DataFinal$hh_children_schoolage>0,1,0) 
# Control variable: It takes value 1 if the household has children in school age
label(DataFinal$hh_children_schoolage) <- "Household has children in school age"
table(DataFinal$hh_children_schoolage, useNA = "always")

DataFinal$child_not_atten <- ifelse(DataFinal$attendance==0 & DataFinal$child_schoolage==1,1,0) 
DataFinal$child_not_atten[is.na(DataFinal$attendance) & DataFinal$child_schoolage==1] <- NA
DataFinal$any_child_not_atten <- ave(DataFinal$child_not_atten, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$hh_child_atten <- ifelse(DataFinal$any_child_not_atten==0,1,0)
DataFinal$hh_child_atten[is.na(DataFinal$any_child_not_atten)] <- NA
DataFinal$hh_child_atten[DataFinal$hh_children_schoolage==0] <- 1
DataFinal$hh_child_atten[DataFinal$hh_child_atten==1 & DataFinal$no_missing_atten==0] <- NA 
# If the household has been intially identified as non-deprived, but has missing school attendance for at least 2/3
# of the school aged children, then we replace this household with a value of '.' because there is insufficient 
# information to conclusively conclude that the household is not deprived
label(DataFinal$hh_child_atten) <- "Household has all school age children up to class 8 in school"
table(DataFinal$hh_child_atten, useNA = "always")

# Note: The indicator takes value 1 if ALL children in school age are attending school and 0 if there is at least one 
# child not attending. Households with no children receive a value of 1 as non-deprived. The indicator has a missing value 
# only when there are all missing values on children attendance in households that have children in school age. 
  
  
########################################################################################################################
### Step 2.3 Nutrition 
########################################################################################################################

########################################################################################################################
### Step 2.3a Child Nutrition 
########################################################################################################################

  
### Child Underweight Indicator 
########################################################################################################################
DataFinal$temp <- ave(DataFinal$underweight, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$temp[DataFinal$temp<0] <- NA
DataFinal$hh_no_underweight <- ifelse(DataFinal$temp==0,1,0) 
    # Takes value 1 if no child in the hh is underweight 
DataFinal$hh_no_underweight[is.na(DataFinal$temp)]<- NA
DataFinal$hh_no_underweight[DataFinal$no_child_eligible==1] <- 1 
    # Households with no eligible children will receive a value of 1 
label(DataFinal$hh_no_underweight) <- "Household has no child underweight - 2 stdev"
table(DataFinal$hh_no_underweight, useNA = "always")
DataFinal$temp <- NULL 


### Child Stunting Indicator 
########################################################################################################################
DataFinal$temp <- ave(DataFinal$stunting, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$temp[DataFinal$temp<0] <- NA
DataFinal$hh_no_stunting <- ifelse(DataFinal$temp==0,1,0)
    # Takes value 1 if no child in the hh is stunted
DataFinal$hh_no_stunting[is.na(DataFinal$temp)]<- NA
DataFinal$hh_no_stunting[DataFinal$no_child_eligible==1] <- 1 
label(DataFinal$hh_no_stunting) <- "Household has no child stunted - 2 stdev"
table(DataFinal$hh_no_stunting, useNA = "always")
DataFinal$temp <- NULL 


### Child Either Stunted or Underweight Indicator 
########################################################################################################################
DataFinal$uw_st[DataFinal$stunting==1 | DataFinal$underweight==1] <- 1  
DataFinal$uw_st[DataFinal$stunting==0 & DataFinal$underweight==0] <- 0
DataFinal$uw_st[is.na(DataFinal$stunting) & is.na(DataFinal$underweight)] <- NA 
DataFinal$temp <- ave(DataFinal$uw_st, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$temp[DataFinal$temp<0] <- NA
DataFinal$hh_no_uw_st <- ifelse(DataFinal$temp==0,1,0) 
    # Takes value 1 if no child in the hh is underweight or stunted
DataFinal$hh_no_uw_st[is.na(DataFinal$temp)] <- NA
DataFinal$hh_no_uw_st[DataFinal$no_child_eligible==1] <- 1
    # Households with no eligible children will receive a value of 1 
label(DataFinal$hh_no_uw_st) <- "Household has no child underweight or stunted"
table(DataFinal$hh_no_uw_st, useNA = "always")
DataFinal$temp <- NULL 



########################################################################################################################
### Step 2.3b Household Nutrition Indicator 
########################################################################################################################
# The indicator takes value 1 if there is no children under 5 underweight or stunted. It also takes value 1 for the 
# households that have no eligible children. The indicator takes value missing "." only if all eligible children have missing 
# information in their respective nutrition variable. 

DataFinal$hh_nutrition_uw_st <- DataFinal$hh_no_uw_st	
label(DataFinal$hh_nutrition_uw_st) <- "Household has no child underweight or stunted"
table(DataFinal$hh_nutrition_uw_st, useNA = "always")


########################################################################################################################
### Step 2.4 Child Mortality 
########################################################################################################################
describe(DataFinal$cm9a)
describe(DataFinal$cm9b)
describe(DataFinal$mcm9a)
describe(DataFinal$mcm9b)
    # cm9a or mcm9a: number of sons who have died 
    # cm9b or mcm9b: number of daughters who have died
# Total child mortality reported by eligible women
DataFinal$temp_f <- rowSums(DataFinal[c("cm9a", "cm9b")])
DataFinal$temp_f[(DataFinal$cm1==1 & DataFinal$cm8==2) | DataFinal$cm1==2 ] <- 0
    # Assign a value of "0" for:
    # - all eligible women who have ever gave birth but reported no child death 
    # - all eligible women who never ever gave birth 
DataFinal$temp_f[DataFinal$no_fem_eligible==1] <- 0
    # Assign a value of "0" for:
    # - individuals living in households that have non-eligible women
DataFinal$child_mortality_f <- ave(DataFinal$temp_f, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=TRUE))
DataFinal$temp_miss_f <- 1
DataFinal$temp_miss_f[is.na(DataFinal$temp_f)] <- 0
DataFinal$child_mortality_temp_miss_f <- ave(DataFinal$temp_miss_f, DataFinal$hh_id, FUN = function(x) max(x,na.rm=TRUE))
DataFinal$child_mortality_f[DataFinal$child_mortality_f==0 & DataFinal$child_mortality_temp_miss_f==0 & is.na(DataFinal$temp_f) & is.na(DataFinal$cm9a) &  is.na(DataFinal$cm9b)] <- NA
label(DataFinal$child_mortality_f) <- "Occurrence of child mortality reported by women"
table(DataFinal$child_mortality_f, useNA = "always")
DataFinal$temp_f <- NULL


# Total child mortality reported by eligible men	
DataFinal$temp_m <- rowSums(DataFinal[c("mcm9a", "mcm9b")])
DataFinal$temp_m[(DataFinal$mcm1==1 & DataFinal$mcm8==2) | DataFinal$mcm1==2 ] <- 0
DataFinal$temp_m[DataFinal$no_male_eligible==1] <- 0
DataFinal$child_mortality_m <- ave(DataFinal$temp_m, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=TRUE))
DataFinal$temp_miss_m <- 1
DataFinal$temp_miss_m [is.na(DataFinal$temp_m)] <- 0
DataFinal$child_mortality_temp_miss_m <- ave(DataFinal$temp_miss_m, DataFinal$hh_id, FUN = function(x) max(x,na.rm=TRUE))
DataFinal$child_mortality_m[DataFinal$child_mortality_m==0 & DataFinal$child_mortality_temp_miss_m==0 & is.na(DataFinal$temp_m) & is.na(DataFinal$mcm9a) &  is.na(DataFinal$mcm9b)] <- NA
#DataFinal$child_mortality_m[is.na(DataFinal$child_mortality_m) & DataFinal$child_mortality_temp_miss_m==0] <- 0
label(DataFinal$child_mortality_m) <- "Occurrence of child mortality reported by men"
table(DataFinal$child_mortality_m, useNA = "always")
DataFinal$temp_m <- NULL
DataFinal$temp_miss_m <- NULL
DataFinal$child_mortality_temp_miss_m <- NULL

DataFinal$child_mortality <- apply(DataFinal[c("child_mortality_f", "child_mortality_m")], 1, max, na.rm=TRUE)
DataFinal$child_mortality[DataFinal$child_mortality<0] <- NA
label(DataFinal$child_mortality) <- "Total child mortality within household reported by women & men"
table(DataFinal$child_mortality, useNA = "always")	


# Deprived if any children died in the household 
##############################################
DataFinal$hh_mortality <- ifelse(DataFinal$child_mortality==0,1,0)
    # Household is replaced with a value of "1" if there is no incidence of child mortality
DataFinal$hh_mortality[is.na(DataFinal$child_mortality)] <- NA
    # Change eligibility to "no_fem_eligible==1" if child mortality indicator is constructed solely using information 
    # from women 
label(DataFinal$hh_mortality) <- "Household had no child mortality"
table(DataFinal$hh_mortality, useNA = "always")


# Deprived if any children died in the household in the last 5 years from the survey year 
##############################################
table(DataFinal$child_died_per_wom_5y, useNA = "always")
DataFinal$child_died_per_wom_5y_b <- DataFinal$child_died_per_wom_5y
    # The 'child_died_per_wom_5y' variable was constructed in Step 1.2 using information from individual women who ever 
    # gave birth in the BR file. The missing values represent eligible woman who have never ever given birth and so are
    # not present in the BR file. But these 'missing women' may be living in households where there are other women with
    # child mortality information from the BR file. So at this stage, it is important that we aggregate the information 
    # that was obtained from the BR file at the household level. Thisens ures that women who were not present in the BR 
    # file is assigned with a value, following the information provided by other women in the household
DataFinal$child_died_per_wom_5y[DataFinal$cm1==2] <- 0 
    # Assign a value of "0" for:
    # - all eligible women who never ever gave birth 
DataFinal$child_died_per_wom_5y[DataFinal$no_fem_eligible==1] <- 0
    # Assign a value of "0" for:
    # - individuals living in households that have non-eligible women 
#DataFinal$child_died_per_wom_5y[DataFinal$child_died_per_wom_5y==0] <- 1
DataFinal$child_mortality_5y <- ave(DataFinal$child_died_per_wom_5y, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=TRUE)) 
DataFinal$child_mortality_5y[DataFinal$child_mortality_5y<0] <- NA

DataFinal$temp_child_mortality_5y_miss <- 1
DataFinal$temp_child_mortality_5y_miss[is.na(DataFinal$child_died_per_wom_5y)] <- 0
DataFinal$child_mortality_5y_miss <- ave(DataFinal$temp_child_mortality_5y_miss, DataFinal$hh_id, FUN = function(x) max(x,na.rm=TRUE))
DataFinal$temp_child_mortality_5y[DataFinal$temp_child_mortality_5y==0 & DataFinal$child_mortality_5y_miss==0] <- NA
DataFinal$temp_child_mortality_5y[is.na(DataFinal$temp_child_mortality_5y) & DataFinal$child_mortality==0] <- 0

DataFinal$child_mortality_5y[is.na(DataFinal$temp_child_mortality_5y)] <- NA
label(DataFinal$child_mortality_5y) <- "Total child mortality within household past 5 years reported by women"
table(DataFinal$child_mortality_5y, useNA = "always")

# The new standard MPI indicator takes a value of "1" if eligible women within the household reported no child mortality
# or if any child died longer than 5 years from the survey year. The indicator takes a value of "0" if women in the 
# household reported any child mortality in the last 5 years from the survey year. Households were replaced with a value
# of "1" if eligible men within the household reported no child mortality in the absence of information from women. The 
# indicator takes a missing value if there was missing information on reported death from eligible individuals.

DataFinal$hh_mortality_5y <- ifelse(DataFinal$child_mortality_5y==0,1,0)
DataFinal$hh_mortality_5y[is.na(DataFinal$child_mortality_5y)] <- NA
table(DataFinal$hh_mortality_5y, useNA = "always")	
label(DataFinal$hh_mortality_5y) <- "Household had no child mortality in the last 5 years"


########################################################################################################################
### Step 2.5 Electricity 
########################################################################################################################
# Members of the household are considered deprived if the household has no electricity 
DataFinal$electricity <- DataFinal$hc8a  
describe(DataFinal$electricity)
DataFinal$electricity[DataFinal$electricity==2] <- 0 
DataFinal$electricity[DataFinal$electricity==9] <- NA 
table(DataFinal$electricity, useNA = "always")
label(DataFinal$electricity) <- "Household has electricity"


########################################################################################################################
### Step 2.6 Sanitation 
########################################################################################################################
# Members of the household are considered deprived if the household's sanitation facility is not improved, according to 
# MDG guidelines, or it is improved but shared with other household. In cases of mismatch between the MDG guideline and 
# country report, we followed the country report. 
DataFinal$toilet <- DataFinal$ws8 
describe(DataFinal$toilet)
table(DataFinal$toilet, useNA = "always") 
describe(DataFinal$ws9)
table(DataFinal$ws9, useNA = "always")  
DataFinal$shared_toilet <- DataFinal$ws9
    # 0=no;1=yes;.=missing
DataFinal$shared_toilet[DataFinal$shared_toilet==2] <- 0
DataFinal$shared_toilet[DataFinal$shared_toilet==3] <- NA
table(DataFinal$shared_toilet, useNA = "always") 
DataFinal$toilet_mdg[DataFinal$toilet == 14 | DataFinal$toilet==23 | DataFinal$toilet==41 |
                       DataFinal$toilet==51 | DataFinal$toilet==95 | DataFinal$toilet==96] <-0
DataFinal$toilet_mdg[DataFinal$toilet==11 | DataFinal$toilet==12 | DataFinal$toilet==13 | DataFinal$toilet==15 | DataFinal$toilet==21 | DataFinal$toilet==22 |
                       DataFinal$toilet==31] <- 1 
DataFinal$toilet_mdg[DataFinal$shared_toilet==1] <- 0
    # Household is assigned a value of '0' if it uses improved sanitation but shares toilet with other households  
label(DataFinal$toilet_mdg) <- "Household has improved sanitation with MDG Standards"
table(DataFinal$toilet_mdg, useNA = "always")


########################################################################################################################
### Step 2.7 Drinking Water  
########################################################################################################################
# Members of the household are considered deprived if the household does not have access to safe drinking water according
# to MDG guidelines, or safe drinking water is more than a 30-minute walk from home roundtrip. In cases of mismatch 
# between the MDG guideline and country report, we followed the country report.
DataFinal$water <- DataFinal$ws1  
DataFinal$timetowater <- DataFinal$ws4  
describe(DataFinal$water)
table(DataFinal$water, useNA = "always")	
DataFinal$ndwater <- DataFinal$ws2 
    # Non-drinking water 
table(DataFinal$ws2[DataFinal$water==91])
    # Because the quality of bottled water is not known, households using bottled water for drinking are classified as using an improved or unimproved source 
    # according to their water source for non-drinking activities such as cooking and hand washing. However, it is important to note that households using bottled 
    # water for drinking are classified as unimproved source if this is explicitly mentioned in the country report. */
DataFinal$water_mdg[DataFinal$ws1==11 | DataFinal$ws1==12 | DataFinal$ws1==13 | DataFinal$ws1==14 | DataFinal$ws1==21 | DataFinal$ws1==31 | 
                      DataFinal$ws1==41 | DataFinal$ws1==51 | (DataFinal$ws1==91 & (DataFinal$ws2==11 | DataFinal$ws2==12 | DataFinal$ws2==13 | 
                      DataFinal$ws2==14 | DataFinal$ws2==21 | DataFinal$ws2==31 | DataFinal$ws2==41 | DataFinal$ws2==51 | DataFinal$ws2==99))] <- 1
DataFinal$water_mdg[DataFinal$ws1==32 | DataFinal$ws1==42 | DataFinal$ws1==61 | DataFinal$ws1==71 | DataFinal$ws1==81 | DataFinal$ws1==96 | 
                      (DataFinal$ws1==91 & (DataFinal$ws2==32 | DataFinal$ws2==42 | DataFinal$ws2==61 | DataFinal$ws2==71 | DataFinal$ws2==81 | DataFinal$ws2==96))] <- 0 
DataFinal$water_mdg[DataFinal$timetowater >= 30 & !is.na(DataFinal$timetowater) & DataFinal$timetowater!=998 & DataFinal$timetowater!=999] <- 0 
    # Deprived if water is at more than 30 minutes' walk (roundtrip) 

DataFinal$water_mdg[is.na(DataFinal$water)| DataFinal$water==99] <- NA
label(DataFinal$water_mdg) <- "Household has drinking water with MDG standards (considering distance)"
table(DataFinal$water_mdg, useNA = "always")


########################################################################################################################
### Step 2.8 Housing 
########################################################################################################################
# Members of the household are considered deprived if the household has a dirt, sand or dung floor
DataFinal$floor <- DataFinal$hc3  
describe(DataFinal$floor)
table(DataFinal$floor, useNA = "always")
DataFinal$floor_imp <- 1
DataFinal$floor_imp[ DataFinal$floor<=12 | DataFinal$floor==96] <- 0
DataFinal$floor_imp[is.na(DataFinal$floor)| DataFinal$floor==99] <- NA 
label(DataFinal$floor_imp) <- "Household has floor that it is not earth/sand/dung"
table(DataFinal$floor_imp, useNA = "always")	


# Members of the household are considered deprived if the household has wall made of natural or rudimentary materials 
DataFinal$wall <- DataFinal$hc5 
describe(DataFinal$wall)
table(DataFinal$wall, useNA = "always")	
DataFinal$wall_imp <- 1 
DataFinal$wall_imp[DataFinal$wall<=28 | DataFinal$wall==96] <- 0 
DataFinal$wall_imp[is.na(DataFinal$wall) | DataFinal$wall==99] <- NA 	
label(DataFinal$wall_imp) <- "Household has wall that it is not of low quality materials"
table(DataFinal$wall_imp, useNA = "always")	


# Members of the household are considered deprived if the household has roof made of natural or rudimentary materials 
DataFinal$roof <- DataFinal$hc4
describe(DataFinal$roof)
table(DataFinal$roof, useNA = "always")		
DataFinal$roof_imp <- 1 
DataFinal$roof_imp[DataFinal$roof<=25 | DataFinal$roof==96] <- 0 
DataFinal$roof_imp[is.na(DataFinal$roof) | DataFinal$roof==99] <- NA
label(DataFinal$roof_imp) <- "Household has roof that it is not of low quality materials"
table(DataFinal$roof_imp, useNA = "always")


#*Household is deprived in housing if the roof, floor OR walls uses low quality materials.
DataFinal$housing_1 <- 1
DataFinal$housing_1[DataFinal$floor_imp==0 | DataFinal$wall_imp==0 | DataFinal$roof_imp==0] <- 0
DataFinal$housing_1[is.na(DataFinal$floor_imp) & is.na(DataFinal$wall_imp) & is.na(DataFinal$roof_imp)] <- NA
label(DataFinal$housing_1) <- "Household has roof, floor & walls that it is not low quality material"
table(DataFinal$housing_1, useNA = "always")


########################################################################################################################
### Step 2.9 Cooking Fuel 
########################################################################################################################
# Members of the household are considered deprived if the household cooks with solid fuels: wood, charcoal, crop 
# residues or dung. "Indicators for Monitoring the Millennium Development Goals", p. 63 
DataFinal$cookingfuel <- DataFinal$hc6   
describe(DataFinal$cookingfuel)
table(DataFinal$cookingfuel, useNA = "always")

DataFinal$cooking_mdg <- 1
DataFinal$cooking_mdg[(DataFinal$cookingfuel>5 & DataFinal$cookingfuel<95)] <- 0
DataFinal$cooking_mdg[is.na(DataFinal$cookingfuel)| DataFinal$cookingfuel==99] <- NA
label(DataFinal$cooking_mdg) <- "Househod has cooking fuel according to MDG standards"
table(DataFinal$cooking_mdg, useNA = "always")	


########################################################################################################################
### Step 2.10 Assets ownership 
########################################################################################################################
# Members of the household are considered deprived if the household does not own more than one of: radio, TV, telephone,
# bike, motorbike or refrigerator and does not own a car or truck. 
  
# Check that for standard assets in living standards: "no"==0 and yes=="1"
describe(DataFinal$hc8c)
describe(DataFinal$hc8b)
describe(DataFinal$hc8d)
describe(DataFinal$hc9b)
describe(DataFinal$hc8e)
describe(DataFinal$hc9f)
describe(DataFinal$hc9c)
describe(DataFinal$hc9e)

DataFinal$television <- DataFinal$hc8c
DataFinal$bw_television  <- NA
DataFinal$radio <- DataFinal$hc8b
DataFinal$telephone <- DataFinal$hc8d
DataFinal$mobiletelephone <- DataFinal$hc9b 
DataFinal$refrigerator <- DataFinal$hc8e
DataFinal$car <- DataFinal$hc9f 	
DataFinal$bicycle <- DataFinal$hc9c
DataFinal$motorbike <- DataFinal$hc9d 
DataFinal$computer <- NA
DataFinal$animal_cart <- DataFinal$hc9e

DataFinal$television[DataFinal$television==9] <-NA
DataFinal$radio[DataFinal$radio==9] <-NA
DataFinal$telephone[DataFinal$telephone==9] <-NA
DataFinal$mobiletelephone[DataFinal$mobiletelephone==9] <-NA
DataFinal$refrigerator[DataFinal$refrigerator==9] <-NA
DataFinal$car[DataFinal$car==9] <-NA
DataFinal$bicycle[DataFinal$bicycle==9] <-NA
DataFinal$motorbike[DataFinal$motorbike==9] <-NA
DataFinal$animal_cart[DataFinal$animal_cart==9] <-NA

DataFinal$television[DataFinal$television==2] <-0
DataFinal$radio[DataFinal$radio==2] <-0
DataFinal$telephone[DataFinal$telephone==2] <-0
DataFinal$mobiletelephone[DataFinal$mobiletelephone==2] <-0
DataFinal$refrigerator[DataFinal$refrigerator==2] <-0
DataFinal$car[DataFinal$car==2] <-0
DataFinal$bicycle[DataFinal$bicycle==2] <-0
DataFinal$motorbike[DataFinal$motorbike==2] <-0
DataFinal$animal_cart[DataFinal$animal_cart==2] <-0

# Group telephone and mobiletelephone as a single variable
DataFinal$telephone[DataFinal$telephone==0 & DataFinal$mobiletelephone==1] <- 1
DataFinal$telephone[is.na(DataFinal$telephone) & DataFinal$mobiletelephone==1] <- 1

table(DataFinal$television, useNA = "always")
table(DataFinal$radio, useNA = "always")
table(DataFinal$telephone, useNA = "always")
table(DataFinal$refrigerator, useNA = "always")
table(DataFinal$bicycle, useNA = "always")
table(DataFinal$motorbike, useNA = "always")
table(DataFinal$computer, useNA = "always")
table(DataFinal$animal_cart, useNA = "always")

    
# Members of the household are considered deprived in assets if the household does not own more than one of: radio, 
# TV, telephone, bike, motorbike, refrigerator, computer or animal_cart and does not own a car or truck.
DataFinal$n_small_assets2 <- rowSums(DataFinal[c("television", "radio", "telephone", "refrigerator", "bicycle", 
                                                "motorbike", "computer", "animal_cart")], na.rm=T)
#DataFinal$n_small_assets2[is.na(DataFinal$television)] <- NA
label(DataFinal$n_small_assets2) <- "Household Number of Small Assets Owned" 
table(DataFinal$n_small_assets2, useNA = "always")    

DataFinal$hh_assets2 <- ifelse(DataFinal$car==1 | DataFinal$n_small_assets2 > 1, 1,0)
DataFinal$hh_assets2[DataFinal$television == 0 & is.na(DataFinal$hh_assets2)] <- 0
label(DataFinal$hh_assets2) <- "Household Asset Ownership: HH has car or more than 1 small assets incl computer & animal cart"
table(DataFinal$hh_assets2, useNA = "always")   


########################################################################################################################
### Step 2.11 Rename and keep variables for MPI calculation 
########################################################################################################################
# Retain data on sampling design: 
str(DataFinal$psu)
str(DataFinal$stratum)	
DataFinal$strata <- DataFinal$stratum

# Retain year, month & date of interview:
str(DataFinal$hh5y)
str(DataFinal$hh5m)
str(DataFinal$hh5d)
DataFinal$year_interview <- DataFinal$hh5y 	
DataFinal$month_interview <- DataFinal$hh5m 
DataFinal$date_interview <- DataFinal$hh5d
    
# Generate presence of subsample
DataFinal$subsample <- NA

    
### Rename key global MPI indicators for estimation 
DataFinal$d_cm <- ifelse(DataFinal$hh_mortality_5y==0,1,0)
DataFinal$d_nutr <- ifelse(DataFinal$hh_nutrition_uw_st==0,1,0)
DataFinal$d_satt <- ifelse(DataFinal$hh_child_atten==0,1,0)
DataFinal$d_educ <- ifelse(DataFinal$hh_years_edu6==0,1,0)
DataFinal$d_elct <- ifelse(DataFinal$electricity==0,1,0)
DataFinal$d_wtr <- ifelse(DataFinal$water_mdg==0,1,0)
DataFinal$d_sani <- ifelse(DataFinal$toilet_mdg==0,1,0)
DataFinal$d_hsg <- ifelse(DataFinal$housing_1==0,1,0)
DataFinal$d_ckfl <- ifelse(DataFinal$cooking_mdg ==0,1,0)
DataFinal$d_asst <- ifelse(DataFinal$hh_assets2==0,1,0)

DataFinal$marital <- DataFinal$marital.x

DataFinal <- DataFinal[c("hh_id", "ind_id", "ccty", "countrycode", "cty", "survey", "year", "subsample",
                         "strata", "psu", "weight", "area", "relationship", "sex", "age", "agec7", "agec4", "marital", "hhsize", 
                         "region", 
                         "d_cm", "d_nutr", "d_satt", "d_educ", "d_elct", "d_wtr", "d_sani", "d_hsg", "d_ckfl", "d_asst",
                         "hh_mortality_5y", "hh_nutrition_uw_st", "hh_child_atten", "hh_years_edu6", "electricity", "water_mdg",
                         "toilet_mdg", "housing_1", "cooking_mdg", "hh_assets2")] 
    

### Sort, compress and save data for estimation 
DataFinal[order(DataFinal$ind_id),] 
write_dta(DataFinal, (file.path(path_out, "cog_mics15_pov.dta")))

  
    
########################################################################################################################
### MPI Calculation (TTD file)
########################################################################################################################
# SELECT COUNTRY POV FILE RUN ON LOOP FOR MORE COUNTRIES
DataTTD <- read_stata(file.path(path_out,"cog_mics15_pov.dta"))

  
########################################################################################################################
### Define Sample Weight and total population ***
########################################################################################################################
DataTTD$sample_weight = DataTTD$weight
    # change to weight if MICS

########################################################################################################################
### List of the 10 indicators included in the MPI 
########################################################################################################################
DataTTD$edu_1 <- DataTTD$hh_years_edu6
DataTTD$atten_1 <- DataTTD$hh_child_atten
DataTTD$cm_1 <- DataTTD$hh_mortality_5y
    # change countries with no child mortality 5 year to child mortality ever
DataTTD$nutri_1 <- DataTTD$hh_nutrition_uw_st
DataTTD$elec_1 <- DataTTD$electricity
DataTTD$toilet_1 <- DataTTD$toilet_mdg
DataTTD$water_1 <- DataTTD$water_mdg
DataTTD$house_1 <- DataTTD$housing_1
DataTTD$fuel_1 <- DataTTD$cooking_mdg
DataTTD$asset_1 <- DataTTD$hh_assets2

 
########################################################################################################################
### List of sample without missing values ***
########################################################################################################################
DataTTD$sample_1 <- ifelse(!is.na(DataTTD$edu_1) & !is.na(DataTTD$atten_1) & !is.na(DataTTD$cm_1) & 
                             !is.na(DataTTD$nutri_1) & !is.na(DataTTD$elec_1) & !is.na(DataTTD$toilet_1) & 
                             !is.na(DataTTD$water_1) & !is.na(DataTTD$house_1) & !is.na(DataTTD$fuel_1) & 
                             !is.na(DataTTD$asset_1), 1,0)

DataTTD$sample_1[DataTTD$subsample==0] <- NA
       # Note: If the anthropometric data was collected from a subsample of the total population that was sampled, 
       # then the final analysis only includes the subsample population. 
       # Percentage sample after dropping missing values 

# Survey stucture
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD$per_sample_weighted_1 <- svymean(~sample_1, DataTTD_weight) 
DataTTD$per_sample_1 <-  mean(DataTTD$sample_1) 
table(DataTTD$per_sample_weighted_1, useNA = "always")
table(DataTTD$per_sample_1, useNA = "always")     


########################################################################################################################
### Define deprivation matrix 'g0' which takes values 1 if individual is deprived in the particular indicator according 
### to deprivation cutoff z as defined during step 2 ***
########################################################################################################################
DataTTD$g01_edu_1 <- ifelse(DataTTD$edu_1==1,0,1)
DataTTD$g01_atten_1 <- ifelse(DataTTD$atten_1==1,0,1)
DataTTD$g01_cm_1 <- ifelse(DataTTD$cm_1==1,0,1)
DataTTD$g01_nutri_1 <- ifelse(DataTTD$nutri_1==1,0,1)
DataTTD$g01_elec_1 <- ifelse(DataTTD$elec_1==1,0,1)
DataTTD$g01_toilet_1 <- ifelse(DataTTD$toilet_1==1,0,1)
DataTTD$g01_water_1 <- ifelse(DataTTD$water_1==1,0,1)
DataTTD$g01_house_1 <- ifelse(DataTTD$house_1==1,0,1)
DataTTD$g01_fuel_1 <- ifelse(DataTTD$fuel_1==1,0,1)
DataTTD$g01_asset_1 <- ifelse(DataTTD$asset_1==1,0,1)

# Renew survey stucture
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)

### Raw Headcount Ratios
DataTTD$raw1_edu_1 <- svymean(~g01_edu_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_edu_1) <- "Raw Headcount: Percentage of people who are deprived in edu_1"
DataTTD$raw1_atten_1 <- svymean(~g01_atten_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_atten_1) <- "Raw Headcount: Percentage of people who are deprived in atten_1"
DataTTD$raw1_cm_1 <- svymean(~g01_cm_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_cm_1) <- "Raw Headcount: Percentage of people who are deprived in cm_1"
DataTTD$raw1_nutri_1 <- svymean(~g01_nutri_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_nutri_1) <- "Raw Headcount: Percentage of people who are deprived in nutri_1"
DataTTD$raw1_elec_1 <- svymean(~g01_elec_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_elec_1) <- "Raw Headcount: Percentage of people who are deprived in elec_1"
DataTTD$raw1_toilet_1 <- svymean(~g01_toilet_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_toilet_1) <- "Raw Headcount: Percentage of people who are deprived in toilet_1"
DataTTD$raw1_water_1 <- svymean(~g01_water_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_water_1) <- "Raw Headcount: Percentage of people who are deprived in water_1"
DataTTD$raw1_house_1 <- svymean(~g01_house_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_house_1) <- "Raw Headcount: Percentage of people who are deprived in house_1"
DataTTD$raw1_fuel_1 <- svymean(~g01_fuel_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_fuel_1) <- "Raw Headcount: Percentage of people who are deprived in fuel_1"
DataTTD$raw1_asset_1 <- svymean(~g01_asset_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_asset_1) <- "Raw Headcount: Percentage of people who are deprived in asset_1"

        
########################################################################################################################
### Define vector 'w' of dimensional and indicator weight
########################################################################################################################
# If survey lacks one or more indicators, weights need to be adjusted within /each dimension such that each dimension 
# weighs 1/3 and the indicator weights add up to one (100%). CHECK COUNTRY FILE

## DIMENSION EDUCATION 
DataTTD$w1_edu_1 <- 1/6
DataTTD$w1_atten_1 <- 1/6

## DIMENSION HEALTH
DataTTD$w1_cm_1 <- 1/6
DataTTD$w1_nutri_1 <- 1/6

## DIMENSION LIVING STANDARD
DataTTD$w1_elec_1 <- 1/18
DataTTD$w1_toilet_1 <- 1/18
DataTTD$w1_water_1 <- 1/18
DataTTD$w1_house_1 <- 1/18
DataTTD$w1_fuel_1 <- 1/18
DataTTD$w1_asset_1 <- 1/18

 
########################################################################################################################
### Generate the weighted deprivation matrix 'w' * 'g0'
########################################################################################################################  foreach j of numlist 1 {
DataTTD$w1_g0_edu_1   <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_edu_1 * DataTTD$g01_edu_1, NA)
DataTTD$w1_g0_atten_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_atten_1 * DataTTD$g01_atten_1, NA)
DataTTD$w1_g0_cm_1    <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_cm_1 * DataTTD$g01_cm_1, NA)
DataTTD$w1_g0_nutri_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_nutri_1 * DataTTD$g01_nutri_1, NA)
DataTTD$w1_g0_elec_1  <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_elec_1 * DataTTD$g01_elec_1, NA)
DataTTD$w1_g0_toilet_1 <-ifelse(DataTTD$sample_1 ==1, DataTTD$w1_toilet_1 * DataTTD$g01_toilet_1, NA)
DataTTD$w1_g0_water_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_water_1 * DataTTD$g01_water_1, NA)
DataTTD$w1_g0_house_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_house_1 * DataTTD$g01_house_1, NA)
DataTTD$w1_g0_fuel_1  <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_fuel_1 * DataTTD$g01_fuel_1, NA)
DataTTD$w1_g0_asset_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_asset_1 * DataTTD$g01_asset_1, NA)
    # The estimation is based only on observations that have non-missing values for all variables in varlist_pov


########################################################################################################################
### Generate the vector of individual weighted deprivation count 'c'
########################################################################################################################
DataTTD$c_vector_1 <- ifelse(DataTTD$sample_1 ==1, rowSums(DataTTD[c("w1_g0_edu_1", "w1_g0_atten_1", "w1_g0_cm_1",
                                                                      "w1_g0_nutri_1", "w1_g0_elec_1", "w1_g0_toilet_1",
                                                                      "w1_g0_water_1", "w1_g0_house_1", "w1_g0_fuel_1",
                                                                      "w1_g0_asset_1")]), NA)


########################################################################################################################
### Identification step according to poverty cutoff k (20 33 50) 
########################################################################################################################
DataTTD$multidimensionally_poor_1_20 <- ifelse(DataTTD$c_vector_1>=20/100, 1,0)
DataTTD$multidimensionally_poor_1_20[is.na(DataTTD$c_vector_1) | DataTTD$sample_1!=1] <- NA
DataTTD$multidimensionally_poor_1_33 <- ifelse(DataTTD$c_vector_1>=33/100, 1,0)
DataTTD$multidimensionally_poor_1_33[is.na(DataTTD$c_vector_1) | DataTTD$sample_1!=1] <- NA
DataTTD$multidimensionally_poor_1_50 <- ifelse(DataTTD$c_vector_1>=50/100, 1,0)
DataTTD$multidimensionally_poor_1_50[is.na(DataTTD$c_vector_1) | DataTTD$sample_1!=1] <- NA


########################################################################################################################
### Generate the censored vector of individual weighted deprivation count 'c(k)'
########################################################################################################################
DataTTD$c_censured_vector_1_20 <- ifelse(DataTTD$multidimensionally_poor_1_20==0, 0, DataTTD$c_vector_1)
DataTTD$c_censured_vector_1_33 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, DataTTD$c_vector_1)
DataTTD$c_censured_vector_1_50 <- ifelse(DataTTD$multidimensionally_poor_1_50==0, 0, DataTTD$c_vector_1)
      # Provide a score of zero if a person is not poor


########################################################################################################################
### Define censored deprivation matrix 'g0(k)'  with multidimensionally_poor_1_33
########################################################################################################################
DataTTD$g01_33_edu_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_edu_1))
DataTTD$g01_33_atten_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_atten_1))
DataTTD$g01_33_cm_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_cm_1))
DataTTD$g01_33_nutri_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_nutri_1))
DataTTD$g01_33_elec_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_elec_1))
DataTTD$g01_33_toilet_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_toilet_1))
DataTTD$g01_33_water_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_water_1))
DataTTD$g01_33_house_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_house_1))
DataTTD$g01_33_fuel_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                                 ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_fuel_1))
DataTTD$g01_33_asset_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                                 ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_asset_1))


########################################################################################################################
### Generates Multidimensional Poverty Index (MPI), Headcount (H) and Intensity of Poverty (A) 
########################################################################################################################
# Renew survey stucture
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)
DataTTD_weight_subset2 <- subset(DataTTD_weight, sample_1==1 & multidimensionally_poor_1_33==1)

### Multidimensional Poverty Index (MPI) 
DataTTD$MPI_1_20 <- svymean(~c_censured_vector_1_20, DataTTD_weight_subset)
label(DataTTD$MPI_1_20) <- "MPI with k=20"
DataTTD$MPI_1_33 <- svymean(~c_censured_vector_1_33, DataTTD_weight_subset)
label(DataTTD$MPI_1_33) <- "MPI with k=33"
DataTTD$MPI_1_50 <- svymean(~c_censured_vector_1_50, DataTTD_weight_subset)
label(DataTTD$MPI_1_50) <- "MPI with k=50"

DataTTD$MPI_1 <- svymean(~c_censured_vector_1_33, DataTTD_weight_subset)
label(DataTTD$MPI_1) <- "1 Multidimensional Poverty Index (MPI = H*A): Range 0 to 1"

### Headcount (H) 
DataTTD$H_1 <- svymean(~multidimensionally_poor_1_33, DataTTD_weight_subset)*100
label(DataTTD$H_1) <- "1 Headcount ratio: % Population in multidimensional poverty (H)"

### Intensity of Poverty (A) 
DataTTD$A_1 <- svymean(~c_censured_vector_1_33, DataTTD_weight_subset2)*100
label(DataTTD$A_1) <- "1 Intensity of deprivation among the poor (A): Average % of weighted deprivations"

### Population vulnerable to poverty (who experience 20-32.9% intensity of deprivations) 
DataTTD$temp <- ifelse(DataTTD$c_vector_1>=0.2 & DataTTD$c_vector_1<0.3332, 1, 
                              ifelse((DataTTD$c_vector_1<0.2 | DataTTD$c_vector_1>=0.3332) & DataTTD$sample_1!=1, NA,0))
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)
DataTTD$vulnerable_1 <- svymean(~temp, DataTTD_weight_subset)*100
                                  
### Population in severe poverty (with intensity 50% or higher) 
DataTTD$temp2 <- ifelse(DataTTD$c_vector_1>0.49, 1, 
                       ifelse(DataTTD$c_vector_1<=0.49 & DataTTD$sample_1!=1, NA,0))
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)
DataTTD$severe_1 <- svymean(~temp2, DataTTD_weight_subset)*100
label(DataTTD$severe_1) <- "1 % Population in severe poverty (with intensity 50% or higher)"

### Censored Headcount
DataTTD$cen1_edu_1 <- svymean(~g01_33_edu_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_edu_1) <- "Censored Headcount: Percentage of people who are poor and deprived in edu_1)"
DataTTD$cen1_atten_1 <- svymean(~g01_33_atten_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_atten_1) <- "Censored Headcount: Percentage of people who are poor and deprived in atten_1)"
DataTTD$cen1_cm_1 <- svymean(~g01_33_cm_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_cm_1) <- "Censored Headcount: Percentage of people who are poor and deprived in cm_1)"
DataTTD$cen1_nutri_1 <- svymean(~g01_33_nutri_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_nutri_1) <- "Censored Headcount: Percentage of people who are poor and deprived in nutri_1)"
DataTTD$cen1_elec_1 <- svymean(~g01_33_elec_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_elec_1) <- "Censored Headcount: Percentage of people who are poor and deprived in elec_1)"
DataTTD$cen1_toilet_1 <- svymean(~g01_33_toilet_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_toilet_1) <- "Censored Headcount: Percentage of people who are poor and deprived in toilet_1)"
DataTTD$cen1_water_1 <- svymean(~g01_33_water_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_water_1) <- "Censored Headcount: Percentage of people who are poor and deprived in water_1)"
DataTTD$cen1_house_1 <- svymean(~g01_33_house_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_house_1) <- "Censored Headcount: Percentage of people who are poor and deprived in house_1)"
DataTTD$cen1_fuel_1 <- svymean(~g01_33_fuel_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_fuel_1) <- "Censored Headcount: Percentage of people who are poor and deprived in fuel_1)"
DataTTD$cen1_asset_1 <- svymean(~g01_33_asset_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_asset_1) <- "Censored Headcount: Percentage of people who are poor and deprived in asset_1)"
   
### Dimensional Contribution
DataTTD$cont1_edu_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_edu_1 * DataTTD$cen1_edu_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_atten_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_atten_1 * DataTTD$cen1_atten_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_cm_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_cm_1 * DataTTD$cen1_cm_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_nutri_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_nutri_1 * DataTTD$cen1_nutri_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_elec_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_elec_1 * DataTTD$cen1_elec_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_toilet_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_toilet_1 * DataTTD$cen1_toilet_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_water_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_water_1 * DataTTD$cen1_water_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_house_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_house_1 * DataTTD$cen1_house_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_fuel_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_fuel_1 * DataTTD$cen1_fuel_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_asset_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_asset_1 * DataTTD$cen1_asset_1/DataTTD$MPI_1, NA)  

### Prepare results to export 
rm("DataFinal", "DataTTD_weight", "DataTTD_weight_subset", "DataTTD_weight_subset2")

DataOutput <- DataTTD[c("MPI_1", "H_1", "A_1", "vulnerable_1", "severe_1", 
                        "cont1_nutri_1", "cont1_cm_1", "cont1_edu_1", "cont1_atten_1","cont1_fuel_1", "cont1_toilet_1", 
                        "cont1_water_1", "cont1_elec_1", "cont1_house_1", "cont1_asset_1", 
                        "per_sample_1", "per_sample_weighted_1")] 
DataOutput <- subset(DataOutput, !is.na(cont1_nutri_1))
DataOutput<- DataOutput[!duplicated(DataOutput),]

write.csv(DataOutput, file.path(path_in, "DataOutput_Congo_without_correction.csv"), row.names = T )