getwd() # the name of my Working Directory ; 

setwd("/Users/jameshanley/Documents/Courses/634/HIV") # set it

##########################################################

# Kenya Trial

##########################################################

ds=read.table("KenyaTrialData.txt",header=T); ds

# re-arrange so both data for both arms are in same row.

data=cbind(ds[seq(1,11,2),c(1,2,4,5)],ds[seq(2,12,2),4:5])
names(data)=c("t.0","t.end","n.C","n.HIV.C", "n.NotC","n.HIV.NotC")
data

# calculations...  

#  C and NotC : shorthand for Circumcised and Not Circumcised)
#  p (conditional, window-specific) probability of seroconversion
#  S (uncontional) probability of remaining seronegative
#  CI cumulative incidence
#  V variance
#  SE Greenwood Standard Error for S-hat
#  .lower and .upper:  limits of confidence interval
#  PY person years
#  n.cases  number of new cases

data$p.C    = *************************************** ; #condn'l pr(Staying HIV-)
data$p.NotC = *************************************** ; #condn'l pr(Staying HIV-)

data$S.C    = round(cumprod(data$p.C),    4); # un-condn'l prob(Staying HIV-)
data$S.NotC = round(cumprod(data$p.NotC), 4); # un-condn'l prob(Staying HIV-)

data$CI.C    = *************************************** ; # Cum.Incidence (%) of HIV+
data$CI.NotC = *************************************** ; # Cum.Incidence (%) of HIV+

data$V.log.p.C   = *************************************** ; # for Greenwood SE for S or CI=1-S
data$V.log.p.NotC= (1/data$n.NotC)*(1-data$p.NotC)/data$p.NotC  # ditto, NotC

data$SE.S.C    =round(100 * data$S.C    * sqrt(cumsum(data$V.log.p.C)),2) # Greenwood SE(%S)
data$SE.S.NotC =round(100 * data$S.NotC * sqrt(cumsum(data$V.log.p.NotC)),2) # SE(%S)=SE(%CI)

data$CI.lower.C    = *************************************** ;   # limit of 95% Conf.Int for CI
data$CI.upper.C    = *************************************** ;     # limit of 95% Conf.Int for CI

data$CI.lower.NotC = *************************************** ;  # limit of 95% Conf.Int for CI
data$CI.upper.NotC = *************************************** ;  # limit of 95% Conf.Int for CI

data

# for overall incidence rates

PY.C    = round( (1/12) * sum( data$n.C    *(data$t.end - data$t.0) ) , 1) ; PY.C
PY.NotC = **************************************************************** ; PY.NotC

n.cases.HIV.C    = *************************************** ; n.cases.HIV.C
n.cases.HIV.NotC = *************************************** ; n.cases.HIV.NotC

incidence.HIV.C    = *************************************** ;   # per 100 PY
incidence.HIV.NotC = *************************************** ;   # per 100 PY


c(incidence.HIV.C , incidence.HIV.NotC )

# window-specific (w.s) incidence rates and rate ratios 

w.s.PY.C    = (1/12) * ( data$n.C    * (data$t.end - data$t.0) ) ; PY.C
w.s.PY.NotC = (1/12) * ( data$n.NotC * (data$t.end - data$t.0) ) ; PY.NotC

rr= ( **** ) / ( ****  ) ; rr

margin.of.error = exp(1.96*sqrt(1/data$n.HIV.C + 1/data$n.HIV.NotC)) ; margin.of.error

rr/margin.of.error
rr*margin.of.error


##########################################################

# Uganda Trial

##########################################################

# remove ds and data if they exist

rm(ds) ; rm(data) ; ds ; data

setwd("/Users/jameshanley/Documents/Courses/634/HIV")

ds=read.table("UgandaTrialData.txt",header=T); ds

# re-arrange so both data for both arms are in same row.

data=cbind(ds[seq(1,5,2),c(1,2,4,5,6)],ds[seq(2,6,2),4:6])
names(data)=c("t.0","t.end", "n.C","n.HIV.C","py.C",  "n.NotC","n.HIV.NotC","py.NotC") ; data

data$p.C    = ****  #condn'l pr(Staying HIV-)
data$p.NotC = ****  #condn'l pr(Staying HIV-)

data$S.C    = ****  ; # un-condn'l prob(Staying HIV-)
data$S.NotC = ****  ; # un-condn'l prob(Staying HIV-)

data$CI.C    = ****  ; # Cum.Incidence (%) of HIV+
data$CI.NotC = ****  ; # Cum.Incidence (%) of HIV+

data$V.log.p.C   = (1/data$n.C)*(1-data$p.C)/data$p.C # for Greenwood SE for S or CI=1-S
data$V.log.p.NotC= (1/data$n.NotC)*(1-data$p.NotC)/data$p.C # ditto, NotC

data$SE.S.C    = ****  # Greenwood SE(%S)
data$SE.S.NotC = ****  # SE(%S)=SE(%CI)

data$CI.lower.C    = ****   # limit of 95% Conf.Int for CI
data$CI.upper.C    = ****   # limit of 95% Conf.Int for CI
data$CI.lower.NotC = ****   # limit of 95% Conf.Int for CI
data$CI.upper.NotC = ****   # limit of 95% Conf.Int for CI

data$n             = data$n.C + data$n.NotC ;  # for log-rank test 

# E = Expected number under the "holy-water" (or 2-placebos) hypothesis

data$E.n.HIV.C     = (????)  * ( ???? ) ;

data$V.n.HIV.C     =   ( data$n.HIV.C + data$n.HIV.NotC)             * 
                     ( ( data$n - (data$n.HIV.C + data$n.HIV.NotC) )  /  data$n ) *
                     (  data$n.C                                     /  data$n ) *
                     ( data$n.NotC                                  / (data$n -1) ) ;
data

log.rank.Xsq= ( sum(data$n.HIV.C - data$E.n.HIV.C) ^2 ) /
                sum(data$V.n.HIV.C); 
log.rank.Xsq
2*(1-pnorm(sqrt(log.rank.Xsq)))
1-pchisq(log.rank.Xsq,1)
                
# overall rates and rate ratio

PY.C    = round( sum( data$n.C    *(data$t.end - data$t.0) ) , 1) ; PY.C
PY.NotC = round( sum( data$n.NotC*(data$t.end - data$t.0) ) , 1)  ; PY.NotC

n.cases.HIV.C    = sum( data$n.HIV.C )     ; n.cases.HIV.C
n.cases.HIV.NotC = sum( data$n.HIV.NotC )  ; n.cases.HIV.NotC

incidence.HIV.C    = round( 100* n.cases.HIV.C    / PY.C,   2)   # per 100 PY
incidence.HIV.NotC = round( 100* n.cases.HIV.NotC / PY.NotC,2)   # per 100 PY

c(incidence.HIV.C , incidence.HIV.NotC, incidence.HIV.C / incidence.HIV.NotC )

# window specific (w.s.) rates and rate ratios

w.s.PY.C    = (1/12) * ( data$n.C    *(data$t.end - data$t.0) ) ; PY.C
w.s.PY.NotC = (1/12) * ( data$n.NotC *(data$t.end - data$t.0) ) ; PY.NotC


w.s.rr=(data$n.HIV.C/PY.C)/(data$n.HIV.NotC/PY.NotC) ; w.s.rr
margin.of.error = exp(1.96*sqrt(1/data$n.HIV.C + 1/data$n.HIV.NotC)) ; margin.of.error

w.s.rr/margin.of.error
w.s.rr*margin.of.error