x=scan()
2  8  6
3  9  8
3  4  4
1  3  1
1  5  2
1  5  4
1  6  5
2  2  2
2  8  5
2  5  7
1  5  2
2  3  4
2  6  7
2  3  3
2  4  9
2  5  3
2  5  5
3  4  5
3  4  4
3  4  3
1  2  6
1  4  4
1  4  4
1  7  6
1  3  3
1  3  4
1  4  2
1  3  2
2  4  5
2  6  6
2  7  7
2  3  2
2  8  5
2  6  6
2  4  6
3  3  4
3  5  3
3  5  6
3  3  3
1  2  3
1  2  3
1 10  2
2  5  3
1  3  1
1  5  5
1  3  2
1  2  1
1  2  2
3  2  4
3  1  2
1  9  8
2  7  8
2  2  5
2  5  3
1  5  5
3  6 10
1  4  3
1  8  9
2  5  6
2  6  4
3  7  9
3  7  8
3  4  5
3  9  6
3  3  2
4  2  3
4  7  6
4  1  1
4  4  5
4  6  3
4  5  7
4  6  5
4  6  3
4  6  2
4  9  8
4  5  5
4  4  4
4  6  8
4  8  2
4  9  6
4  6  2
4  5  2
3  2  2
4  4  4
4  4  4
4  4  3
3  2  4
3  4  3
2  5  5
2  7  8
2  6  5
2  6  6
1  7  6
1  4  3
1  5  2
4  4  0
4  6  5
4 13  7
4  7  6
3  6 10
3  5  6
3  3  6
3  6  3
4  5  4
4  3  3
4  3  6
3  5  3
4  4  2
3  6  6
4  5  4
3  3  3

#
c     = x[seq(1,333,3)] # c = (experimental) "condition'
math1 = x[seq(2,333,3)]
math2 = x[seq(3,333,3)]

G = 1*(c==1)   #  4 INDICATOR Variables
E = 1*(c==2)
ND= 1*(c==3)
S = 1*(c==4)

misled = 1*(c==1 | c==4) # 1 INDICATOR Variable


math1c = math1 - mean(math1) # c=centered, so overall mean is zero */ 

by(cbind(math1,math2), c, summary)

## out of curiosity, what is the probabability that,
## by randozation alone, the 4 means would vary as 
## much as they did (or more)

summary( aov(math1 ~ as.factor(c) ) )

## by simulation, what is the probabability that,
## by randozation alone, the 4 means would have as
## large a range as they did (or more) ?


sims=1000
who=rep(1:4,table(c))
SPREAD=rep(NA,sims)
for(sim in 1:sims){
	range=range(by(sample(math1),who,mean))
	SPREAD[sim] = range[2]-range[1]
}
hist(SPREAD)
observed = by(math1, c, mean)
( MAX = max(observed)-min(observed) )
points(MAX,0,pch=19,cex=2) # observed diff. b/w max(mean) & min(mean) 

### the (analysis of covariance) analysis the authors carried out

# This aov just gives  the OVERALL F test (3 degrees of freedom)
# and doesnt indicate where the difference(s) is (are)

summary( aov(math1 ~ as.factor(c) + math1c) )

# This yields 3 (slightly overlapping) contrasts, all againt ND

summary(lm(math2 ~ G+E+S+math1c))

# as a check, look at the correlation matrix of the fitted effects

round( cov2cor( summary(lm(math2 ~ G+E+S+math1c))$cov.unscaled ), 2 )

# the G E and S effect estiamtes are mutually correlated at about .5
# so, if the ND is artificially low/high, then all 3 are 'off'

# This yields the 1 primary contrast


summary(lm(math2 ~ misled + math1c))


## here are the 2 remaining orthogonal contrasts

who = (misled==0)
summary(lm(math2[who] ~ c[who]+math1c[who] ))

who = (misled==1)
summary(lm(math2[who] ~ c[who]+math1c[who] ))


## Using math2/math1 Ratio 

post = by(math2,misled,sum) ; pre = by(math1,misled,sum)
ratio = post/pre

## Bootstrap to get estimated sampling distribution

B = 1000 # number of bootstrap samples

( n = c( sum(misled), sum(1-misled) ) )

ds =data.frame(misled,math1,math2)
ds0=ds[ds$misled==0,] ; n0=length(ds0$math1)
ds1=ds[ds$misled==1,] ; n1=length(ds1$math1)

ratios=matrix(NA,B,2)
for (b in 1:B){
    indices = sample(1:n0, n0, replace=TRUE)
    ratio0 = sum( ds0[indices,"math2"] ) / sum( ds0[indices,"math1"] )
    indices = sample(1:n1, n1, replace=TRUE)
    ratio1 = sum( ds1[indices,"math2"] ) / sum( ds1[indices,"math1"] )
    ratios[b,] = c(ratio0,ratio1)
}

plot(ratios,
     ylim=c(min(ratios),max(ratios)),
     xlim=c(min(ratios),max(ratios)),
     xlab="Post/Pre Ratio, Not misled",
     ylab="Post/Pre Ratio, Misled",
     pch=19,cex=0.25,col="red",
     main=paste(toString(B),
           "bootstrap samples")); 
abline(a=0,b=1)
abline(v=1)
abline(h=1)


# jh 2020.02.25