/* --- see bootom of file for Stata section ---  */

/*  --- for SAS ---  */

PROC FORMAT;
VALUE onefirst 0="z_0 NO" 1="a_1 YES";
run;

DATA a;
INPUT tx months dead;
log_fu_T = log(months);
LINES;
 1 1 0
 1 5 0
 1 6 1
 1 6 1
 1 9 0
1 10 1
1 10 1
1 10 0
1 12 1
1 12 1
1 12 1
1 12 1
1 12 0
1 13 0
1 15 0
1 16 0
1 20 0
1 24 1
1 24 0
1 27 0
1 32 1
1 34 0
1 36 0
1 36 0
1 44 0
0  3 0
0  6 1
0  6 1
0  6 1
0  6 1
0  8 1
0  8 1
0 12 1
0 12 1
0 12 0
0 15 0
0 16 0
0 18 0
0 18 0
0 20 1
0 22 0
0 24 1
0 28 0
0 28 0
0 28 0
0 30 1
0 30 0
0 33 0
0 42 1
;
run;

options LINESIZE=78 PAGESIZE=60 NODATE; RUN;

TITLE OVERALL number (proportion) of deaths & amount of follow-up;

PROC MEANS DATA=a SUM MEAN;
  VAR months dead ;
  RUN;

TITLE (by Tx:) Numbers (Proportions) of deaths & amount of follow-up;

PROC MEANS DATA=a SUM MEAN;
  VAR months dead ;
  CLASS tx;
  OUTPUT OUT=temp sum = Pt_mo no_Dead mean= mean_fu pr_Dead ;
  RUN;

TITLE Proportions and Logits;
DATA summary;
  SET temp;
  logit = log( pr_Dead / (1 - pr_Dead) );
  RUN;
PROC PRINT DATA=summary; RUN;

TITLE Numbers of deaths, by Tx ;

PROC PLOT data=a ;
  PLOT dead * tx / HPOS=20 VPOS=10
                   HPOS=20 VPOS=21 ;
RUN;

TITLE Proportions of deaths vs Tx ;
PROC PLOT data=summary FORMCHAR = '|          ';
  PLOT pr_Dead * tx = "*" / HPOS=20 VPOS=21
                            VAXIS = BY .2
                        VREF= 0  .2  .4  .6  .8  1
                        VREFCHAR = "-"  ;
RUN;

TITLE Logit of proportion dead vs Tx ;
PROC PLOT data=summary FORMCHAR = '|          ';
  PLOT logit * tx = "*" / HPOS=20 VPOS=21
                            VAXIS = BY .5
                        VREF= -0.5  0 0.5
                        VREFCHAR = "-"  ;
RUN;

TITLE  Difference in proportions dead (or alive) by Binomial Regression;
TITLE2 Bernoulli is a Binomial with n=1  i.e., result of a single trial;
PROC GENMOD data=a ;
  MODEL dead = tx /
   DIST=binomial LINK = identity ;
RUN;

TITLE Ratio of proportions dead (Risk Ratio), as well as Odds Ratio ;
PROC FREQ data=a ORDER=FORMATTED;
  TABLES tx*dead / CMH NOPERCENT;
  FORMAT tx dead onefirst. ;
RUN;

TITLE1 Risk RATIO, obtained by Bernoulli (Binomial) Regression ;
TITLE3 Recall: log of Risk Ratio = difference of log Risks                 .;
TITLE5 B1(ie coeff. of tx) = difference in log Risks between  Tx=1 &  Tx=0 .;
TITLE7 exp[B1] = antilog[coefficient for Tx] = Risk Ratio Tx=1 vs Tx=0     .;
PROC GENMOD data=a ;
  MODEL dead = tx /
   DIST = Binomial LINK = log ;
RUN;

 TITLE1 ODDS RATIO, obtained by Bernoulli (Binomial) Regression ;
 TITLE2 .   Odds {of death} if TX=1  =    [Odds if TX = 0] x     Odds_Ratio.;
 TITLE3 log[Odds {of death} if TX=1] = log[Odds if TX = 0] + log[Odds_Ratio];
 TITLE5 log[Odds if TX = ? ] =  log[Odds if TX = 0] + log[Odds_Ratio] x  ? .;
 TITLE6 log[Odds if TX = tx] =  log[Odds if TX = 0] + log[Odds_Ratio] x tx .;
 TITLE7 logit[odds of death] =           B0         +        B1       x tx .;
 TITLE9 .   B1  = log[OR] = diff. of logOdds [i.e., of logits] Tx=1 &  Tx=0.;
TITLE10 exp[B1] = antilog[coefficient for Tx] = Odds Ratio for Tx=1 vs Tx=0.;

PROC GENMOD data=a ;
  MODEL dead = tx /
   DIST = Binomial LINK = logit ;
RUN;

**** Using a PATIENT-TIME Denominator **** ;

 TITLE1 .OVERALL death rate (incidence type, with PATIENT-TIME denominator );
 TITLE2 .******* ...........................................................;
 TITLE4 E[#deaths] = RATE x months [Epi 101: #cases = rate x PT & vice versa]     .;
 TITLE6 .          =  B1  x months [(after null model) simplest of all regressions];
 TITLE8 . NOTICE absence of a B0 [or B0=0: NO deaths if NO person-months! Epi 102].;
TITLE10 Regrn. coefficient B1 associated with MONTHS = (estimated) RATE <-OUR FOCUS;

PROC GENMOD data=a ;
  MODEL dead = months / NOINT
   DIST = poisson LINK = identity ;
RUN;

 TITLE1 .difference in death rates (incidence type rates):         Rate DIFF.;
 TITLE2 .E[#deaths] = rate0 x months  or rate1 x months ...   depends on tx !;
 TITLE3 .E[#deaths] = rate  x months ..                         [ generic ] .;
 TITLE4 . tx=0: rate = rate0 + RateDifference x 0  = rate0                  .;
 TITLE5 . tx=1: rate = rate0 + RateDifference x 1  = rate1                  .;
 TITLE6 .       rate = rate0 + RateDifference x tx              [ generic ] .;
 TITLE7 . E[#deaths] = (rate0 + RateDifference x tx ) x months              .;
 TITLE8 . E[#deaths] = rate0 x months + RateDifference x tx x months        .;
 TITLE9 . E[#deaths] = rate0 x months + RateDifference x tx x months        .;
TITLE10 . E[#deaths] =   B1  x months +       B2       x tx x months        .;

PROC GENMOD data=a ;
  MODEL dead = months tx*months / NOINT
   DIST = poisson LINK = identity ;
   /* notice again the absence of the traditional intercept */
RUN;

 TITLE1 .  (difference by tx) in death rates (incidence type rates) RATE RATIO.;
 TITLE2 .  E[#deaths] = rate0 x months  or rate1 x months, if tx=0 or 1       .;
 TITLE3 .  E[#deaths] = rate  x months ..                        [ generic ]  .;
 TITLE4 .    where rate = rate0 x exp[log(RateRatio) x tx]       [ generic ]  .;
 TITLE5 . tx=0: rate = rate0 x exp[log(RateRatio) x 0] = rate0 x exp[0] = rate0;
 TITLE6 . tx=1: rate = rate0 x exp[log(RateRatio) x 1] = rate0 x RateRatio    .;
 TITLE7 log[E[#deaths]] = log[ rate x months ] = log [rate ] +     log [months];
 TITLE8 log[E[#deaths]] = log[ rate0 ] + log[RateRatio] * tx +     log [months];

TITLE10 log[E[#deaths]] =     B0     +         B1     * tx   +      1*log [months];

PROC GENMOD data=a ;
  MODEL dead = tx /
   DIST=poisson LINK = log  OFFSET = log_fu_T;

   /* You need to create the offset variable ahead of time ..see data step.
      Think of the offset as a variable whose regression  coefficient is
      forced to be 1 .. so the variable is effectively in the equation */

RUN;

 TITLE1 Kaplan Meier survival curves, by Tx;
 TITLE2 " ";
 TITLE3 Note the TIME format: time_variable*status_variable(value);
 TITLE4 " ";
 TITLE5 value = which value of status_variable denotes a censored duration;
 TITLE7 need to use <<STRATA>> in PROC LIFETEST to get contarsted curves on same plot;
 TITLE8 <<Test of Equality over strata>> is programmer-ese for ;
 TITLE9 <<Test of Equality over strata>> is programmer-ese for ;

PROC LIFETEST DATA = a graphics
 method = KM plots=(survival,hazard) ; /* see HELP for other options */
 TIME months*dead(0);  /* Note the TIME format:  */
                       /* time_variable*status_variable(value)   */
                       /* value = which value of status_variable */
                       /*         denotes a censored duration    */
 STRATA tx; /* using STRATA in LIFETEST puts contrasted curves on same plot */
            /* <<BY Tx>> statement produces separate tables/plots..cannot compare tx */

            /* STRATA in LIFETEST can also be use as real <STRATA> */
            /* i.e., can test 2 tx's across strata formed by another variable */
            /* e.g., can test 2 tx's while aggregating within-sex tx comparisons */
            /* see small made-up example below */
 TEST tx;   /* log-rank and wilcoxon tests comparing tx's */
run;

TITLE1 Lifetable (3 month intervals) ;
PROC LIFETEST DATA=a graphics
  METHOD = LT WIDTH=3 plots=(survival,hazard) ;
 TIME months*dead(0);
 STRATA tx;
 TEST tx;
run;


******* example of stratified test ;
DATA a;
INPUT sex $ tx months dead;
LINES;
m 1 1 1
f 0 5 0
f 1 6 1
m 0 6 1
f 0 9 1
m 1 10 1
f 1 10 1
f 0 10 1
f 1 10 0
;
TITLE Stratified log rank test ;
PROC LIFETEST DATA=a
 METHOD = KM  ;
 TIME months*dead(0);
 STRATA sex;
 TEST tx;
run;


/* --- for Stata --- */

input tx months dead
 1 1 0
 1 5 0
 1 6 1
 1 6 1
 1 9 0 
1 10 1 
1 10 1 
1 10 0 
1 12 1 
1 12 1 
1 12 1 
1 12 1 
1 12 0 
1 13 0 
1 15 0 
1 16 0 
1 20 0 
1 24 1
1 24 0 
1 27 0
1 32 1 
1 34 0 
1 36 0 
1 36 0 
1 44 0

0  3 0
0  6 1
0  6 1
0  6 1
0  6 1
0  8 1 
0  8 1 
0 12 1 
0 12 1 
0 12 0 
0 15 0 
0 16 0
0 18 0
0 18 0
0 20 1
0 22 0
0 24 1
0 28 0
0 28 0
0 28 0
0 30 1
0 30 0
0 33 0
0 42 1
end
stset months, failure(dead)
sts graph, l1title("proportion") title("months since randomization") by(tx)
sts test tx