*import the data;
proc import out=school1
datafile= "E:\mlm13icpsr_lecture05.csv"
dbms=csv replace;
getnames=yes;
datarow=2;
run;
*plot acheivement by student ses for all data;
*reset gplot options;
goptions reset=all border;
proc gplot data=school1;
*select stars as symbols;
symbol1 value=star cv=red ci=black co=blue width=4;
*make the first plot - no regression line;
plot achieve*studentses; run;
*ask for an overall regression line;
symbol1 interpol=rl value=star cv=red ci=black co=blue width=4;
*make the second plot - with overall regression line;
plot achieve*studentses; run;
quit;
*preliminary analysis: descriptive statistics;
proc means data=school1;
var achieve studentses;
run;
*Analysis #1 - empty regression model with just the intercept - using proc reg;
proc reg data=school1;
model achieve = ;
run;
*Analysis #2 - regression model predicting student acheivement by student SES - in proc mixed;
proc mixed data=school1;
model achieve=studentses/s;
run;
*Analysis #3 - ANOVA model for differences in acheivement;
proc mixed data=school1;
class school;
model achieve=school/s;
lsmeans school;
run;
*plot acheivement by student ses - by school;
goptions reset=all border;
proc gplot data=school1;
*make the first plot - no regression line;
plot achieve*studentses=school; run;
*make the second plot - with overall regression line;
symbol1 interpol=rl value=star width=1 repeat=1000;
plot achieve*studentses=school; run;
quit;
*get school mean SES - for group mean centering (more on this later);
*first sort the data by school (SAS requirement);
proc sort data=school1;
by school;
run;
*second get the mean studentSES for each school;
*output these to a new SAS data set called school_means;
proc means data=school1;
by school;
var studentses;
output out=school_means mean(studentses)=schoolmean;
run;
*third - open the school_means data set and keep only the relevant variables;
data school_means (keep=school schoolmean);
set school_means;
run;
*fourth - merge the school_means data set with the whole data set and subtract
* the school mean from each student's SES, creating a cluster mean
* centered variable;
data school2 (keep=school student achieve studentses schoolmean studentsesM);
merge school1 school_means;
by school;
studentsesM=studentses-schoolmean;
run;
*Analysis #4 - Adding school level;
proc mixed data=school2 covtest itdetails;
class school;
model achieve=schoolmean studentses/s ddfm=bw;
random int/subject=school g;
run;