model=Y~XBoth Y and X can be matrices, in which case as many multiple regressions will be done as the number of variables in Y.
Consider the following (artificial) data set of ability tests, motivation tests, and performance measures. The Ability tests are (simulated) GRE Verbal, Quantitative, and Advanced, the motivation tests measure Need for Achievement and Anxiety, the Performance measures are graduate GPA, rated performance on the Prelims, and quality of the Masters Thesis.
Somehow we need to read the data and find the correlations.
datafilename="http://personality-project.org/R/datasets/psychometrics.prob2.txt" dataset =read.table(datafilename,header=TRUE) #read the data file names(dataset) #what are the variables? dataset=dataset[,-1] #get rid of the ID names(dataset) #check the names again round(cor(dataset),2) #find the correlation matrix dataset=scale(dataset) #convert to standardized scores dataset=data.frame(dataset) #gives this output > datafilename="http://personality-project.org/R/datasets/psychometrics.prob2.txt" > dataset =read.table(datafilename,header=TRUE) #read the data file > names(dataset) #what are the variables? [1] "ID" "GREV" "GREQ" "GREA" "Ach" "Anx" "Prelim" "GPA" "MA" > dataset=dataset[,-1] #get rid of the ID > names(dataset) #check the names again [1] "GREV" "GREQ" "GREA" "Ach" "Anx" "Prelim" "GPA" "MA" > round(cor(dataset),2) #find the correlation matrix GREV GREQ GREA Ach Anx Prelim GPA MA GREV 1.00 0.73 0.64 0.01 0.01 0.43 0.42 0.32 GREQ 0.73 1.00 0.60 0.01 0.01 0.38 0.37 0.29 GREA 0.64 0.60 1.00 0.45 -0.39 0.57 0.52 0.45 Ach 0.01 0.01 0.45 1.00 -0.56 0.30 0.28 0.26 Anx 0.01 0.01 -0.39 -0.56 1.00 -0.23 -0.22 -0.22 Prelim 0.43 0.38 0.57 0.30 -0.23 1.00 0.42 0.36 GPA 0.42 0.37 0.52 0.28 -0.22 0.42 1.00 0.31 MA 0.32 0.29 0.45 0.26 -0.22 0.36 0.31 1.00 > dataset=scale(dataset) #convert to standardized scores > dataset=data.frame(dataset)
attach(dataset) ability=cbind(GREV,GREQ,GREA) #form three different composites motivation=cbind(Ach,Anx) perform=cbind(Prelim,GPA,MA) model.regress=lm(perform~ability+motivation) model.regress #show the resulting regression print(model.regress,digits=3) #another way of showing the result summary(model.regress) #yet another way of showing the output #produces this output > attach(dataset) ability=cbind(GREV,GREQ,GREA) #form three different composites > motivation=cbind(Ach,Anx) > perform=cbind(Prelim,GPA,MA) > model=lm(perform~ability+motivation) > model #show the resulting regression Call: lm(formula = perform ~ ability + motivation) Coefficients: Prelim GPA MA (Intercept) -5.627e-16 1.786e-15 3.019e-15 abilityGREV 1.399e-01 1.987e-01 1.041e-01 abilityGREQ 3.944e-02 5.098e-02 2.555e-02 abilityGREA 4.041e-01 2.861e-01 3.111e-01 motivationAch 1.143e-01 1.190e-01 9.621e-02 motivationAnx -8.500e-03 -4.703e-02 -4.569e-02 > print(model,digits=3) #another way of showing the result Call: lm(formula = perform ~ ability + motivation) Coefficients: Prelim GPA MA (Intercept) -5.63e-16 1.79e-15 3.02e-15 abilityGREV 1.40e-01 1.99e-01 1.04e-01 abilityGREQ 3.94e-02 5.10e-02 2.56e-02 abilityGREA 4.04e-01 2.86e-01 3.11e-01 motivationAch 1.14e-01 1.19e-01 9.62e-02 motivationAnx -8.50e-03 -4.70e-02 -4.57e-02 > summary(model) #yet another way of showing the output Response Prelim : Call: lm(formula = Prelim ~ ability + motivation) Residuals: Min 1Q Median 3Q Max -2.859621 -0.553213 -0.004029 0.503496 2.718487 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -5.627e-16 2.570e-02 -2.19e-14 1.000000 abilityGREV 1.399e-01 4.259e-02 3.283 0.001061 ** abilityGREQ 3.944e-02 3.954e-02 0.998 0.318722 abilityGREA 4.041e-01 4.539e-02 8.903 < 2e-16 *** motivationAch 1.143e-01 3.441e-02 3.323 0.000925 *** motivationAnx -8.500e-03 3.239e-02 -0.262 0.793050 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.8126 on 994 degrees of freedom Multiple R-Squared: 0.3429, Adjusted R-squared: 0.3396 F-statistic: 103.8 on 5 and 994 DF, p-value: < 2.2e-16 Response GPA : Call: lm(formula = GPA ~ ability + motivation) Residuals: Min 1Q Median 3Q Max -2.60408 -0.61369 0.02232 0.60416 2.53718 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.786e-15 2.665e-02 6.7e-14 1.000000 abilityGREV 1.987e-01 4.418e-02 4.497 7.72e-06 *** abilityGREQ 5.098e-02 4.101e-02 1.243 0.214094 abilityGREA 2.861e-01 4.708e-02 6.078 1.73e-09 *** motivationAch 1.190e-01 3.569e-02 3.334 0.000886 *** motivationAnx -4.703e-02 3.360e-02 -1.400 0.161883 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.8429 on 994 degrees of freedom Multiple R-Squared: 0.2931, Adjusted R-squared: 0.2895 F-statistic: 82.43 on 5 and 994 DF, p-value: < 2.2e-16 Response MA : Call: lm(formula = MA ~ ability + motivation) Residuals: Min 1Q Median 3Q Max -2.66414 -0.61142 0.02422 0.61993 2.87736 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 3.019e-15 2.804e-02 1.08e-13 1.0000 abilityGREV 1.041e-01 4.648e-02 2.240 0.0253 * abilityGREQ 2.555e-02 4.314e-02 0.592 0.5537 abilityGREA 3.111e-01 4.952e-02 6.282 4.98e-10 *** motivationAch 9.621e-02 3.754e-02 2.563 0.0105 * motivationAnx -4.569e-02 3.534e-02 -1.293 0.1964 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.8867 on 994 degrees of freedom Multiple R-Squared: 0.2177, Adjusted R-squared: 0.2138 F-statistic: 55.32 on 5 and 994 DF, p-value: < 2.2e-16
These regressions were done with the raw data. This is very useful if we want to examine diagnostics for the regression, examine the residuals, look for badly fitted subjects, etc. But sometimes we want to do the regressions from the correlation matrix or covariance matrix without reverting to the raw data.
Using the same data set as before, we find the correlation matrix and then do some basic matrix algebra to do the multiple correlations.
r.matrix=cor(dataset) #find the correlations round(r.matrix,2) #show themm to 2 decimal placeds a.matrix=r.matrix[1:5,1:5] #the a.matrix are the predictors b.matrix=r.matrix[1:5,6:8] #the b.matrix has the criteria a.matrix #show the a.matrix b.matrix #show the b.matrix model.mat=solve(a.matrix,b.matrix) #solve the equation bY~aX round(model.mat,2) #show the answer to 2 decimals print(model.regression,digits=2) #compare this to the multiple regression using the raw data #produces this output > r.matrix=cor(dataset) #find the correlations > round(r.matrix,2) #show themm to 2 decimal placeds GREV GREQ GREA Ach Anx Prelim GPA MA GREV 1.00 0.73 0.64 0.01 0.01 0.43 0.42 0.32 GREQ 0.73 1.00 0.60 0.01 0.01 0.38 0.37 0.29 GREA 0.64 0.60 1.00 0.45 -0.39 0.57 0.52 0.45 Ach 0.01 0.01 0.45 1.00 -0.56 0.30 0.28 0.26 Anx 0.01 0.01 -0.39 -0.56 1.00 -0.23 -0.22 -0.22 Prelim 0.43 0.38 0.57 0.30 -0.23 1.00 0.42 0.36 GPA 0.42 0.37 0.52 0.28 -0.22 0.42 1.00 0.31 MA 0.32 0.29 0.45 0.26 -0.22 0.36 0.31 1.00 > a.matrix=r.matrix[1:5,1:5] #the a.matrix are the predictors > b.matrix=r.matrix[1:5,6:8] #the b.matrix has the criteria > a.matrix #show the a.matrix GREV GREQ GREA Ach Anx GREV 1.000000000 0.728847119 0.6411517 0.005612846 0.010193145 GREQ 0.728847119 1.000000000 0.5963450 0.006846819 0.005469727 GREA 0.641151694 0.596345026 1.0000000 0.453464501 -0.389629680 Ach 0.005612846 0.006846819 0.4534645 1.000000000 -0.556186667 Anx 0.010193145 0.005469727 -0.3896297 -0.556186667 1.000000000 > b.matrix #show the b.matrix Prelim GPA MA GREV 0.4282415 0.4194686 0.3222975 GREQ 0.3830881 0.3669720 0.2873879 GREA 0.5724319 0.5162068 0.4545495 Ach 0.3033434 0.2763810 0.2634665 Anx -0.2278877 -0.2224047 -0.2192203 > model.mat=solve(a.matrix,b.matrix) #solve the equation bY~aX > round(model.mat,2) #show the answer to 2 decimals Prelim GPA MA GREV 0.14 0.20 0.10 GREQ 0.04 0.05 0.03 GREA 0.40 0.29 0.31 Ach 0.11 0.12 0.10 Anx -0.01 -0.05 -0.05 > print(model.regression,digits=2) #compare this to the multiple regression using the raw data Call: lm(formula = perform ~ ability + motivation) Coefficients: Prelim GPA MA (Intercept) -5.6e-16 1.8e-15 3.0e-15 abilityGREV 1.4e-01 2.0e-01 1.0e-01 abilityGREQ 3.9e-02 5.1e-02 2.6e-02 abilityGREA 4.0e-01 2.9e-01 3.1e-01 motivationAch 1.1e-01 1.2e-01 9.6e-02 motivationAnx -8.5e-03 -4.7e-02 -4.6e-02
Compare the results based upon the correlation matrix to that based upon the raw data. To find R^2, we need to multiply the beta weights times the zero order correlations