Computing R squared with missing data

multiRegModel <- mxModel("Multiple Regression, All Variables",
type="RAM",
manifestVars=c("x", "y", "z"),
# variance paths
mxPath(
from=c("x", "y", "z"),
arrows=2,
free=TRUE,
values = c(.5, .5, .5),
labels=c("varx", "residual", "varz")
),
# covariance of x and z
mxPath(
from="x",
to="z",
arrows=2,
free=TRUE,
values=0.2,
labels="covxz"
),
# regression weights
mxPath(
from=c("x","z"),
to="y",
arrows=1,
free=TRUE,
values=.5,
labels=c("betax","betaz")
),
# means and intercepts
mxPath(
from="one",
to=c("x", "y", "z"),
arrows=1,
free=TRUE,
values=c(.5, .5),
labels=c("meanx", "beta0", "meanz")
)
) # close model
I know "residual" is the residual variance of Y, but I would think I'd need an estimate of the total variance of Y to compute it from that. Any help would be appreciated. Thanks!
Your output has an expected
Alternatively, you could figure it out by hand if that's easier. Total variance of Y in this model would be:
Var(Y) = residual + betax^2 varx + betaz^2 varz + betax betaz covxz
There's probably an easy function to write, at least for RAM models.
Log in or register to post comments
not quite
#### simulate data
n = 10000
pop.mat = matrix(c(1, .48, .36, .48, 1, .35, .36, .35, 1), nrow=3)
d = data.frame(mvrnorm(n, mu=rep(0, times=3), Sigma=pop.mat))
names(d) = c(letters[c(26, 25, 24)])
### build openMX model
multiRegModel <- mxModel("MR",
type="RAM",
manifestVars=c("x", "y", "z"),
# variance paths
mxPath(
from=c("x", "y", "z"),
arrows=2,
free=TRUE,
values = c(.5, .5, .5),
labels=c("varx", "residual", "varz")
),
# covariance of x and z
mxPath(
from="x",
to="z",
arrows=2,
free=TRUE,
values=0.2,
labels="covxz"
),
# regression weights
mxPath(
from=c("x","z"),
to="y",
arrows=1,
free=TRUE,
values=.5,
labels=c("betax","betaz")
),
# means and intercepts
mxPath(
from="one",
to=c("x", "y", "z"),
arrows=1,
free=TRUE,
values=c(.5, .5),
labels=c("meanx", "beta0", "meanz")
)
) # close model
### estimate the model
m = mxModel(multiRegModel, mxData(observed=d, type="raw"))
mod = mxRun(model=m, silent=TRUE, suppressWarnings=TRUE)
### compute R squared
par = summary(mod)$parameters[,c("name","Estimate")]
res = par[par$name=="residual",2]
betax = par[par$name=="betax",2]
varx = par[par$name=="varx",2]
betaz = par[par$name=="betaz",2]
varz = par[par$name=="varz",2]
covxz = par[par$name=="covxz",2]
SST = (res + betax^2*varx + betaz^2*varz + betax*betaz*covxz)*(n-1)
SSR = res*(n-3)
Rsq = 1-(SSR/SST)
regMod = lm(y~x + z, data=d)
summary(regMod)$r.squared
Rsq
SST = (res + betax^2*varx + betaz^2*varz + betax*betaz*covxz)*(n-1)
SSR = res*(n-3)
Rsq.1 = 1-(SSR/SST)
par = summary(mod2)$parameters[,c("name","Estimate")]
res = par[par$name=="residual",2]
betaz = par[par$name=="betaz",2]
varz = par[par$name=="varz",2]
SST2 = (res + betaz^2*varz)*(n-1)
SSR2 = res*(n-2)
Rsq.2 = 1-(SSR2/SST2)
partial = sqrt((Rsq.1-Rsq.2)/(1-Rsq.2^2))
list(partial = partial, rsq1 = Rsq.1, rsq2=Rsq.2, SST=SST, SSR=SSR)
Log in or register to post comments
In reply to not quite by fife
A different method
resid.mod <- omxGetParameters(mod)["residual"]
var.y <- var(d$y) #total var of y
(modR2 <- (var.y - resid.mod)/var.y)
# 0.2579765
# total estimated var of y from model
var.ymod <- attr(mod@output$algebras$MR.fitfunction, "expCov")[2,2]
(allmodR2 <- (var.ymod - resid.mod)/var.ymod)
# 0.2579022
summary(regMod)$r.squared
# 0.2579023
What do you think?
Log in or register to post comments