lavaan multi-level SEM in OpenMX
Lavaan's log-likelihood is -23309.87 but with the following OpenMx code I get only -26495.56. I have also tried to use the estimated parameters from lavaan as fixed parameters in the OpenMx model - the log-likelihood gets even worse then. I assume that the model structure in OpenMx is not the same as the structure model in lavaan.
A few notes on the code: lavaan doesn't allow a connection from an observed variable to a latent variable (I introduced 'pseudo-latent' variables (I prefixed them with '_'): they have have one loading (with factor 1) to the observed variable, have its mean, its (co)variance, and the observed variable has no residual - lavaan does the same internally, see inspect(lavaan_fit, what='est')); lavaan seems to use the biased (co)variance (I resale the covariance matrix with scale/cluster_scale).
library('lavaan')
library('OpenMx')
data <- Demo.twolevel
data$cluster <- sapply(data$cluster, as.factor)
y <- c('y1', 'y2', 'y3')
x <- c('x1', 'x2', 'x3')
x_ <- c('_x1', '_x2', '_x3')
w <- c('w1', 'w2')
w_ <- c('_w1', '_w2')
manifest_l1 <- c(y, x)
latent_l1 <- c('fw', x_)
manifest_l2 <- c(y, w)
latent_l2 <- c('fb', w_)
level2 <- data[!duplicated(data$cluster),]
scale <- (dim(data)[1] - 1.0) / dim(data)[1]
cluster_scale <- (dim(level2)[1] - 1.0) / dim(level2)[1]
between <- mxModel(
"between",
type="RAM",
manifestVars=manifest_l2,
latentVars=latent_l2,
mxData(observed=level2, type="raw", primaryKey="cluster"),
mxPath(from=y, arrows=2, free=TRUE, values=diag(var(data[y], na.rm=TRUE)) / 2),
mxPath(from=w, arrows=2, free=FALSE, values=0),
mxPath(from='fb', arrows=2, free=FALSE, values=1),
mxPath(from=w_, to=w, arrows=1, free=FALSE, values=1),
mxPath(from=w_, arrows=2, free=FALSE, values=diag(var(level2[w], na.rm=TRUE)) * cluster_scale),
mxPath(from='_w1', to='_w2', arrows=2, free=FALSE, values=var(level2[w], na.rm=TRUE)['w1', 'w2'] * cluster_scale),
mxPath(from='fb', to=y, arrows=1, free=TRUE, values=1, labels=c('fb_y1', 'fb_y2', 'fb_y3')),
mxPath(from=w_, to='fb', arrows=1, free=TRUE, values=1, labels=c('w1_fb', 'w2_fb')),
mxPath(from='one', to=y, free=TRUE, values=colMeans(data[y], na.rm=TRUE)),
mxPath(from='one', to=w, free=FALSE, values=0),
mxPath(from='one', to=w_, free=FALSE, values=colMeans(level2[w], na.rm=TRUE)))
within <- mxModel(
"Demo_twolevel",
type="RAM",
between,
manifestVars=manifest_l1,
latentVars=latent_l1,
mxData(observed=data, type="raw"),
mxPath(from=y, arrows=2, free=TRUE, values=diag(var(data[y], na.rm=TRUE)) / 2),
mxPath(from=x, arrows=2, free=FALSE, values=0),
mxPath(from='fw', arrows=2, free=FALSE, values=1),
mxPath(from=x_, to=x, arrows=1, free=FALSE, values=1),
mxPath(from=x_, arrows=2, free=FALSE, values=diag(var(data[x], na.rm=TRUE)) * scale),
mxPath(from='_x1', to='_x2', arrows=2, free=FALSE, values=var(data[x], na.rm=TRUE)['x1', 'x2'] * scale),
mxPath(from='_x1', to='_x3', arrows=2, free=FALSE, values=var(data[x], na.rm=TRUE)['x1', 'x3'] * scale),
mxPath(from='_x2', to='_x3', arrows=2, free=FALSE, values=var(data[x], na.rm=TRUE)['x2', 'x3'] * scale),
mxPath(from='fw', to=y, arrows=1, free=TRUE, values=1, labels=c('fw_y1', 'fw_y2', 'fw_y3')),
mxPath(from=x_, to='fw', arrows=1, free=TRUE, values=1, labels=c('x1_fw', 'x2_fw', 'x3_fw')),
mxPath('between.y1', 'y1', values=1, free=FALSE, joinKey="cluster"),
mxPath('between.y2', 'y2', values=1, free=FALSE, joinKey="cluster"),
mxPath('between.y3', 'y3', values=1, free=FALSE, joinKey="cluster"),
mxPath(from='one', to=manifest_l1, free=FALSE, values=0),
mxPath(from='one', to=x_, free=FALSE, values=colMeans(data[x], na.rm=TRUE)))
fit <- mxRun(within)
summary(fit)
mxVersion() ?
Error in runHelper(model, frontendStart, intervals, silent, suppressWarnings, :
between.data: primary key must be an integer or factor column in raw observed data
The variable named "cluster" in the dataset is of type "numeric", but it needs to be of type "integer" or "factor". My `mxVersion()` :
OpenMx version: 2.15.5.2 [GIT v2.15.5-2-g471224d3-dirty]
R version: R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu
Default optimizer: CSOLNP
NPSOL-enabled?: Yes
OpenMP-enabled?: Yes
I'm running a build of OpenMx that's a few commits newer than the most recent release.
Log in or register to post comments
Thank you for quick reply!
I'm sorry, I forgot the line to convert the cluster column in the above example. I get the same error message without it.
data <- Demo.twolevel
data$cluster <- sapply(data$cluster, as.factor)
I updated my original post.
mxVersion()
OpenMx version: 2.15.4 [GIT v2.15.4]
R version: R version 3.5.2 (2018-12-20)
Platform: x86_64-pc-linux-gnu
Default optimizer: CSOLNP
NPSOL-enabled?: No
OpenMP-enabled?: Yes
Log in or register to post comments
reproduced
Log in or register to post comments
confused
data[!duplicated(data$cluster),]
to get the level2 data, but is this what lavaan is doing? I don't think so. I think the between level is entirely latent. If make the between level latent and link the y1,y2,y3 indicators across levels then I get the same parameter estimates.library('lavaan')
library('OpenMx')
data <- Demo.twolevel
model <- '
level: 1
fw =~ y1 + y2 + y3
fw ~ x1 + x2 + x3
level: 2
fb =~ y1 + y2 + y3
fb ~ w1 + w2
'
lfit <- sem(model = model, data = Demo.twolevel, cluster = "cluster")
data$cluster <- sapply(data$cluster, as.factor)
y <- c('y1', 'y2', 'y3')
x <- c('x1', 'x2', 'x3')
w <- c('w1', 'w2')
latent_l1 <- c('fw', x)
manifest_l2 <- c()
latent_l2 <- c('fb', y, w)
level2 <- data[!duplicated(data$cluster),]
between <- mxModel(
"between",
type="RAM",
manifestVars=manifest_l2,
latentVars=latent_l2,
mxData(observed=level2, type="raw", primaryKey="cluster"),
mxPath(from=y, arrows=2, free=TRUE, values=diag(var(data[y], na.rm=TRUE)),
labels=paste0("y",1:3,"_var")),
mxPath(from='fb', arrows=2, free=TRUE, values=1, labels="fb_var"),
mxPath(from=y, arrows=2, free=TRUE, values=1,
labels=paste0("y",1:3,"_var_b")),
mxPath(from='fb', to=y, arrows=1, free=c(FALSE, rep(TRUE,2)),
values=1, labels=c('fb_y1', 'fb_y2', 'fb_y3')),
mxPath(from=w, to='fb', free=TRUE, labels=paste0('w',1:2,"_fb")),
mxPath(from='one', to=w, free=FALSE, labels=paste0("data.w",1:2)),
mxPath('one', y))
within <- mxModel(
"Demo_twolevel",
type="RAM",
between,
manifestVars=manifest_l1,
latentVars=latent_l1,
mxData(observed=data, type="raw"),
mxPath(from=y, arrows=2, free=TRUE, values=diag(var(data[y], na.rm=TRUE)),
labels=paste0("y",1:3,"_var_w")),
mxPath(from='fw', arrows=2, free=TRUE, values=1, labels="fw_var"),
mxPath(from='fw', to=y, arrows=1, free=c(FALSE,rep(TRUE,2)),
values=1, labels=c('fw_y1', 'fw_y2', 'fw_y3')),
mxPath(from=x, to='fw', arrows=1, free=TRUE, values=1,
labels=c('x1_fw', 'x2_fw', 'x3_fw')),
mxPath(paste0('between.y',1:3), paste0('y',1:3), values=1, free=FALSE, joinKey="cluster"),
mxPath(from='one', to=manifest_l1, free=FALSE),
mxPath(from='one', to=x, free=FALSE, labels=paste0("data.x",1:3)))
fit <- mxRun(within)
summary(fit)
The logLik doesn't match at all. It looks like a lavaan bug or maybe there is some other way to compute the likelihood? I'm not sure.
Log in or register to post comments
In reply to confused by jpritikin
normalized?
Log in or register to post comments
In reply to confused by jpritikin
Might differ by a constant?
Log in or register to post comments
In reply to confused by jpritikin
Thank you!
I'm sorry, I should have posted how I use lavaan (yes, it doesn't distinguish between data for level1 and level2 in its interface; and I set the variance of the latent variables to 1 instead of one fixed factor loading).
library('lavaan')
model <- '
level: 1
fw =~ y1 + y2 + y3
fw ~ x1 + x2 + x3
level: 2
fb =~ y1 + y2 + y3
fb ~ w1 + w2
'
fit <- sem(model = model, data = Demo.twolevel, cluster = "cluster", auto.fix.first = FALSE, std.lv = TRUE)
Yes, the estimated parameters match but the logL is different (OpenMx -12076.57, lavaan -23309.87). The constant -0.5*n*log(2*pi) is only -2297.35.
Log in or register to post comments
In reply to Thank you! by twoertwein
constant
model <- '
level: 1
fw =~ y1 + 0*y2 + y3
fw ~ x1 + x2 + x3
level: 2
fb =~ y1 + y2 + y3
fb ~ w1 + w2
'
lfit <- sem(model = model, data = Demo.twolevel, cluster = "cluster")
, I get an `lfit` with a loglikelihood about 350 units lower than the full model,
> logLik(lfit) - -23309.87
'log Lik.' -350.4593 (df=19)
. If I likewise fix that loading in the MxModel,
foo <- omxSetParameters(model=within,labels="fw_y2",free=F,values=0)
foo <- mxRun(foo)
, I get a `foo` with a loglikelihood again about 350 units lower than the full model,
> logLik(foo) - -12076.57
'log Lik.' -350.455 (df=19)
. In other words, the log of the likelihood ratio is the same out to five significant figures in both cases, indicating that the two packages use proportional likelihood functions.
Log in or register to post comments
Different model
Could it be that lavaan is fitting the joint distribution of the endogenous and exogenous variables?
if i change the OpenMx script i get similar results.
library('lavaan')
library('OpenMx')
data <- Demo.twolevel
model <- '
level: 1
fw =~ y1 + y2 + y3
level: 2
fb =~ y1 + y2 + y3
fb ~ w1 + w2
'
model <- '
level: 1
fw =~ y1 + y2 + y3
fw ~ x1 + x2 + x3
level: 2
fb =~ y1 + y2 + y3
fb ~ w1 + w2
'
lfit <- sem(model = model, data = Demo.twolevel, cluster = "cluster", fixed.x = T)
data$cluster <- sapply(data$cluster, as.factor)
y <- c('y1', 'y2', 'y3')
x <- c('x1', 'x2', 'x3')
w <- c('w1', 'w2')
manifest_l1 <- c(y, x)
latent_l1 <- c('fw')
manifest_l2 <- c(w)
latent_l2 <- c('fb', y)
level2 <- data[!duplicated(data$cluster),]
vc_w = cov(level2[,w])
vc_x = cov(data[,x])
between <- mxModel(
"between",
type="RAM",
manifestVars=manifest_l2,
latentVars=latent_l2,
mxData(observed=level2, type="raw", primaryKey="cluster"),
mxPath(from=y, arrows=2, free=TRUE, values=diag(var(data[y], na.rm=TRUE)),
labels=paste0("y",1:3,"_var")),
mxPath(from='fb', arrows=2, free=TRUE, values=1, labels="fb_var"),
mxPath(from=y, arrows=2, free=TRUE, values=1,
labels=paste0("y",1:3,"_var_b")),
mxPath(from="w1", to = "w1", arrows=2, free=F, values = vc_w[1,1],
labels="w11"),
mxPath(from="w1", to = "w2", arrows=2, free=F, values = vc_w[2,1],
labels="w12"),
mxPath(from="w2", to = "w2", arrows=2, free=F, values = vc_w[2,2],
labels="w22"),
mxPath(from='fb', to=y, arrows=1, free=c(FALSE, rep(TRUE,2)),
values=1, labels=c('fb_y1', 'fb_y2', 'fb_y3')),
mxPath(from=w, to='fb', free=TRUE, values = 1, labels=paste0('w',1:2,"_fb")),
mxPath(from='one', to=w, free=FALSE, values = colMeans(level2[,w])),
mxPath('one', y))
within <- mxModel(
"Demo_twolevel",
type="RAM",
between,
manifestVars=manifest_l1,
latentVars=latent_l1,
mxData(observed=data, type="raw"),
mxPath(from=y, arrows=2, free=TRUE, values=diag(var(data[y], na.rm=TRUE)),
labels=paste0("y",1:3,"_var_w")),
mxPath(from='fw', arrows=2, free=TRUE, values=1, labels="fw_var"),
mxPath(from='fw', to=y, arrows=1, free=c(FALSE,rep(TRUE,2)),
values=1, labels=c('fw_y1', 'fw_y2', 'fw_y3')),
mxPath(from="x1", to = "x1", arrows=2, free=F, values = vc_x[1,1],
labels="x11"),
mxPath(from="x1", to = "x2", arrows=2, free=F, values = vc_x[1,2],
labels="x12"),
mxPath(from="x1", to = "x3", arrows=2, free=F, values = vc_x[1,3],
labels="x13"),
mxPath(from="x2", to = "x2", arrows=2, free=F, values = vc_x[2,2],
labels="x22"),
mxPath(from="x2", to = "x3", arrows=2, free=F, values = vc_x[2,3],
labels="x23"),
mxPath(from="x3", to = "x3", arrows=2, free=F, values = vc_x[3,3],
labels="x33"),
mxPath(from=x, to='fw', arrows=1, free=TRUE, values=1,
labels=c('x1_fw', 'x2_fw', 'x3_fw')),
mxPath(paste0('between.y',1:3), paste0('y',1:3), values=1, free=FALSE, joinKey="cluster"),
mxPath(from='one', to=manifest_l1, values = colMeans(data[,x]), free=FALSE)
)
fit <- mxTryHard(within)
logLik(fit)
logLik(lfit)
Log in or register to post comments
In reply to Different model by esei
I think you're right.
Log in or register to post comments
Unbiased variance in lavaan
To use the unbiased covariance estimator, add
likelihood = "wishart"
to your lavaan call. http://lavaan.ugent.be/tutorial/est.htmlLog in or register to post comments