#9th May,2013 #How to cite 'hp.fit.r': #A.J. Sáez-Castillo, A. Conde-Sánchez (2013). A hyper-Poisson regression model for overdispersed and underdispersed count data. Computational Statistics & Data Analysis, vol. 61, issue C, pages 148-157. #A BibTeX entry: #@ARTICLE{saez_conde_2013, # title = {A hyper-Poisson regression model for overdispersed and underdispersed count data}, # author = {Sáez-Castillo, A.J. and Conde-Sánchez, A.}, # year = {2013}, # journal = {Computational Statistics & Data Analysis}, # volume = {61}, # number = {C}, # pages = {148-157}, # } #MLE of the hP model: #Usage #hp.fit(formula.mu, formula.gamma, f = NULL, p0delta = NULL, p0beta = NULL, iters = 10000, data, method = 2, hessian = TRUE, link = 1) #Arguments #formula.mu, an object of class "formula": a symbolic description of the mean of the model to be fitted #formula.gamma, an object of class "formula": a symbolic description of the dispersion parameter of the model to be fitted #f, an optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector #p0delta, starting values for the 'delta' parameters in the linear predictor of the dispersion parameter, given by 'gamma = exp(x'*delta)' #p0beta, starting values for the 'beta' parameters in the linear predictor of the mean, given by 'mu=exp(x'*beta) #iters, the maximum number of iterations #data, a data frame containing the variables in the model. #method, the algorithm used in the optimization of the log-likelihood: #1: Newton-type algorithm by 'nlm' #2: Nelder-Mead by 'optim' #3: BFGS by 'optim' #4: CG by 'optim' #5: L-BFGS-B by 'optim' #6. SANN by 'optim' #hessian, if TRUE, the hessian of f at the minimum is returned #link, the procedure to find the hP lambda parameter from the gamma parameter and the mean: 1 uses 'optimize' function and 2 uses 'uniroot' #Value, a list containing the following components: # model.mu, the formula for the mean of the model # model.gamma, the formula for the dispersion parameter of the model # offset, the name of the offset variable, if any # covars.mu, the names of the covariates in model.mu # covars.gamma, the names of the covariates in model.gamma, # optimum, the value of the estimated minimum of the log-likelihood # aic, AIC statistic value # bic, BIC statistic value # df, degrees of freedom # coefficients, 'betas' and 'deltas' parameter estimates in mu = exp(x'*betas) and gamma = exp(x'*deltas) # gammas, the value of the estimated gamma hP-parameter in each case # lambdas, the value of the estimated lambda hP-parameter in each case # betas, 'betas' parameter estimates in mu = exp(x'*betas) # deltas, 'deltas' parameter estimates in gamma = exp(x'*deltas) # fitted.means, the value of the estimated mean in each case, that is, mu = exp(x'*betas) # hessian, the hessian at the estimated minimum of the log-likelihood (if requested) # cov, covariance matrix of the estimates # se.betas, standard error estimates of 'betas' # se.deltas, standard error estimates of 'deltas' # corr, correlation matrix of the estimates # code, an integer indicating why the optimization process terminated (see help on 'nlm' and 'optim') # method, the algorithm used in the optimization of the log-likelihood hp.fit <- function(formula.mu, formula.gamma, f = NULL, p0delta = NULL, p0beta = NULL, iters = 10000, data, method = 2, hessian = TRUE, link = 1){ library(hypergeo) #Design matrix a.mu <- model.frame(formula.mu, data=data) offset <- model.extract(a.mu, "offset") a.gamma <- model.frame(formula.gamma , data=data) y<-model.extract(a.mu, "response") if (is.null(offset) == TRUE) { offset <- rep(0, length(y)) covoffset <- FALSE } else covoffset <- TRUE attach(data, warn.conflicts = FALSE) if (is.null(f)==TRUE){ f<-rep(1,length(y)) data$f<-f } if (terms(formula.mu)[[3]]==1){ matrizmu<-matrix(1,c(length(y),1)) namescovars.mu<-c("(Intercept)") } else { matrizmu<-model.matrix(terms(formula.mu),model.frame(terms(formula.mu),data=data,na.action=NULL)) namescovars.mu<-dimnames(matrizmu[0,])[[2]] } ncovars.mu<-ncol(matrizmu) if (terms(formula.gamma)[[3]]==1){ matrizgamma<-matrix(1,c(length(y),1)) namescovars.gamma<-c("(Intercept)") } else { matrizgamma<-model.matrix(terms(formula.gamma),model.frame(terms(formula.gamma),data=data,na.action=NULL)) namescovars.gamma<-dimnames(matrizgamma[0,])[[2]] } ncovars.gamma<-ncol(matrizgamma) datos<-data.frame(data,f=data$f) if (is.null(p0beta)==TRUE) p0beta<-glm(formula.mu,weights=f,data=datos,family="poisson")$coefficients detach(data) if (is.null(p0delta)==TRUE) p0delta<-rep(0,ncovars.gamma) #Link function between mean and parameters if (link == 1){ ec2<-function(gamma,mu){ obj<-function(lambda) (mu+gamma-1-lambda+(1-gamma)/genhypergeo(1,gamma,lambda))^2 bounds.lower<-min(mu,max(mu+gamma-1,mu*gamma)) bounds.upper<-max(mu,min(mu*gamma,mu+gamma-1)) if (bounds.upper2.02e+19){ # print("Warning: too high bounds") # print(c(bounds.lower,bounds.upper)) # } if (bounds.upper==bounds.lower) lam<-bounds.upper else{ output<-optimize(f = obj, interval = c(bounds.lower,bounds.upper), tol = .Machine$double.eps) lam<-output$minimum } return(lam) } } if (link==2){ ec2<-function(gamma,mu){ obj<-function(lambda) (mu+gamma-1-lambda+(1-gamma)/genhypergeo(1,gamma,lambda)) bounds.lower<-min(mu,max(mu+gamma-1,mu*gamma)) bounds.upper<-max(mu,min(mu*gamma,mu+gamma-1)) if (bounds.upper2.02e+19){ print("Warning: too high bounds") print(c(bounds.lower,bounds.upper)) } if (bounds.upper==bounds.lower) lam<-bounds.upper else{ output<-uniroot(f = obj, interval = c(bounds.lower,bounds.upper), tol = .Machine$double.eps) lam<-output$root } return(lam) } } #Log-likelihood logL<-function(p){ beta.mu<-p[1:(ncovars.mu)] mu<-exp(offset+matrizmu%*%beta.mu) beta.gamma<-p[(ncovars.mu+1):(ncovars.mu+ncovars.gamma)] gamma<-exp(matrizgamma%*%beta.gamma) lam<-mapply(gamma=gamma,ec2,mu=mu) obj<--sum(f*(-lgamma(gamma+y)+lgamma(gamma)+y*log(lam)-log(mapply(z=lam, genhypergeo, U=1,L=gamma)))) return(obj) } p0<-c(p0beta,p0delta) #Optimizing log-likelihood if (method==1){ fit<-nlm(logL,p=p0,hessian=hessian ,iterlim=iters) fit$value<-fit$minimum fit$par<-fit$estimate fit$convergence<-fit$code method="nlm" } if (method==2){ fit<-optim(p0,logL,control=list(maxit=iters),hessian=hessian) method<-"optim (Nelder-Mead)" } if (method==3){ fit<-optim(p0,logL,method="BFGS",hessian=hessian,control=list(maxit=iters)) method<-"optim (BFGS)" } if (method==4){ fit<-optim(p0,logL,method="CG",hessian=hessian,control=list(maxit=iters)) method<-"optim (CG)" } if (method==5){ fit<-optim(p0,logL,method="L-BFGS-B",hessian=hessian,control=list(maxit=iters)) method<-"optim (L-BFGS-B)" } if (method==6){ fit<-optim(p0,logL,method="SANN",hessian=hessian,control=list(maxit=iters)) method<-"optim (SANN)" } if (hessian==TRUE){ hessian<-fit$hessian cov<-solve(fit$hessian) se.mu<-sqrt(diag(solve(fit$hessian)))[1:ncovars.mu] se.gamma<-sqrt(diag(solve(fit$hessian)))[(ncovars.mu+1):(ncovars.mu+ncovars.gamma)] corr<-solve(fit$hessian)/(sqrt(diag(solve(fit$hessian)))%o%sqrt(diag(solve(fit$hessian)))) } else { hessian<-NA cov<-NA se.mu<-NA se.gamma<-NA corr<-NA } #Results lambda<-mapply(mu=exp(offset+matrizmu%*%fit$par[1:ncovars.mu]), ec2, gamma=exp(matrizgamma%*%fit$par[(ncovars.mu+1):(ncovars.mu+ncovars.gamma)])) results<-list( model.mu=formula.mu, model.gamma=formula.gamma, offset=covoffset, covars.mu=namescovars.mu, covars.gamma=namescovars.gamma, optimum=fit$value, aic=2*(fit$value)+(length(p0beta)+length(p0delta))*2, bic=2*(fit$value)+(length(p0beta)+length(p0delta))*log(sum(f)), df=sum(f)-(length(p0beta)+length(p0delta)), coefficients=fit$par, gammas=exp(matrizgamma%*%fit$par[(ncovars.mu+1):(ncovars.mu+ncovars.gamma)]), lambdas=lambda, betas=fit$par[1:ncovars.mu], deltas=fit$par[(ncovars.mu+1):(ncovars.mu+ncovars.gamma)], fitted.means=exp(offset+matrizmu%*%fit$par[1:ncovars.mu]), hessian=hessian, cov=cov, se.betas=se.mu, se.deltas=se.gamma, corr=corr, code=fit$convergence, method=method ) return(results) }