#How to cite 'hp.fit.r': #A.J. Sáez-Castillo, A. Conde-Sánchez (2013). A hyper-Poisson regression model for overdispersed and underdispersed count data. Computational Statistics & Data Analysis, vol. 61, issue C, pages 148-157. #A BibTeX entry: #@ARTICLE{saez_conde_2013, # title = {A hyper-Poisson regression model for overdispersed and underdispersed count data}, # author = {Sáez-Castillo, A.J. and Conde-Sánchez, A.}, # year = {2013}, # journal = {Computational Statistics & Data Analysis}, # volume = {61}, # number = {C}, # pages = {148-157}, # } source('hp.fit.r') #MLE of the hP model: #Usage #hp.fit(formula.mu, formula.gamma, f = NULL, p0delta = NULL, p0beta = NULL, iters = 10000, data, method = 2, hessian = TRUE, link = 1) #Arguments #formula.mu, an object of class "formula": a symbolic description of the mean of the model to be fitted #formula.gamma, an object of class "formula": a symbolic description of the dispersion parameter of the model to be fitted #f, an optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector #p0delta, starting values for the 'delta' parameters in the linear predictor of the dispersion parameter, given by 'gamma = exp(x'*delta)' #p0beta, starting values for the 'beta' parameters in the linear predictor of the mean, given by 'mu=exp(x'*beta) #iters, the maximum number of iterations #data, a data frame containing the variables in the model. #method, the algorithm used in the optimization of the log-likelihood: #1: Newton-type algorithm by 'nlm' #2: Nelder-Mead by 'optim' #3: BFGS by 'optim' #4: CG by 'optim' #5: L-BFGS-B by 'optim' #6. SANN by 'optim' #hessian, if TRUE, the hessian of f at the minimum is returned #link, the procedure to find the hP lambda parameter from the gamma parameter and the mean: 1 uses 'optimize' function and 2 uses 'uniroot' #Value, a list containing the following components: # model.mu, the formula for the mean of the model # model.gamma, the formula for the dispersion parameter of the model # offset, the name of the offset variable, if any # covars.mu, the names of the covariates in model.mu # covars.gamma, the names of the covariates in model.gamma, # optimum, the value of the estimated minimum of the log-likelihood # aic, AIC statistic value # bic, BIC statistic value # df, degrees of freedom # coefficients, 'betas' and 'deltas' parameter estimates in mu = exp(x'*betas) and gamma = exp(x'*deltas) # gammas, the value of the estimated gamma hP-parameter in each case # lambdas, the value of the estimated lambda hP-parameter in each case # betas, 'betas' parameter estimates in mu = exp(x'*betas) # deltas, 'deltas' parameter estimates in gamma = exp(x'*deltas) # fitted.means, the value of the estimated mean in each case, that is, mu = exp(x'*betas) # hessian, the hessian at the estimated minimum of the log-likelihood (if requested) # cov, covariance matrix of the estimates # se.betas, standard error estimates of 'betas' # se.deltas, standard error estimates of 'deltas' # corr, correlation matrix of the estimates # code, an integer indicating why the optimization process terminated (see help on 'nlm' and 'optim') # method, the algorithm used in the optimization of the log-likelihood #Simulating and fitting data from a Poisson model risk <- sample(1:3, 150, replace=TRUE)# An offset covar <- rbinom(150, 1, 0.25)# A covariate y <- rpois(150, risk*exp(0.5*covar)) #Poisson data with mu = offset * exp(beta0 + beta1 * covar), #being beta0 = 0 and beta1 = 0.5 data <- data.frame(y, covar, risk) cells <- unique(data) freqs <- rep(0,nrow(cells)) for (i in 1:nrow(cells)){ for (j in 1:nrow(data)){ if (sum(cells[i,]==data[j,])==3) freqs[i]<-freqs[i]+1 } } data <- data.frame(cells, freqs) #Poisson fit fit.pois<-glm(y ~ covar + offset(log(risk)), data=data, family="poisson", weight=freqs) coef(fit.pois) AIC(fit.pois) library(MASS) fit.nb<-glm.nb(y ~ covar + offset(log(risk)), data=data, weight=freqs) coef(fit.nb) AIC(fit.nb) #hP fit with common dispersion parameter (gamma) fit1 <- hp.fit(y ~ covar + offset(log(risk)), y ~ 1, data = data, f = freqs) fit1$betas fit1$aic fit1$gammas #hP fit with not common dispersion parameter (gamma) fit2 <- hp.fit(y ~ covar + offset(log(risk)), y ~ covar, p0beta = fit1$betascoefs.mu, p0delta = rep(0,2), data=data, f = freqs) fit2$betas fit2$aic fit2$gammas