Publications of David B. Dunson

Department of Mathematics
Search | Help | Login |
Math @ Duke
Math HOME
.......................
Faculty
.......................
Webpage
Publications
Publications of David B. Dunson :chronological combined listing:

%% Books   
@book{fds338546,
   Author = {Gelman, A and Carlin, JB and Stern, HS and Dunson, DB and Vehtari, A and Rubin, DB},
   Title = {Bayesian data analysis, third edition},
   Pages = {1-646},
   Year = {2013},
   Month = {January},
   ISBN = {9781439840955},
   Abstract = {Broadening its scope to nonstatisticians, Bayesian Methods
             for Data Analysis, Third Edition provides an accessible
             introduction to the foundations and applications of Bayesian
             analysis. Along with a complete reorganization of the
             material, this edition concentrates more on hierarchical
             Bayesian modeling as implemented via Markov chain Monte
             Carlo (MCMC) methods and related data analytic techniques.
             New to the Third Edition • New data examples,
             corresponding R and WinBUGS code, and homework problems •
             Explicit descriptions and illustrations of hierarchical
             modeling-now commonplace in Bayesian data analysis • A new
             chapter on Bayesian design that emphasizes Bayesian clinical
             trials • A completely revised and expanded section on
             ranking and histogram estimation • A new case study on
             infectious disease modeling and the 1918 flu epidemic • A
             solutions manual for qualifying instructors that contains
             solutions, computer code, and associated output for every
             homework problem-available both electronically and in print
             Ideal for Anyone Performing Statistical Analyses Focusing on
             applications from biostatistics, epidemiology, and medicine,
             this text builds on the popularity of its predecessors by
             making it suitable for even more practitioners and
             students.},
   Key = {fds338546}
}


%% Papers Published   
@article{fds257916,
   Author = {Dunson, DB and Chen, Z and Harry, J},
   Title = {A Bayesian approach for joint modeling of cluster size and
             subunit-specific outcomes.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {3},
   Pages = {521-530},
   Year = {2003},
   Month = {September},
   url = {http://dx.doi.org/10.1111/1541-0420.00062},
   Abstract = {In applications that involve clustered data, such as
             longitudinal studies and developmental toxicity experiments,
             the number of subunits within a cluster is often correlated
             with outcomes measured on the individual subunits. Analyses
             that ignore this dependency can produce biased inferences.
             This article proposes a Bayesian framework for jointly
             modeling cluster size and multiple categorical and
             continuous outcomes measured on each subunit. We use a
             continuation ratio probit model for the cluster size and
             underlying normal regression models for each of the
             subunit-specific outcomes. Dependency between cluster size
             and the different outcomes is accommodated through a latent
             variable structure. The form of the model facilitates
             posterior computation via a simple and computationally
             efficient Gibbs sampler. The approach is illustrated with an
             application to developmental toxicity data, and other
             applications, to joint modeling of longitudinal and event
             time data, are discussed.},
   Doi = {10.1111/1541-0420.00062},
   Key = {fds257916}
}

@article{fds257886,
   Author = {Dunson, DB and Zhou, H},
   Title = {A Bayesian Model for Fecundability and Sterility},
   Journal = {Journal of the American Statistical Association},
   Volume = {95},
   Number = {452},
   Pages = {1054-1062},
   Publisher = {Informa UK Limited},
   Year = {2000},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2000.10474302},
   Abstract = {There is increasing evidence that exposure to environmental
             toxins during key stages of development can disrupt the
             human reproductive system. Such effects have proven
             difficult to study due to the many behavioral and biological
             factors involved in human reproduction. We analyze data from
             a North Carolina fertility study to assess the effect of
             prenatal, childhood, and current cigarette smoking exposure
             on fecundability and sterility. We use a mixture model that
             adjusts for timing and frequency of intercourse and allows
             both fecundability and sterility to depend on multiple
             covariates. We account for dependency among menstrual cycles
             within individual couples using a mixture density for a
             latent cycle viability variable. The mixture consists of a
             normal distribution describing heterogeneity among fecund
             couples with a point mass at 0 for sterile couples. The
             resulting distribution is more biologically plausible than
             the standard beta density. A Markov chain Monte Carlo scheme
             is used for Bayesian estimation of the model. There is some
             evidence that spontaneous intrauterine mortality results in
             decreased fecundability in subsequent cycles. Both current
             cigarette smoking and prenatal exposure of the woman to her
             mother's cigarette smoking are shown to be associated with a
             decrease in the probability of menstrual cycle viability. ©
             2000 Taylor & Francis Group, LLC.},
   Doi = {10.1080/01621459.2000.10474302},
   Key = {fds257886}
}

@article{fds258029,
   Author = {Du, L and Ren, L and Dunson, DB and Carin, L},
   Title = {A Bayesian Model for Simultaneous Image Clustering,
             Annotation and Object Segmentation.},
   Journal = {Advances in neural information processing
             systems},
   Volume = {2009},
   Pages = {486-494},
   Year = {2009},
   Month = {January},
   Abstract = {A non-parametric Bayesian model is proposed for processing
             multiple images. The analysis employs image features and,
             when present, the words associated with accompanying
             annotations. The model clusters the images into classes, and
             each image is segmented into a set of objects, also allowing
             the opportunity to assign a word to each object (localized
             labeling). Each object is assumed to be represented as a
             heterogeneous mix of components, with this realized via
             mixture models linking image features to object types. The
             number of image classes, number of object types, and the
             characteristics of the object-feature mixture models are
             inferred nonparametrically. To constitute spatially
             contiguous objects, a new logistic stick-breaking process is
             developed. Inference is performed efficiently via
             variational Bayesian analysis, with example results
             presented on two image databases.},
   Key = {fds258029}
}

@article{fds343735,
   Author = {Norberg, A and Abrego, N and Blanchet, FG and Adler, FR and Anderson,
             BJ and Anttila, J and Araújo, MB and Dallas, T and Dunson, D and Elith, J and Foster, SD and Fox, R and Franklin, J and Godsoe, W and Guisan, A and O'Hara, B and Hill, NA and Holt, RD and Hui, FKC and Husby, M and Kålås,
             JA and Lehikoinen, A and Luoto, M and Mod, HK and Newell, G and Renner, I and Roslin, T and Soininen, J and Thuiller, W and Vanhatalo, J and Warton,
             D and White, M and Zimmermann, NE and Gravel, D and Ovaskainen,
             O},
   Title = {A comprehensive evaluation of predictive performance of 33
             species distribution models at species and community
             levels},
   Journal = {Ecological Monographs},
   Volume = {89},
   Number = {3},
   Year = {2019},
   Month = {August},
   url = {http://dx.doi.org/10.1002/ecm.1370},
   Abstract = {A large array of species distribution model (SDM) approaches
             has been developed for explaining and predicting the
             occurrences of individual species or species assemblages.
             Given the wealth of existing models, it is unclear which
             models perform best for interpolation or extrapolation of
             existing data sets, particularly when one is concerned with
             species assemblages. We compared the predictive performance
             of 33 variants of 15 widely applied and recently emerged
             SDMs in the context of multispecies data, including both
             joint SDMs that model multiple species together, and stacked
             SDMs that model each species individually combining the
             predictions afterward. We offer a comprehensive evaluation
             of these SDM approaches by examining their performance in
             predicting withheld empirical validation data of different
             sizes representing five different taxonomic groups, and for
             prediction tasks related to both interpolation and
             extrapolation. We measure predictive performance by 12
             measures of accuracy, discrimination power, calibration, and
             precision of predictions, for the biological levels of
             species occurrence, species richness, and community
             composition. Our results show large variation among the
             models in their predictive performance, especially for
             communities comprising many species that are rare. The
             results do not reveal any major trade-offs among measures of
             model performance; the same models performed generally well
             in terms of accuracy, discrimination, and calibration, and
             for the biological levels of individual species, species
             richness, and community composition. In contrast, the models
             that gave the most precise predictions were not well
             calibrated, suggesting that poorly performing models can
             make overconfident predictions. However, none of the models
             performed well for all prediction tasks. As a general
             strategy, we therefore propose that researchers fit a small
             set of models showing complementary performance, and then
             apply a cross-validation procedure involving separate data
             to establish which of these models performs best for the
             goal of the study.},
   Doi = {10.1002/ecm.1370},
   Key = {fds343735}
}

@article{fds257896,
   Author = {Dunson, DB and Baird, DD},
   Title = {A flexible parametric model for combining current status and
             age at first diagnosis data.},
   Journal = {Biometrics},
   Volume = {57},
   Number = {2},
   Pages = {396-403},
   Year = {2001},
   Month = {June},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2001.00396.x},
   Abstract = {In some cross-sectional studies of chronic disease, data
             consist of the age at examination, whether the disease was
             present at the exam, and recall of the age at first
             diagnosis. This article describes a flexible parametric
             approach for combining current status and age at first
             diagnosis data. We assume that the log odds of onset by a
             given age and of detection by a given age conditional on
             onset by that age are nondecreasing functions of time plus
             linear combinations of covariates. Piecewise linear models
             are used to characterize changes across time in the baseline
             odds. Methods are described for accommodating informatively
             missing current status data and inferences based on the
             age-specific incidence of disease prior to a landmark event
             (e.g., puberty, menopause). Our formulation enables
             straightforward maximum likelihood estimation without
             requiring restrictive parametric or Markov assumptions. The
             methods are applied to data from a study of uterine
             fibroids.},
   Doi = {10.1111/j.0006-341x.2001.00396.x},
   Key = {fds257896}
}

@article{fds328949,
   Author = {Chabout, J and Sarkar, A and Patel, SR and Radden, T and Dunson, DB and Fisher, SE and Jarvis, ED},
   Title = {A Foxp2 Mutation Implicated in Human Speech Deficits Alters
             Sequencing of Ultrasonic Vocalizations in Adult Male
             Mice.},
   Journal = {Front Behav Neurosci},
   Volume = {10},
   Pages = {197},
   Year = {2016},
   url = {http://dx.doi.org/10.3389/fnbeh.2016.00197},
   Abstract = {Development of proficient spoken language skills is
             disrupted by mutations of the FOXP2 transcription factor. A
             heterozygous missense mutation in the KE family causes
             speech apraxia, involving difficulty producing words with
             complex learned sequences of syllables. Manipulations in
             songbirds have helped to elucidate the role of this gene in
             vocal learning, but findings in non-human mammals have been
             limited or inconclusive. Here, we performed a systematic
             study of ultrasonic vocalizations (USVs) of adult male mice
             carrying the KE family mutation. Using novel statistical
             tools, we found that Foxp2 heterozygous mice did not have
             detectable changes in USV syllable acoustic structure, but
             produced shorter sequences and did not shift to more complex
             syntax in social contexts where wildtype animals did.
             Heterozygous mice also displayed a shift in the position of
             their rudimentary laryngeal motor cortex (LMC) layer-5
             neurons. Our findings indicate that although mouse USVs are
             mostly innate, the underlying contributions of FoxP2 to
             sequencing of vocalizations are conserved with
             humans.},
   Doi = {10.3389/fnbeh.2016.00197},
   Key = {fds328949}
}

@article{fds372678,
   Author = {Rigon, T and Herring, AH and Dunson, DB},
   Title = {A generalized Bayes framework for probabilistic
             clustering},
   Journal = {Biometrika},
   Volume = {110},
   Number = {3},
   Pages = {559-578},
   Year = {2023},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/asad004},
   Abstract = {Loss-based clustering methods, such as k-means clustering
             and its variants, are standard tools for finding groups in
             data. However, the lack of quantification of uncertainty in
             the estimated clusters is a disadvantage. Model-based
             clustering based on mixture models provides an alternative
             approach, but such methods face computational problems and
             are highly sensitive to the choice of kernel. In this
             article we propose a generalized Bayes framework that
             bridges between these paradigms through the use of Gibbs
             posteriors. In conducting Bayesian updating, the
             loglikelihood is replaced by a loss function for clustering,
             leading to a rich family of clustering methods. The Gibbs
             posterior represents a coherent updating of Bayesian beliefs
             without needing to specify a likelihood for the data, and
             can be used for characterizing uncertainty in clustering. We
             consider losses based on Bregman divergence and pairwise
             similarities, and develop efficient deterministic algorithms
             for point estimation along with sampling algorithms for
             uncertainty quantification. Several existing clustering
             algorithms, including k-means, can be interpreted as
             generalized Bayes estimators in our framework, and thus we
             provide a method of uncertainty quantification for these
             approaches, allowing, for example, calculation of the
             probability that a data point is well clustered.},
   Doi = {10.1093/biomet/asad004},
   Key = {fds372678}
}

@article{fds322547,
   Author = {Yazdani, A and Dunson, DB},
   Title = {A hybrid bayesian approach for genome-wide association
             studies on related individuals.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {31},
   Number = {24},
   Pages = {3890-3896},
   Year = {2015},
   Month = {December},
   url = {http://dx.doi.org/10.1093/bioinformatics/btv496},
   Abstract = {<h4>Motivation</h4>Both single marker and simultaneous
             analysis face challenges in GWAS due to the large number of
             markers genotyped for a small number of subjects. This large
             p small n problem is particularly challenging when the trait
             under investigation has low heritability.<h4>Method</h4>In
             this article, we propose a two-stage approach that is a
             hybrid method of single and simultaneous analysis designed
             to improve genomic prediction of complex traits. In the
             first stage, we use a Bayesian independent screening method
             to select the most promising SNPs. In the second stage, we
             rely on a hierarchical model to analyze the joint impact of
             the selected markers. The model is designed to take into
             account familial dependence in the different subjects, while
             using local-global shrinkage priors on the marker
             effects.<h4>Results</h4>We evaluate the performance in
             simulation studies, and consider an application to animal
             breeding data. The illustrative data analysis reveals an
             encouraging result in terms of prediction performance and
             computational cost.},
   Doi = {10.1093/bioinformatics/btv496},
   Key = {fds322547}
}

@article{fds257904,
   Author = {Dunson, DB and Baird, DD},
   Title = {A proportional hazards model for incidence and induced
             remission of disease.},
   Journal = {Biometrics},
   Volume = {58},
   Number = {1},
   Pages = {71-78},
   Year = {2002},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2002.00071.x},
   Abstract = {To assess the protective effects of a time-varying
             covariate, we develop a stochastic model based on tumor
             biology. The model assumes that individuals have a
             Poisson-distributed pool of initiated clones, which progress
             through predetectable, detectable mortal and detectable
             immortal stages. Time-independent covariates are
             incorporated through a log-linear model for the expected
             number of clones, resulting in a proportional hazards model
             for disease onset. By allowing time-dependent covariates to
             induce clone death, with rate dependent on a clone's state,
             the model is flexible enough to accommodate delayed disease
             onset and remission or cure of preexisting disease.
             Inference uses Bayesian methods via Markov chain Monte
             Carlo. Theoretical properties are derived, and the approach
             is illustrated through analysis of the effects of childbirth
             on uterine leiomyoma (fibroids).},
   Doi = {10.1111/j.0006-341x.2002.00071.x},
   Key = {fds257904}
}

@article{fds257939,
   Author = {Gunn, LH and Dunson, DB},
   Title = {A transformation approach for incorporating monotone or
             unimodal constraints.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {6},
   Number = {3},
   Pages = {434-449},
   Year = {2005},
   Month = {July},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxi020},
   Abstract = {Samples of curves are collected in many applications,
             including studies of reproductive hormone levels in the
             menstrual cycle. Many approaches have been proposed for
             correlated functional data of this type, including smoothing
             spline methods and other flexible parametric modeling
             strategies. In many cases, the underlying biological
             processes involved restrict the curve to follow a particular
             shape. For example, progesterone levels in healthy women
             increase during the menstrual cycle to a peak achieved at
             random location with decreases thereafter. Reproductive
             epidemiologists are interested in studying the distribution
             of the peak and the trajectory for women in different
             groups. Motivated by this application, we propose a simple
             approach for restricting each woman's mean trajectory to
             follow an umbrella shape. An unconstrained hierarchical
             Bayesian model is used to characterize the data, and draws
             from the posterior distribution obtained using a Gibbs
             sampler are then mapped to the constrained space. Inferences
             are based on the resulting quasi-posterior distribution for
             the peak and individual woman trajectories. The methods are
             applied to a study comparing progesterone trajectories for
             conception and nonconception cycles.},
   Doi = {10.1093/biostatistics/kxi020},
   Key = {fds257939}
}

@article{fds365276,
   Author = {Badea, A and Li, D and Niculescu, AR and Anderson, RJ and Stout, JA and Williams, CL and Colton, CA and Maeda, N and Dunson,
             DB},
   Title = {Absolute Winding Number Differentiates Mouse Spatial
             Navigation Strategies With Genetic Risk for Alzheimer's
             Disease.},
   Journal = {Front Neurosci},
   Volume = {16},
   Pages = {848654},
   Year = {2022},
   url = {http://dx.doi.org/10.3389/fnins.2022.848654},
   Abstract = {Spatial navigation and orientation are emerging as promising
             markers for altered cognition in prodromal Alzheimer's
             disease, and even in cognitively normal individuals at risk
             for Alzheimer's disease. The different APOE gene alleles
             confer various degrees of risk. The APOE2 allele is
             considered protective, APOE3 is seen as control, while APOE4
             carriage is the major known genetic risk for Alzheimer's
             disease. We have used mouse models carrying the three
             humanized APOE alleles and tested them in a spatial memory
             task in the Morris water maze. We introduce a new metric,
             the absolute winding number, to characterize the spatial
             search strategy, through the shape of the swim path. We show
             that this metric is robust to noise, and works for small
             group samples. Moreover, the absolute winding number better
             differentiated APOE3 carriers, through their straighter swim
             paths relative to both APOE2 and APOE4 genotypes. Finally,
             this novel metric supported increased vulnerability in APOE4
             females. We hypothesized differences in spatial memory and
             navigation strategies are linked to differences in brain
             networks, and showed that different genotypes have different
             reliance on the hippocampal and caudate putamen circuits,
             pointing to a role for white matter connections. Moreover,
             differences were most pronounced in females. This departure
             from a hippocampal centric to a brain network approach may
             open avenues for identifying regions linked to increased
             risk for Alzheimer's disease, before overt disease
             manifestation. Further exploration of novel biomarkers based
             on spatial navigation strategies may enlarge the windows of
             opportunity for interventions. The proposed framework will
             be significant in dissecting vulnerable circuits associated
             with cognitive changes in prodromal Alzheimer's
             disease.},
   Doi = {10.3389/fnins.2022.848654},
   Key = {fds365276}
}

@article{fds257885,
   Author = {Dunson, DB and Weinberg, CR},
   Title = {Accounting for unreported and missing intercourse in human
             fertility studies},
   Journal = {Statistics in Medicine},
   Volume = {19},
   Number = {5},
   Pages = {665-679},
   Year = {2000},
   ISSN = {0277-6715},
   url = {http://dx.doi.org/10.1002/(SICI)1097-0258(20000315)19:5<665::AID-SIM391>3.0.CO},
   Abstract = {In prospective studies of human fertility that attempt to
             identify days of ovulation, couples record each day whether
             they had intercourse. Depending on the design of the study,
             couples either (I) mark the dates of intercourse on a chart
             or (II) mark 'yes' or 'no' for each day of the menstrual
             cycle. If protocol I is used, intercourse dates that couples
             fail to record are indistinguishable from dates of no
             intercourse. Consequently, estimates of day-specific
             fecundability are biased upwards. If protocol II is used,
             data from menstrual cycles with missing intercourse
             information must be discarded in order to fit current
             fertility models. We propose methods to account for
             unreported and missing intercourse under the assumption that
             the missingness mechanism is independent of time conditional
             on the unobservable true intercourse status. We use probit
             mixture models to allow for heterogeneity among couples,
             both in fecundability and in the missingness and
             non-reporting mechanisms. Markov chain Monte Carlo (MCMC)
             techniques are used for Bayesian estimation. The methods are
             generally applicable to the analysis of aggregated Bernoulli
             outcomes when there is uncertainty in whether a given trial,
             out of a series of trials, was completed. We illustrate the
             methods by application to two prospective fertility
             studies.},
   Doi = {10.1002/(SICI)1097-0258(20000315)19:5<665::AID-SIM391>3.0.CO},
   Key = {fds257885}
}

@article{fds335796,
   Author = {Bertrán, MA and Martínez, NL and Wang, Y and Dunson, D and Sapiro, G and Ringach, D},
   Title = {Active learning of cortical connectivity from two-photon
             imaging data.},
   Journal = {PloS one},
   Volume = {13},
   Number = {5},
   Pages = {e0196527},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1371/journal.pone.0196527},
   Abstract = {Understanding how groups of neurons interact within a
             network is a fundamental question in system neuroscience.
             Instead of passively observing the ongoing activity of a
             network, we can typically perturb its activity, either by
             external sensory stimulation or directly via techniques such
             as two-photon optogenetics. A natural question is how to use
             such perturbations to identify the connectivity of the
             network efficiently. Here we introduce a method to infer
             sparse connectivity graphs from in-vivo, two-photon imaging
             of population activity in response to external stimuli. A
             novel aspect of the work is the introduction of a
             recommended distribution, incrementally learned from the
             data, to optimally refine the inferred network. Unlike
             existing system identification techniques, this "active
             learning" method automatically focuses its attention on key
             undiscovered areas of the network, instead of targeting
             global uncertainty indicators like parameter variance. We
             show how active learning leads to faster inference while, at
             the same time, provides confidence intervals for the network
             parameters. We present simulations on artificial small-world
             networks to validate the methods and apply the method to
             real data. Analysis of frequency of motifs recovered show
             that cortical networks are consistent with a small-world
             topology model.},
   Doi = {10.1371/journal.pone.0196527},
   Key = {fds335796}
}

@article{fds257865,
   Author = {Yang, H and Liu, F and Ji, C and Dunson, D},
   Title = {Adaptive sampling for Bayesian geospatial
             models},
   Journal = {Statistics and Computing},
   Volume = {24},
   Number = {6},
   Pages = {1101-1110},
   Publisher = {Springer Nature},
   Year = {2014},
   Month = {November},
   ISSN = {0960-3174},
   url = {http://dx.doi.org/10.1007/s11222-013-9422-4},
   Abstract = {Bayesian hierarchical modeling with Gaussian process random
             effects provides a popular approach for analyzing
             point-referenced spatial data. For large spatial data sets,
             however, generic posterior sampling is infeasible due to the
             extremely high computational burden in decomposing the
             spatial correlation matrix. In this paper, we propose an
             efficient algorithm—the adaptive griddy Gibbs (AGG)
             algorithm—to address the computational issues with large
             spatial data sets. The proposed algorithm dramatically
             reduces the computational complexity. We show theoretically
             that the proposed method can approximate the real posterior
             distribution accurately. The sufficient number of grid
             points for a required accuracy has also been derived. We
             compare the performance of AGG with that of the
             state-of-the-art methods in simulation studies. Finally, we
             apply AGG to spatially indexed data concerning building
             energy consumption.},
   Doi = {10.1007/s11222-013-9422-4},
   Key = {fds257865}
}

@article{fds258004,
   Author = {Zhu, B and Dunson, DB and Ashley-Koch, AE},
   Title = {Adverse subpopulation regression for multivariate outcomes
             with high-dimensional predictors.},
   Journal = {Stat Med},
   Volume = {31},
   Number = {29},
   Pages = {4102-4113},
   Year = {2012},
   Month = {December},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/22825854},
   Abstract = {Biomedical studies have a common interest in assessing
             relationships between multiple related health outcomes and
             high-dimensional predictors. For example, in reproductive
             epidemiology, one may collect pregnancy outcomes such as
             length of gestation and birth weight and predictors such as
             single nucleotide polymorphisms in multiple candidate genes
             and environmental exposures. In such settings, there is a
             need for simple yet flexible methods for selecting true
             predictors of adverse health responses from a
             high-dimensional set of candidate predictors. To address
             this problem, one may either consider linear regression
             models for the continuous outcomes or convert these outcomes
             into binary indicators of adverse responses using predefined
             cutoffs. The former strategy has the disadvantage of often
             leading to a poorly fitting model that does not predict risk
             well, whereas the latter approach can be very sensitive to
             the cutoff choice. As a simple yet flexible alternative, we
             propose a method for adverse subpopulation regression, which
             relies on a two-component latent class model, with the
             dominant component corresponding to (presumed) healthy
             individuals and the risk of falling in the minority
             component characterized via a logistic regression. The
             logistic regression model is designed to accommodate
             high-dimensional predictors, as occur in studies with a
             large number of gene by environment interactions, through
             the use of a flexible nonparametric multiple shrinkage
             approach. The Gibbs sampler is developed for posterior
             computation. We evaluate the methods with the use of
             simulation studies and apply these to a genetic epidemiology
             study of pregnancy outcomes.},
   Doi = {10.1002/sim.5520},
   Key = {fds258004}
}

@article{fds257868,
   Author = {Salazar, E and Dunson, DB and Carin, L},
   Title = {Analysis of space-time relational data with application to
             legislative voting},
   Journal = {Computational Statistics and Data Analysis},
   Volume = {68},
   Pages = {141-154},
   Publisher = {Elsevier BV},
   Year = {2013},
   Month = {July},
   ISSN = {0167-9473},
   url = {http://dx.doi.org/10.1016/j.csda.2013.06.018},
   Abstract = {We consider modeling spatio-temporally indexed relational
             data, motivated by analysis of voting data for the United
             States House of Representatives over two decades. The data
             are characterized by incomplete binary matrices,
             representing votes of legislators on legislation over time.
             The spatial covariates correspond to the location of a
             legislator's district, and time corresponds to the year of a
             vote. We seek to infer latent features associated with
             legislators and legislation, incorporating spatio-temporal
             structure. A model of such data must impose a flexible
             representation of the space-time structure, since the
             apportionment of House seats and the total number of
             legislators change over time. There are 435 congressional
             districts, with one legislator at a time for each district;
             however, the total number of legislators typically changes
             from year to year, for example due to deaths. A matrix
             kernel stick-breaking process (MKSBP) is proposed, with the
             model employed within a probit-regression construction.
             Theoretical properties of the model are discussed and
             posterior inference is developed using Markov chain Monte
             Carlo methods. Advantages over benchmark models are shown in
             terms of vote prediction and treatment of missing data.
             Marked improvements in results are observed based on
             leveraging spatial (geographical) information. © 2013
             Elsevier B.V. All rights reserved.},
   Doi = {10.1016/j.csda.2013.06.018},
   Key = {fds257868}
}

@article{fds322557,
   Author = {Bhattacharya, A and Pati, D and Dunson, D},
   Title = {Anisotropic function estimation using multi-bandwidth
             Gaussian processes},
   Journal = {Annals of Statistics},
   Volume = {42},
   Number = {1},
   Pages = {352-381},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2014},
   Month = {January},
   url = {http://dx.doi.org/10.1214/13-AOS1192},
   Abstract = {In nonparametric regression problems involving multiple
             predictors, there is typically interest in estimating an
             anisotropic multivariate regression surface in the important
             predictors while discarding the unimportant ones. Our focus
             is on defining a Bayesian procedure that leads to the
             minimax optimal rate of posterior contraction (up to a log
             factor) adapting to the unknown dimension and anisotropic
             smoothness of the true surface. We propose such an approach
             based on a Gaussian process prior with dimension-specific
             scalings, which are assigned carefully-chosen hyperpriors.
             We additionally show that using a homogenous Gaussian
             process with a single bandwidth leads to a sub-optimal rate
             in anisotropic cases.},
   Doi = {10.1214/13-AOS1192},
   Key = {fds322557}
}

@article{fds257898,
   Author = {Robbins, WA and Witt, KL and Haseman, JK and Dunson, DB and Troiani, L and Cohen, MS and Hamilton, CD and Perreault, SD and Libbus, B and Beyler,
             SA and Raburn, DJ and Tedder, ST and Shelby, MD and Bishop,
             JB},
   Title = {Antiretroviral therapy effects on genetic and morphologic
             end points in lymphocytes and sperm of men with human
             immunodeficiency virus infection.},
   Journal = {J Infect Dis},
   Volume = {184},
   Number = {2},
   Pages = {127-135},
   Year = {2001},
   Month = {July},
   ISSN = {0022-1899},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/11424008},
   Abstract = {Many human immunodeficiency virus (HIV)-infected persons
             receive prolonged treatment with DNA-reactive antiretroviral
             drugs. A prospective study was conducted of 26 HIV-infected
             men who provided samples before treatment and at multiple
             times after beginning treatment, to investigate effects of
             antiretrovirals on lymphocyte and sperm chromosomes and
             semen quality. Several antiretroviral regimens, all
             including a nucleoside component, were used. Lymphocyte
             metaphase analysis and sperm fluorescence in situ
             hybridization were used for cytogenetic studies. Semen
             analyses included conventional parameters (volume,
             concentration, viability, motility, and morphology). No
             significant effects on cytogenetic parameters, semen volume,
             or sperm concentration were detected. However, there were
             significant improvements in sperm motility for men with
             study entry CD4 cell counts >200 cells/mm(3), sperm
             morphology for men with entry CD4 cell counts < or =200
             cells/mm(3), and the percentage of viable sperm in both
             groups. These findings suggest that nucleoside-containing
             antiretrovirals administered via recommended protocols do
             not induce chromosomal changes in lymphocytes or sperm but
             may produce improvements in semen quality.},
   Doi = {10.1086/322002},
   Key = {fds257898}
}

@article{fds257938,
   Author = {Dunson, DB and Taylor, JA},
   Title = {Approximate Bayesian inference for quantites},
   Journal = {Journal of Nonparametric Statistics},
   Volume = {17},
   Number = {3},
   Pages = {385-400},
   Publisher = {Informa UK Limited},
   Year = {2005},
   Month = {April},
   url = {http://dx.doi.org/10.1080/10485250500039049},
   Abstract = {Suppose data consist of a random sample from a distribution
             function F Y, which is unknown, and that interest focuses on
             inferences on θ, a vector of quantiles of FY. When the
             likelihood function is not fully specified, a posterior
             density cannot be calculated and Bayesian inference is
             difficult. This article considers an approach which relies
             on a substitution likelihood characterized by a vector of
             quantiles. Properties of the substitution likelihood are
             investigated, strategies for prior elicitation are
             presented, and a general framework is proposed for quantile
             regression modeling. Posterior computation proceeds via a
             Metropolis algorithm that utilizes a normal approximation to
             the posterior. Results from a simulation study are
             presented, and the methods are illustrated through
             application to data from a genotoxicity experiment. © 2005
             Taylor & Francis Ltd.},
   Doi = {10.1080/10485250500039049},
   Key = {fds257938}
}

@article{fds257961,
   Author = {Hannah, LA and Dunson, DB},
   Title = {Approximate dynamic programming for storage
             problems},
   Journal = {Proceedings of the 28th International Conference on Machine
             Learning, ICML 2011},
   Pages = {337-344},
   Year = {2011},
   Month = {October},
   Abstract = {Storage problems are an important subclass of stochastic
             control problems. This paper presents a new method,
             approximate dynamic programming for storage, to solve
             storage problems with continuous, convex decision sets.
             Unlike other solution procedures, ADPS allows math
             programming to be used to make decisions each time period,
             even in the presence of large state variables. We test ADPS
             on the day ahead wind commitment problem with storage.
             Copyright 2011 by the author(s)/owner(s).},
   Key = {fds257961}
}

@article{fds365277,
   Author = {VAN DEN Boom and W and Reeves, G and Dunson, DB},
   Title = {Approximating posteriors with high-dimensional nuisance
             parameters via integrated rotated Gaussian
             approximation.},
   Journal = {Biometrika},
   Volume = {108},
   Number = {2},
   Pages = {269-282},
   Year = {2021},
   Month = {June},
   url = {http://dx.doi.org/10.1093/biomet/asaa068},
   Abstract = {Posterior computation for high-dimensional data with many
             parameters can be challenging. This article focuses on a new
             method for approximating posterior distributions of a low-
             to moderate-dimensional parameter in the presence of a
             high-dimensional or otherwise computationally challenging
             nuisance parameter. The focus is on regression models and
             the key idea is to separate the likelihood into two
             components through a rotation. One component involves only
             the nuisance parameters, which can then be integrated out
             using a novel type of Gaussian approximation. We provide
             theory on approximation accuracy that holds for a broad
             class of forms of the nuisance component and priors.
             Applying our method to simulated and real data sets shows
             that it can outperform state-of-the-art posterior
             approximation approaches.},
   Doi = {10.1093/biomet/asaa068},
   Key = {fds365277}
}

@article{fds257958,
   Author = {Stanford, JB and Mikolajczyk, RT and Dunson, DB},
   Title = {Are Chinese people really more fertile?},
   Journal = {Fertility and sterility},
   Volume = {94},
   Number = {3},
   Pages = {e58},
   Year = {2010},
   Month = {August},
   ISSN = {0015-0282},
   url = {http://dx.doi.org/10.1016/j.fertnstert.2010.05.004},
   Doi = {10.1016/j.fertnstert.2010.05.004},
   Key = {fds257958}
}

@article{fds257894,
   Author = {Dunson, DB and Weinberg, CR and Baird, DD and Kesner, JS and Wilcox,
             AJ},
   Title = {Assessing human fertility using several markers of
             ovulation.},
   Journal = {Statistics in medicine},
   Volume = {20},
   Number = {6},
   Pages = {965-978},
   Year = {2001},
   Month = {March},
   ISSN = {0277-6715},
   url = {http://dx.doi.org/10.1002/sim.716},
   Abstract = {In modelling human fertility one ideally accounts for timing
             of intercourse relative to ovulation. Measurement error in
             identifying the day of ovulation can bias estimates of
             fecundability parameters and attenuate estimates of
             covariate effects. In the absence of a single perfect marker
             of ovulation, several error prone markers are sometimes
             obtained. In this paper we propose a semi-parametric mixture
             model that uses multiple independent markers of ovulation to
             account for measurement error. The model assigns each method
             of assessing ovulation a distinct non-parametric error
             distribution, and corrects bias in estimates of day-specific
             fecundability. We use a Monte Carlo EM algorithm for joint
             estimation of (i) the error distribution for the markers,
             (ii) the error-corrected fertility parameters, and (iii) the
             couple-specific random effects. We apply the methods to data
             from a North Carolina fertility study to assess the
             magnitude of error in measures of ovulation based on urinary
             luteinizing hormone and metabolites of ovarian hormones, and
             estimate the corrected day-specific probabilities of
             clinical pregnancy. Published in 2001 by John Wiley & Sons,
             Ltd.},
   Doi = {10.1002/sim.716},
   Key = {fds257894}
}

@article{fds258015,
   Author = {Dunson, DB},
   Title = {Assessing overall risk in reproductive experiments.},
   Journal = {Risk analysis : an official publication of the Society for
             Risk Analysis},
   Volume = {20},
   Number = {4},
   Pages = {429-437},
   Year = {2000},
   Month = {August},
   url = {http://dx.doi.org/10.1111/0272-4332.204042},
   Abstract = {Toxicologists are often interested in assessing the joint
             effect of an exposure on multiple reproductive endpoints,
             including early loss, fetal death, and malformation.
             Exposures that occur prior to mating or extremely early in
             development can adversely affect the number of implantation
             sites or fetuses that form within each dam and may even
             prevent pregnancy. A simple approach for assessing overall
             adverse effects in such studies is to consider fetuses or
             implants that fail to develop due to exposure as missing
             data. The missing data can be imputed, and standard methods
             for the analysis of quantal response data can then be used
             for quantitative risk assessment or testing. In this
             article, a new bias-corrected imputation procedure is
             proposed and evaluated. The procedure is straightforward to
             implement in standard statistical packages and has excellent
             operating characteristics when used in combination with a
             marginal model fit with generalized estimating equations.
             The methods are applied to data from a reproductive toxicity
             study of Nitrofurazone conducted by the National Toxicology
             Program.},
   Doi = {10.1111/0272-4332.204042},
   Key = {fds258015}
}

@article{fds257944,
   Author = {Baird, DD and Dunson, DB and Hill, MC and Cousins, D and Schectman,
             JM},
   Title = {Association of physical activity with development of uterine
             leiomyoma.},
   Journal = {American journal of epidemiology},
   Volume = {165},
   Number = {2},
   Pages = {157-163},
   Year = {2007},
   Month = {January},
   ISSN = {0002-9262},
   url = {http://dx.doi.org/10.1093/aje/kwj363},
   Abstract = {The relation between physical activity and uterine
             leiomyomata (fibroids) has received little study, but
             exercise is protective for breast cancer, another hormonally
             mediated tumor. Participants in this study were randomly
             selected members of a health plan based in Washington, DC,
             aged 35-49 years (734 African Americans, 455 Whites)
             enrolled between 1996 and 1999. Fibroid status was based on
             ultrasound screening. Physical activity was based on
             detailed interview questions. Logistic regression with
             adjustment for body mass index and other risk factors showed
             that women in the highest category of physical activity were
             significantly less likely to have fibroids (odds ratio =
             0.6, 95% confidence interval = 0.4, 0.9 for the highest vs.
             the lowest category (equivalent to approximately > or =7
             hours/week vs <2 hours/week)). There was a dose-response
             pattern; a significant trend was seen for both
             African-American and White women. A multistate Bayesian
             analysis indicated that exercise was associated with tumor
             onset more strongly than with tumor growth. When data for
             women who reported major fibroid-related symptoms were
             excluded, results remained essentially unchanged, suggesting
             that the observed association could not be attributed to
             reverse causation (fibroids preventing exercise). The
             authors concluded that regular exercise might help women
             prevent fibroids.},
   Doi = {10.1093/aje/kwj363},
   Key = {fds257944}
}

@article{fds257846,
   Author = {Kundu, S and Dunson, DB},
   Title = {Bayes variable selection in semiparametric linear
             models.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {505},
   Pages = {437-447},
   Year = {2014},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.881153},
   Abstract = {There is a rich literature on Bayesian variable selection
             for parametric models. Our focus is on generalizing methods
             and asymptotic theory established for mixtures of
             <i>g</i>-priors to semiparametric linear regression models
             having unknown residual densities. Using a Dirichlet process
             location mixture for the residual density, we propose a
             semiparametric <i>g</i>-prior which incorporates an unknown
             matrix of cluster allocation indicators. For this class of
             priors, posterior computation can proceed via a
             straightforward stochastic search variable selection
             algorithm. In addition, Bayes factor and variable selection
             consistency is shown to result under a class of proper
             priors on <i>g</i> even when the number of candidate
             predictors <i>p</i> is allowed to increase much faster than
             sample size <i>n</i>, while making sparsity assumptions on
             the true model size.},
   Doi = {10.1080/01621459.2014.881153},
   Key = {fds257846}
}

@article{fds258072,
   Author = {Bigelow, JL and Dunson, DB},
   Title = {Bayesian adaptive regression splines for hierarchical
             data.},
   Journal = {Biometrics},
   Volume = {63},
   Number = {3},
   Pages = {724-732},
   Year = {2007},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/17403106},
   Abstract = {This article considers methodology for hierarchical
             functional data analysis, motivated by studies of
             reproductive hormone profiles in the menstrual cycle.
             Current methods standardize the cycle lengths and ignore the
             timing of ovulation within the cycle, both of which are
             biologically informative. Methods are needed that avoid
             standardization, while flexibly incorporating information on
             covariates and the timing of reference events, such as
             ovulation and onset of menses. In addition, it is necessary
             to account for within-woman dependency when data are
             collected for multiple cycles. We propose an approach based
             on a hierarchical generalization of Bayesian multivariate
             adaptive regression splines. Our formulation allows for an
             unknown set of basis functions characterizing the
             population-averaged and woman-specific trajectories in
             relation to covariates. A reversible jump Markov chain Monte
             Carlo algorithm is developed for posterior computation.
             Applying the methods to data from the North Carolina Early
             Pregnancy Study, we investigate differences in urinary
             progesterone profiles between conception and nonconception
             cycles.},
   Doi = {10.1111/j.1541-0420.2007.00761.x},
   Key = {fds258072}
}

@article{fds258012,
   Author = {Dunson, DB and Tindall, KR},
   Title = {Bayesian analysis of mutational spectra.},
   Journal = {Genetics},
   Volume = {156},
   Number = {3},
   Pages = {1411-1418},
   Year = {2000},
   Month = {November},
   url = {http://dx.doi.org/10.1093/genetics/156.3.1411},
   Abstract = {Studies that examine both the frequency of gene mutation and
             the pattern or spectrum of mutational changes can be used to
             identify chemical mutagens and to explore the molecular
             mechanisms of mutagenesis. In this article, we propose a
             Bayesian hierarchical modeling approach for the analysis of
             mutational spectra. We assume that the total number of
             independent mutations and the numbers of mutations falling
             into different response categories, defined by location
             within a gene and/or type of alteration, follow binomial and
             multinomial sampling distributions, respectively. We use
             prior distributions to summarize past information about the
             overall mutation frequency and the probabilities
             corresponding to the different mutational categories. These
             priors can be chosen on the basis of data from previous
             studies using an approach that accounts for heterogeneity
             among studies. Inferences about the overall mutation
             frequency, the proportions of mutations in each response
             category, and the category-specific mutation frequencies can
             be based on posterior distributions, which incorporate past
             and current data on the mutant frequency and on DNA sequence
             alterations. Methods are described for comparing groups and
             for assessing dose-related trends. We illustrate our
             approach using data from the literature.},
   Doi = {10.1093/genetics/156.3.1411},
   Key = {fds258012}
}

@article{fds257933,
   Author = {Dunson, DB},
   Title = {Bayesian Biostatistics},
   Journal = {Handbook of Statistics},
   Volume = {25},
   Pages = {743-761},
   Publisher = {Elsevier},
   Year = {2005},
   Month = {December},
   ISSN = {0169-7161},
   url = {http://dx.doi.org/10.1016/S0169-7161(05)25025-3},
   Abstract = {With the rapid increase in biomedical technology and the
             accompanying generation of complex and high-dimensional data
             sets, Bayesian statistical methods have become much more
             widely used. One reason is that the Bayesian probability
             modeling machinery provides a natural framework for
             integration of data and information from multiple sources,
             while accounting for uncertainty in model specifications.
             This chapter briefly reviews some of the recent areas in
             which Bayesian biostatistical research has had the greatest
             impact. Particular areas of focus include correlated and
             longitudinal data analysis, event time data, nonlinear
             modeling, model averaging, and bioinformatics. The reader is
             referred elsewhere for recent Bayesian developments in other
             important areas, such as clinical trials and analysis of
             spatially correlated data. Certainly the many practical and
             conceptual advantages of the Bayesian paradigm will lead to
             an increasing impact in future biomedical research,
             particularly in areas such as genomics. © 2005 Elsevier
             B.V. All rights reserved.},
   Doi = {10.1016/S0169-7161(05)25025-3},
   Key = {fds257933}
}

@article{fds353002,
   Author = {Binette, O and Pati, D and Dunson, DB},
   Title = {Bayesian closed surface fitting through tensor
             products},
   Journal = {Journal of Machine Learning Research},
   Volume = {21},
   Pages = {1-26},
   Year = {2020},
   Month = {July},
   Abstract = {Closed surfaces provide a useful model for 3-d shapes, with
             the data typically consisting of a cloud of points in R3.
             The existing literature on closed surface modeling focuses
             on frequentist point estimation methods that join surface
             patches along the edges, with surface patches created via
             Bezier surfaces or tensor products of B-splines. However,
             the resulting surfaces are not smooth along the edges and
             the geometric constraints required to join the surface
             patches lead to computational drawbacks. In this article, we
             develop a Bayesian model for closed surfaces based on tensor
             products of a cyclic basis resulting in infinitely smooth
             surface realizations. We impose sparsity on the control
             points through a doubleshrinkage prior. Theoretical
             properties of the support of our proposed prior are studied
             and it is shown that the posterior achieves the optimal rate
             of convergence under reasonable assumptions on the prior.
             The proposed approach is illustrated with some
             examples.},
   Key = {fds353002}
}

@article{fds322550,
   Author = {Guhaniyogi, R and Dunson, DB},
   Title = {Bayesian Compressed Regression},
   Journal = {Journal of the American Statistical Association},
   Volume = {110},
   Number = {512},
   Pages = {1500-1514},
   Publisher = {Informa UK Limited},
   Year = {2015},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2014.969425},
   Abstract = {As an alternative to variable selection or shrinkage in
             high-dimensional regression, we propose to randomly compress
             the predictors prior to analysis. This dramatically reduces
             storage and computational bottlenecks, performing well when
             the predictors can be projected to a low-dimensional linear
             subspace with minimal loss of information about the
             response. As opposed to existing Bayesian dimensionality
             reduction approaches, the exact posterior distribution
             conditional on the compressed data is available
             analytically, speeding up computation by many orders of
             magnitude while also bypassing robustness issues due to
             convergence and mixing problems with MCMC. Model averaging
             is used to reduce sensitivity to the random projection
             matrix, while accommodating uncertainty in the subspace
             dimension. Strong theoretical support is provided for the
             approach by showing near parametric convergence rates for
             the predictive density in the large p small n asymptotic
             paradigm. Practical performance relative to competitors is
             illustrated in simulations and real data
             applications.},
   Doi = {10.1080/01621459.2014.969425},
   Key = {fds322550}
}

@article{fds339305,
   Author = {Guhaniyogi, R and Qamar, S and Dunson, DB},
   Title = {Bayesian Conditional Density Filtering},
   Journal = {Journal of Computational and Graphical Statistics},
   Volume = {27},
   Number = {3},
   Pages = {657-672},
   Publisher = {Informa UK Limited},
   Year = {2018},
   Month = {July},
   url = {http://dx.doi.org/10.1080/10618600.2017.1422431},
   Abstract = {We propose a conditional density filtering (C-DF) algorithm
             for efficient online Bayesian inference. C-DF adapts MCMC
             sampling to the online setting, sampling from approximations
             to conditional posterior distributions obtained by
             propagating surrogate conditional sufficient statistics (a
             function of data and parameter estimates) as new data
             arrive. These quantities eliminate the need to store or
             process the entire dataset simultaneously and offer a number
             of desirable features. Often, these include a reduction in
             memory requirements and runtime and improved mixing, along
             with state-of-the-art parameter inference and prediction.
             These improvements are demonstrated through several
             illustrative examples including an application to high
             dimensional compressed regression. In the cases where
             dimension of the model parameter does not grow with time, we
             also establish sufficient conditions under which C-DF
             samples converge to the target posterior distribution
             asymptotically as sampling proceeds and more data arrive.
             Supplementary materials of C-DF are available
             online.},
   Doi = {10.1080/10618600.2017.1422431},
   Key = {fds339305}
}

@article{fds329117,
   Author = {Yang, Y and Dunson, DB},
   Title = {Bayesian Conditional Tensor Factorizations for
             High-Dimensional Classification.},
   Journal = {Journal of the American Statistical Association},
   Volume = {111},
   Number = {514},
   Pages = {656-669},
   Publisher = {Informa UK Limited},
   Year = {2016},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2015.1029129},
   Abstract = {In many application areas, data are collected on a
             categorical response and high-dimensional categorical
             predictors, with the goals being to build a parsimonious
             model for classification while doing inferences on the
             important predictors. In settings such as genomics, there
             can be complex interactions among the predictors. By using a
             carefully-structured Tucker factorization, we define a model
             that can characterize any conditional probability, while
             facilitating variable selection and modeling of higher-order
             interactions. Following a Bayesian approach, we propose a
             Markov chain Monte Carlo algorithm for posterior computation
             accommodating uncertainty in the predictors to be included.
             Under near low rank assumptions, the posterior distribution
             for the conditional probability is shown to achieve close to
             the parametric rate of contraction even in ultra
             high-dimensional settings. The methods are illustrated using
             simulation examples and biomedical applications.},
   Doi = {10.1080/01621459.2015.1029129},
   Key = {fds329117}
}

@article{fds257869,
   Author = {Lock, EF and Dunson, DB},
   Title = {Bayesian consensus clustering.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {29},
   Number = {20},
   Pages = {2610-2616},
   Year = {2013},
   Month = {October},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23990412},
   Abstract = {<h4>Motivation</h4>In biomedical research a growing number
             of platforms and technologies are used to measure diverse
             but related information, and the task of clustering a set of
             objects based on multiple sources of data arises in several
             applications. Most current approaches to multisource
             clustering either independently determine a separate
             clustering for each data source or determine a single
             'joint' clustering for all data sources. There is a need for
             more flexible approaches that simultaneously model the
             dependence and the heterogeneity of the data
             sources.<h4>Results</h4>We propose an integrative
             statistical model that permits a separate clustering of the
             objects for each data source. These separate clusterings
             adhere loosely to an overall consensus clustering, and hence
             they are not independent. We describe a computationally
             scalable Bayesian framework for simultaneous estimation of
             both the consensus clustering and the source-specific
             clusterings. We demonstrate that this flexible approach is
             more robust than joint clustering of all data sources, and
             is more powerful than clustering each data source
             independently. We present an application to subtype
             identification of breast cancer tumor samples using publicly
             available data from The Cancer Genome Atlas.<h4>Availability</h4>R
             code with instructions and examples is available at
             http://people.duke.edu/%7Eel113/software.html.},
   Doi = {10.1093/bioinformatics/btt425},
   Key = {fds257869}
}

@article{fds348919,
   Author = {Duan, LL and Young, AL and Nishimura, A and Dunson,
             DB},
   Title = {Bayesian constraint relaxation.},
   Journal = {Biometrika},
   Volume = {107},
   Number = {1},
   Pages = {191-204},
   Year = {2020},
   Month = {March},
   url = {http://dx.doi.org/10.1093/biomet/asz069},
   Abstract = {Prior information often takes the form of parameter
             constraints. Bayesian methods include such information
             through prior distributions having constrained support. By
             using posterior sampling algorithms, one can quantify
             uncertainty without relying on asymptotic approximations.
             However, sharply constrained priors are not necessary in
             some settings and tend to limit modelling scope to a narrow
             set of distributions that are tractable computationally. We
             propose to replace the sharp indicator function of the
             constraint with an exponential kernel, thereby creating a
             close-to-constrained neighbourhood within the Euclidean
             space in which the constrained subspace is embedded. This
             kernel decays with distance from the constrained space at a
             rate depending on a relaxation hyperparameter. By avoiding
             the sharp constraint, we enable use of off-the-shelf
             posterior sampling algorithms, such as Hamiltonian Monte
             Carlo, facilitating automatic computation in a broad range
             of models. We study the constrained and relaxed
             distributions under multiple settings and theoretically
             quantify their differences. Application of the method is
             illustrated through several novel modelling
             examples.},
   Doi = {10.1093/biomet/asz069},
   Key = {fds348919}
}

@article{fds258070,
   Author = {Cai, B and Dunson, DB},
   Title = {Bayesian covariance selection in generalized linear mixed
             models.},
   Journal = {Biometrics},
   Volume = {62},
   Number = {2},
   Pages = {446-457},
   Year = {2006},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://ftp.stat.duke.edu/WorkingPapers/05-01.html},
   Abstract = {The generalized linear mixed model (GLMM), which extends the
             generalized linear model (GLM) to incorporate random effects
             characterizing heterogeneity among subjects, is widely used
             in analyzing correlated and longitudinal data. Although
             there is often interest in identifying the subset of
             predictors that have random effects, random effects
             selection can be challenging, particularly when outcome
             distributions are nonnormal. This article proposes a fully
             Bayesian approach to the problem of simultaneous selection
             of fixed and random effects in GLMMs. Integrating out the
             random effects induces a covariance structure on the
             multivariate outcome data, and an important problem that we
             also consider is that of covariance selection. Our approach
             relies on variable selection-type mixture priors for the
             components in a special Cholesky decomposition of the random
             effects covariance. A stochastic search MCMC algorithm is
             developed, which relies on Gibbs sampling, with Taylor
             series expansions used to approximate intractable integrals.
             Simulated data examples are presented for different
             exponential family distributions, and the approach is
             applied to discrete survival data from a time-to-pregnancy
             study.},
   Doi = {10.1111/j.1541-0420.2005.00499.x},
   Key = {fds258070}
}

@article{fds304006,
   Author = {Cornelis, B and Yang, Y and Vogelstein, JT and Dooms, A and Daubechies,
             I and Dunson, D},
   Title = {Bayesian crack detection in ultra high resolution multimodal
             images of paintings},
   Journal = {2013 18th International Conference on Digital Signal
             Processing, DSP 2013},
   Year = {2013},
   Month = {December},
   url = {http://arxiv.org/abs/1304.5894v2},
   Abstract = {The preservation of our cultural heritage is of paramount
             importance. Thanks to recent developments in digital
             acquisition techniques, powerful image analysis algorithms
             are developed which can be useful non-invasive tools to
             assist in the restoration and preservation of art. In this
             paper we propose a semi-supervised crack detection method
             that can be used for high-dimensional acquisitions of
             paintings coming from different modalities. Our dataset
             consists of a recently acquired collection of images of the
             Ghent Altarpiece (1432), one of Northern Europe's most
             important art masterpieces. Our goal is to build a
             classifier that is able to discern crack pixels from the
             background consisting of non-crack pixels, making optimal
             use of the information that is provided by each modality. To
             accomplish this we employ a recently developed
             non-parametric Bayesian classifier, that uses tensor
             factorizations to characterize any conditional probability.
             A prior is placed on the parameters of the factorization
             such that every possible interaction between predictors is
             allowed while still identifying a sparse subset among these
             predictors. The proposed Bayesian classifier, which we will
             refer to as conditional Bayesian tensor factorization or
             CBTF, is assessed by visually comparing classification
             results with the Random Forest (RF) algorithm. © 2013
             IEEE.},
   Doi = {10.1109/ICDSP.2013.6622710},
   Key = {fds304006}
}

@article{fds353001,
   Author = {Legramanti, S and Durante, D and Dunson, DB},
   Title = {Bayesian cumulative shrinkage for infinite
             factorizations.},
   Journal = {Biometrika},
   Volume = {107},
   Number = {3},
   Pages = {745-752},
   Year = {2020},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/asaa008},
   Abstract = {The dimension of the parameter space is typically unknown in
             a variety of models that rely on factorizations. For
             example, in factor analysis the number of latent factors is
             not known and has to be inferred from the data. Although
             classical shrinkage priors are useful in such contexts,
             increasing shrinkage priors can provide a more effective
             approach that progressively penalizes expansions with
             growing complexity. In this article we propose a novel
             increasing shrinkage prior, called the cumulative shrinkage
             process, for the parameters that control the dimension in
             overcomplete formulations. Our construction has broad
             applicability and is based on an interpretable sequence of
             spike-and-slab distributions which assign increasing mass to
             the spike as the model complexity grows. Using factor
             analysis as an illustrative example, we show that this
             formulation has theoretical and practical advantages
             relative to current competitors, including an improved
             ability to recover the model dimension. An adaptive Markov
             chain Monte Carlo algorithm is proposed, and the performance
             gains are outlined in simulations and in an application to
             personality data.},
   Doi = {10.1093/biomet/asaa008},
   Key = {fds353001}
}

@article{fds258068,
   Author = {Dunson, DB and Pillai, N and Park, JH},
   Title = {Bayesian density regression},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {69},
   Number = {2},
   Pages = {163-183},
   Publisher = {WILEY},
   Year = {2007},
   Month = {April},
   ISSN = {1369-7412},
   url = {http://dx.doi.org/10.1111/j.1467-9868.2007.00582.x},
   Abstract = {The paper considers Bayesian methods for density regression,
             allowing a random probability distribution to change
             flexibly with multiple predictors. The conditional response
             distribution is expressed as a non-parametric mixture of
             regression models, with the mixture distribution changing
             with predictors. A class of weighted mixture of Dirichlet
             process priors is proposed for the uncountable collection of
             mixture distributions. It is shown that this specification
             results in a generalized Pólya urn scheme, which
             incorporates weights that are dependent on the distance
             between subjects' predictor values. To allow local
             dependence in the mixture distributions, we propose a
             kernel-based weighting scheme. A Gibbs sampling algorithm is
             developed for posterior computation. The methods are
             illustrated by using simulated data examples and an
             epidemiologic application. © Royal Statistical
             Society.},
   Doi = {10.1111/j.1467-9868.2007.00582.x},
   Key = {fds258068}
}

@article{fds362587,
   Author = {Duan, LL and Dunson, DB},
   Title = {Bayesian Distance Clustering.},
   Journal = {Journal of machine learning research : JMLR},
   Volume = {22},
   Pages = {224},
   Year = {2021},
   Month = {January},
   Abstract = {Model-based clustering is widely used in a variety of
             application areas. However, fundamental concerns remain
             about robustness. In particular, results can be sensitive to
             the choice of kernel representing the within-cluster data
             density. Leveraging on properties of pairwise differences
             between data points, we propose a class of Bayesian distance
             clustering methods, which rely on modeling the likelihood of
             the pairwise distances in place of the original data.
             Although some information in the data is discarded, we gain
             substantial robustness to modeling assumptions. The proposed
             approach represents an appealing middle ground between
             distance- and model-based clustering, drawing advantages
             from each of these canonical approaches. We illustrate
             dramatic gains in the ability to infer clusters that are not
             well represented by the usual choices of kernel. A
             simulation study is included to assess performance relative
             to competitors, and we apply the approach to clustering of
             brain genome expression data.},
   Key = {fds362587}
}

@article{fds257841,
   Author = {Durante, D and Dunson, DB},
   Title = {Bayesian dynamic financial networks with time-varying
             predictors},
   Journal = {Statistics and Probability Letters},
   Volume = {93},
   Pages = {19-26},
   Publisher = {Elsevier BV},
   Year = {2014},
   Month = {January},
   ISSN = {0167-7152},
   url = {http://dx.doi.org/10.1016/j.spl.2014.06.015},
   Abstract = {We propose a targeted and robust modeling of dependence in
             multivariate time series via dynamic networks, with
             time-varying predictors included to improve interpretation
             and prediction. The model is applied to financial markets,
             estimating effects of verbal and material cooperations. ©
             2014 Elsevier B.V.},
   Doi = {10.1016/j.spl.2014.06.015},
   Key = {fds257841}
}

@article{fds258073,
   Author = {Dunson, DB},
   Title = {Bayesian dynamic modeling of latent trait
             distributions.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {7},
   Number = {4},
   Pages = {551-568},
   Year = {2006},
   Month = {October},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxj025},
   Abstract = {Studies of latent traits often collect data for multiple
             items measuring different aspects of the trait. For such
             data, it is common to consider models in which the different
             items are manifestations of a normal latent variable, which
             depends on covariates through a linear regression model.
             This article proposes a flexible Bayesian alternative in
             which the unknown latent variable density can change
             dynamically in location and shape across levels of a
             predictor. Scale mixtures of underlying normals are used in
             order to model flexibly the measurement errors and allow
             mixed categorical and continuous scales. A dynamic mixture
             of Dirichlet processes is used to characterize the latent
             response distributions. Posterior computation proceeds via a
             Markov chain Monte Carlo algorithm, with predictive
             densities used as a basis for inferences and evaluation of
             model fit. The methods are illustrated using data from a
             study of DNA damage in response to oxidative
             stress.},
   Doi = {10.1093/biostatistics/kxj025},
   Key = {fds258073}
}

@article{fds257926,
   Author = {Chen, Z and Dunson, DB},
   Title = {Bayesian estimation of survival functions under stochastic
             precedence.},
   Journal = {Lifetime data analysis},
   Volume = {10},
   Number = {2},
   Pages = {159-173},
   Year = {2004},
   Month = {June},
   url = {http://dx.doi.org/10.1023/b:lida.0000030201.12943.13},
   Abstract = {When estimating the distributions of two random variables, X
             and Y, investigators often have prior information that Y
             tends to be bigger than X. To formalize this prior belief,
             one could potentially assume stochastic ordering between X
             and Y, which implies Pr(X < or = z) > or = Pr(Y < or = z)
             for all z in the domain of X and Y. Stochastic ordering is
             quite restrictive, though, and this article focuses instead
             on Bayesian estimation of the distribution functions of X
             and Y under the weaker stochastic precedence constraint,
             Pr(X < or = Y) > or = 0.5. We consider the case where both X
             and Y are categorical variables with common support and
             develop a Gibbs sampling algorithm for posterior
             computation. The method is then generalized to the case
             where X and Y are survival times. The proposed approach is
             illustrated using data on survival after tumor removal for
             patients with malignant melanoma.},
   Doi = {10.1023/b:lida.0000030201.12943.13},
   Key = {fds257926}
}

@article{fds349530,
   Author = {Ferrari, F and Dunson, DB},
   Title = {Bayesian Factor Analysis for Inference on
             Interactions.},
   Journal = {Journal of the American Statistical Association},
   Volume = {116},
   Number = {535},
   Pages = {1521-1532},
   Year = {2021},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2020.1745813},
   Abstract = {This article is motivated by the problem of inference on
             interactions among chemical exposures impacting human health
             outcomes. Chemicals often co-occur in the environment or in
             synthetic mixtures and as a result exposure levels can be
             highly correlated. We propose a latent factor joint model,
             which includes shared factors in both the predictor and
             response components while assuming conditional independence.
             By including a quadratic regression in the latent variables
             in the response component, we induce flexible dimension
             reduction in characterizing main effects and interactions.
             We propose a Bayesian approach to inference under this
             Factor analysis for INteractions (FIN) framework. Through
             appropriate modifications of the factor modeling structure,
             FIN can accommodate higher order interactions. We evaluate
             the performance using a simulation study and data from the
             National Health and Nutrition Examination Survey (NHANES).
             Code is available on GitHub.},
   Doi = {10.1080/01621459.2020.1745813},
   Key = {fds349530}
}

@article{fds322551,
   Author = {Zhou, J and Bhattacharya, A and Herring, A and Dunson,
             D},
   Title = {Bayesian factorizations of big sparse tensors.},
   Journal = {Journal of the American Statistical Association},
   Volume = {110},
   Number = {512},
   Pages = {1562-1576},
   Publisher = {Informa UK Limited},
   Year = {2015},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2014.983233},
   Abstract = {It has become routine to collect data that are structured as
             multiway arrays (tensors). There is an enormous literature
             on low rank and sparse matrix factorizations, but limited
             consideration of extensions to the tensor case in
             statistics. The most common low rank tensor factorization
             relies on parallel factor analysis (PARAFAC), which
             expresses a rank k tensor as a sum of rank one tensors. When
             observations are only available for a tiny subset of the
             cells of a big tensor, the low rank assumption is not
             sufficient and PARAFAC has poor performance. We induce an
             additional layer of dimension reduction by allowing the
             effective rank to vary across dimensions of the table. For
             concreteness, we focus on a contingency table application.
             Taking a Bayesian approach, we place priors on terms in the
             factorization and develop an efficient Gibbs sampler for
             posterior computation. Theory is provided showing posterior
             concentration rates in high-dimensional settings, and the
             methods are shown to have excellent performance in
             simulations and several real data applications.},
   Doi = {10.1080/01621459.2014.983233},
   Key = {fds322551}
}

@article{fds327028,
   Author = {Zhu, B and Dunson, DB},
   Title = {Bayesian functional data modeling for heterogeneous
             volatility},
   Journal = {Bayesian Analysis},
   Volume = {12},
   Number = {2},
   Pages = {335-350},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1214/16-BA1004},
   Abstract = {Although there are many methods for functional data
             analysis, less emphasis is put on characterizing variability
             among volatilities of individual functions. In particular,
             certain individuals exhibit erratic swings in their
             trajectory while other individuals have more stable
             trajectories. There is evidence of such volatility
             heterogeneity in blood pressure trajectories during
             pregnancy, for example, and reason to suspect that
             volatility is a biologically important feature. Most
             functional data analysis models implicitly assume similar or
             identical smoothness of the individual functions, and hence
             can lead to misleading inferences on volatility and an
             inadequate representation of the functions. We propose a
             novel class of functional data analysis models characterized
             using hierarchical stochastic differential equations. We
             model the derivatives of a mean function and deviation
             functions using Gaussian processes, while also allowing
             covariate dependence including on the volatilities of the
             deviation functions. Following a Bayesian approach to
             inference, a Markov chain Monte Carlo algorithm is used for
             posterior computation. The methods are tested on simulated
             data and applied to blood pressure trajectories during
             pregnancy.},
   Doi = {10.1214/16-BA1004},
   Key = {fds327028}
}

@article{fds304007,
   Author = {Murray, JS and Dunson, DB and Carin, L and Lucas,
             JE},
   Title = {Bayesian Gaussian Copula Factor Models for Mixed
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {502},
   Pages = {656-665},
   Year = {2013},
   Month = {June},
   url = {http://arxiv.org/abs/1111.0317v2},
   Abstract = {Gaussian factor models have proven widely useful for
             parsimoniously characterizing dependence in multivariate
             data. There is a rich literature on their extension to mixed
             categorical and continuous variables, using latent Gaussian
             variables or through generalized latent trait models
             acommodating measurements in the exponential family.
             However, when generalizing to non-Gaussian measured
             variables the latent variables typically influence both the
             dependence structure and the form of the marginal
             distributions, complicating interpretation and introducing
             artifacts. To address this problem we propose a novel class
             of Bayesian Gaussian copula factor models which decouple the
             latent factors from the marginal distributions. A
             semiparametric specification for the marginals based on the
             extended rank likelihood yields straightforward
             implementation and substantial computational gains. We
             provide new theoretical and empirical justifications for
             using this likelihood in Bayesian inference. We propose new
             default priors for the factor loadings and develop efficient
             parameter-expanded Gibbs sampling for posterior computation.
             The methods are evaluated through simulations and applied to
             a dataset in political science. The models in this paper are
             implemented in the R package bfa.},
   Doi = {10.1080/01621459.2012.762328},
   Key = {fds304007}
}

@article{fds258043,
   Author = {Park, JH and Dunson, DB},
   Title = {Bayesian generalized product partition model},
   Journal = {Statistica Sinica},
   Volume = {20},
   Number = {3},
   Pages = {1203-1226},
   Year = {2010},
   Month = {July},
   ISSN = {1017-0405},
   url = {http://hdl.handle.net/10161/4623 Duke open
             access},
   Abstract = {Starting with a carefully formulated Dirichlet process (DP)
             mixture model, we derive a generalized product partition
             model (GPPM) in which the partition process is
             predictor-dependent. The GPPM generalizes DP clustering to
             relax the exchangeability assumption through the
             incorporation of predictors, resulting in a generalized
             Pólya urn scheme. In addition, the GPPM can be used for
             formulating flexible semiparametric Bayes models for
             conditional distribution estimation, bypassing the need for
             expensive computation of large numbers of unknowns
             characterizing priors for dependent collections of random
             probability measures. A variety of special cases are
             considered, and an efficient Gibbs sampling algorithm is
             developed for posterior computation. The methods are
             illustrated using simulation examples and an epidemiologic
             application.},
   Key = {fds258043}
}

@article{fds323700,
   Author = {Lock, EF and Dunson, DB},
   Title = {Bayesian genome- and epigenome-wide association studies with
             gene level dependence.},
   Journal = {Biometrics},
   Volume = {73},
   Number = {3},
   Pages = {1018-1028},
   Year = {2017},
   Month = {September},
   url = {http://dx.doi.org/10.1111/biom.12649},
   Abstract = {High-throughput genetic and epigenetic data are often
             screened for associations with an observed phenotype. For
             example, one may wish to test hundreds of thousands of
             genetic variants, or DNA methylation sites, for an
             association with disease status. These genomic variables can
             naturally be grouped by the gene they encode, among other
             criteria. However, standard practice in such applications is
             independent screening with a universal correction for
             multiplicity. We propose a Bayesian approach in which the
             prior probability of an association for a given genomic
             variable depends on its gene, and the gene-specific
             probabilities are modeled nonparametrically. This
             hierarchical model allows for appropriate gene and
             genome-wide multiplicity adjustments, and can be
             incorporated into a variety of Bayesian association
             screening methodologies with negligible increase in
             computational complexity. We describe an application to
             screening for differences in DNA methylation between lower
             grade glioma and glioblastoma multiforme tumor samples from
             The Cancer Genome Atlas. Software is available via the
             package BayesianScreening for R: github.com/lockEF/BayesianScreening.},
   Doi = {10.1111/biom.12649},
   Key = {fds323700}
}

@article{fds257964,
   Author = {Pati, D and Reich, BJ and Dunson, DB},
   Title = {Bayesian geostatistical modelling with informative sampling
             locations.},
   Journal = {Biometrika},
   Volume = {98},
   Number = {1},
   Pages = {35-48},
   Year = {2011},
   Month = {March},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asq067},
   Abstract = {We consider geostatistical models that allow the locations
             at which data are collected to be informative about the
             outcomes. A Bayesian approach is proposed, which models the
             locations using a log Gaussian Cox process, while modelling
             the outcomes conditionally on the locations as Gaussian with
             a Gaussian process spatial random effect and adjustment for
             the location intensity process. We prove posterior propriety
             under an improper prior on the parameter controlling the
             degree of informative sampling, demonstrating that the data
             are informative. In addition, we show that the density of
             the locations and mean function of the outcome process can
             be estimated consistently under mild assumptions. The
             methods show significant evidence of informative sampling
             when applied to ozone data over Eastern U.S.A.},
   Doi = {10.1093/biomet/asq067},
   Key = {fds257964}
}

@article{fds329114,
   Author = {Zhu, H and Strawn, N and Dunson, DB},
   Title = {Bayesian graphical models for multivariate functional
             data},
   Journal = {Journal of Machine Learning Research},
   Volume = {17},
   Pages = {1-27},
   Year = {2016},
   Month = {October},
   Abstract = {Graphical models express conditional independence
             relationships among variables. Although methods for
             vector-valued data are well established, functional data
             graphical models remain underdeveloped. By functional data,
             we refer to data that are realizations of random functions
             varying over a continuum (e.g., images, signals). We
             introduce a notion of conditional independence between
             random functions, and construct a framework for Bayesian
             inference of undirected, decomposable graphs in the
             multivariate functional data context. This framework is
             based on extending Markov distributions and hyper Markov
             laws from random variables to random processes, providing a
             principled alternative to naive application of multivariate
             methods to discretized functional data. Markov properties
             facilitate the composition of likelihoods and priors
             according to the decomposition of a graph. Our focus is on
             Gaussian process graphical models using orthogonal basis
             expansions. We propose a hyper-inverse-Wishart-process prior
             for the covariance kernels of the infinite coeficient
             sequences of the basis expansion, and establish its
             existence and uniqueness. We also prove the strong hyper
             Markov property and the conjugacy of this prior under a
             finite rank condition of the prior kernel parameter.
             Stochastic search Markov chain Monte Carlo algorithms are
             developed for posterior inference, assessed through
             simulations, and applied to a study of brain activity and
             alcoholism.},
   Key = {fds329114}
}

@article{fds355488,
   Author = {Moran, KR and Turner, EL and Dunson, D and Herring,
             AH},
   Title = {Bayesian hierarchical factor regression models to infer
             cause of death from verbal autopsy data.},
   Journal = {J R Stat Soc Ser C Appl Stat},
   Volume = {70},
   Number = {3},
   Pages = {532-557},
   Year = {2021},
   Month = {June},
   url = {http://dx.doi.org/10.1111/rssc.12468},
   Abstract = {In low-resource settings where vital registration of death
             is not routine it is often of critical interest to determine
             and study the cause of death (COD) for individuals and the
             cause-specific mortality fraction (CSMF) for populations.
             Post-mortem autopsies, considered the gold standard for COD
             assignment, are often difficult or impossible to implement
             due to deaths occurring outside the hospital, expense,
             and/or cultural norms. For this reason, Verbal Autopsies
             (VAs) are commonly conducted, consisting of a questionnaire
             administered to next of kin recording demographic
             information, known medical conditions, symptoms, and other
             factors for the decedent. This article proposes a novel
             class of hierarchical factor regression models that avoid
             restrictive assumptions of standard methods, allow both the
             mean and covariance to vary with COD category, and can
             include covariate information on the decedent, region, or
             events surrounding death. Taking a Bayesian approach to
             inference, this work develops an MCMC algorithm and
             validates the FActor Regression for Verbal Autopsy (FARVA)
             model in simulation experiments. An application of FARVA to
             real VA data shows improved goodness-of-fit and better
             predictive performance in inferring COD and CSMF over
             competing methods. Code and a user manual are made available
             at https://github.com/kelrenmor/farva.},
   Doi = {10.1111/rssc.12468},
   Key = {fds355488}
}

@article{fds258049,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Bayesian hierarchical functional data analysis via
             contaminated informative priors.},
   Journal = {Biometrics},
   Volume = {65},
   Number = {3},
   Pages = {772-780},
   Year = {2009},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2008.01163.x},
   Abstract = {A variety of flexible approaches have been proposed for
             functional data analysis, allowing both the mean curve and
             the distribution about the mean to be unknown. Such methods
             are most useful when there is limited prior information.
             Motivated by applications to modeling of temperature curves
             in the menstrual cycle, this article proposes a flexible
             approach for incorporating prior information in
             semiparametric Bayesian analyses of hierarchical functional
             data. The proposed approach is based on specifying the
             distribution of functions as a mixture of a parametric
             hierarchical model and a nonparametric contamination. The
             parametric component is chosen based on prior knowledge,
             while the contamination is characterized as a functional
             Dirichlet process. In the motivating application, the
             contamination component allows unanticipated curve shapes in
             unhealthy menstrual cycles. Methods are developed for
             posterior computation, and the approach is applied to data
             from a European fecundability study.},
   Doi = {10.1111/j.1541-0420.2008.01163.x},
   Key = {fds258049}
}

@article{fds258047,
   Author = {Rodriguez, A and Dunson, DB and Taylor, J},
   Title = {Bayesian hierarchically weighted finite mixture models for
             samples of distributions.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {10},
   Number = {1},
   Pages = {155-171},
   Year = {2009},
   Month = {January},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxn024},
   Abstract = {Finite mixtures of Gaussian distributions are known to
             provide an accurate approximation to any unknown density.
             Motivated by DNA repair studies in which data are collected
             for samples of cells from different individuals, we propose
             a class of hierarchically weighted finite mixture models.
             The modeling framework incorporates a collection of k
             Gaussian basis distributions, with the individual-specific
             response densities expressed as mixtures of these bases. To
             allow heterogeneity among individuals and predictor effects,
             we model the mixture weights, while treating the basis
             distributions as unknown but common to all distributions.
             This results in a flexible hierarchical model for samples of
             distributions. We consider analysis of variance-type
             structures and a parsimonious latent factor representation,
             which leads to simplified inferences on non-Gaussian
             covariance structures. Methods for posterior computation are
             developed, and the model is used to select genetic
             predictors of baseline DNA damage, susceptibility to induced
             damage, and rate of repair.},
   Doi = {10.1093/biostatistics/kxn024},
   Key = {fds258047}
}

@article{fds257892,
   Author = {Dunson, DB and Dinse, GE},
   Title = {Bayesian incidence analysis of animal tumorigenicity
             data},
   Journal = {Journal of the Royal Statistical Society. Series C: Applied
             Statistics},
   Volume = {50},
   Number = {2},
   Pages = {125-141},
   Publisher = {WILEY},
   Year = {2001},
   Month = {January},
   url = {http://dx.doi.org/10.1111/1467-9876.00224},
   Abstract = {Statistical inference about tumorigenesis should focus on
             the tumour incidence rate. Unfortunately, in most animal
             carcinogenicity experiments, tumours are not observable in
             live animals and censoring of the tumour onset times is
             informative. In this paper, we propose a Bayesian method for
             analysing data from such studies. Our approach focuses on
             the incidence of tumours and accommodates occult tumours and
             censored onset times without restricting tumour lethality,
             relying on cause-of-death data, or requiring interim
             sacrifices. We represent the underlying state of nature by a
             multistate stochastic process and assume general probit
             models for the time-specific transition rates. These models
             allow the incorporation of covariates, historical control
             data and subjective prior information. The inherent
             flexibility of this approach facilitates the interpretation
             of results, particularly when the sample size is small or
             the data are sparse. We use a Gibbs sampler to estimate the
             relevant posterior distributions. The methods proposed are
             applied to data from a US National Toxicology Program
             carcinogenicity study.},
   Doi = {10.1111/1467-9876.00224},
   Key = {fds257892}
}

@article{fds340385,
   Author = {Durante, D and Dunson, DB},
   Title = {Bayesian inference and testing of group differences in brain
             networks},
   Journal = {Bayesian Analysis},
   Volume = {13},
   Number = {1},
   Pages = {29-58},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1214/16-BA1030},
   Abstract = {Network data are increasingly collected along with other
             variables of interest. Our motivation is drawn from
             neurophysiology studies measuring brain connectivity
             networks for a sample of individuals along with their
             membership to a low or high creative reasoning group. It is
             of paramount importance to develop statistical methods for
             testing of global and local changes in the structural
             interconnections among brain regions across groups. We
             develop a general Bayesian procedure for inference and
             testing of group differences in the network structure, which
             relies on a nonparametric representation for the conditional
             probability mass function associated with a network-valued
             random variable. By leveraging a mixture of low-rank
             factorizations, we allow simple global and local hypothesis
             testing adjusting for multiplicity. An efficient Gibbs
             sampler is defined for posterior computation. We provide
             theoretical results on the flexibility of the model and
             assess testing performance in simulations. The approach is
             applied to provide novel insights on the relationships
             between human brain networks and creativity.},
   Doi = {10.1214/16-BA1030},
   Key = {fds340385}
}

@article{fds257968,
   Author = {Xing, C and Dunson, DB},
   Title = {Bayesian inference for genomic data integration reduces
             misclassification rate in predicting protein-protein
             interactions.},
   Journal = {PLoS computational biology},
   Volume = {7},
   Number = {7},
   Pages = {e1002110},
   Year = {2011},
   Month = {July},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21829334},
   Abstract = {Protein-protein interactions (PPIs) are essential to most
             fundamental cellular processes. There has been increasing
             interest in reconstructing PPIs networks. However, several
             critical difficulties exist in obtaining reliable
             predictions. Noticeably, false positive rates can be as high
             as >80%. Error correction from each generating source can be
             both time-consuming and inefficient due to the difficulty of
             covering the errors from multiple levels of data processing
             procedures within a single test. We propose a novel Bayesian
             integration method, deemed nonparametric Bayes ensemble
             learning (NBEL), to lower the misclassification rate (both
             false positives and negatives) through automatically
             up-weighting data sources that are most informative, while
             down-weighting less informative and biased sources.
             Extensive studies indicate that NBEL is significantly more
             robust than the classic naïve Bayes to unreliable,
             error-prone and contaminated data. On a large human data set
             our NBEL approach predicts many more PPIs than naïve Bayes.
             This suggests that previous studies may have large numbers
             of not only false positives but also false negatives. The
             validation on two human PPIs datasets having high quality
             supports our observations. Our experiments demonstrate that
             it is feasible to predict high-throughput PPIs
             computationally with substantially reduced false positives
             and false negatives. The ability of predicting large numbers
             of PPIs both reliably and automatically may inspire people
             to use computational approaches to correct data errors in
             general, and may speed up PPIs prediction with high quality.
             Such a reliable prediction may provide a solid platform to
             other studies such as protein functions prediction and roles
             of PPIs in disease susceptibility.},
   Doi = {10.1371/journal.pcbi.1002110},
   Key = {fds257968}
}

@article{fds343492,
   Author = {Rao, V and Adams, RP and Dunson, DD},
   Title = {Bayesian inference for Matérn repulsive
             processes},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {79},
   Number = {3},
   Pages = {877-897},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1111/rssb.12198},
   Abstract = {In many applications involving point pattern data, the
             Poisson process assumption is unrealistic, with the data
             exhibiting a more regular spread. Such repulsion between
             events is exhibited by trees for example, because of
             competition for light and nutrients. Other examples include
             the locations of biological cells and cities, and the times
             of neuronal spikes. Given the many applications of repulsive
             point processes, there is a surprisingly limited literature
             developing flexible, realistic and interpretable models, as
             well as efficient inferential methods. We address this gap
             by developing a modelling framework around the Matérn type
             III repulsive process. We consider some extensions of the
             original Matérn type III process for both the homogeneous
             and the inhomogeneous cases. We also derive the probability
             density of this generalized Matérn process, allowing us to
             characterize the conditional distribution of the various
             latent variables, and leading to a novel and efficient
             Markov chain Monte Carlo algorithm. We apply our ideas to
             data sets of spatial locations of trees, nerve fibre cells
             and Greyhound bus stations.},
   Doi = {10.1111/rssb.12198},
   Key = {fds343492}
}

@article{fds257990,
   Author = {Chen, B and Chen, M and Paisley, J and Zaas, A and Woods, C and Ginsburg,
             GS and Hero, A and Lucas, J and Dunson, D and Carin,
             L},
   Title = {Bayesian inference of the number of factors in
             gene-expression analysis: application to human virus
             challenge studies.},
   Journal = {BMC Bioinformatics},
   Volume = {11},
   Pages = {552},
   Year = {2010},
   Month = {November},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21062443},
   Abstract = {BACKGROUND: Nonparametric Bayesian techniques have been
             developed recently to extend the sophistication of factor
             models, allowing one to infer the number of appropriate
             factors from the observed data. We consider such techniques
             for sparse factor analysis, with application to
             gene-expression data from three virus challenge studies.
             Particular attention is placed on employing the Beta Process
             (BP), the Indian Buffet Process (IBP), and related
             sparseness-promoting techniques to infer a proper number of
             factors. The posterior density function on the model
             parameters is computed using Gibbs sampling and variational
             Bayesian (VB) analysis. RESULTS: Time-evolving
             gene-expression data are considered for respiratory
             syncytial virus (RSV), Rhino virus, and influenza, using
             blood samples from healthy human subjects. These data were
             acquired in three challenge studies, each executed after
             receiving institutional review board (IRB) approval from
             Duke University. Comparisons are made between several
             alternative means of per-forming nonparametric factor
             analysis on these data, with comparisons as well to
             sparse-PCA and Penalized Matrix Decomposition (PMD), closely
             related non-Bayesian approaches. CONCLUSIONS: Applying the
             Beta Process to the factor scores, or to the singular values
             of a pseudo-SVD construction, the proposed algorithms infer
             the number of factors in gene-expression data. For real data
             the "true" number of factors is unknown; in our simulations
             we consider a range of noise variances, and the proposed
             Bayesian models inferred the number of factors accurately
             relative to other methods in the literature, such as
             sparse-PCA and PMD. We have also identified a "pan-viral"
             factor of importance for each of the three viruses
             considered in this study. We have identified a set of genes
             associated with this pan-viral factor, of interest for early
             detection of such viruses based upon the host response, as
             quantified via gene-expression data.},
   Doi = {10.1186/1471-2105-11-552},
   Key = {fds257990}
}

@article{fds258053,
   Author = {Dunson, DB and Herring, A and Siega-Riz, AM},
   Title = {Bayesian Inference on Changes in Response Densities over
             Predictor Clusters.},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {484},
   Pages = {1508-1517},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214508000001039},
   Abstract = {In epidemiology, it is often of interest to assess how
             individuals with different trajectories over time in an
             environmental exposure or biomarker differ with respect to a
             continuous response. For ease in interpretation and
             presentation of results, epidemiologists typically
             categorize predictors prior to analysis. To extend this
             approach to time-varying predictors, one can cluster
             individuals by their predictor trajectory, with the cluster
             index included as a predictor in a regression model for the
             response. This article develops a semiparametric Bayes
             approach, which avoids assuming a pre-specified number of
             clusters and allows the response to vary nonparametrically
             over predictor clusters. This methodology is motivated by
             interest in relating trajectories in weight gain during
             pregnancy to the distribution of birth weight adjusted for
             gestational age at delivery. In this setting, the proposed
             approach allows the tails of the birth weight density to
             vary flexibly over weight gain clusters.},
   Doi = {10.1198/016214508000001039},
   Key = {fds258053}
}

@article{fds374277,
   Author = {Chakraborty, A and Ou, R and Dunson, DB},
   Title = {Bayesian Inference on High-Dimensional Multivariate Binary
             Responses},
   Journal = {Journal of the American Statistical Association},
   Year = {2023},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2023.2260053},
   Abstract = {It has become increasingly common to collect
             high-dimensional binary response data; for example, with the
             emergence of new sampling techniques in ecology. In smaller
             dimensions, multivariate probit (MVP) models are routinely
             used for inferences. However, algorithms for fitting such
             models face issues in scaling up to high dimensions due to
             the intractability of the likelihood, involving an integral
             over a multivariate normal distribution having no analytic
             form. Although a variety of algorithms have been proposed to
             approximate this intractable integral, these approaches are
             difficult to implement and/or inaccurate in high dimensions.
             Our main focus is in accommodating high-dimensional binary
             response data with a small-to-moderate number of covariates.
             We propose a two-stage approach for inference on model
             parameters while taking care of uncertainty propagation
             between the stages. We use the special structure of latent
             Gaussian models to reduce the highly expensive computation
             involved in joint parameter estimation to focus inference on
             marginal distributions of model parameters. This essentially
             makes the method embarrassingly parallel for both stages. We
             illustrate performance in simulations and applications to
             joint species distribution modeling in ecology.
             Supplementary materials for this article are available
             online.},
   Doi = {10.1080/01621459.2023.2260053},
   Key = {fds374277}
}

@article{fds257918,
   Author = {Dunson, DB and Neelon, B},
   Title = {Bayesian inference on order-constrained parameters in
             generalized linear models.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {2},
   Pages = {286-295},
   Year = {2003},
   Month = {June},
   url = {http://dx.doi.org/10.1111/1541-0420.00035},
   Abstract = {In biomedical studies, there is often interest in assessing
             the association between one or more ordered categorical
             predictors and an outcome variable, adjusting for
             covariates. For a k-level predictor, one typically uses
             either a k-1 degree of freedom (df) test or a single df
             trend test, which requires scores for the different levels
             of the predictor. In the absence of knowledge of a
             parametric form for the response function, one can
             incorporate monotonicity constraints to improve the
             efficiency of tests of association. This article proposes a
             general Bayesian approach for inference on order-constrained
             parameters in generalized linear models. Instead of choosing
             a prior distribution with support on the constrained space,
             which can result in major computational difficulties, we
             propose to map draws from an unconstrained posterior density
             using an isotonic regression transformation. This approach
             allows flat regions over which increases in the level of a
             predictor have no effect. Bayes factors for assessing
             ordered trends can be computed based on the output from a
             Gibbs sampling algorithm. Results from a simulation study
             are presented and the approach is applied to data from a
             time-to-pregnancy study.},
   Doi = {10.1111/1541-0420.00035},
   Key = {fds257918}
}

@article{fds327030,
   Author = {Datta, J and Dunson, DB},
   Title = {Bayesian inference on quasi-sparse count
             data.},
   Journal = {Biometrika},
   Volume = {103},
   Number = {4},
   Pages = {971-983},
   Year = {2016},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asw053},
   Abstract = {There is growing interest in analysing high-dimensional
             count data, which often exhibit quasi-sparsity corresponding
             to an overabundance of zeros and small nonzero counts.
             Existing methods for analysing multivariate count data via
             Poisson or negative binomial log-linear hierarchical models
             with zero-inflation cannot flexibly adapt to quasi-sparse
             settings. We develop a new class of continuous local-global
             shrinkage priors tailored to quasi-sparse counts.
             Theoretical properties are assessed, including flexible
             posterior concentration and stronger control of false
             discoveries in multiple testing. Simulation studies
             demonstrate excellent small-sample properties relative to
             competing methods. We use the method to detect rare
             mutational hotspots in exome sequencing data and to identify
             North American cities most impacted by terrorism.},
   Doi = {10.1093/biomet/asw053},
   Key = {fds327030}
}

@article{fds257920,
   Author = {Dunson, DB and Herring, AH},
   Title = {Bayesian inferences in the Cox model for order-restricted
             hypotheses.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {4},
   Pages = {916-923},
   Year = {2003},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2003.00106.x},
   Abstract = {In studying the relationship between an ordered categorical
             predictor and an event time, it is standard practice to
             include dichotomous indicators of the different levels of
             the predictor in a Cox model. One can then use a multiple
             degree-of-freedom score or partial likelihood ratio test for
             hypothesis testing. Often, interest focuses on comparing the
             null hypothesis of no difference to an order-restricted
             alternative, such as a monotone increase across levels of a
             predictor. This article proposes a Bayesian approach for
             addressing hypotheses of this type. We reparameterize the
             Cox model in terms of a cumulative product of parameters
             having conjugate prior densities, consisting of mixtures of
             point masses at one, and truncated gamma densities. Due to
             the structure of the model, posterior computation can
             proceed via a simple and efficient Gibbs sampling algorithm.
             Posterior probabilities for the global null hypothesis and
             subhypotheses, comparing the hazards for specific groups,
             can be calculated directly from the output of a single Gibbs
             chain. The approach allows for level sets across which a
             predictor has no effect. Generalizations to multiple
             predictors are described, and the method is applied to a
             study of emergency medical treatment for
             stroke.},
   Doi = {10.1111/j.0006-341x.2003.00106.x},
   Key = {fds257920}
}

@article{fds257936,
   Author = {Dunson, DB and Stanford, JB},
   Title = {Bayesian inferences on predictors of conception
             probabilities.},
   Journal = {Biometrics},
   Volume = {61},
   Number = {1},
   Pages = {126-133},
   Year = {2005},
   Month = {March},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2005.031231.x},
   Abstract = {Reproductive scientists and couples attempting pregnancy are
             interested in identifying predictors of the day-specific
             probabilities of conception in relation to the timing of a
             single intercourse act. Because most menstrual cycles have
             multiple days of intercourse, the occurrence of conception
             represents the aggregation across Bernoulli trials for each
             intercourse day. Because of this data structure and
             dependency among the multiple cycles from a woman,
             implementing analyses has proven challenging. This article
             proposes a Bayesian approach based on a generalization of
             the Barrett and Marshall model to incorporate a
             woman-specific frailty and day-specific covariates. The
             model results in a simple closed form expression for the
             marginal probability of conception, and has an auxiliary
             variables formulation that facilitates efficient posterior
             computation. Although motivated by fecundability studies,
             the approach can be used for efficient variable selection
             and model averaging in general applications with categorical
             or discrete event time data.},
   Doi = {10.1111/j.0006-341x.2005.031231.x},
   Key = {fds257936}
}

@article{fds257942,
   Author = {Hans, C and Dunson, DB},
   Title = {Bayesian inferences on umbrella orderings.},
   Journal = {Biometrics},
   Volume = {61},
   Number = {4},
   Pages = {1018-1026},
   Year = {2005},
   Month = {December},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/16401275},
   Abstract = {In regression applications with categorical predictors,
             interest often focuses on comparing the null hypothesis of
             homogeneity to an ordered alternative. This article proposes
             a Bayesian approach for addressing this problem in the
             setting of normal linear and probit regression models. The
             regression coefficients are assigned a conditionally
             conjugate prior density consisting of mixtures of point
             masses at 0 and truncated normal densities, with a (possibly
             unknown) changepoint parameter included to accommodate
             umbrella ordering. Two strategies of prior elicitation are
             considered: (1) a Bayesian Bonferroni approach in which the
             probability of the global null hypothesis is specified and
             local hypotheses are considered independent; and (2) an
             approach which treats these probabilities as random. A
             single Gibbs sampling chain can be used to obtain posterior
             probabilities for the different hypotheses and to estimate
             regression coefficients and predictive quantities either by
             model averaging or under the preferred hypothesis. The
             methods are applied to data from a carcinogenesis
             study.},
   Doi = {10.1111/j.1541-0420.2005.00373.x},
   Key = {fds257942}
}

@article{fds372788,
   Author = {Barrientos, AF and Sen, D and Page, GL and Dunson,
             DB},
   Title = {Bayesian Inferences on Uncertain Ranks and Orderings:
             Application to Ranking Players and Lineups},
   Journal = {Bayesian Analysis},
   Volume = {18},
   Number = {3},
   Pages = {777-806},
   Year = {2023},
   Month = {January},
   url = {http://dx.doi.org/10.1214/22-BA1324},
   Abstract = {It is common to be interested in rankings or order
             relationships among entities. In complex settings where one
             does not directly measure a univariate statistic upon which
             to base ranks, such inferences typically rely on statistical
             models having entity-specific parameters. These can be
             treated as random effects in hierarchical models
             characterizing variation among the entities. In this paper,
             we are particularly interested in the problem of ranking
             basketball players in terms of their contribution to team
             performance. Using data from the National Basketball
             Association (NBA) in the United States, we find that many
             players have similar latent ability levels, making any
             single estimated ranking highly misleading. The current
             literature fails to provide summaries of order relationships
             that adequately account for uncertainty. Motivated by this,
             we propose a Bayesian strategy for characterizing
             uncertainty in inferences on order relationships among
             players and lineups. Our approach adapts to scenarios in
             which uncertainty in ordering is high by producing more
             conservative results that improve interpretability. This is
             achieved through a reward function within a decision
             theoretic framework. We apply our approach to data from the
             2009–2010 NBA season.},
   Doi = {10.1214/22-BA1324},
   Key = {fds372788}
}

@article{fds257972,
   Author = {Wang, L and Dunson, DB},
   Title = {Bayesian isotonic density regression.},
   Journal = {Biometrika},
   Volume = {98},
   Number = {3},
   Pages = {537-551},
   Year = {2011},
   Month = {September},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asr025},
   Abstract = {Density regression models allow the conditional distribution
             of the response given predictors to change flexibly over the
             predictor space. Such models are much more flexible than
             nonparametric mean regression models with nonparametric
             residual distributions, and are well supported in many
             applications. A rich variety of Bayesian methods have been
             proposed for density regression, but it is not clear whether
             such priors have full support so that any true
             data-generating model can be accurately approximated. This
             article develops a new class of density regression models
             that incorporate stochastic-ordering constraints which are
             natural when a response tends to increase or decrease
             monotonely with a predictor. Theory is developed showing
             large support. Methods are developed for hypothesis testing,
             with posterior computation relying on a simple Gibbs
             sampler. Frequentist properties are illustrated in a
             simulation study, and an epidemiology application is
             considered.},
   Doi = {10.1093/biomet/asr025},
   Key = {fds257972}
}

@article{fds257925,
   Author = {Neelon, B and Dunson, DB},
   Title = {Bayesian isotonic regression and trend analysis.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {2},
   Pages = {398-406},
   Year = {2004},
   Month = {June},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00184.x},
   Abstract = {In many applications, the mean of a response variable can be
             assumed to be a nondecreasing function of a continuous
             predictor, controlling for covariates. In such cases,
             interest often focuses on estimating the regression
             function, while also assessing evidence of an association.
             This article proposes a new framework for Bayesian isotonic
             regression and order-restricted inference. Approximating the
             regression function with a high-dimensional piecewise linear
             model, the nondecreasing constraint is incorporated through
             a prior distribution for the slopes consisting of a product
             mixture of point masses (accounting for flat regions) and
             truncated normal densities. To borrow information across the
             intervals and smooth the curve, the prior is formulated as a
             latent autoregressive normal process. This structure
             facilitates efficient posterior computation, since the full
             conditional distributions of the parameters have simple
             conjugate forms. Point and interval estimates of the
             regression function and posterior probabilities of an
             association for different regions of the predictor can be
             estimated from a single MCMC run. Generalizations to
             categorical outcomes and multiple predictors are described,
             and the approach is applied to an epidemiology
             application.},
   Doi = {10.1111/j.0006-341x.2004.00184.x},
   Key = {fds257925}
}

@article{fds360021,
   Author = {Moran, KR and Dunson, D and Wheeler, MW and Herring,
             AH},
   Title = {BAYESIAN JOINT MODELING OF CHEMICAL STRUCTURE AND DOSE
             RESPONSE CURVES.},
   Journal = {The annals of applied statistics},
   Volume = {15},
   Number = {3},
   Pages = {1405-1430},
   Year = {2021},
   Month = {September},
   url = {http://dx.doi.org/10.1214/21-aoas1461},
   Abstract = {Today there are approximately 85,000 chemicals regulated
             under the Toxic Substances Control Act, with around 2,000
             new chemicals introduced each year. It is impossible to
             screen all of these chemicals for potential toxic effects,
             either via full organism <i>in vivo</i> studies or <i>in
             vitro</i> high-throughput screening (HTS) programs.
             Toxicologists face the challenge of choosing which chemicals
             to screen, and predicting the toxicity of as yet unscreened
             chemicals. Our goal is to describe how variation in chemical
             structure relates to variation in toxicological response to
             enable <i>in silico</i> toxicity characterization designed
             to meet both of these challenges. With our Bayesian
             partially Supervised Sparse and Smooth Factor Analysis
             (BS<sup>3</sup>FA) model, we learn a distance between
             chemicals targeted to toxicity, rather than one based on
             molecular structure alone. Our model also enables the
             prediction of chemical dose-response profiles based on
             chemical structure (i.e., without <i>in vivo</i> or <i>in
             vitro</i> testing) by taking advantage of a large database
             of chemicals that have already been tested for toxicity in
             HTS programs. We show superior simulation performance in
             distance learning and modest to large gains in predictive
             ability compared to existing methods. Results from the
             high-throughput screening data application elucidate the
             relationship between chemical structure and a
             toxicity-relevant high-throughput assay. An R package for
             BS<sup>3</sup>FA is available online at https://github.com/kelrenmor/bs3fa.},
   Doi = {10.1214/21-aoas1461},
   Key = {fds360021}
}

@article{fds257971,
   Author = {Canale, A and Dunson, DB},
   Title = {Bayesian Kernel Mixtures for Counts.},
   Journal = {Journal of the American Statistical Association},
   Volume = {106},
   Number = {496},
   Pages = {1528-1539},
   Year = {2011},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2011.tm10552},
   Abstract = {Although Bayesian nonparametric mixture models for
             continuous data are well developed, there is a limited
             literature on related approaches for count data. A common
             strategy is to use a mixture of Poissons, which
             unfortunately is quite restrictive in not accounting for
             distributions having variance less than the mean. Other
             approaches include mixing multinomials, which requires
             finite support, and using a Dirichlet process prior with a
             Poisson base measure, which does not allow smooth deviations
             from the Poisson. As a broad class of alternative models, we
             propose to use nonparametric mixtures of rounded continuous
             kernels. An efficient Gibbs sampler is developed for
             posterior computation, and a simulation study is performed
             to assess performance. Focusing on the rounded Gaussian
             case, we generalize the modeling framework to account for
             multivariate count data, joint modeling with continuous and
             categorical variables, and other complications. The methods
             are illustrated through applications to a developmental
             toxicity study and marketing data. This article has
             supplementary material online.},
   Doi = {10.1198/jasa.2011.tm10552},
   Key = {fds257971}
}

@article{fds257976,
   Author = {Montagna, S and Tokdar, ST and Neelon, B and Dunson,
             DB},
   Title = {Bayesian latent factor regression for functional and
             longitudinal data.},
   Journal = {Biometrics},
   Volume = {68},
   Number = {4},
   Pages = {1064-1073},
   Year = {2012},
   Month = {December},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23005895},
   Abstract = {In studies involving functional data, it is commonly of
             interest to model the impact of predictors on the
             distribution of the curves, allowing flexible effects on not
             only the mean curve but also the distribution about the
             mean. Characterizing the curve for each subject as a linear
             combination of a high-dimensional set of potential basis
             functions, we place a sparse latent factor regression model
             on the basis coefficients. We induce basis selection by
             choosing a shrinkage prior that allows many of the loadings
             to be close to zero. The number of latent factors is treated
             as unknown through a highly-efficient, adaptive-blocked
             Gibbs sampler. Predictors are included on the latent
             variables level, while allowing different predictors to
             impact different latent factors. This model induces a
             framework for functional response regression in which the
             distribution of the curves is allowed to change flexibly
             with predictors. The performance is assessed through
             simulation studies and the methods are applied to data on
             blood pressure trajectories during pregnancy.},
   Doi = {10.1111/j.1541-0420.2012.01788.x},
   Key = {fds257976}
}

@article{fds258014,
   Author = {Dunson, DB},
   Title = {Bayesian latent variable models for clustered mixed
             outcomes},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {62},
   Number = {2},
   Pages = {355-366},
   Publisher = {WILEY},
   Year = {2000},
   Month = {January},
   url = {http://dx.doi.org/10.1111/1467-9868.00236},
   Abstract = {A general framework is proposed for modelling clustered
             mixed outcomes. A mixture of generalized linear models is
             used to describe the joint distribution of a set of
             underlying variables, and an arbitrary function relates the
             underlying variables to the observed outcomes. The model
             accommodates multilevel data structures, general covariate
             effects and distinct link functions and error distributions
             for each underlying variable. Within the framework proposed,
             novel models are developed for clustered multiple binary,
             unordered categorical and joint discrete and continuous
             outcomes. A Markov chain Monte Carlo sampling algorithm is
             described for estimating the posterior distributions of the
             parameters and latent variables. Because of the flexibility
             of the modelling framework and estimation procedure,
             extensions to ordered categorical outcomes and more complex
             data structures are straightforward. The methods are
             illustrated by using data from a reproductive toxicity
             study.},
   Doi = {10.1111/1467-9868.00236},
   Key = {fds258014}
}

@article{fds257917,
   Author = {Dunson, DB and Watson, M and Taylor, JA},
   Title = {Bayesian latent variable models for median regression on
             multiple outcomes.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {2},
   Pages = {296-304},
   Year = {2003},
   Month = {June},
   url = {http://dx.doi.org/10.1111/1541-0420.00036},
   Abstract = {Often a response of interest cannot be measured directly and
             it is necessary to rely on multiple surrogates, which can be
             assumed to be conditionally independent given the latent
             response and observed covariates. Latent response models
             typically assume that residual densities are Gaussian. This
             article proposes a Bayesian median regression modeling
             approach, which avoids parametric assumptions about residual
             densities by relying on an approximation based on quantiles.
             To accommodate within-subject dependency, the quantile
             response categories of the surrogate outcomes are related to
             underlying normal variables, which depend on a latent normal
             response. This underlying Gaussian covariance structure
             simplifies interpretation and model fitting, without
             restricting the marginal densities of the surrogate
             outcomes. A Markov chain Monte Carlo algorithm is proposed
             for posterior computation, and the methods are applied to
             single-cell electrophoresis (comet assay) data from a
             genetic toxicology study.},
   Doi = {10.1111/1541-0420.00036},
   Key = {fds257917}
}

@article{fds257934,
   Author = {Dunson, DB and Herring, AH},
   Title = {Bayesian latent variable models for mixed discrete
             outcomes.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {6},
   Number = {1},
   Pages = {11-25},
   Year = {2005},
   Month = {January},
   url = {http://dx.doi.org/10.1093/biostatistics/kxh025},
   Abstract = {In studies of complex health conditions, mixtures of
             discrete outcomes (event time, count, binary, ordered
             categorical) are commonly collected. For example, studies of
             skin tumorigenesis record latency time prior to the first
             tumor, increases in the number of tumors at each week, and
             the occurrence of internal tumors at the time of death.
             Motivated by this application, we propose a general
             underlying Poisson variable framework for mixed discrete
             outcomes, accommodating dependency through an additive gamma
             frailty model for the Poisson means. The model has
             log-linear, complementary log-log, and proportional hazards
             forms for count, binary and discrete event time outcomes,
             respectively. Simple closed form expressions can be derived
             for the marginal expectations, variances, and correlations.
             Following a Bayesian approach to inference,
             conditionally-conjugate prior distributions are chosen that
             facilitate posterior computation via an MCMC algorithm. The
             methods are illustrated using data from a Tg.AC mouse
             bioassay study.},
   Doi = {10.1093/biostatistics/kxh025},
   Key = {fds257934}
}

@article{fds322561,
   Author = {Banerjee, A and Murray, J and Dunson, DB},
   Title = {Bayesian learning of joint distributions of
             objects},
   Journal = {Journal of Machine Learning Research},
   Volume = {31},
   Pages = {1-9},
   Year = {2013},
   Month = {January},
   Abstract = {There is increasing interest in broad application areas in
             defining flexible joint models for data having a variety of
             measurement scales, while also allowing data of complex
             types, such as functions, images and documents. We consider
             a general framework for nonparametric Bayes joint modeling
             through mixture models that incorporate dependence across
             data types through a joint mixing measure. The mixing
             measure is assigned a novel infinite tensor factorization
             (ITF) prior that allows flexible dependence in cluster
             allocation across data types. The ITF prior is formulated as
             a tensor product of stick-breaking processes. Focusing on a
             convenient special case corresponding to a Parafac
             factorization, we provide basic theory justifying the
             flexibility of the proposed prior and resulting asymptotic
             properties. Focusing on ITF mixtures of product kernels, we
             develop a new Gibbs sampling algorithm for routine
             implementation relying on slice sampling. The methods are
             compared with alternative joint mixture models based on
             Dirichlet processes and related approaches through
             simulations and real data applications.},
   Key = {fds322561}
}

@article{fds257967,
   Author = {Page, GL and Dunson, DB},
   Title = {Bayesian Local Contamination Models for Multivariate
             Outliers.},
   Journal = {Technometrics : a journal of statistics for the physical,
             chemical, and engineering sciences},
   Volume = {53},
   Number = {2},
   Pages = {152-162},
   Year = {2011},
   Month = {May},
   ISSN = {0040-1706},
   url = {http://dx.doi.org/10.1198/tech.2011.10041},
   Abstract = {In studies where data are generated from multiple locations
             or sources it is common for there to exist observations that
             are quite unlike the majority. Motivated by the application
             of establishing a reference value in an inter-laboratory
             setting when outlying labs are present, we propose a local
             contamination model that is able to accommodate unusual
             multivariate realizations in a flexible way. The proposed
             method models the process level of a hierarchical model
             using a mixture with a parametric component and a possibly
             nonparametric contamination. Much of the flexibility in the
             methodology is achieved by allowing varying random subsets
             of the elements in the lab-specific mean vectors to be
             allocated to the contamination component. Computational
             methods are developed and the methodology is compared to
             three other possible approaches using a simulation study. We
             apply the proposed method to a NIST/NOAA sponsored
             inter-laboratory study which motivated the methodological
             development.},
   Doi = {10.1198/tech.2011.10041},
   Key = {fds257967}
}

@article{fds332363,
   Author = {Wheeler, MW and Dunson, DB and Herring, AH},
   Title = {Bayesian local extremum splines},
   Journal = {Biometrika},
   Volume = {104},
   Number = {4},
   Pages = {939-952},
   Publisher = {Oxford University Press (OUP)},
   Year = {2017},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asx039},
   Abstract = {We consider shape-restricted nonparametric regression on a
             closed set $$\mathcal{X} \subset \mathbb{R},$$ where it is
             reasonable to assume that the function has no more than
             $$H$$ local extrema interior to $$\mathcal{X}$$. Following a
             Bayesian approach we develop a nonparametric prior over a
             novel class of local extremum splines. This approach is
             shown to be consistent when modelling any continuously
             differentiable function within the class considered, and we
             use itto develop methods for testing hypotheses on the shape
             of the curve. Sampling algorithms are developed, and the
             method is applied in simulation studies and data examples
             where the shape of the curve is of interest.},
   Doi = {10.1093/biomet/asx039},
   Key = {fds332363}
}

@article{fds322558,
   Author = {Durante, D and Dunson, DB},
   Title = {Bayesian logistic Gaussian process models for dynamic
             networks},
   Journal = {Journal of Machine Learning Research},
   Volume = {33},
   Pages = {194-201},
   Year = {2014},
   Month = {January},
   Abstract = {Time-varying adjacency matrices encoding the presence or
             absence of a relation among entities are available in many
             research fields. Motivated by an application to studying
             dynamic networks among sports teams, we propose a Bayesian
             nonparametric model. The proposed approach uses a logistic
             mapping from the probability matrix, encoding link
             probabilities between each team, to an embedded latent
             relational space. Within this latent space, we incorporate a
             dictionary of Gaussian process (GP) latent trajectories
             characterizing changes over time in each team, while
             allowing learning of the number of latent dimensions through
             a specially tailored prior for the GP covariance. The model
             is provably flexible and borrows strength across the network
             and over time. We provide simulation experiments and an
             application to the Italian soccer Championship.},
   Key = {fds322558}
}

@article{fds322540,
   Author = {Yang, Y and Dunson, DB},
   Title = {Bayesian manifold regression},
   Journal = {Annals of Statistics},
   Volume = {44},
   Number = {2},
   Pages = {876-905},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2016},
   Month = {April},
   url = {http://dx.doi.org/10.1214/15-AOS1390},
   Abstract = {There is increasing interest in the problem of nonparametric
             regression with high-dimensional predictors. When the number
             of predictors D is large, one encounters a daunting problem
             in attempting to estimate aD-dimensional surface based on
             limited data. Fortunately, in many applications, the support
             of the data is concentrated on a d-dimensional subspace with
             d ≤ D. Manifold learning attempts to estimate this
             subspace. Our focus is on developing computationally
             tractable and theoretically supported Bayesian nonparametric
             regression methods in this context. When the subspace
             corresponds to a locally-Euclidean compact Riemannian
             manifold, we show that a Gaussian process regression
             approach can be applied that leads to the minimax optimal
             adaptive rate in estimating the regression function under
             some conditions. The proposed model bypasses the need to
             estimate the manifold, and can be implemented using standard
             algorithms for posterior computation in Gaussian processes.
             Finite sample performance is illustrated in a data analysis
             example.},
   Doi = {10.1214/15-AOS1390},
   Key = {fds322540}
}

@article{fds375282,
   Author = {Jin, B and Dunson, DB and Rager, JE and Reif, DM and Engel, SM and Herring,
             AH},
   Title = {Bayesian matrix completion for hypothesis
             testing.},
   Journal = {Journal of the Royal Statistical Society. Series C, Applied
             statistics},
   Volume = {72},
   Number = {2},
   Pages = {254-270},
   Year = {2023},
   Month = {May},
   url = {http://dx.doi.org/10.1093/jrsssc/qlac005},
   Abstract = {We aim to infer bioactivity of each chemical by assay
             endpoint combination, addressing sparsity of toxicology
             data. We propose a Bayesian hierarchical framework which
             borrows information across different chemicals and assay
             endpoints, facilitates out-of-sample prediction of activity
             for chemicals not yet assayed, quantifies uncertainty of
             predicted activity, and adjusts for multiplicity in
             hypothesis testing. Furthermore, this paper makes a novel
             attempt in toxicology to simultaneously model
             heteroscedastic errors and a nonparametric mean function,
             leading to a broader definition of activity whose need has
             been suggested by toxicologists. Real application identifies
             chemicals most likely active for neurodevelopmental
             disorders and obesity.},
   Doi = {10.1093/jrsssc/qlac005},
   Key = {fds375282}
}

@article{fds257945,
   Author = {MacLehose, RF and Dunson, DB and Herring, AH and Hoppin,
             JA},
   Title = {Bayesian methods for highly correlated exposure
             data.},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {18},
   Number = {2},
   Pages = {199-207},
   Year = {2007},
   Month = {March},
   ISSN = {1044-3983},
   url = {http://dx.doi.org/10.1097/01.ede.0000256320.30737.c0},
   Abstract = {Studies that include individuals with multiple highly
             correlated exposures are common in epidemiology. Because
             standard maximum likelihood techniques often fail to
             converge in such instances, hierarchical regression methods
             have seen increasing use. Bayesian hierarchical regression
             places prior distributions on exposure-specific regression
             coefficients to stabilize estimation and incorporate prior
             knowledge, if available. A common parametric approach in
             epidemiology is to treat the prior mean and variance as
             fixed constants. An alternative parametric approach is to
             place distributions on the prior mean and variance to allow
             the data to help inform their values. As a more flexible
             semiparametric option, one can place an unknown distribution
             on the coefficients that simultaneously clusters exposures
             into groups using a Dirichlet process prior. We also present
             a semiparametric model with a variable-selection prior to
             allow clustering of coefficients at 0. We compare these 4
             hierarchical regression methods and demonstrate their
             application in an example estimating the association of
             herbicides with retinal degeneration among wives of
             pesticide applicators.},
   Doi = {10.1097/01.ede.0000256320.30737.c0},
   Key = {fds257945}
}

@article{fds257948,
   Author = {Dunson, DB},
   Title = {Bayesian methods for latent trait modelling of longitudinal
             data.},
   Journal = {Statistical methods in medical research},
   Volume = {16},
   Number = {5},
   Pages = {399-415},
   Year = {2007},
   Month = {October},
   ISSN = {0962-2802},
   url = {http://dx.doi.org/10.1177/0962280206075309},
   Abstract = {Latent trait models have long been used in the social
             science literature for studying variables that can only be
             measured indirectly through multiple items. However, such
             models are also very useful in accounting for correlation in
             multivariate and longitudinal data, particularly when
             outcomes have mixed measurement scales. Bayesian methods
             implemented with Markov chain Monte Carlo provide a flexible
             framework for routine fitting of a broad class of latent
             variable (LV) models, including very general structural
             equation models. However, in considering LV models, a number
             of challenging issues arise, including identifiability,
             confounding between the mean and variance, uncertainty in
             different aspects of the model, and difficulty in
             computation. Motivated by the problem of modelling
             multidimensional longitudinal data, this article reviews the
             recent literature, provides some recommendations and
             highlights areas in need of additional research, focusing on
             methods for model uncertainty.},
   Doi = {10.1177/0962280206075309},
   Key = {fds257948}
}

@article{fds257946,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Bayesian methods for searching for optimal rules for timing
             intercourse to achieve pregnancy.},
   Journal = {Statistics in medicine},
   Volume = {26},
   Number = {9},
   Pages = {1920-1936},
   Year = {2007},
   Month = {April},
   ISSN = {0277-6715},
   url = {http://dx.doi.org/10.1002/sim.2846},
   Abstract = {With societal trends towards increasing age at starting a
             pregnancy attempt, many women are concerned about achieving
             conception before the onset of infertility, which precedes
             menopause. Couples failing to conceive a pregnancy within 12
             months are classified as clinically infertile, and may be
             recommended for assisted reproductive therapy (ART). Because
             many ART procedures are expensive and may convey an
             increased risk of adverse outcomes for the offspring, it is
             advantageous to decrease time to pregnancy by natural
             methods. One possibility is to intentionally time
             intercourse during the days of the menstrual cycle having
             the highest conception probabilities. This article proposes
             a Bayesian decision theoretic approach for searching for
             optimal rules for timing intercourse based on cycle day,
             secretions and other information. Good rules result in high
             conception probabilities while requiring minimal targeted
             intercourse. A biologically based statistical model is used
             to relate cycle day and biomarkers to the conception
             probability. A stochastic search procedure is then developed
             to search for rules with high expected utility, and the
             methods are applied to data from a recent Italian
             study.},
   Doi = {10.1002/sim.2846},
   Key = {fds257946}
}

@article{fds257937,
   Author = {Dunson, DB and Herring, AH},
   Title = {Bayesian model selection and averaging in additive and
             proportional hazards models.},
   Journal = {Lifetime data analysis},
   Volume = {11},
   Number = {2},
   Pages = {213-232},
   Year = {2005},
   Month = {June},
   url = {http://dx.doi.org/10.1007/s10985-004-0384-x},
   Abstract = {Although Cox proportional hazards regression is the default
             analysis for time to event data, there is typically
             uncertainty about whether the effects of a predictor are
             more appropriately characterized by a multiplicative or
             additive model. To accommodate this uncertainty, we place a
             model selection prior on the coefficients in an
             additive-multiplicative hazards model. This prior assigns
             positive probability, not only to the model that has both
             additive and multiplicative effects for each predictor, but
             also to sub-models corresponding to no association, to only
             additive effects, and to only proportional effects. The
             additive component of the model is constrained to ensure
             non-negative hazards, a condition often violated by current
             methods. After augmenting the data with Poisson latent
             variables, the prior is conditionally conjugate, and
             posterior computation can proceed via an efficient Gibbs
             sampling algorithm. Simulation study results are presented,
             and the methodology is illustrated using data from the
             Framingham heart study.},
   Doi = {10.1007/s10985-004-0384-x},
   Key = {fds257937}
}

@article{fds257908,
   Author = {Dunson, B and Baird, DD},
   Title = {Bayesian modeling of incidence and progression of disease
             from cross-sectional data.},
   Journal = {Biometrics},
   Volume = {58},
   Number = {4},
   Pages = {813-822},
   Year = {2002},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2002.00813.x},
   Abstract = {In the absence of longitudinal data, the current presence
             and severity of disease can be measured for a sample of
             individuals to investigate factors related to disease
             incidence and progression. In this article, Bayesian
             discrete-time stochastic models are developed for inference
             from cross-sectional data consisting of the age at first
             diagnosis, the current presence of disease, and one or more
             surrogates of disease severity. Semiparametric models are
             used for the age-specific hazards of onset and diagnosis,
             and a normal underlying variable approach is proposed for
             modeling of changes with latency time in disease severity.
             The model accommodates multiple surrogates of disease
             severity having different measurement scales and
             heterogeneity among individuals in disease progression. A
             Markov chain Monte Carlo algorithm is described for
             posterior computation, and the methods are applied to data
             from a study of uterine leiomyoma.},
   Doi = {10.1111/j.0006-341x.2002.00813.x},
   Key = {fds257908}
}

@article{fds257910,
   Author = {Dunson, DB and Colombo, B},
   Title = {Bayesian modeling of markers of day-specific
             fertility},
   Journal = {Journal of the American Statistical Association},
   Volume = {98},
   Number = {461},
   Pages = {28-37},
   Publisher = {Informa UK Limited},
   Year = {2003},
   Month = {March},
   url = {http://dx.doi.org/10.1198/016214503388619067},
   Abstract = {Cervical mucus hydration increases during the fertile
             interval before ovulation. Because sperm can only penetrate
             mucus having a high water content, cervical secretions
             provide a reliable marker of the fertile days of the
             menstrual cycle. This article develops a Bayesian approach
             for modeling of daily observations of cervical mucus and
             applies the approach to assess heterogeneity among women and
             cycles from a given woman with respect to the increase in
             mucus hydration during the fertile interval. The proposed
             model relates the mucus observations to an underlying normal
             mucus hydration score, which varies relative to a peak
             hydration day. Uncertainty in the timing of the peak is
             accounted for, and a novel weighted mixture model is used to
             characterize heterogeneity in distinct features of the
             underlying mean function. Prior information on the mucus
             hydration trajectory is incorporated, and a Markov chain
             Monte Carlo approach is developed. Based on data from a
             study of daily fecundability, there appears to be
             substantial heterogeneity among women in detected
             preovulatory increases in mucus hydration, but only minimal
             differences among cycles from a given woman.},
   Doi = {10.1198/016214503388619067},
   Key = {fds257910}
}

@article{fds257930,
   Author = {Dunson, DB and Holloman, C and Calder, C and Gunn,
             LH},
   Title = {Bayesian modeling of multiple lesion onset and growth from
             interval-censored data.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {3},
   Pages = {676-683},
   Year = {2004},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00217.x},
   Abstract = {In studying rates of occurrence and progression of lesions
             (or tumors), it is typically not possible to obtain exact
             onset times for each lesion. Instead, data consist of the
             number of lesions that reach a detectable size between
             screening examinations, along with measures of the
             size/severity of individual lesions at each exam time. This
             interval-censored data structure makes it difficult to
             properly adjust for the onset time distribution in assessing
             covariate effects on rates of lesion progression. This
             article proposes a joint model for the multiple lesion onset
             and progression process, motivated by cross-sectional data
             from a study of uterine leiomyoma tumors. By using a joint
             model, one can potentially obtain more precise inferences on
             rates of onset, while also performing onset time-adjusted
             inferences on lesion severity. Following a Bayesian
             approach, we propose a data augmentation Markov chain Monte
             Carlo algorithm for posterior computation.},
   Doi = {10.1111/j.0006-341x.2004.00217.x},
   Key = {fds257930}
}

@article{fds365095,
   Author = {Zito, A and Rigon, T and Ovaskainen, O and Dunson,
             DB},
   Title = {Bayesian Modeling of Sequential Discoveries.},
   Journal = {Journal of the American Statistical Association},
   Volume = {118},
   Number = {544},
   Pages = {2521-2532},
   Year = {2023},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2022.2060835},
   Abstract = {We aim at modeling the appearance of distinct tags in a
             sequence of labeled objects. Common examples of this type of
             data include words in a corpus or distinct species in a
             sample. These sequential discoveries are often summarized
             via accumulation curves, which count the number of distinct
             entities observed in an increasingly large set of objects.
             We propose a novel Bayesian method for species sampling
             modeling by directly specifying the probability of a new
             discovery, therefore, allowing for flexible specifications.
             The asymptotic behavior and finite sample properties of such
             an approach are extensively studied. Interestingly, our
             enlarged class of sequential processes includes highly
             tractable special cases. We present a subclass of models
             characterized by appealing theoretical and computational
             properties, including one that shares the same discovery
             probability with the Dirichlet process. Moreover, due to
             strong connections with logistic regression models, the
             latter subclass can naturally account for covariates. We
             finally test our proposal on both synthetic and real data,
             with special emphasis on a large fungal biodiversity study
             in Finland. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/01621459.2022.2060835},
   Key = {fds365095}
}

@article{fds257856,
   Author = {Kunihama, T and Dunson, DB},
   Title = {Bayesian modeling of temporal dependence in large sparse
             contingency tables.},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {504},
   Pages = {1324-1338},
   Year = {2013},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2013.823866},
   Abstract = {In many applications, it is of interest to study trends over
             time in relationships among categorical variables, such as
             age group, ethnicity, religious affiliation, political party
             and preference for particular policies. At each time point,
             a sample of individuals provide responses to a set of
             questions, with different individuals sampled at each time.
             In such settings, there tends to be abundant missing data
             and the variables being measured may change over time. At
             each time point, one obtains a large sparse contingency
             table, with the number of cells often much larger than the
             number of individuals being surveyed. To borrow information
             across time in modeling large sparse contingency tables, we
             propose a Bayesian autoregressive tensor factorization
             approach. The proposed model relies on a probabilistic
             Parafac factorization of the joint pmf characterizing the
             categorical data distribution at each time point, with
             autocorrelation included across times. Efficient
             computational methods are developed relying on MCMC. The
             methods are evaluated through simulation examples and
             applied to social survey data.},
   Doi = {10.1080/01621459.2013.823866},
   Key = {fds257856}
}

@article{fds257844,
   Author = {Xing, Z and Nicholson, B and Jimenez, M and Veldman, T and Hudson, L and Lucas, J and Dunson, D and Zaas, AK and Woods, CW and Ginsburg, GS and Carin, L},
   Title = {Bayesian modeling of temporal properties of infectious
             disease in a college student population},
   Journal = {Journal of Applied Statistics},
   Volume = {41},
   Number = {6},
   Pages = {1358-1382},
   Year = {2014},
   Month = {January},
   ISSN = {0266-4763},
   url = {http://dx.doi.org/10.1080/02664763.2013.870138},
   Abstract = {A Bayesian statistical model is developed for analysis of
             the time-evolving properties of infectious disease, with a
             particular focus on viruses. The model employs a latent
             semi-Markovian state process, and the state-transition
             statistics are driven by three terms: (i) a general
             time-evolving trend of the overall population, (ii) a
             semi-periodic term that accounts for effects caused by the
             days of the week, and (iii) a regression term that relates
             the probability of infection to covariates (here,
             specifically, to the Google Flu Trends data). Computations
             are performed using Markov Chain Monte Carlo sampling.
             Results are presented using a novel data set: daily
             self-reported symptom scores from hundreds of Duke
             University undergraduate students, collected over three
             academic years. The illnesses associated with these students
             are (imperfectly) labeled using real-time (RT) polymerase
             chain reaction (PCR) testing for several viruses, and
             gene-expression data were also analyzed. The statistical
             analysis is performed on the daily, self-reported symptom
             scores, and the RT PCR and gene-expression data are employed
             for analysis and interpretation of the model results. ©
             2013 The Author(s). Published by Taylor &
             Francis.},
   Doi = {10.1080/02664763.2013.870138},
   Key = {fds257844}
}

@article{fds257854,
   Author = {Xing, Z and Nicholson, B and Jimenez, M and Veldman, T and Hudson, L and Lucas, J and Dunson, D and Zaas, AK and Woods, CW and Ginsburg, GS and Carin, L},
   Title = {Bayesian modeling of temporal properties of infectious
             disease in a college student population},
   Journal = {Journal of Applied Statistics},
   Volume = {41},
   Number = {6},
   Pages = {1358-1382},
   Publisher = {Informa UK Limited},
   Year = {2013},
   ISSN = {0266-4763},
   url = {http://dx.doi.org/10.1080/02664763.2013.870138},
   Doi = {10.1080/02664763.2013.870138},
   Key = {fds257854}
}

@article{fds257889,
   Author = {Dunson, DB},
   Title = {Bayesian modeling of the level and duration of fertility in
             the menstrual cycle.},
   Journal = {Biometrics},
   Volume = {57},
   Number = {4},
   Pages = {1067-1073},
   Year = {2001},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2001.01067.x},
   Abstract = {Time to pregnancy studies that identify ovulation days and
             collect daily intercourse data can be used to estimate the
             day-specific probabilities of conception given intercourse
             on a single day relative to ovulation. In this article, a
             Bayesian semiparametric model is described for flexibly
             characterizing covariate effects and heterogeneity among
             couples in daily fecundability. The proposed model is
             characterized by the timing of the most fertile day of the
             cycle relative to ovulation, by the probability of
             conception due to intercourse on the most fertile day, and
             by the ratios of the daily conception probabilities for
             other days of the cycle relative to this peak probability.
             The ratios are assumed to be increasing in time to the peak
             and decreasing thereafter. Generalized linear mixed models
             are used to incorporate covariate and couple-specific
             effects on the peak probability and on the day-specific
             ratios. A Markov chain Monte Carlo algorithm is described
             for posterior estimation, and the methods are illustrated
             through application to caffeine data from a North Carolina
             pregnancy study.},
   Doi = {10.1111/j.0006-341x.2001.01067.x},
   Key = {fds257889}
}

@article{fds257912,
   Author = {Dunson, DB and Chulada, P and Arbes, SJ},
   Title = {Bayesian modeling of time-varying and waning exposure
             effects.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {1},
   Pages = {83-91},
   Year = {2003},
   Month = {March},
   url = {http://dx.doi.org/10.1111/1541-0420.00010},
   Abstract = {In epidemiologic studies, there is often interest in
             assessing the association between exposure history and
             disease incidence. For many diseases, incidence may depend
             not only on cumulative exposure, but also on the ages at
             which exposure occurred. This article proposes a flexible
             Bayesian approach for modeling age-varying and waning
             exposure effects. The Cox model is generalized to allow the
             hazard of disease to depend on an integral, across the
             exposed ages, of a piecewise polynomial function of age,
             multiplied by an exponential decay term. Linearity
             properties of the model facilitate posterior computation via
             a Gibbs sampler, which generalizes previous algorithms for
             Cox regression with time-dependent covariates. The approach
             is illustrated by an application to the study of protective
             effects of breastfeeding on incidence of childhood
             asthma.},
   Doi = {10.1111/1541-0420.00010},
   Key = {fds257912}
}

@article{fds329111,
   Author = {Durante, D and Paganin, S and Scarpa, B and Dunson,
             DB},
   Title = {Bayesian modelling of networks in complex business
             intelligence problems},
   Journal = {Journal of the Royal Statistical Society. Series C: Applied
             Statistics},
   Volume = {66},
   Number = {3},
   Pages = {555-580},
   Publisher = {WILEY},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.1111/rssc.12168},
   Abstract = {Complex network data problems are increasingly common in
             many fields of application. Our motivation is drawn from
             strategic marketing studies monitoring customer choices of
             specific products, along with co-subscription networks
             encoding multiple-purchasing behaviour. Data are available
             for several agencies within the same insurance company, and
             our goal is to exploit co-subscription networks efficiently
             to inform targeted advertising of cross-sell strategies to
             currently monoproduct customers. We address this goal by
             developing a Bayesian hierarchical model, which clusters
             agencies according to common monoproduct customer choices
             and co-subscription networks. Within each cluster, we
             efficiently model customer behaviour via a cluster-dependent
             mixture of latent eigenmodels. This formulation provides key
             information on monoproduct customer choices and
             multiple-purchasing behaviour within each cluster, informing
             targeted cross-sell strategies. We develop simple algorithms
             for tractable inference and assess performance in
             simulations and an application to business
             intelligence.},
   Doi = {10.1111/rssc.12168},
   Key = {fds329111}
}

@article{fds257902,
   Author = {Dunson, DB and Dinse, GE},
   Title = {Bayesian models for multivariate current status data with
             informative censoring.},
   Journal = {Biometrics},
   Volume = {58},
   Number = {1},
   Pages = {79-88},
   Year = {2002},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2002.00079.x},
   Abstract = {Multivariate current status data, consist of indicators of
             whether each of several events occur by the time of a single
             examination. Our interest focuses on inferences about the
             joint distribution of the event times. Conventional methods
             for analysis of multiple event-time data cannot be used
             because all of the event times are censored and censoring
             may be informative. Within a given subject, we account for
             correlated event times through a subject-specific latent
             variable, conditional upon which the various events are
             assumed to occur independently. We also assume that each
             event contributes independently to the hazard of censoring.
             Nonparametric step functions are used to characterize the
             baseline distributions of the different event times and of
             the examination times. Covariate and subject-specific
             effects are incorporated through generalized linear models.
             A Markov chain Monte Carlo algorithm is described for
             estimation of the posterior distributions of the unknowns.
             The methods are illustrated through application to multiple
             tumor site data from an animal carcinogenicity
             study.},
   Doi = {10.1111/j.0006-341x.2002.00079.x},
   Key = {fds257902}
}

@article{fds257843,
   Author = {Lin, L and Dunson, DB},
   Title = {Bayesian monotone regression using Gaussian process
             projection},
   Journal = {Biometrika},
   Volume = {101},
   Number = {2},
   Pages = {303-317},
   Publisher = {Oxford University Press (OUP)},
   Year = {2014},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/ast063},
   Abstract = {Shape-constrained regression analysis has applications in
             dose-response modelling, environmental risk assessment,
             disease screening and many other areas. Incorporating the
             shape constraints can improve estimation efficiency and
             avoid implausible results. We propose a novel method,
             focusing on monotone curve and surface estimation, which
             uses Gaussian process projections. Our inference is based on
             projecting posterior samples from the Gaussian process. We
             develop theory on continuity of the projection and rates of
             contraction. Our approach leads to simple computation with
             good performance in finite samples. The proposed projection
             method can also be applied to other constrained-function
             estimation problems, including those in multivariate
             settings. © 2014 Biometrika Trust.},
   Doi = {10.1093/biomet/ast063},
   Key = {fds257843}
}

@article{fds335794,
   Author = {Shterev, ID and Dunson, DB and Chan, C and Sempowski,
             GD},
   Title = {Bayesian Multi-Plate High-Throughput Screening of
             Compounds.},
   Journal = {Sci Rep},
   Volume = {8},
   Number = {1},
   Pages = {9551},
   Year = {2018},
   Month = {June},
   url = {http://dx.doi.org/10.1038/s41598-018-27531-w},
   Abstract = {High-throughput screening of compounds (chemicals) is an
             essential part of drug discovery, involving thousands to
             millions of compounds, with the purpose of identifying
             candidate hits. Most statistical tools, including the
             industry standard B-score method, work on individual
             compound plates and do not exploit cross-plate correlation
             or statistical strength among plates. We present a new
             statistical framework for high-throughput screening of
             compounds based on Bayesian nonparametric modeling. The
             proposed approach is able to identify candidate hits from
             multiple plates simultaneously, sharing statistical strength
             among plates and providing more robust estimates of compound
             activity. It can flexibly accommodate arbitrary
             distributions of compound activities and is applicable to
             any plate geometry. The algorithm provides a principled
             statistical approach for hit identification and false
             discovery rate control. Experiments demonstrate significant
             improvements in hit identification sensitivity and
             specificity over the B-score and R-score methods, which are
             highly sensitive to threshold choice. These improvements are
             maintained at low hit rates. The framework is implemented as
             an efficient R extension package BHTSpack and is suitable
             for large scale data sets.},
   Doi = {10.1038/s41598-018-27531-w},
   Key = {fds335794}
}

@article{fds257833,
   Author = {Gu, K and Pati, D and Dunson, DB},
   Title = {Bayesian Multiscale Modeling of Closed Curves in Point
             Clouds.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {508},
   Pages = {1481-1494},
   Year = {2014},
   Month = {October},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.934825},
   Abstract = {Modeling object boundaries based on image or point cloud
             data is frequently necessary in medical and scientific
             applications ranging from detecting tumor contours for
             targeted radiation therapy, to the classification of
             organisms based on their structural information. In
             low-contrast images or sparse and noisy point clouds, there
             is often insufficient data to recover local segments of the
             boundary in isolation. Thus, it becomes critical to model
             the entire boundary in the form of a closed curve. To
             achieve this, we develop a Bayesian hierarchical model that
             expresses highly diverse 2D objects in the form of closed
             curves. The model is based on a novel multiscale deformation
             process. By relating multiple objects through a hierarchical
             formulation, we can successfully recover missing boundaries
             by borrowing structural information from similar objects at
             the appropriate scale. Furthermore, the model's latent
             parameters help interpret the population, indicating
             dimensions of significant structural variability and also
             specifying a 'central curve' that summarizes the collection.
             Theoretical properties of our prior are studied in specific
             cases and efficient Markov chain Monte Carlo methods are
             developed, evaluated through simulation examples and applied
             to panorex teeth images for modeling teeth contours and also
             to a brain tumor contour detection problem.},
   Doi = {10.1080/01621459.2014.934825},
   Key = {fds257833}
}

@article{fds258062,
   Author = {Cai, B and Dunson, DB},
   Title = {Bayesian multivariate isotonic regression splines:
             Applications to carcinogenicity studies},
   Journal = {Journal of the American Statistical Association},
   Volume = {102},
   Number = {480},
   Pages = {1158-1171},
   Publisher = {Informa UK Limited},
   Year = {2007},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214506000000942},
   Abstract = {In many applications, interest focuses on assessing the
             relationship between a predictor and a multivariate outcome
             variable, and there may be prior knowledge about the shape
             of the regression curves. For example, regression functions
             that relate dose of a possible risk factor to different
             adverse outcomes can often be assumed to be nondecreasing.
             In such cases, interest focuses on (1) assessing evidence of
             an overall adverse effect, (2) determining which outcomes
             are most affected, and (3) estimating outcome-specific
             regression curves. This article proposes a Bayesian approach
             for addressing this problem, motivated by multisite tumor
             data from carcinogenicity experiments. A multivariate
             smoothing spline model is specified, that accommodates
             dependency in the multiple curves through a hierarchical
             Markov random field prior for the basis coefficients, while
             also allowing for residual correlation. A Gibbs sampler is
             proposed for posterior computation, and the approach is
             applied to data on body weight and tumor
             occurrence.},
   Doi = {10.1198/016214506000000942},
   Key = {fds258062}
}

@article{fds257931,
   Author = {O'Brien, SM and Dunson, DB},
   Title = {Bayesian multivariate logistic regression.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {3},
   Pages = {739-746},
   Year = {2004},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/15339297},
   Abstract = {Bayesian analyses of multivariate binary or categorical
             outcomes typically rely on probit or mixed effects logistic
             regression models that do not have a marginal logistic
             structure for the individual outcomes. In addition,
             difficulties arise when simple noninformative priors are
             chosen for the covariance parameters. Motivated by these
             problems, we propose a new type of multivariate logistic
             distribution that can be used to construct a likelihood for
             multivariate logistic regression analysis of binary and
             categorical data. The model for individual outcomes has a
             marginal logistic structure, simplifying interpretation. We
             follow a Bayesian approach to estimation and inference,
             developing an efficient data augmentation algorithm for
             posterior computation. The method is illustrated with
             application to a neurotoxicology study.},
   Doi = {10.1111/j.0006-341X.2004.00224.x},
   Key = {fds257931}
}

@article{fds257828,
   Author = {Canale, A and Dunson, DB},
   Title = {Bayesian multivariate mixed-scale density
             estimation},
   Journal = {Statistics and its Interface},
   Volume = {8},
   Number = {2},
   Pages = {195-201},
   Publisher = {International Press of Boston},
   Year = {2015},
   Month = {January},
   ISSN = {1938-7989},
   url = {http://dx.doi.org/10.4310/SII.2015.v8.n2.a7},
   Abstract = {Although continuous density estimation has received abundant
             attention in the Bayesian nonparametrics literature, there
             is limited theory on multivariate mixed scale density
             estimation. In this note, we consider a general framework to
             jointly model continuous, count and categorical variables
             under a nonparametric prior, which is induced through
             rounding latent variables having an unknown density with
             respect to Lebesgue measure. For the proposed class of
             priors, we provide sufficient conditions for large support,
             strong consistency and rates of posterior contraction. These
             conditions allow one to convert sufficient conditions
             obtained in the setting of multivariate continuous density
             estimation to the mixed scale case. To illustrate the
             procedure, a rounded multivariate nonparametric mixture of
             Gaussians is introduced and applied to a crime and
             communities dataset.},
   Doi = {10.4310/SII.2015.v8.n2.a7},
   Key = {fds257828}
}

@article{fds327029,
   Author = {Wang, L and Durante, D and Jung, RE and Dunson, DB},
   Title = {Bayesian network-response regression.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {33},
   Number = {12},
   Pages = {1859-1866},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1093/bioinformatics/btx050},
   Abstract = {<h4>Motivation</h4>There is increasing interest in learning
             how human brain networks vary as a function of a continuous
             trait, but flexible and efficient procedures to accomplish
             this goal are limited. We develop a Bayesian semiparametric
             model, which combines low-rank factorizations and flexible
             Gaussian process priors to learn changes in the conditional
             expectation of a network-valued random variable across the
             values of a continuous predictor, while including
             subject-specific random effects.<h4>Results</h4>The
             formulation leads to a general framework for inference on
             changes in brain network structures across human traits,
             facilitating borrowing of information and coherently
             characterizing uncertainty. We provide an efficient Gibbs
             sampler for posterior computation along with simple
             procedures for inference, prediction and goodness-of-fit
             assessments. The model is applied to learn how human brain
             networks vary across individuals with different intelligence
             scores. Results provide interesting insights on the
             association between intelligence and brain connectivity,
             while demonstrating good predictive performance.<h4>Availability
             and implementation</h4>Source code implemented in R and data
             are available at https://github.com/wangronglu/BNRR.<h4>Contact</h4>rl.wang@duke.edu.<h4>Supplementary
             information</h4>Supplementary data are available at
             Bioinformatics online.},
   Doi = {10.1093/bioinformatics/btx050},
   Key = {fds327029}
}

@article{fds322546,
   Author = {Fox, EB and Dunson, DB and Airoldi, EM},
   Title = {Bayesian nonparametric covariance regression},
   Journal = {Journal of Machine Learning Research},
   Volume = {16},
   Pages = {2501-2542},
   Year = {2015},
   Month = {December},
   Abstract = {Capturing predictor-dependent correlations amongst the
             elements of a multivariate response vector is fundamental to
             numerous applied domains, including neuroscience,
             epidemiology, and finance. Although there is a rich
             literature on methods for allowing the variance in a
             univariate regression model to vary with predictors,
             relatively little has been done in the multivariate case. As
             a motivating example, we consider the Google Flu Trends data
             set, which provides indirect measurements of influenza
             incidence at a large set of locations over time (our
             predictor). To accurately characterize temporally evolving
             influenza incidence across regions, it is important to
             develop statistical methods for a time-varying covariance
             matrix. Importantly, the locations provide a redundant set
             of measurements and do not yield a sparse nor static spatial
             dependence structure. We propose to reduce dimensionality
             and induce a flexible Bayesian nonparametric covariance
             regression model by relating these location-specific
             trajectories to a lower-dimensional subspace through a
             latent factor model with predictor-dependent factor
             loadings. These loadings are in terms of a collection of
             basis functions that vary nonparametrically over the
             predictor space. Such low-rank approximations are in
             contrast to sparse precision assumptions, and are
             appropriate in a wide range of applications. Our formulation
             aims to address three challenges: scaling to large p
             domains, coping with missing values, and allowing an
             irregular grid of observations. The model is shown to be
             highly flexible, while leading to a computationally feasible
             implementation via Gibbs sampling. The ability to scale to
             large p domains and cope with missing values is fundamental
             in analyzing the Google Flu Trends data.},
   Key = {fds322546}
}

@article{fds257953,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {Bayesian Nonparametric Functional Data Analysis Through
             Density Estimation.},
   Journal = {Biometrika},
   Volume = {96},
   Number = {1},
   Pages = {149-162},
   Year = {2009},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asn054},
   Abstract = {In many modern experimental settings, observations are
             obtained in the form of functions, and interest focuses on
             inferences on a collection of such functions. We propose a
             hierarchical model that allows us to simultaneously estimate
             multiple curves nonparametrically by using dependent
             Dirichlet Process mixtures of Gaussians to characterize the
             joint distribution of predictors and outcomes. Function
             estimates are then induced through the conditional
             distribution of the outcome given the predictors. The
             resulting approach allows for flexible estimation and
             clustering, while borrowing information across curves. We
             also show that the function estimates we obtain are
             consistent on the space of integrable functions. As an
             illustration, we consider an application to the analysis of
             Conductivity and Temperature at Depth data in the north
             Atlantic.},
   Doi = {10.1093/biomet/asn054},
   Key = {fds257953}
}

@article{fds257954,
   Author = {Dunson, DB},
   Title = {Bayesian nonparametric hierarchical modeling.},
   Journal = {Biometrical journal. Biometrische Zeitschrift},
   Volume = {51},
   Number = {2},
   Pages = {273-284},
   Year = {2009},
   Month = {April},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/19358217},
   Abstract = {In biomedical research, hierarchical models are very widely
             used to accommodate dependence in multivariate and
             longitudinal data and for borrowing of information across
             data from different sources. A primary concern in
             hierarchical modeling is sensitivity to parametric
             assumptions, such as linearity and normality of the random
             effects. Parametric assumptions on latent variable
             distributions can be challenging to check and are typically
             unwarranted, given available prior knowledge. This article
             reviews some recent developments in Bayesian nonparametric
             methods motivated by complex, multivariate and functional
             data collected in biomedical studies. The author provides a
             brief review of flexible parametric approaches relying on
             finite mixtures and latent class modeling. Dirichlet process
             mixture models are motivated by the need to generalize these
             approaches to avoid assuming a fixed finite number of
             classes. Focusing on an epidemiology application, the author
             illustrates the practical utility and potential of
             nonparametric Bayes methods.},
   Doi = {10.1002/bimj.200800183},
   Key = {fds257954}
}

@article{fds258052,
   Author = {Dunson, DB and Peddada, SD},
   Title = {Bayesian nonparametric inference on stochastic
             ordering.},
   Journal = {Biometrika},
   Volume = {95},
   Number = {4},
   Pages = {859-874},
   Publisher = {Oxford University Press (OUP)},
   Year = {2008},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asn043},
   Abstract = {This article considers Bayesian inference about collections
             of unknown distributions subject to a partial stochastic
             ordering. To address problems in testing of equalities
             between groups and estimation of group-specific
             distributions, we propose classes of restricted dependent
             Dirichlet process priors. These priors have full support in
             the space of stochastically ordered distributions, and can
             be used for collections of unknown mixture distributions to
             obtain a flexible class of mixture models. Theoretical
             properties are discussed, efficient methods are developed
             for posterior computation using Markov chain Monte Carlo,
             and the methods are illustrated using data from a study of
             DNA damage and repair.},
   Doi = {10.1093/biomet/asn043},
   Key = {fds258052}
}

@article{fds325977,
   Author = {Lin, L and Rao, V and Dunson, D},
   Title = {Bayesian nonparametric inference on the stiefel
             manifold},
   Journal = {Statistica Sinica},
   Volume = {27},
   Number = {2},
   Pages = {535-553},
   Publisher = {Institute of Statistical Science},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.5705/ss.202016.0017},
   Abstract = {The Stiefel manifold Vp,d is the space of all d × p
             orthonormal matrices, with the d-1 hypersphere and the space
             of all orthogonal matrices constituting special cases. In
             modeling data lying on the Stiefel manifold, parametric
             distributions such as the matrix Langevin distribution are
             often used; however, model misspecification is a concern and
             it is desirable to have nonparametric alternatives. Current
             nonparametric methods are mainly Fŕechet-mean based. We
             take a fully generative nonparametric approach, which relies
             on mixing parametric kernels such as the matrix Langevin.
             The proposed kernel mixtures can approximate a large class
             of distributions on the Stiefel manifold, and we develop
             theory showing posterior consistency. While there exists
             work developing general posterior consistency results,
             extending these results to this particular manifold requires
             substantial new theory. Posterior inference is illustrated
             on a dataset of near-Earth objects.},
   Doi = {10.5705/ss.202016.0017},
   Key = {fds325977}
}

@article{fds329115,
   Author = {Sarkar, A and Dunson, DB},
   Title = {Bayesian Nonparametric Modeling of Higher Order Markov
             Chains},
   Journal = {Journal of the American Statistical Association},
   Volume = {111},
   Number = {516},
   Pages = {1791-1803},
   Publisher = {Informa UK Limited},
   Year = {2016},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2015.1115763},
   Abstract = {We consider the problem of flexible modeling of higher order
             Markov chains when an upper bound on the order of the chain
             is known but the true order and nature of the serial
             dependence are unknown. We propose Bayesian nonparametric
             methodology based on conditional tensor factorizations,
             which can characterize any transition probability with a
             specified maximal order. The methodology selects the
             important lags and captures higher order interactions among
             the lags, while also facilitating calculation of Bayes
             factors for a variety of hypotheses of interest. We design
             efficient Markov chain Monte Carlo algorithms for posterior
             computation, allowing for uncertainty in the set of
             important lags to be included and in the nature and order of
             the serial dependence. The methods are illustrated using
             simulation experiments and real world applications.
             Supplementary materials for this article are available
             online.},
   Doi = {10.1080/01621459.2015.1115763},
   Key = {fds329115}
}

@article{fds257874,
   Author = {Pati, D and Dunson, DB},
   Title = {Bayesian nonparametric regression with varying residual
             density.},
   Journal = {Annals of the Institute of Statistical Mathematics},
   Volume = {66},
   Number = {1},
   Pages = {1-31},
   Year = {2014},
   Month = {February},
   ISSN = {0020-3157},
   url = {http://dx.doi.org/10.1007/s10463-013-0415-z},
   Abstract = {We consider the problem of robust Bayesian inference on the
             mean regression function allowing the residual density to
             change flexibly with predictors. The proposed class of
             models is based on a Gaussian process prior for the mean
             regression function and mixtures of Gaussians for the
             collection of residual densities indexed by predictors.
             Initially considering the homoscedastic case, we propose
             priors for the residual density based on probit
             stick-breaking (PSB) scale mixtures and symmetrized PSB
             (sPSB) location-scale mixtures. Both priors restrict the
             residual density to be symmetric about zero, with the sPSB
             prior more flexible in allowing multimodal densities. We
             provide sufficient conditions to ensure strong posterior
             consistency in estimating the regression function under the
             sPSB prior, generalizing existing theory focused on
             parametric residual distributions. The PSB and sPSB priors
             are generalized to allow residual densities to change
             nonparametrically with predictors through incorporating
             Gaussian processes in the stick-breaking components. This
             leads to a robust Bayesian regression procedure that
             automatically down-weights outliers and influential
             observations in a locally-adaptive manner. Posterior
             computation relies on an efficient data augmentation exact
             block Gibbs sampler. The methods are illustrated using
             simulated and real data applications.},
   Doi = {10.1007/s10463-013-0415-z},
   Key = {fds257874}
}

@article{fds371510,
   Author = {Gu, Y and Dunson, DB},
   Title = {Bayesian Pyramids: identifiable multilayer discrete latent
             structure models for discrete data},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {85},
   Number = {2},
   Pages = {399-426},
   Year = {2023},
   Month = {April},
   url = {http://dx.doi.org/10.1093/jrsssb/qkad010},
   Abstract = {High-dimensional categorical data are routinely collected in
             biomedical and social sciences. It is of great importance to
             build interpretable parsimonious models that perform
             dimension reduction and uncover meaningful latent structures
             from such discrete data. Identifiability is a fundamental
             requirement for valid modeling and inference in such
             scenarios, yet is challenging to address when there are
             complex latent structures. In this article, we propose a
             class of identifiable multilayer (potentially deep) discrete
             latent structure models for discrete data, termed Bayesian
             Pyramids. We establish the identifiability of Bayesian
             Pyramids by developing novel transparent conditions on the
             pyramid-shaped deep latent directed graph. The proposed
             identifiability conditions can ensure Bayesian posterior
             consistency under suitable priors. As an illustration, we
             consider the two-latent-layer model and propose a Bayesian
             shrinkage estimation approach. Simulation results for this
             model corroborate the identifiability and estimatability of
             model parameters. Applications of the methodology to DNA
             nucleotide sequence data uncover useful discrete latent
             features that are highly predictive of sequence types. The
             proposed framework provides a recipe for interpretable
             unsupervised learning of discrete data and can be a useful
             alternative to popular machine learning methods.},
   Doi = {10.1093/jrsssb/qkad010},
   Key = {fds371510}
}

@article{fds258059,
   Author = {Dunson, DB and Herring, AH and Engel, SM},
   Title = {Bayesian selection and clustering of polymorphisms in
             functionally related genes},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {482},
   Pages = {534-546},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {June},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214507000000554},
   Abstract = {In epidemiologic studies, there is often interest in
             assessing the relationship between polymorphisms in
             functionally related genes and a health outcome. For each
             candidate gene, single nucleotide polymorphism (SNP) data
             are collected at a number of locations, resulting in a large
             number of possible genotypes. Because instabilities can
             result in analyses that include all the SNPs, dimensionality
             is typically reduced by conducting single SNP analyses or
             attempting to identify haplotypes. This article proposes an
             alternative Bayesian approach for reducing dimensionality. A
             multilevel Dirichlet process prior is used for the
             distribution of the SNP-specific regression coefficients
             within genes, incorporating a variable selection-type
             mixture structure to allow SNPs with no effect. This
             structure allows simultaneous selection of important SNPs
             and soft clustering of SNPs having similar impact on the
             health outcome. The methods are illustrated using data from
             a study of pro- and anti-inflammatory cytokine polymorphisms
             and spontaneous preterm birth.},
   Doi = {10.1198/016214507000000554},
   Key = {fds258059}
}

@article{fds258061,
   Author = {Scarpa, B and Dunson, DB and Giacchi, E},
   Title = {Bayesian selection of optimal rules for timing intercourse
             to conceive by using calendar and mucus.},
   Journal = {Fertility and sterility},
   Volume = {88},
   Number = {4},
   Pages = {915-924},
   Year = {2007},
   Month = {October},
   ISSN = {0015-0282},
   url = {http://dx.doi.org/10.1016/j.fertnstert.2006.12.017},
   Abstract = {<h4>Objective</h4>To find optimal clinical rules that
             maximize the probability of conception while limiting the
             number of intercourse days required.<h4>Design</h4>Multicenter
             prospective study. Women were followed prospectively while
             they kept daily records of menstrual bleeding, intercourse,
             and mucus symptom characteristics. In some cycles, women
             sought to conceive, whereas in other cycles, they sought to
             avoid pregnancy.<h4>Setting</h4>Four centers providing
             services on fertility awareness.<h4>Patient(s)</h4>One
             hundred ninety-one healthy women using the Billings
             Ovulation Method. Women were invited to enroll by their
             instructors if they satisfied the entry criteria. We
             excluded cycles in which mucus was not recorded on a day
             with intercourse.<h4>Intervention(s)</h4>None.<h4>Main
             outcome measure(s)</h4>Clinically identified pregnancies.
             There were 161 clinically identified pregnancies in 2,536
             menstrual cycles from 191 women.<h4>Result(s)</h4>Our
             approach relies on a statistical model that relates daily
             predictors, such as type of mucus symptom, to the
             day-specific probabilities of conception. By using Bayesian
             methods to search over a large set of possible clinical
             rules, focusing on rules based on calendar and mucus, we
             found that simple rules that are based on days within the
             midcycle calendar interval that also have the most
             fertile-type mucus symptom present have high
             utility.<h4>Conclusion(s)</h4>Couples can shorten their time
             to pregnancy efficiently by timing intercourse on days that
             the most fertile-type mucus symptom is observed at the
             vulva.},
   Doi = {10.1016/j.fertnstert.2006.12.017},
   Key = {fds258061}
}

@article{fds258023,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Bayesian selection of predictors of conception probabilities
             across the menstrual cycle.},
   Journal = {Paediatric and perinatal epidemiology},
   Volume = {20 Suppl 1},
   Number = {SUPPL. 1},
   Pages = {30-37},
   Year = {2006},
   Month = {November},
   ISSN = {0269-5022},
   url = {http://dx.doi.org/10.1111/j.1365-3016.2006.00768.x},
   Abstract = {There is increasing interest in identifying predictors of
             human fertility, including environmental exposures,
             behavioural factors, and biomarkers, such as mucus or
             reproductive hormones. Epidemiological studies typically
             measure fecundability, the per menstrual cycle probability
             of conception, using time to pregnancy data. A critical
             predictor, which is often ignored in the design or analysis,
             is the timing of non-contracepting intercourse in the
             menstrual cycle. In order to limit confounding by
             behavioural differences between exposure groups, it may be
             preferable to base inferences on day-specific conception
             probabilities in relation to intercourse timing. This
             article proposes Bayesian methods for selection of
             predictors of day-specific conception probabilities. A
             particular focus is the case in which data on ovulation
             timing are not available. We focus on the selection of
             fertile days in the cycle during which conception
             probabilities are non-negligible and predictors may play a
             role. Data from recent European and Italian prospective
             studies of daily fecundability are presented, and the
             proposed approach is used to estimate cervical mucus effects
             within a mid-cycle potentially fertile window using data
             from the Italian study.},
   Doi = {10.1111/j.1365-3016.2006.00768.x},
   Key = {fds258023}
}

@article{fds258069,
   Author = {Pennell, ML and Dunson, DB},
   Title = {Bayesian semiparametric dynamic frailty models for multiple
             event time data.},
   Journal = {Biometrics},
   Volume = {62},
   Number = {4},
   Pages = {1044-1052},
   Year = {2006},
   Month = {December},
   ISSN = {0006-341X},
   url = {http://ftp.stat.duke.edu/WorkingPapers/04-27.html},
   Abstract = {Many biomedical studies collect data on times of occurrence
             for a health event that can occur repeatedly, such as
             infection, hospitalization, recurrence of disease, or tumor
             onset. To analyze such data, it is necessary to account for
             within-subject dependency in the multiple event times.
             Motivated by data from studies of palpable tumors, this
             article proposes a dynamic frailty model and Bayesian
             semiparametric approach to inference. The widely used shared
             frailty proportional hazards model is generalized to allow
             subject-specific frailties to change dynamically with age
             while also accommodating nonproportional hazards. Parametric
             assumptions on the frailty distribution are avoided by using
             Dirichlet process priors for a shared frailty and for
             multiplicative innovations on this frailty. By centering the
             semiparametric model on a conditionally conjugate dynamic
             gamma model, we facilitate posterior computation and
             lack-of-fit assessments of the parametric model. Our
             proposed method is demonstrated using data from a cancer
             chemoprevention study.},
   Doi = {10.1111/j.1541-0420.2006.00571.x},
   Key = {fds258069}
}

@article{fds258075,
   Author = {Dunson, DB},
   Title = {Bayesian semiparametric isotonic regression for count
             data},
   Journal = {Journal of the American Statistical Association},
   Volume = {100},
   Number = {470},
   Pages = {618-627},
   Publisher = {Informa UK Limited},
   Year = {2005},
   Month = {June},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214504000001457},
   Abstract = {This article proposes a semiparametric Bayesian approach for
             inference on an unknown isotonic regression function, f(x),
             characterizing the relationship between a continuous
             predictor, X, and a count response variable, Y, adjusting
             for covariates, Z. A Dirichlet process mixture of Poisson
             distributions is used to avoid parametric assumptions on the
             conditional distribution of Y given X and Z. Then, to also
             avoid parametric assumptions on f(x), a novel prior
             formulation is proposed that enforces the nondecreasing
             constraint and assigns positive prior probability to the
             null hypothesis of no association. Through the use of
             carefully tailored hyperprior distributions, we allow for
             borrowing of information across different regions of X in
             estimating f(x) and in assessing hypotheses about local
             increases in the function. Due to conjugacy properties,
             posterior computation is straightforward using a Markov
             chain Monte Carlo algorithm. The methods are illustrated
             using data from an epidemiologic study of sleep problems and
             obesity.},
   Doi = {10.1198/016214504000001457},
   Key = {fds258075}
}

@article{fds258051,
   Author = {Bigelow, JL and Dunson, DB},
   Title = {Bayesian semiparametric joint models for functional
             predictors.},
   Journal = {Journal of the American Statistical Association},
   Volume = {104},
   Number = {485},
   Pages = {26-36},
   Publisher = {Informa UK Limited},
   Year = {2009},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2009.0001},
   Abstract = {Motivated by the need to understand and predict early
             pregnancy loss using hormonal indicators of pregnancy
             health, this paper proposes a semiparametric Bayes approach
             for assessing the relationship between functional predictors
             and a response. A multivariate adaptive spline model is used
             to describe the functional predictors, and a generalized
             linear model with a random intercept describes the response.
             Through specifying the random intercept to follow a
             Dirichlet process jointly with the random spline
             coefficients, we obtain a procedure that clusters
             trajectories according to shape and according to the
             parameters of the response model for each cluster. This very
             flexible method allows for the incorporation of covariates
             in the models for both the response and the trajectory. We
             apply the method to post-ovulatory progesterone data from
             the Early Pregnancy Study and find that the model
             successfully predicts early pregnancy loss.},
   Doi = {10.1198/jasa.2009.0001},
   Key = {fds258051}
}

@article{fds365239,
   Author = {Chakraborty, A and Ovaskainen, O and Dunson, DB},
   Title = {BAYESIAN SEMIPARAMETRIC LONG MEMORY MODELS FOR DISCRETIZED
             EVENT DATA.},
   Journal = {The annals of applied statistics},
   Volume = {16},
   Number = {3},
   Pages = {1380-1399},
   Year = {2022},
   Month = {September},
   url = {http://dx.doi.org/10.1214/21-aoas1546},
   Abstract = {We introduce a new class of semiparametric latent variable
             models for long memory discretized event data. The proposed
             methodology is motivated by a study of bird vocalizations in
             the Amazon rain forest; the timings of vocalizations exhibit
             self-similarity and long range dependence. This rules out
             Poisson process based models where the rate function itself
             is not long range dependent. The proposed class of
             FRActional Probit (FRAP) models is based on thresholding, a
             latent process. This latent process is modeled by a smooth
             Gaussian process and a fractional Brownian motion by
             assuming an additive structure. We develop a Bayesian
             approach to inference using Markov chain Monte Carlo and
             show good performance in simulation studies. Applying the
             methods to the Amazon bird vocalization data, we find
             substantial evidence for self-similarity and
             non-Markovian/Poisson dynamics. To accommodate the bird
             vocalization data in which there are many different species
             of birds exhibiting their own vocalization dynamics, a
             hierarchical expansion of FRAP is provided in the
             Supplementary Material.},
   Doi = {10.1214/21-aoas1546},
   Key = {fds365239}
}

@article{fds335793,
   Author = {Sarkar, A and Chabout, J and Macopson, JJ and Jarvis, ED and Dunson,
             DB},
   Title = {Bayesian Semiparametric Mixed Effects Markov Models With
             Application to Vocalization Syntax},
   Journal = {Journal of the American Statistical Association},
   Volume = {113},
   Number = {524},
   Pages = {1515-1527},
   Publisher = {Informa UK Limited},
   Year = {2018},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2018.1423986},
   Abstract = {Studying the neurological, genetic, and evolutionary basis
             of human vocal communication mechanisms using animal
             vocalization models is an important field of neuroscience.
             The datasets typically comprise structured sequences of
             syllables or “songs” produced by animals from different
             genotypes under different social contexts. It has been
             difficult to come up with sophisticated statistical methods
             that appropriately model animal vocal communication syntax.
             We address this need by developing a novel Bayesian
             semiparametric framework for inference in such datasets. Our
             approach is built on a novel class of mixed effects Markov
             transition models for the songs that accommodate exogenous
             influences of genotype and context as well as
             animal-specific heterogeneity. Crucial advantages of the
             proposed approach include its ability to provide insights
             into key scientific queries related to global and local
             influences of the exogenous predictors on the transition
             dynamics via automated tests of hypotheses. The methodology
             is illustrated using simulation experiments and the
             aforementioned motivating application in neuroscience.
             Supplementary materials for this article, including a
             standardized description of the materials available for
             reproducing the work, are available as an online
             supplement.},
   Doi = {10.1080/01621459.2018.1423986},
   Key = {fds335793}
}

@article{fds258041,
   Author = {Maclehose, RF and Dunson, DB},
   Title = {Bayesian semiparametric multiple shrinkage.},
   Journal = {Biometrics},
   Volume = {66},
   Number = {2},
   Pages = {455-462},
   Year = {2010},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2009.01275.x},
   Abstract = {High-dimensional and highly correlated data leading to non-
             or weakly identified effects are commonplace. Maximum
             likelihood will typically fail in such situations and a
             variety of shrinkage methods have been proposed. Standard
             techniques, such as ridge regression or the lasso, shrink
             estimates toward zero, with some approaches allowing
             coefficients to be selected out of the model by achieving a
             value of zero. When substantive information is available,
             estimates can be shrunk to nonnull values; however, such
             information may not be available. We propose a Bayesian
             semiparametric approach that allows shrinkage to multiple
             locations. Coefficients are given a mixture of heavy-tailed
             double exponential priors, with location and scale
             parameters assigned Dirichlet process hyperpriors to allow
             groups of coefficients to be shrunk toward the same,
             possibly nonzero, mean. Our approach favors sparse, but
             flexible, structure by shrinking toward a small number of
             random locations. The methods are illustrated using a study
             of genetic polymorphisms and Parkinson's
             disease.},
   Doi = {10.1111/j.1541-0420.2009.01275.x},
   Key = {fds258041}
}

@article{fds258060,
   Author = {Yang, M and Dunson, DB},
   Title = {Bayesian semiparametric structural equation models with
             latent variables},
   Journal = {Psychometrika},
   Volume = {75},
   Number = {4},
   Pages = {675-693},
   Publisher = {Springer Nature},
   Year = {2008},
   ISSN = {0033-3123},
   url = {http://dx.doi.org/10.1007/s11336-010-9174-4},
   Abstract = {Structural equation models (SEMs) with latent variables are
             widely useful for sparse covariance structure modeling and
             for inferring relationships among latent variables. Bayesian
             SEMs are appealing in allowing for the incorporation of
             prior information and in providing exact posterior
             distributions of unknowns, including the latent variables.
             In this article, we propose a broad class of semiparametric
             Bayesian SEMs, which allow mixed categorical and continuous
             manifest variables while also allowing the latent variables
             to have unknown distributions. In order to include typical
             identifiability restrictions on the latent variable
             distributions, we rely on centered Dirichlet process (CDP)
             and CDP mixture (CDPM) models. The CDP will induce a latent
             class model with an unknown number of classes, while the
             CDPM will induce a latent trait model with unknown densities
             for the latent traits. A simple and efficient Markov chain
             Monte Carlo algorithm is developed for posterior
             computation, and the methods are illustrated using simulated
             examples, and several applications. © 2010 The Psychometric
             Society.},
   Doi = {10.1007/s11336-010-9174-4},
   Key = {fds258060}
}

@article{fds346411,
   Author = {Chae, M and Lin, L and Dunson, DB},
   Title = {Bayesian sparse linear regression with unknown symmetric
             error},
   Journal = {Information and Inference},
   Volume = {8},
   Number = {3},
   Pages = {621-653},
   Year = {2019},
   Month = {September},
   url = {http://dx.doi.org/10.1093/imaiai/iay022},
   Abstract = {We study Bayesian procedures for sparse linear regression
             when the unknown error distribution is endowed with a
             non-parametric prior. Specifically, we put a symmetrized
             Dirichlet process mixture of Gaussian prior on the error
             density, where the mixing distributions are compactly
             supported. For the prior on regression coefficients, a
             mixture of point masses at zero and continuous distributions
             is considered. Under the assumption that the model is well
             specified, we study behavior of the posterior with diverging
             number of predictors. The compatibility and restricted
             eigenvalue conditions yield the minimax convergence rate of
             the regression coefficients in 1- and 2-norms, respectively.
             In addition, strong model selection consistency and a
             semi-parametric Bernstein-von Mises theorem are proven under
             slightly stronger conditions.},
   Doi = {10.1093/imaiai/iay022},
   Key = {fds346411}
}

@article{fds258038,
   Author = {Reich, BJ and Fuentes, M and Dunson, DB},
   Title = {Bayesian Spatial Quantile Regression.},
   Journal = {Journal of the American Statistical Association},
   Volume = {106},
   Number = {493},
   Pages = {6-20},
   Year = {2011},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://hdl.handle.net/10161/2981 Duke open
             access},
   Abstract = {Tropospheric ozone is one of the six criteria pollutants
             regulated by the United States Environmental Protection
             Agency under the Clean Air Act and has been linked with
             several adverse health effects, including mortality. Due to
             the strong dependence on weather conditions, ozone may be
             sensitive to climate change and there is great interest in
             studying the potential effect of climate change on ozone,
             and how this change may affect public health. In this paper
             we develop a Bayesian spatial model to predict ozone under
             different meteorological conditions, and use this model to
             study spatial and temporal trends and to forecast ozone
             concentrations under different climate scenarios. We develop
             a spatial quantile regression model that does not assume
             normality and allows the covariates to affect the entire
             conditional distribution, rather than just the mean. The
             conditional distribution is allowed to vary from
             site-to-site and is smoothed with a spatial prior. For
             extremely large datasets our model is computationally
             infeasible, and we develop an approximate method. We apply
             the approximate version of our model to summer ozone from
             1997-2005 in the Eastern U.S., and use deterministic climate
             models to project ozone under future climate conditions. Our
             analysis suggests that holding all other factors fixed, an
             increase in daily average temperature will lead to the
             largest increase in ozone in the Industrial Midwest and
             Northeast.},
   Doi = {10.1198/jasa.2010.ap09237},
   Key = {fds258038}
}

@article{fds257866,
   Author = {Palomo, J and Dunson, DB and Bollen, K},
   Title = {Bayesian Structural Equation Modeling},
   Pages = {163-188},
   Publisher = {Elsevier},
   Year = {2007},
   Month = {December},
   url = {http://dx.doi.org/10.1016/B978-044452044-9/50011-2},
   Abstract = {This chapter focuses on Bayesian structural equation
             modeling. Structural equation models (SEMs) with latent
             variables are routinely used in social science research, and
             are of increasing importance in biomedical applications.
             Standard practice in implementing SEMs relies on frequentist
             methods. The chapter provides a simple and concise
             description of an alternative Bayesian approach. A
             description of the Bayesian specification of SEMs, and an
             outline of a Gibbs sampling strategy for model fitting is
             also presented. Bayesian inferences are illustrated through
             an industrialization and democratization case study. The
             Bayesian approach has some distinct advantages, due to the
             availability of samples from the joint posterior
             distribution of the model parameters and latent variables,
             which are highlighted in the chapter. These posterior
             samples provide important information not contained in the
             measurement and structural parameters. © 2007 Elsevier B.V.
             All rights reserved.},
   Doi = {10.1016/B978-044452044-9/50011-2},
   Key = {fds257866}
}

@article{fds329353,
   Author = {Guhaniyogi, R and Qamar, S and Dunson, DB},
   Title = {Bayesian tensor regression},
   Journal = {Journal of Machine Learning Research},
   Volume = {18},
   Pages = {1-31},
   Year = {2017},
   Month = {August},
   Abstract = {We propose a Bayesian approach to regression with a scalar
             response on vector and tensor covariates. Vectorization of
             the tensor prior to analysis fails to exploit the structure,
             often leading to poor estimation and predictive performance.
             We introduce a novel class of multiway shrinkage priors for
             tensor coefficients in the regression setting and present
             posterior consistency results under mild conditions. A
             computationally efficient Markov chain Monte Carlo algorithm
             is developed for posterior computation. Simulation studies
             illustrate substantial gains over existing tensor regression
             methods in terms of estimation and parameter inference. Our
             approach is further illustrated in a neuroimaging
             application.},
   Key = {fds329353}
}

@article{fds362556,
   Author = {Roy, A and Borg, JS and Dunson, DB},
   Title = {Bayesian time-aligned factor analysis of paired multivariate
             time series.},
   Journal = {Journal of machine learning research : JMLR},
   Volume = {22},
   Pages = {250},
   Year = {2021},
   Month = {January},
   Abstract = {Many modern data sets require inference methods that can
             estimate the shared and individual-specific components of
             variability in collections of matrices that change over
             time. Promising methods have been developed to analyze these
             types of data in static cases, but only a few approaches are
             available for dynamic settings. To address this gap, we
             consider novel models and inference methods for pairs of
             matrices in which the columns correspond to multivariate
             observations at different time points. In order to
             characterize common and individual features, we propose a
             Bayesian dynamic factor modeling framework called Time
             Aligned Common and Individual Factor Analysis (TACIFA) that
             includes uncertainty in time alignment through an unknown
             warping function. We provide theoretical support for the
             proposed model, showing identifiability and posterior
             concentration. The structure enables efficient computation
             through a Hamiltonian Monte Carlo (HMC) algorithm. We show
             excellent performance in simulations, and illustrate the
             method through application to a social mimicry
             experiment.},
   Key = {fds362556}
}

@article{fds257872,
   Author = {Yu, K and Chen, CWS and Reed, C and Dunson, DB},
   Title = {Bayesian variable selection in quantile regression},
   Journal = {Statistics and its Interface},
   Volume = {6},
   Number = {2},
   Pages = {261-274},
   Publisher = {International Press of Boston},
   Year = {2013},
   Month = {January},
   ISSN = {1938-7989},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000319964700009&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {In many applications, interest focuses on assessing
             relationships between predictors and the quantiles of the
             distribution of a continuous response. For example, in
             epidemiology studies, cutoffs to define premature delivery
             have been based on the 10th percentile of the distribution
             for gestational age at delivery. Using quantile regression,
             one can assess how this percentile varies with predictors
             instead of using a pre-defined cutoff. However, there is
             typically uncertainty in which of the many candidate
             predictors should be included. In order to identify
             important predictors and to build accurate predictive
             models, Bayesian methods for variable selection and model
             averaging are very useful. However, such methods are
             currently not available for quantile regression. This
             article develops Bayesian methods for variable selection,
             with a simple and efficient stochastic search variable
             selection (SSVS) algorithm proposed for posterior
             computation. This approach can be used for moderately
             highdimensional variable selection and can accommodate
             uncertainty in basis function selection in non-linear and
             additive quantile regression models. The methods are
             illustrated using simulated data and an application to the
             Boston Housing data.},
   Doi = {10.4310/sii.2013.v6.n2.a9},
   Key = {fds257872}
}

@article{fds258030,
   Author = {Shi, M and Dunson, DB},
   Title = {Bayesian Variable Selection via Particle Stochastic
             Search.},
   Journal = {Statistics & probability letters},
   Volume = {81},
   Number = {2},
   Pages = {283-291},
   Year = {2011},
   Month = {February},
   ISSN = {0167-7152},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21278860},
   Abstract = {We focus on Bayesian variable selection in regression
             models. One challenge is to search the huge model space
             adequately, while identifying high posterior probability
             regions. In the past decades, the main focus has been on the
             use of Markov chain Monte Carlo (MCMC) algorithms for these
             purposes. In this article, we propose a new computational
             approach based on sequential Monte Carlo (SMC), which we
             refer to as particle stochastic search (PSS). We illustrate
             PSS through applications to linear regression and probit
             models.},
   Doi = {10.1016/j.spl.2010.10.011},
   Key = {fds258030}
}

@article{fds258026,
   Author = {Shterev, ID and Dunson, DB},
   Title = {Bayesian watermark attacks},
   Journal = {Proceedings of the 29th International Conference on Machine
             Learning, ICML 2012},
   Volume = {1},
   Pages = {695-702},
   Year = {2012},
   Month = {October},
   Abstract = {This paper presents an application of statistical machine
             learning to the field of water-marking. We propose a new
             attack model on additive spread-spectrum watermarking
             systems. The proposed attack is based on Bayesian
             statistics. We consider the scenario in which a watermark
             signal is repeatedly embedded in specific, possibly chosen
             based on a secret message bitstream, segments (signals) of
             the host data. The host signal can represent a patch of
             pixels from an image or a video frame. We propose a
             probabilistic model that infers the embedded message
             bit-stream and watermark signal, directly from the
             watermarked data, without access to the decoder. We develop
             an efficient Markov chain Monte Carlo sampler for updating
             the model parameters from their conjugate full conditional
             posteriors. We also provide a variational Bayesian solution,
             which further increases the convergence speed of the
             algorithm. Experiments with synthetic and real image signals
             demonstrate that the attack model is able to correctly infer
             a large part of the message bitstream and obtain a very
             accurate estimate of the watermark signal. Copyright 2012 by
             the author(s)/owner(s).},
   Key = {fds258026}
}

@article{fds257830,
   Author = {Li, D and Wilcox, AJ and Dunson, DB},
   Title = {Benchmark pregnancy rates and the assessment of post-coital
             contraceptives: an update.},
   Journal = {Contraception},
   Volume = {91},
   Number = {4},
   Pages = {344-349},
   Year = {2015},
   Month = {April},
   ISSN = {0010-7824},
   url = {http://dx.doi.org/10.1016/j.contraception.2015.01.002},
   Abstract = {<h4>Objective</h4>In 2001, we provided benchmark estimates
             of probability of pregnancy given a single act of
             intercourse. Those calculations assumed that intercourse and
             ovulation are independent. Subsequent research has shown
             that this assumption is not valid. We provide here an update
             of previous benchmark estimates.<h4>Study design</h4>We
             reanalyze earlier data from two North Carolina studies that
             collected daily urine samples and recorded daily intercourse
             for multiple menstrual cycles. One study comprised 68
             sexually active women with either an intrauterine device or
             tubal ligation. The second was of 221 women who planned to
             become pregnant and had discontinued use of any birth
             control at enrollment. Participants had no known fertility
             problems. New statistical analyses were based on Monte Carlo
             simulations and Bayesian methods.<h4>Results</h4>The
             probability that a single act of intercourse occurs within a
             woman's fertile window is 25%, compared with 20% in previous
             calculations. The probability of pregnancy with intercourse
             on a given menstrual cycle day is correspondingly higher
             than previously estimated, with the largest increases
             occurring on menstrual days 12-22. These increases are,
             however, fairly small (for example, the peak chance of
             conception on menstrual day 13 increased from 8.6% to
             9.7%).<h4>Conclusions</h4>Previous benchmark rates of
             pregnancy with one act of intercourse were moderately
             underestimated due to a mistaken assumption about the
             independence of intercourse and ovulation.<h4>Implications
             statement</h4>The chance of pregnancy with a single act of
             unprotected intercourse is greater than previously
             estimated. Previous benchmarks may underestimate the
             efficacy of post-coital contraception.},
   Doi = {10.1016/j.contraception.2015.01.002},
   Key = {fds257830}
}

@article{fds323268,
   Author = {Zhou, M and Hannah, LA and Dunson, DB and Carin, L},
   Title = {Beta-negative binomial process and poisson factor
             analysis},
   Journal = {Journal of Machine Learning Research},
   Volume = {22},
   Pages = {1462-1471},
   Year = {2012},
   Month = {January},
   Abstract = {A beta-negative binomial (BNB) process is proposed, leading
             to a beta-gamma-Poisson process, which may be viewed as a
             "multiscoop" generalization of the beta-Bernoulli process.
             The BNB process is augmented into a beta-gamma-gamma-Poisson
             hierarchical structure, and applied as a nonparametric
             Bayesian prior for an infinite Poisson factor analysis
             model. A finite approximation for the beta process Lévy
             random measure is constructed for convenient implementation.
             Efficient MCMC computations are performed with data
             augmentation and marginalization techniques. Encouraging
             results are shown on document count matrix
             factorization.},
   Key = {fds323268}
}

@article{fds257911,
   Author = {Chulada, PC and Arbes, SJ and Dunson, D and Zeldin,
             DC},
   Title = {Breast-feeding and the prevalence of asthma and wheeze in
             children: analyses from the Third National Health and
             Nutrition Examination Survey, 1988-1994.},
   Journal = {The Journal of allergy and clinical immunology},
   Volume = {111},
   Number = {2},
   Pages = {328-336},
   Year = {2003},
   Month = {February},
   url = {http://dx.doi.org/10.1067/mai.2003.127},
   Abstract = {<h4>Background</h4>Asthma prevalence has increased
             dramatically in recent years, especially among children.
             Breast-feeding might protect children against asthma and
             related conditions (recurrent wheeze), and this protective
             effect might depend on the duration and exclusivity of the
             breast-feeding regimen.<h4>Objective</h4>We sought to
             determine whether there is an association between
             breast-feeding and asthma, recurrent wheeze, or both in
             children up to 72 months of age and whether the duration and
             exclusivity of breast-feeding affect this
             association.<h4>Methods</h4>Data were from the third
             National Health and Nutrition Examination Survey, a
             nationally representative cross-sectional survey conducted
             from 1988 to 1994. We tested for significant associations
             between breast-feeding and physician-diagnosed asthma and
             recurrent wheeze (> or =3 episodes in the past 12 months)
             before and after adjusting for potential
             confounders.<h4>Results</h4>Crude analyses showed that
             breast-feeding was associated with significantly reduced
             risks for asthma and recurrent wheeze in children 2 to 71
             months of age, but after adjusting for potential
             confounders, these overall protective associations
             attenuated and were no longer statistically significant.
             However, 2 new and important associations were revealed
             after adjusting for confounders: (1) compared with never
             breast-fed children, ever breast-fed children had
             significantly reduced odds of being diagnosed with asthma
             and of having recurrent wheeze before 24 months of age, and
             (2) among children 2 to 71 months of age who had been
             exposed to environmental tobacco smoke, those who had ever
             been breast-fed had significantly reduced risks of asthma
             and wheeze compared with those who had never been
             breast-fed.<h4>Conclusions</h4>Breast-feeding might delay
             the onset of or actively protect children less than 24
             months of age against asthma and recurrent wheeze.
             Breast-feeding might reduce the prevalence of asthma and
             recurrent wheeze in children exposed to environmental
             tobacco smoke.},
   Doi = {10.1067/mai.2003.127},
   Key = {fds257911}
}

@article{fds356939,
   Author = {Paganin, S and Herring, AH and Olshan, AF and Dunson, DB and National
             Birth Defects Prevention Study},
   Title = {Centered Partition Processes: Informative Priors for
             Clustering (with Discussion).},
   Journal = {Bayesian analysis},
   Volume = {16},
   Number = {1},
   Pages = {301-370},
   Year = {2021},
   Month = {March},
   url = {http://dx.doi.org/10.1214/20-ba1197},
   Abstract = {There is a very rich literature proposing Bayesian
             approaches for clustering starting with a prior probability
             distribution on partitions. Most approaches assume
             exchangeability, leading to simple representations in terms
             of Exchangeable Partition Probability Functions (EPPF).
             Gibbs-type priors encompass a broad class of such cases,
             including Dirichlet and Pitman-Yor processes. Even though
             there have been some proposals to relax the exchangeability
             assumption, allowing covariate-dependence and partial
             exchangeability, limited consideration has been given on how
             to include concrete prior knowledge on the partition. For
             example, we are motivated by an epidemiological application,
             in which we wish to cluster birth defects into groups and we
             have prior knowledge of an initial clustering provided by
             experts. As a general approach for including such prior
             knowledge, we propose a Centered Partition (CP) process that
             modifies the EPPF to favor partitions close to an initial
             one. Some properties of the CP prior are described, a
             general algorithm for posterior computation is developed,
             and we illustrate the methodology through simulation
             examples and an application to the motivating epidemiology
             study of birth defects.},
   Doi = {10.1214/20-ba1197},
   Key = {fds356939}
}

@article{fds258017,
   Author = {Scarpa, B and Dunson, DB and Colombo, B},
   Title = {Cervical mucus secretions on the day of intercourse: an
             accurate marker of highly fertile days.},
   Journal = {European journal of obstetrics, gynecology, and reproductive
             biology},
   Volume = {125},
   Number = {1},
   Pages = {72-78},
   Year = {2006},
   Month = {March},
   url = {http://dx.doi.org/10.1016/j.ejogrb.2005.07.024},
   Abstract = {<h4>Objective</h4>To provide estimates of the probabilities
             of conception according to vulvar mucus observations
             classified by the woman on the day of intercourse.<h4>Study
             design</h4>Prospective cohort study of 193 outwardly healthy
             Italian women using the Billings Ovulation Method. Outcome
             measures include 161 conception cycles and 2594
             non-conception cycles with daily records of the type of
             mucus and the occurrences of sexual intercourse.<h4>Results</h4>The
             probability of conception ranged from 0.003 for days with no
             noticeable secretions to 0.29 for days with most
             fertile-type mucus detected by the woman. The probability of
             most fertile type mucus by day of the menstrual cycle
             increased from values <20% outside of days 10-17 to a peak
             of 59% on day 13.<h4>Conclusion</h4>Regardless of the timing
             of intercourse in the menstrual cycle, the probability of
             conception is essentially 0 on days with no secretions. This
             probability increases dramatically to near 30% on days with
             most fertile-type mucus, an association that accurately
             predicts both the timing of the fertile interval and the
             day-specific conception probabilities across the menstrual
             cycle.},
   Doi = {10.1016/j.ejogrb.2005.07.024},
   Key = {fds258017}
}

@article{fds257903,
   Author = {Dunson, DB and Colombo, B and Baird, DD},
   Title = {Changes with age in the level and duration of fertility in
             the menstrual cycle.},
   Journal = {Human reproduction (Oxford, England)},
   Volume = {17},
   Number = {5},
   Pages = {1399-1403},
   Year = {2002},
   Month = {May},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/17.5.1399},
   Abstract = {<h4>Background</h4>Most analyses of age-related changes in
             fertility cannot separate effects due to reduced frequency
             of sexual intercourse from effects directly related to
             ageing. Information on intercourse collected daily through
             each menstrual cycle provides the data for estimating
             day-specific probabilities of pregnancy for specific days
             relative to ovulation, and these estimates allow
             unconfounded analysis of ageing effects.<h4>Methods</h4>A
             total of 782 healthy couples using natural family planning
             methods contributed prospective data on 5860 menstrual
             cycles. Day of ovulation was based on basal body temperature
             measurements. Estimates of day-specific probabilities of
             pregnancy and the length of the fertile window were compared
             across age groups.<h4>Results</h4>Nearly all pregnancies
             occurred within a 6 day fertile window. There was no
             evidence for a shorter fertile window in older men or women.
             On average, the day-specific probabilities of pregnancy
             declined with age for women from the late 20s onward, with
             probabilities of pregnancy twice as high for women aged
             19-26 years compared with women aged 35-39 years.
             Controlling for age of the woman, fertility was
             significantly reduced for men aged >35 years.<h4>Conclusions</h4>Women's
             fertility begins to decline in the late 20s with substantial
             decreases by the late 30s. Fertility for men is less
             affected by age, but shows significant decline by the late
             30s.},
   Doi = {10.1093/humrep/17.5.1399},
   Key = {fds257903}
}

@article{fds362760,
   Author = {Zhu, Y and Li, C and Dunson, DB},
   Title = {Classification Trees for Imbalanced Data: Surface-to-Volume
             Regularization},
   Journal = {Journal of the American Statistical Association},
   Year = {2021},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2021.2005609},
   Abstract = {Classification algorithms face difficulties when one or more
             classes have limited training data. We are particularly
             interested in classification trees, due to their
             interpretability and flexibility. When data are limited in
             one or more of the classes, the estimated decision
             boundaries are often irregularly shaped due to the limited
             sample size, leading to poor generalization error. We
             propose a novel approach that penalizes the
             Surface-to-Volume Ratio (SVR) of the decision set, obtaining
             a new class of SVR-Tree algorithms. We develop a simple and
             computationally efficient implementation while proving
             estimation consistency for SVR-Tree and rate of convergence
             for an idealized empirical risk minimizer of SVR-Tree.
             SVR-Tree is compared with multiple algorithms that are
             designed to deal with imbalance through real data
             applications. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/01621459.2021.2005609},
   Key = {fds362760}
}

@article{fds257873,
   Author = {Page, G and Bhattacharya, A and Dunson, D},
   Title = {Classification via bayesian nonparametric learning of affine
             subspaces},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {501},
   Pages = {187-201},
   Publisher = {Informa UK Limited},
   Year = {2013},
   Month = {May},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2013.763566},
   Abstract = {It has become common for datasets to contain large numbers
             of variables in studies conducted in areas such as genetics,
             machine vision, image analysis, and many others. When
             analyzing such data, parametric models are often too
             inflexible while nonparametric procedures tend to be
             nonrobust because of insufficient data on these
             high-dimensional spaces. This is particularly true when
             interest lies in building efficient classifiers in the
             presence of many predictor variables. When dealing with
             these types of data, it is often the case that most of the
             variability tends to lie along a few directions, or more
             generally along a much smaller dimensional submanifold of
             the data space. In this article, we propose a class of
             models that flexibly learn about this submanifold while
             simultaneously performing dimension reduction in
             classification. This methodology allows the cell
             probabilities to vary nonparametrically based on a few
             coordinates expressed as linear combinations of the
             predictors. Also, as opposed to many black-box methods for
             dimensionality reduction, the proposed model is appealing in
             having clearly interpretable and identifiable parameters
             that provide insight into which predictors are important in
             determining accurate classification boundaries. Gibbs
             sampling methods are developed for posterior computation,
             and the methods are illustrated using simulated and real
             data applications. © 2013 American Statistical
             Association.},
   Doi = {10.1080/01621459.2013.763566},
   Key = {fds257873}
}

@article{fds355000,
   Author = {Li, D and Dunson, D},
   Title = {Classification via local manifold approximation},
   Volume = {107},
   Number = {4},
   Pages = {1013-1020},
   Year = {2020},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asaa033},
   Abstract = {Classifiers label data as belonging to one of a set of
             groups based on input features. It is challenging to achieve
             accurate classification when the feature distributions in
             the different classes are complex, with nonlinear,
             overlapping and intersecting supports. This is particularly
             true when training data are limited. To address this
             problem, we propose a new type of classifier based on
             obtaining a local approximation to the support of the data
             within each class in a neighbourhood of the feature to be
             classified, and assigning the feature to the class having
             the closest support. This general algorithm is referred to
             as local manifold approximation classification. As a simple
             and theoretically supported special case, which is shown to
             have excellent performance across a broad variety of
             examples, we use spheres for local approximation, obtaining
             a spherical approximation classifier.},
   Doi = {10.1093/biomet/asaa033},
   Key = {fds355000}
}

@article{fds257991,
   Author = {Wang, C and Liao, X and Carin, L and Dunson, DB},
   Title = {Classification with Incomplete Data Using Dirichlet Process
             Priors.},
   Journal = {Journal of machine learning research : JMLR},
   Volume = {11},
   Pages = {3269-3311},
   Year = {2010},
   Month = {March},
   ISSN = {1532-4435},
   Abstract = {A non-parametric hierarchical Bayesian framework is
             developed for designing a classifier, based on a mixture of
             simple (linear) classifiers. Each simple classifier is
             termed a local "expert", and the number of experts and their
             construction are manifested via a Dirichlet process
             formulation. The simple form of the "experts" allows
             analytical handling of incomplete data. The model is
             extended to allow simultaneous design of classifiers on
             multiple data sets, termed multi-task learning, with this
             also performed non-parametrically via the Dirichlet process.
             Fast inference is performed using variational Bayesian (VB)
             analysis, and example results are presented for several data
             sets. We also perform inference via Gibbs sampling, to which
             we compare the VB results.},
   Key = {fds257991}
}

@article{fds362729,
   Author = {Lum, K and Dunson, DB and Johndrow, J},
   Title = {Closer than they appear: A Bayesian perspective on
             individual-level heterogeneity in risk assessment},
   Journal = {Journal of the Royal Statistical Society. Series A:
             Statistics in Society},
   Volume = {185},
   Number = {2},
   Pages = {588-614},
   Year = {2022},
   Month = {April},
   url = {http://dx.doi.org/10.1111/rssa.12792},
   Abstract = {Risk assessment instruments are used across the criminal
             justice system to estimate the probability of some future
             event, such as failure to appear for a court appointment or
             re-arrest. The estimated probabilities are then used in
             making decisions at the individual level. In the past, there
             has been controversy about whether the probabilities derived
             from group-level calculations can meaningfully be applied to
             individuals. Using Bayesian hierarchical models applied to a
             large longitudinal dataset from the court system in the
             state of Kentucky, we analyse variation in individual-level
             probabilities of failing to appear for court and the extent
             to which it is captured by covariates. We find that
             individuals within the same risk group vary widely in their
             probability of the outcome. In practice, this means that
             allocating individuals to risk groups based on standard
             approaches to risk assessment, in large part, results in
             creating distinctions among individuals who are not
             meaningfully different in terms of their likelihood of the
             outcome. This is because uncertainty about the probability
             that any particular individual will fail to appear is large
             relative to the difference in average probabilities among
             any reasonable set of risk groups.},
   Doi = {10.1111/rssa.12792},
   Key = {fds362729}
}

@article{fds257835,
   Author = {Dunson, DB},
   Title = {Comment},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {507},
   Pages = {890-891},
   Publisher = {Informa UK Limited},
   Year = {2014},
   Month = {July},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.955988},
   Doi = {10.1080/01621459.2014.955988},
   Key = {fds257835}
}

@article{fds257949,
   Author = {Dunson, DB},
   Title = {Comment},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {481},
   Pages = {40-41},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214507000001436},
   Doi = {10.1198/016214507000001436},
   Key = {fds257949}
}

@article{fds258024,
   Author = {Dunson, DB},
   Title = {Comment on article by Craigmile et al.},
   Journal = {Bayesian Analysis},
   Volume = {4},
   Number = {1},
   Pages = {41-44},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2009},
   Month = {December},
   ISSN = {1936-0975},
   url = {http://dx.doi.org/10.1214/09-BA401B},
   Doi = {10.1214/09-BA401B},
   Key = {fds258024}
}

@article{fds257897,
   Author = {Dunson, DB},
   Title = {Commentary: practical advantages of Bayesian analysis of
             epidemiologic data.},
   Journal = {American journal of epidemiology},
   Volume = {153},
   Number = {12},
   Pages = {1222-1226},
   Year = {2001},
   Month = {June},
   url = {http://dx.doi.org/10.1093/aje/153.12.1222},
   Abstract = {In the past decade, there have been enormous advances in the
             use of Bayesian methodology for analysis of epidemiologic
             data, and there are now many practical advantages to the
             Bayesian approach. Bayesian models can easily accommodate
             unobserved variables such as an individual's true disease
             status in the presence of diagnostic error. The use of prior
             probability distributions represents a powerful mechanism
             for incorporating information from previous studies and for
             controlling confounding. Posterior probabilities can be used
             as easily interpretable alternatives to p values. Recent
             developments in Markov chain Monte Carlo methodology
             facilitate the implementation of Bayesian analyses of
             complex data sets containing missing observations and
             multidimensional outcomes. Tools are now available that
             allow epidemiologists to take advantage of this powerful
             approach to assessment of exposure-disease
             relations.},
   Doi = {10.1093/aje/153.12.1222},
   Key = {fds257897}
}

@article{fds258016,
   Author = {Gueorguieva, RV},
   Title = {Comments about Joint Modeling of Cluster Size and Binary and
             Continuous Subunit-Specific Outcomes.},
   Journal = {Biometrics},
   Volume = {61},
   Number = {3},
   Pages = {862-866},
   Year = {2005},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-020x.2005.00409_1.x},
   Abstract = {In longitudinal studies and in clustered situations often
             binary and continuous response variables are observed and
             need to be modeled together. In a recent publication Dunson,
             Chen, and Harry (2003, Biometrics 59, 521-530) (DCH) propose
             a Bayesian approach for joint modeling of cluster size and
             binary and continuous subunit-specific outcomes and
             illustrate this approach with a developmental toxicity data
             example. In this note we demonstrate how standard software
             (PROC NLMIXED in SAS) can be used to obtain maximum
             likelihood estimates in an alternative parameterization of
             the model with a single cluster-level factor considered by
             DCH for that example. We also suggest that a more general
             model with additional cluster-level random effects provides
             a better fit to the data set. An apparent discrepancy
             between the estimates obtained by DCH and the estimates
             obtained earlier by Catalano and Ryan (1992, Journal of the
             American Statistical Association 87, 651-658) is also
             resolved. The issue of bias in inferences concerning the
             dose effect when cluster size is ignored is discussed. The
             maximum-likelihood approach considered herein is applicable
             to general situations with multiple clustered or
             longitudinally measured outcomes of different type and does
             not require prior specification and extensive
             programming.},
   Doi = {10.1111/j.1541-020x.2005.00409_1.x},
   Key = {fds258016}
}

@article{fds342828,
   Author = {Wang, L and Zhengwu Zhang, and Dunson},
   Title = {COMMON AND INDIVIDUAL STRUCTURE OF MULTIPLE
             NETWORKS},
   Volume = {13},
   Number = {1},
   Pages = {85-112},
   Year = {2019},
   Month = {January},
   url = {http://dx.doi.org/10.1214/18-AOAS1193},
   Abstract = {This article focuses on the problem of studying shared- and
             individualspecific structure in replicated networks or
             graph-valued data. In particular, the observed data consist
             of n graphs, G i , i = 1, . . ., n, with each graph
             consisting of a collection of edges between V nodes. In
             brain connectomics, the graph for an individual corresponds
             to a set of interconnections among brain regions. Such data
             can be organized as a V × V binary adjacency matrix Ai for
             each i, with ones indicating an edge between a pair of nodes
             and zeros indicating no edge. When nodes have a shared
             meaning across replicates i = 1, . . ., n, it becomes of
             substantial interest to study similarities and differences
             in the adjacency matrices. To address this problem, we
             propose a method to estimate a common structure and
             low-dimensional individualspecific deviations from
             replicated networks. The proposed Multiple GRAph
             Factorization (M-GRAF) model relies on a logistic regression
             mapping combined with a hierarchical eigenvalue
             decomposition. We develop an efficient algorithm for
             estimation and study basic properties of our approach.
             Simulation studies show excellent operating characteristics
             and we apply the method to human brain connectomics
             data.},
   Doi = {10.1214/18-AOAS1193},
   Key = {fds342828}
}

@article{fds344775,
   Author = {Li, M and Dunson, DB},
   Title = {Comparing and weighting imperfect models using
             D-probabilities.},
   Journal = {Journal of the American Statistical Association},
   Volume = {115},
   Number = {531},
   Pages = {1349-1360},
   Year = {2020},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2019.1611140},
   Abstract = {We propose a new approach for assigning weights to models
             using a divergence-based method (<i>D-probabilities</i>),
             relying on evaluating parametric models relative to a
             nonparametric Bayesian reference using Kullback-Leibler
             divergence. D-probabilities are useful in goodness-of-fit
             assessments, in comparing imperfect models, and in providing
             model weights to be used in model aggregation.
             D-probabilities avoid some of the disadvantages of Bayesian
             model probabilities, such as large sensitivity to prior
             choice, and tend to place higher weight on a greater
             diversity of models. In an application to linear model
             selection against a Gaussian process reference, we provide
             simple analytic forms for routine implementation and show
             that D-probabilities automatically penalize model
             complexity. Some asymptotic properties are described, and we
             provide interesting probabilistic interpretations of the
             proposed model weights. The framework is illustrated through
             simulation examples and an ozone data application.},
   Doi = {10.1080/01621459.2019.1611140},
   Key = {fds344775}
}

@article{fds365274,
   Author = {Aliverti, E and Dunson, DB},
   Title = {COMPOSITE MIXTURE OF LOG-LINEAR MODELS WITH APPLICATION TO
             PSYCHIATRIC STUDIES.},
   Journal = {The annals of applied statistics},
   Volume = {16},
   Number = {2},
   Pages = {765-790},
   Year = {2022},
   Month = {June},
   url = {http://dx.doi.org/10.1214/21-aoas1515},
   Abstract = {Psychiatric studies of suicide provide fundamental insights
             on the evolution of severe psychopathologies, and contribute
             to the development of early treatment interventions. Our
             focus is on modelling different traits of psychosis and
             their interconnections, focusing on a case study on suicide
             attempt survivors. Such aspects are recorded via
             multivariate categorical data, involving a large numbers of
             items for multiple subjects. Current methods for
             multivariate categorical data-such as penalized log-linear
             models and latent structure analysis-are either limited to
             low-dimensional settings or include parameters with
             difficult interpretation. Motivated by this application,
             this article proposes a new class of approaches, which we
             refer to as Mixture of Log Linear models (mills). Combining
             latent class analysis and log-linear models, mills defines a
             novel Bayesian approach to model complex multivariate
             categorical data with flexibility and interpretability,
             providing interesting insights on the relationship between
             psychotic diseases and psychological aspects in suicide
             attempt survivors.},
   Doi = {10.1214/21-aoas1515},
   Key = {fds365274}
}

@article{fds322538,
   Author = {Guhaniyogi, R and Dunson, DB},
   Title = {Compressed Gaussian process for manifold
             regression},
   Journal = {Journal of Machine Learning Research},
   Volume = {17},
   Year = {2016},
   Month = {May},
   Abstract = {Nonparametric regression for large numbers of features (p)
             is an increasingly important problem. If the sample size n
             is massive, a common strategy is to partition the feature
             space, and then separately apply simple models to each
             partition set. This is not ideal when n is modest relative
             to p, and we propose an alternative approach relying on
             random compression of the feature vector combined with
             Gaussian process regression. The proposed approach is
             particularly motivated by the setting in which the response
             is conditionally independent of the features given the
             projection to a low dimensional manifold. Conditionally on
             the random compression matrix and a smoothness parameter,
             the posterior distribution for the regression surface and
             posterior predictive distributions are available
             analytically. Running the analysis in parallel for many
             random compression matrices and smoothness parameters, model
             averaging is used to combine the results. The algorithm can
             be implemented rapidly even in very large p and moderately
             large n nonparametric regression, has strong theoretical
             justification, and is found to yield state of the art
             predictive performance.},
   Key = {fds322538}
}

@article{fds257989,
   Author = {Chen, M and Silva, J and Paisley, J and Wang, C and Dunson, D and Carin,
             L},
   Title = {Compressive Sensing on Manifolds Using a Nonparametric
             Mixture of Factor Analyzers: Algorithm and Performance
             Bounds.},
   Journal = {IEEE transactions on signal processing : a publication of
             the IEEE Signal Processing Society},
   Volume = {58},
   Number = {12},
   Pages = {6140-6155},
   Year = {2010},
   Month = {December},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/tsp.2010.2070796},
   Abstract = {Nonparametric Bayesian methods are employed to constitute a
             mixture of low-rank Gaussians, for data <b><i>x</i></b> ∈
             ℝ <sup><i>N</i></sup> that are of high dimension <i>N</i>
             but are constrained to reside in a low-dimensional subregion
             of ℝ <sup><i>N</i></sup> . The number of mixture
             components and their rank are inferred automatically from
             the data. The resulting algorithm can be used for learning
             manifolds and for reconstructing signals from manifolds,
             based on compressive sensing (CS) projection measurements.
             The statistical CS inversion is performed analytically. We
             derive the required number of CS random measurements needed
             for successful reconstruction, based on easily-computed
             quantities, drawing on block-sparsity properties. The
             proposed methodology is validated on several synthetic and
             real datasets.},
   Doi = {10.1109/tsp.2010.2070796},
   Key = {fds257989}
}

@article{fds348077,
   Author = {Tikhonov, G and Duan, L and Abrego, N and Newell, G and White, M and Dunson, D and Ovaskainen, O},
   Title = {Computationally efficient joint species distribution
             modeling of big spatial data.},
   Journal = {Ecology},
   Volume = {101},
   Number = {2},
   Pages = {e02929},
   Year = {2020},
   Month = {February},
   url = {http://dx.doi.org/10.1002/ecy.2929},
   Abstract = {The ongoing global change and the increased interest in
             macroecological processes call for the analysis of spatially
             extensive data on species communities to understand and
             forecast distributional changes of biodiversity. Recently
             developed joint species distribution models can deal with
             numerous species efficiently, while explicitly accounting
             for spatial structure in the data. However, their
             applicability is generally limited to relatively small
             spatial data sets because of their severe computational
             scaling as the number of spatial locations increases. In
             this work, we propose a practical alleviation of this
             scalability constraint for joint species modeling by
             exploiting two spatial-statistics techniques that facilitate
             the analysis of large spatial data sets: Gaussian predictive
             process and nearest-neighbor Gaussian process. We devised an
             efficient Gibbs posterior sampling algorithm for Bayesian
             model fitting that allows us to analyze community data sets
             consisting of hundreds of species sampled from up to
             hundreds of thousands of spatial units. The performance of
             these methods is demonstrated using an extensive plant data
             set of 30,955 spatial units as a case study. We provide an
             implementation of the presented methods as an extension to
             the hierarchical modeling of species communities
             framework.},
   Doi = {10.1002/ecy.2929},
   Key = {fds348077}
}

@article{fds341344,
   Author = {Canale, A and Durante, D and Dunson, DB},
   Title = {Convex mixture regression for quantitative risk
             assessment.},
   Journal = {Biometrics},
   Volume = {74},
   Number = {4},
   Pages = {1331-1340},
   Year = {2018},
   Month = {December},
   url = {http://dx.doi.org/10.1111/biom.12917},
   Abstract = {There is wide interest in studying how the distribution of a
             continuous response changes with a predictor. We are
             motivated by environmental applications in which the
             predictor is the dose of an exposure and the response is a
             health outcome. A main focus in these studies is inference
             on dose levels associated with a given increase in risk
             relative to a baseline. In addressing this goal, popular
             methods either dichotomize the continuous response or focus
             on modeling changes with the dose in the expectation of the
             outcome. Such choices may lead to information loss and
             provide inaccurate inference on dose-response relationships.
             We instead propose a Bayesian convex mixture regression
             model that allows the entire distribution of the health
             outcome to be unknown and changing with the dose. To balance
             flexibility and parsimony, we rely on a mixture model for
             the density at the extreme doses, and express the
             conditional density at each intermediate dose via a convex
             combination of these extremal densities. This representation
             generalizes classical dose-response models for quantitative
             outcomes, and provides a more parsimonious, but still
             powerful, formulation compared to nonparametric methods,
             thereby improving interpretability and efficiency in
             inference on risk functions. A Markov chain Monte Carlo
             algorithm for posterior inference is developed, and the
             benefits of our methods are outlined in simulations, along
             with a study on the impact of dde exposure on gestational
             age.},
   Doi = {10.1111/biom.12917},
   Key = {fds341344}
}

@article{fds368076,
   Author = {Badea, A and Li, D and Niculescu, AR and Anderson, RJ and Stout, JA and Williams, CL and Colton, CA and Maeda, N and Dunson,
             DB},
   Title = {Corrigendum: Absolute winding number differentiates mouse
             spatial navigation strategies with genetic risk for
             Alzheimer's disease.},
   Journal = {Front Neurosci},
   Volume = {16},
   Pages = {1070425},
   Year = {2022},
   url = {http://dx.doi.org/10.3389/fnins.2022.1070425},
   Abstract = {[This corrects the article DOI: 10.3389/fnins.2022.848654.].},
   Doi = {10.3389/fnins.2022.1070425},
   Key = {fds368076}
}

@article{fds257993,
   Author = {Zhou, M and Yang, H and Sapiro, G and Dunson, D and Carin,
             L},
   Title = {Covariate-dependent dictionary learning and sparse
             coding},
   Journal = {ICASSP, IEEE International Conference on Acoustics, Speech
             and Signal Processing - Proceedings},
   Pages = {5824-5827},
   Publisher = {IEEE},
   Year = {2011},
   Month = {August},
   ISSN = {1520-6149},
   url = {http://dx.doi.org/10.1109/ICASSP.2011.5947685},
   Abstract = {A dependent hierarchical beta process (dHBP) is developed as
             a prior for data that may be represented in terms of a
             sparse set of latent features (dictionary elements), with
             covariate-dependent feature usage. The dHBP is applicable to
             general covariates and data models, imposing that signals
             with similar covariates are likely to be manifested in terms
             of similar features. As an application, we consider the
             simultaneous sparse modeling of multiple images, with the
             covariate of a given image linked to its similarity to all
             other images (as applied in manifold learning). Efficient
             inference is performed using hybrid Gibbs,
             Metropolis-Hastings and slice sampling. © 2011
             IEEE.},
   Doi = {10.1109/ICASSP.2011.5947685},
   Key = {fds257993}
}

@article{fds371511,
   Author = {Papadogeorgou, G and Bello, C and Ovaskainen, O and Dunson,
             DB},
   Title = {Covariate-Informed Latent Interaction Models: Addressing
             Geographic & Taxonomic Bias in Predicting Bird–Plant
             Interactions},
   Journal = {Journal of the American Statistical Association},
   Volume = {118},
   Number = {544},
   Pages = {2250-2261},
   Year = {2023},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2023.2208390},
   Abstract = {Reductions in natural habitats urge that we better
             understand species’ interconnection and how biological
             communities respond to environmental changes. However,
             ecological studies of species’ interactions are limited by
             their geographic and taxonomic focus which can distort our
             understanding of interaction dynamics. We focus on
             bird–plant interactions that refer to situations of
             potential fruit consumption and seed dispersal. We develop
             an approach for predicting species’ interactions that
             accounts for errors in the recorded interaction networks,
             addresses the geographic and taxonomic biases of existing
             studies, is based on latent factors to increase flexibility
             and borrow information across species, incorporates
             covariates in a flexible manner to inform the latent
             factors, and uses a meta-analysis dataset from 85 individual
             studies. We focus on interactions among 232 birds and 511
             plants in the Atlantic Forest, and identify 5% of pairs of
             species with an unrecorded interaction, but posterior
             probability that the interaction is possible over 80%.
             Finally, we develop a permutation-based variable importance
             procedure for latent factor network models and identify that
             a bird’s body mass and a plant’s fruit diameter are
             important in driving the presence of species interactions,
             with a multiplicative relationship that exhibits both a
             thresholding and a matching behavior. Supplementary
             materials for this article are available
             online.},
   Doi = {10.1080/01621459.2023.2208390},
   Key = {fds371511}
}

@article{fds322537,
   Author = {Rao, V and Lin, L and Dunson, DB},
   Title = {Data augmentation for models based on rejection
             sampling.},
   Journal = {Biometrika},
   Volume = {103},
   Number = {2},
   Pages = {319-335},
   Year = {2016},
   Month = {June},
   url = {http://dx.doi.org/10.1093/biomet/asw005},
   Abstract = {We present a data augmentation scheme to perform Markov
             chain Monte Carlo inference for models where data generation
             involves a rejection sampling algorithm. Our idea is a
             simple scheme to instantiate the rejected proposals
             preceding each data point. The resulting joint probability
             over observed and rejected variables can be much simpler
             than the marginal distribution over the observed variables,
             which often involves intractable integrals. We consider
             three problems: modelling flow-cytometry measurements
             subject to truncation; the Bayesian analysis of the matrix
             Langevin distribution on the Stiefel manifold; and Bayesian
             inference for a nonparametric Gaussian process density
             model. The latter two are instances of doubly-intractable
             Markov chain Monte Carlo problems, where evaluating the
             likelihood is intractable. Our experiments demonstrate
             superior performance over state-of-the-art sampling
             algorithms for such problems.},
   Doi = {10.1093/biomet/asw005},
   Key = {fds322537}
}

@article{fds258007,
   Author = {Dunson, DB and Baird, DD and Wilcox, AJ and Weinberg,
             CR},
   Title = {Day-specific probabilities of clinical pregnancy based on
             two studies with imperfect measures of ovulation.},
   Journal = {Human reproduction (Oxford, England)},
   Volume = {14},
   Number = {7},
   Pages = {1835-1839},
   Year = {1999},
   Month = {July},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/14.7.1835},
   Abstract = {Two studies have related the timing of sexual intercourse
             (relative to ovulation) to day-specific fecundability. The
             first was a study of Catholic couples practising natural
             family planning in London in the 1950s and 1960s and the
             second was of North Carolina couples attempting to become
             pregnant in the early 1980s. The former identified ovulation
             based on the ovulatory shift in the basal body temperature,
             while the latter used urinary assays of hormones. We use a
             statistical model to correct for error in identifying
             ovulation and to re-estimate the length of the fertile
             window and day-specific fecundabilities. We estimate the
             same 6-day fertile interval in both studies after
             controlling for error. After adjusting for error both data
             sets showed the highest estimate of the probability of
             pregnancy on the day prior to ovulation and both fell close
             to zero after ovulation. Given that the fertile interval is
             before ovulation, methods that anticipate ovulation by
             several days (such as the assessment of cervical mucus)
             would be particularly useful for couples who want to time
             their intercourse either to avoid or facilitate
             conception.},
   Doi = {10.1093/humrep/14.7.1835},
   Key = {fds258007}
}

@article{fds327031,
   Author = {Wang, X and Dunson, D and Leng, C},
   Title = {DECOrrelated feature space partitioning for distributed
             sparse regression},
   Journal = {Advances in Neural Information Processing
             Systems},
   Pages = {802-810},
   Year = {2016},
   Month = {January},
   Abstract = {Fitting statistical models is computationally challenging
             when the sample size or the dimension of the dataset is
             huge. An attractive approach for down-scaling the problem
             size is to first partition the dataset into subsets and then
             fit using distributed algorithms. The dataset can be
             partitioned either horizontally (in the sample space) or
             vertically (in the feature space). While the majority of the
             literature focuses on sample space partitioning, feature
             space partitioning is more effective when p> n. Existing
             methods for partitioning features, however, are either
             vulnerable to high correlations or inefficient in reducing
             the model dimension. In this paper, we solve these problems
             through a new embarrassingly parallel framework named DECO
             for distributed variable selection and parameter estimation.
             In DECO, variables are first partitioned and allocated to m
             distributed workers. The decorrelated subset data within
             each worker are then fitted via any algorithm designed for
             high-dimensional problems. We show that by incorporating the
             decorrelation step, DECO can achieve consistent variable
             selection and parameter estimation on each subset with
             (almost) no assumptions. In addition, the convergence rate
             is nearly minimax optimal for both sparse and weakly sparse
             models and does NOT depend on the partition number m.
             Extensive numerical experiments are provided to illustrate
             the performance of the new framework.},
   Key = {fds327031}
}

@article{fds257870,
   Author = {Chen, B and Polatkan, G and Sapiro, G and Blei, D and Dunson, D and Carin,
             L},
   Title = {Deep Learning with Hierarchical Convolutional Factor
             Analysis.},
   Journal = {IEEE transactions on pattern analysis and machine
             intelligence},
   Year = {2013},
   Month = {January},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23319498},
   Abstract = {Unsupervised multi-layered ("deep") models are considered
             for general data, with a particular focus on imagery. The
             model is represented using a hierarchical convolutional
             factor-analysis construction, with sparse factor loadings
             and scores. The computation of layer-dependent model
             parameters is implemented within a Bayesian setting,
             employing a Gibbs sampler and variational Bayesian (VB)
             analysis, that explicitly exploit the convolutional nature
             of the expansion. In order to address large-scale and
             streaming data, an online version of VB is also developed.
             The number of basis functions or dictionary elements at each
             layer is inferred from the data, based on a beta-Bernoulli
             implementation of the Indian buffet process. Example results
             are presented for several image-processing applications,
             with comparisons to related models in the
             literature.},
   Key = {fds257870}
}

@article{fds258048,
   Author = {Ghosh, J and Dunson, DB},
   Title = {Default Prior Distributions and Efficient Posterior
             Computation in Bayesian Factor Analysis.},
   Journal = {Journal of computational and graphical statistics : a joint
             publication of American Statistical Association, Institute
             of Mathematical Statistics, Interface Foundation of North
             America},
   Volume = {18},
   Number = {2},
   Pages = {306-320},
   Year = {2009},
   Month = {June},
   ISSN = {1061-8600},
   url = {http://dx.doi.org/10.1198/jcgs.2009.07145},
   Abstract = {Factor analytic models are widely used in social sciences.
             These models have also proven useful for sparse modeling of
             the covariance structure in multidimensional data. Normal
             prior distributions for factor loadings and inverse gamma
             prior distributions for residual variances are a popular
             choice because of their conditionally conjugate form.
             However, such prior distributions require elicitation of
             many hyperparameters and tend to result in poorly behaved
             Gibbs samplers. In addition, one must choose an informative
             specification, as high variance prior distributions face
             problems due to impropriety of the posterior distribution.
             This article proposes a default, heavy-tailed prior
             distribution specification, which is induced through
             parameter expansion while facilitating efficient posterior
             computation. We also develop an approach to allow
             uncertainty in the number of factors. The methods are
             illustrated through simulated examples and epidemiology and
             toxicology applications. Data sets and computer code used in
             this article are available online.},
   Doi = {10.1198/jcgs.2009.07145},
   Key = {fds258048}
}

@article{fds257906,
   Author = {Tiano, HF and Loftin, CD and Akunda, J and Lee, CA and Spalding, J and Sessoms, A and Dunson, DB and Rogan, EG and Morham, SG and Smart, RC and Langenbach, R},
   Title = {Deficiency of either cyclooxygenase (COX)-1 or COX-2 alters
             epidermal differentiation and reduces mouse skin
             tumorigenesis.},
   Journal = {Cancer research},
   Volume = {62},
   Number = {12},
   Pages = {3395-3401},
   Year = {2002},
   Month = {June},
   Abstract = {Nonsteroidal anti-inflammatory drugs are widely reported to
             inhibit carcinogenesis in humans and in rodents. These drugs
             are believed to act by inhibiting one or both of the known
             isoforms of cyclooxygenase (COX). However, COX-2, and not
             COX-1, is the isoform most frequently reported to have a key
             role in tumor development. Here we report that homozygous
             deficiency of either COX-1 or COX-2 reduces skin
             tumorigenesis by 75% in a multistage mouse skin model.
             Reduced tumorigenesis was observed even though the levels of
             stable 7,12-dimethylbenz(a)anthracene-DNA adducts were
             increased about 2-fold in the COX-deficient mice compared
             with wild-type mice. The premature onset of keratinocyte
             terminal differentiation appeared to be the cellular event
             leading to the reduced tumorigenesis because keratin 1 and
             keratin 10, two keratins that indicate the commitment of
             keratinocytes to differentiate, were expressed 8-13-fold and
             10-20-fold more frequently in epidermal basal cells of the
             COX-1-deficient and COX-2-deficient mice, respectively, than
             in wild-type mice. Papillomas on the COX-deficient mice also
             displayed the premature onset of keratinocyte terminal
             differentiation. However, loricrin, a late marker of
             epidermal differentiation, was not significantly altered,
             suggesting that it was the early stages of keratinocyte
             differentiation that were primarily affected by COX
             deficiency. Because keratin 5, a keratin associated with
             basal cells, was detected differently in papillomas of
             COX-1-deficient as compared with COX-2-deficient mice, it
             appears that the isoforms do not have identical roles in
             papilloma development. Interestingly, apoptosis, a cellular
             process associated with nonsteroidal anti-inflammatory
             drug-induced inhibition of tumorigenesis, was not
             significantly altered in the epidermis or in papillomas of
             the COX-deficient mice. Thus, both COX-1 and COX-2 have
             roles in keratinocyte differentiation, and we propose that
             the absence of either isoform causes premature terminal
             differentiation of initiated keratinocytes and reduced tumor
             formation.},
   Key = {fds257906}
}

@article{fds257999,
   Author = {Zhou, M and Carin, L and Yang, H and Dunson, D and Sapiro,
             G},
   Title = {Dependent hierarchical beta process for image interpolation
             and denoising},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {883-891},
   Year = {2011},
   Month = {December},
   ISSN = {1532-4435},
   Abstract = {A dependent hierarchical beta process (dHBP) is developed as
             a prior for data that may be represented in terms of a
             sparse set of latent features, with covariate-dependent
             feature usage. The dHBP is applicable to general covariates
             and data models, imposing that signals with similar
             covariates are likely to be manifested in terms of similar
             features. Coupling the dHBP with the Bernoulli process, and
             upon marginalizing out the dHBP, the model may be
             interpreted as a covariate-dependent hierarchical Indian
             buffet process. As applications, we consider interpolation
             and denoising of an image, with covariates defined by the
             location of image patches within an image. Two types of
             noise models are considered: (i) typical white Gaussian
             noise; and (ii) spiky noise of arbitrary amplitude,
             distributed uniformly at random. In these examples, the
             features correspond to the atoms of a dictionary, learned
             based upon the data under test (without a priori training
             data). State-of-the-art performance is demonstrated, with
             efficient inference using hybrid Gibbs, Metropolis-Hastings
             and slice sampling. Copyright 2011 by the
             authors.},
   Key = {fds257999}
}

@article{fds376258,
   Author = {Huang, J and Morsomme, R and Dunson, D and Xu, J},
   Title = {Detecting changes in the transmission rate of a stochastic
             epidemic model.},
   Journal = {Statistics in medicine},
   Year = {2024},
   Month = {February},
   url = {http://dx.doi.org/10.1002/sim.10050},
   Abstract = {Throughout the course of an epidemic, the rate at which
             disease spreads varies with behavioral changes, the
             emergence of new disease variants, and the introduction of
             mitigation policies. Estimating such changes in transmission
             rates can help us better model and predict the dynamics of
             an epidemic, and provide insight into the efficacy of
             control and intervention strategies. We present a method for
             likelihood-based estimation of parameters in the stochastic
             susceptible-infected-removed model under a
             time-inhomogeneous transmission rate comprised of piecewise
             constant components. In doing so, our method simultaneously
             learns change points in the transmission rate via a Markov
             chain Monte Carlo algorithm. The method targets the exact
             model posterior in a difficult missing data setting given
             only partially observed case counts over time. We validate
             performance on simulated data before applying our approach
             to data from an Ebola outbreak in Western Africa and
             COVID-19 outbreak on a university campus.},
   Doi = {10.1002/sim.10050},
   Key = {fds376258}
}

@article{fds322560,
   Author = {Johndrow, JE and Lum, K and Dunson, DB},
   Title = {Diagonal orthant multinomial probit models},
   Journal = {Journal of Machine Learning Research},
   Volume = {31},
   Pages = {29-38},
   Year = {2013},
   Month = {January},
   Abstract = {Bayesian classification commonly relies on probit models,
             with data augmentation algorithms used for posterior
             computation. By imputing latent Gaussian variables, one can
             often trivially adapt computational approaches used in
             Gaussian models. However, MCMC for multinomial probit (MNP)
             models can be inefficient in practice due to high posterior
             dependence between latent variables and parameters, and to
             difficulties in efficiently sampling latent variables when
             there are more than two categories. To address these
             problems, we propose a new class of diagonal orthant (DO)
             multinomial models. The key characteristics of these models
             include conditional independence of the latent variables
             given model parameters, avoidance of arbitrary
             identifiability restrictions, and simple expressions for
             category probabilities. We show substantially improved
             computational efficiency and comparable predictive
             performance to MNP.},
   Key = {fds322560}
}

@article{fds257823,
   Author = {Yin, R and Dunson, D and Cornelis, B and Brown, B and Ocon, N and Daubechies, I},
   Title = {Digital cradle removal in X-ray images of art
             paintings},
   Journal = {2014 IEEE International Conference on Image Processing, ICIP
             2014},
   Pages = {4299-4303},
   Publisher = {IEEE},
   Year = {2014},
   Month = {January},
   ISBN = {9781479957514},
   url = {http://dx.doi.org/10.1109/ICIP.2014.7025873},
   Abstract = {We introduce an algorithm that removes the deleterious
             effect of cradling on X-ray images of paintings on wooden
             panels. The algorithm consists of a three stage procedure.
             Firstly, the cradled regions are located automatically. The
             second step consists of separating the X-ray image into a
             textural and image component. In the last step the algorithm
             learns to distinguish between the texture caused by the
             wooden cradle and the texture belonging to the original
             painted wooden panel. The results obtained with our method
             are compared with those obtained manually by best current
             practice.},
   Doi = {10.1109/ICIP.2014.7025873},
   Key = {fds257823}
}

@article{fds322549,
   Author = {Bhattacharya, A and Pati, D and Pillai, NS and Dunson,
             DB},
   Title = {Dirichlet-Laplace priors for optimal shrinkage.},
   Journal = {Journal of the American Statistical Association},
   Volume = {110},
   Number = {512},
   Pages = {1479-1490},
   Year = {2015},
   Month = {December},
   url = {http://dx.doi.org/10.1080/01621459.2014.960967},
   Abstract = {Penalized regression methods, such as <i>L</i><sub>1</sub>
             regularization, are routinely used in high-dimensional
             applications, and there is a rich literature on optimality
             properties under sparsity assumptions. In the Bayesian
             paradigm, sparsity is routinely induced through
             two-component mixture priors having a probability mass at
             zero, but such priors encounter daunting computational
             problems in high dimensions. This has motivated continuous
             shrinkage priors, which can be expressed as global-local
             scale mixtures of Gaussians, facilitating computation. In
             contrast to the frequentist literature, little is known
             about the properties of such priors and the convergence and
             concentration of the corresponding posterior distribution.
             In this article, we propose a new class of Dirichlet-Laplace
             priors, which possess optimal posterior concentration and
             lead to efficient posterior computation. Finite sample
             performance of Dirichlet-Laplace priors relative to
             alternatives is assessed in simulated and real data
             examples.},
   Doi = {10.1080/01621459.2014.960967},
   Key = {fds322549}
}

@article{fds350537,
   Author = {Nishimura, A and Dunson, DB and Lu, J},
   Title = {Discontinuous Hamiltonian Monte Carlo for discrete
             parameters and discontinuous likelihoods},
   Journal = {Biometrika},
   Volume = {107},
   Number = {2},
   Pages = {365-380},
   Year = {2020},
   Month = {June},
   url = {http://dx.doi.org/10.1093/biomet/asz083},
   Abstract = {Hamiltonian Monte Carlo has emerged as a standard tool for
             posterior computation. In this article we present an
             extension that can efficiently explore target distributions
             with discontinuous densities. Our extension in particular
             enables efficient sampling from ordinal parameters through
             the embedding of probability mass functions into continuous
             spaces. We motivate our approach through a theory of
             discontinuous Hamiltonian dynamics and develop a
             corresponding numerical solver. The proposed solver is the
             first of its kind, with a remarkable ability to exactly
             preserve the Hamiltonian. We apply our algorithm to
             challenging posterior inference problems to demonstrate its
             wide applicability and competitive performance.},
   Doi = {10.1093/biomet/asz083},
   Key = {fds350537}
}

@article{fds349531,
   Author = {Dunson, D and Papamarkou, T},
   Title = {Discussions},
   Journal = {International Statistical Review},
   Volume = {88},
   Number = {2},
   Pages = {321-324},
   Year = {2020},
   Month = {August},
   url = {http://dx.doi.org/10.1111/insr.12375},
   Doi = {10.1111/insr.12375},
   Key = {fds349531}
}

@article{fds258011,
   Author = {Dunson, DB and Dinse, GE},
   Title = {Distinguishing effects on tumor multiplicity and growth rate
             in chemoprevention experiments.},
   Journal = {Biometrics},
   Volume = {56},
   Number = {4},
   Pages = {1068-1075},
   Year = {2000},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2000.01068.x},
   Abstract = {In some types of cancer chemoprevention experiments and
             short-term carcinogenicity bioassays, the data consist of
             the number of observed tumors per animal and the times at
             which these tumors were first detected. In such studies,
             there is interest in distinguishing between treatment
             effects on the number of tumors induced by a known
             carcinogen and treatment effects on the tumor growth rate.
             Since animals may die before all induced tumors reach a
             detectable size, separation of these effects can be
             difficult. This paper describes a flexible parametric model
             for data of this type. Under our model, the tumor detection
             times are realizations of a delayed Poisson process that is
             characterized by the age-specific tumor induction rate and a
             random latency interval between tumor induction and
             detection. The model accommodates distinct treatment and
             animal-specific effects on the number of induced tumors
             (multiplicity) and the time to tumor detection (growth
             rate). A Gibbs sampler is developed for estimation of the
             posterior distributions of the parameters. The methods are
             illustrated through application to data from a breast cancer
             chemoprevention experiment.},
   Doi = {10.1111/j.0006-341x.2000.01068.x},
   Key = {fds258011}
}

@article{fds258006,
   Author = {Dunson, DB},
   Title = {Dose-dependent number of implants and implications in
             developmental toxicity.},
   Journal = {Biometrics},
   Volume = {54},
   Number = {2},
   Pages = {558-569},
   Year = {1998},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.2307/3109763},
   Abstract = {This paper proposes a method for assessing risk in
             developmental toxicity studies with exposure prior to
             implantation. The method proposed in this paper was
             developed to account for a dose-dependent trend in the
             number of implantation sites per dam, which is a common
             problem in studies with exposure prior to implantation.
             Toxins may have the effect of interfering with the early
             reproductive process, which can prevent implantation in the
             uterine wall. An imputation procedure is presented for
             estimating the number of potential fetuses by sampling from
             the empirical distribution of the number of implants per
             litter in the control group. The marginal death outcomes and
             the joint malformation and survival outcomes for each
             potential fetus can be estimated using multiple imputation
             or the chained data augmentation algorithm. Logit models can
             then be fit and used to estimate the effect of dose on
             reducing the probability of a normal birth. These models
             accommodate multiple covariate effects and can be applied to
             low-dose extrapolation. A simulation study is done to
             evaluate the properties of model-based estimators of the
             mean response and the virtually safe dose level (VSD). It
             was found that both estimates were good approximations of
             the underlying dose effect. A dominant lethal assay data set
             (Luning et al., 1966, Mutation Research 3, 444-451) is
             analyzed, and the results are compared with those of Rai and
             Van Ryzin.},
   Doi = {10.2307/3109763},
   Key = {fds258006}
}

@article{fds257919,
   Author = {Dunson, DB},
   Title = {Dynamic Latent Trait Models for Multidimensional
             Longitudinal Data},
   Journal = {Journal of the American Statistical Association},
   Volume = {98},
   Number = {463},
   Pages = {555-563},
   Publisher = {Informa UK Limited},
   Year = {2003},
   Month = {September},
   url = {http://dx.doi.org/10.1198/016214503000000387},
   Abstract = {This article presents a new approach for analysis of
             multidimensional longitudinal data, motivated by studies
             using an item response battery to measure traits of an
             individual repeatedly over time. A general modeling
             framework is proposed that allows mixtures of count,
             categorical, and continuous response variables. Each
             response is related to age-specific latent traits through a
             generalized linear model that accommodates item-specific
             measurement errors. A transition model allows the latent
             traits at a given age to depend on observed predictors and
             on previous latent traits for that individual. Following a
             Bayesian approach to inference, a Markov chain Monte Carlo
             algorithm is proposed for posterior computation. The methods
             are applied to data from a neurotoxicity study of the
             pesticide methoxychlor, and evidence of a dose-dependent
             increase in motor activity is presented.},
   Doi = {10.1198/016214503000000387},
   Key = {fds257919}
}

@article{fds258034,
   Author = {Cai, B and Dunson, DB and Stanford, JB},
   Title = {Dynamic model for multivariate markers of
             fecundability.},
   Journal = {Biometrics},
   Volume = {66},
   Number = {3},
   Pages = {905-913},
   Year = {2010},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2009.01327.x},
   Abstract = {Dynamic latent class models provide a flexible framework for
             studying biologic processes that evolve over time. Motivated
             by studies of markers of the fertile days of the menstrual
             cycle, we propose a discrete-time dynamic latent class
             framework, allowing change points to depend on time, fixed
             predictors, and random effects. Observed data consist of
             multivariate categorical indicators, which change
             dynamically in a flexible manner according to latent class
             status. Given the flexibility of the framework, which
             incorporates semi-parametric components using mixtures of
             betas, identifiability constraints are needed to define the
             latent classes. Such constraints are most appropriately
             based on the known biology of the process. The Bayesian
             method is developed particularly for analyzing mucus symptom
             data from a study of women using natural family
             planning.},
   Doi = {10.1111/j.1541-0420.2009.01327.x},
   Key = {fds258034}
}

@article{fds258042,
   Author = {Ren, L and Dunson, D and Lindroth, S and Carin, L},
   Title = {Dynamic nonparametric bayesian models for analysis of
             music},
   Journal = {Journal of the American Statistical Association},
   Volume = {105},
   Number = {490},
   Pages = {458-472},
   Publisher = {Informa UK Limited},
   Year = {2010},
   Month = {June},
   ISSN = {0162-1459},
   url = {http://hdl.handle.net/10161/4397 Duke open
             access},
   Abstract = {The dynamic hierarchical Dirichlet process (dHDP) is
             developed to model complex sequential data, with a focus on
             audio signals from music. The music is represented in terms
             of a sequence of discrete observations, and the sequence is
             modeled using a hidden Markov model (HMM) with time-evolving
             parameters. The dHDP imposes the belief that observations
             that are temporally proximate are more likely to be drawn
             from HMMs with similar parameters, while also allowing for
             "innovation" associated with abrupt changes in the music
             texture. The sharing mechanisms of the time-evolving model
             are derived, and for inference a relatively simple Markov
             chain Monte Carlo sampler is developed. Segmentation of a
             given musical piece is constituted via the model inference.
             Detailed examples are presented on several pieces, with
             comparisons to other models. The dHDP results are also
             compared with a conventional music-theoretic analysis. All
             the supplemental materials used by this paper are available
             online. © 2010 American Statistical Association.},
   Doi = {10.1198/jasa.2009.ap08497},
   Key = {fds258042}
}

@article{fds329993,
   Author = {Hultman, R and Mague, SD and Li, Q and Katz, BM and Michel, N and Lin, L and Wang, J and David, LK and Blount, C and Chandy, R and Carlson, D and Ulrich, K and Carin, L and Dunson, D and Kumar, S and Deisseroth, K and Moore, SD and Dzirasa, K},
   Title = {Dysregulation of Prefrontal Cortex-Mediated Slow-Evolving
             Limbic Dynamics Drives Stress-Induced Emotional
             Pathology.},
   Journal = {Neuron},
   Volume = {91},
   Number = {2},
   Pages = {439-452},
   Year = {2016},
   Month = {July},
   url = {http://dx.doi.org/10.1016/j.neuron.2016.05.038},
   Abstract = {Circuits distributed across cortico-limbic brain regions
             compose the networks that mediate emotional behavior. The
             prefrontal cortex (PFC) regulates ultraslow (<1 Hz)
             dynamics across these networks, and PFC dysfunction is
             implicated in stress-related illnesses including major
             depressive disorder (MDD). To uncover the mechanism whereby
             stress-induced changes in PFC circuitry alter emotional
             networks to yield pathology, we used a multi-disciplinary
             approach including in vivo recordings in mice and chronic
             social defeat stress. Our network model, inferred using
             machine learning, linked stress-induced behavioral pathology
             to the capacity of PFC to synchronize amygdala and VTA
             activity. Direct stimulation of PFC-amygdala circuitry with
             DREADDs normalized PFC-dependent limbic synchrony in
             stress-susceptible animals and restored normal behavior. In
             addition to providing insights into MDD mechanisms, our
             findings demonstrate an interdisciplinary approach that can
             be used to identify the large-scale network changes that
             underlie complex emotional pathologies and the specific
             network nodes that can be used to develop targeted
             interventions.},
   Doi = {10.1016/j.neuron.2016.05.038},
   Key = {fds329993}
}

@article{fds332810,
   Author = {van den Boom, W and Schroeder, RA and Manning, MW and Setji, TL and Fiestan, G-O and Dunson, DB},
   Title = {Effect of A1C and Glucose on Postoperative Mortality in
             Noncardiac and Cardiac Surgeries.},
   Journal = {Diabetes Care},
   Volume = {41},
   Number = {4},
   Pages = {782-788},
   Year = {2018},
   Month = {April},
   url = {http://dx.doi.org/10.2337/dc17-2232},
   Abstract = {OBJECTIVE: Hemoglobin A1c (A1C) is used in assessment of
             patients for elective surgeries because hyperglycemia
             increases risk of adverse events. However, the interplay of
             A1C, glucose, and surgical outcomes remains unclarified,
             with often only two of these three factors considered
             simultaneously. We assessed the association of preoperative
             A1C with perioperative glucose control and their
             relationship with 30-day mortality. RESEARCH DESIGN AND
             METHODS: Retrospective analysis on 431,480 surgeries within
             the Duke University Health System determined the association
             of preoperative A1C with perioperative glucose (averaged
             over the first 3 postoperative days) and 30-day mortality
             among 6,684 noncardiac and 6,393 cardiac surgeries with A1C
             and glucose measurements. A generalized additive model was
             used, enabling nonlinear relationships. RESULTS: A1C and
             glucose were strongly associated. Glucose and mortality were
             positively associated for noncardiac cases: 1.0% mortality
             at mean glucose of 100 mg/dL and 1.6% at mean glucose of 200
             mg/dL. For cardiac procedures, there was a striking U-shaped
             relationship between glucose and mortality, ranging from
             4.5% at 100 mg/dL to a nadir of 1.5% at 140 mg/dL and rising
             again to 6.9% at 200 mg/dL. A1C and 30-day mortality were
             not associated when controlling for glucose in noncardiac or
             cardiac procedures. CONCLUSIONS: Although A1C is positively
             associated with perioperative glucose, it is not associated
             with increased 30-day mortality after controlling for
             glucose. Perioperative glucose predicts 30-day mortality,
             linearly in noncardiac and nonlinearly in cardiac
             procedures. This confirms that perioperative glucose control
             is related to surgical outcomes but that A1C, reflecting
             antecedent glycemia, is a less useful predictor.},
   Doi = {10.2337/dc17-2232},
   Key = {fds332810}
}

@article{fds257822,
   Author = {Trouba, K and Nyska, A and Styblo, M and Dunson, D and Lomnitski, L and Grossman, S and Moser, G and Suttie, A and Patterson, R and Walton, F and Germolec, D},
   Title = {Effect of antioxidants on the papilloma response and liver
             glutathione modulation mediated by arsenic in tg.ac
             transgenic mice},
   Journal = {Arsenic Exposure and Health Effects V},
   Pages = {283-293},
   Publisher = {Elsevier},
   Year = {2003},
   Month = {December},
   url = {http://dx.doi.org/10.1016/B978-044451441-7/50022-1},
   Abstract = {Epidemiological studies indicate that inorganic arsenicals
             produce various skin lesions as well as skin, lung, bladder,
             liver, prostate, and renal cancer. Our laboratory previously
             demonstrated that low-dose 12-O-tetradecanoylphorbol-13-acetate
             (TPA) increased the number of skin papillomas in Tg.AC
             transgenic mice that received sodium arsenite in drinking
             water, an effect dependent on proinflammatory cytokines.
             Because proinflammatory cytokine expression can be modulated
             by free radicals and oxidative stress, we hypothesized that
             oxidative stress contributes to TPA-promoted papilloma
             development in Tg.AC mice exposed to sodium arsenite. To
             evaluate the contribution of oxidative stress to arsenic
             skin carcinogenesis, two free-radical scavengers were tested
             for their ability to suppress papilloma responses (e.g.
             induction, latency, and multiplicity) modulated by arsenite
             in Tg.AC mice. Data indicate that arsenite increased
             papilloma responses in TPA-promoted Tg.AC mice as compared
             to control animals (no arsenite). The antioxidant vitamin E
             or a water-soluble natural antioxidant fraction from spinach
             had no inhibitory effect on TPA-promoted papilloma responses
             following arsenite exposure. Although not conclusively
             defined by our studies, oxidative stress generated by
             arsenic may contribute to skin carcinogenesis; however, it
             is not likely to be the sole or primary mechanism that
             enhances papilloma responses following arsenite exposure and
             TPA promotion. © 2003 Elsevier B.V.},
   Doi = {10.1016/B978-044451441-7/50022-1},
   Key = {fds257822}
}

@article{fds257947,
   Author = {Stanford, JB and Dunson, DB},
   Title = {Effects of sexual intercourse patterns in time to pregnancy
             studies.},
   Journal = {American journal of epidemiology},
   Volume = {165},
   Number = {9},
   Pages = {1088-1095},
   Year = {2007},
   Month = {May},
   ISSN = {0002-9262},
   url = {http://dx.doi.org/10.1093/aje/kwk111},
   Abstract = {Time to pregnancy, typically defined as the number of
             menstrual cycles required to achieve a clinical pregnancy,
             is widely used as a measure of couple fecundity in
             epidemiologic studies. Time to pregnancy studies seldom
             utilize detailed data on the timing and frequency of sexual
             intercourse and the timing of ovulation. However, the
             simulated models in this paper illustrate that intercourse
             behavior can have a large impact on time to pregnancy and,
             likewise, on fecundability ratios, especially under
             conditions of low intercourse frequency or low fecundity.
             Because intercourse patterns in the menstrual cycles may
             vary substantially among groups, it is important to consider
             the effects of sexual behavior. Where relevant and feasible,
             an assessment should be made of the timing and frequency of
             intercourse relative to ovulation. Day-specific
             probabilities of pregnancy can be used to account for the
             effects of intercourse patterns. Depending on the research
             hypothesis, intercourse patterns may be considered as a
             potential confounder, mediator, or outcome.},
   Doi = {10.1093/aje/kwk111},
   Key = {fds257947}
}

@article{fds304008,
   Author = {Banerjee, A and Dunson, DB and Tokdar, ST},
   Title = {Efficient Gaussian process regression for large
             datasets},
   Journal = {Biometrika},
   Volume = {100},
   Number = {1},
   Pages = {75-89},
   Year = {2013},
   url = {http://arxiv.org/abs/1106.5779v1},
   Abstract = {Gaussian processes are widely used in nonparametric
             regression, classification and spatiotemporal modelling,
             facilitated in part by a rich literature on their
             theoretical properties. However, one of their practical
             limitations is expensive computation, typically on the order
             of n3 where n is the number of data points, in performing
             the necessary matrix inversions. For large datasets, storage
             and processing also lead to computational bottlenecks, and
             numerical stability of the estimates and predicted values
             degrades with increasing n. Various methods have been
             proposed to address these problems, including predictive
             processes in spatial data analysis and the
             subset-of-regressors technique in machine learning. The idea
             underlying these approaches is to use a subset of the data,
             but this raises questions concerning sensitivity to the
             choice of subset and limitations in estimating fine-scale
             structure in regions that are not well covered by the
             subset. Motivated by the literature on compressive sensing,
             we propose an alternative approach that involves linear
             projection of all the data points onto a lower-dimensional
             subspace. We demonstrate the superiority of this approach
             from a theoretical perspective and through simulated and
             real data examples. © 2012 Biometrika Trust.},
   Doi = {10.1093/biomet/ass068},
   Key = {fds304008}
}

@article{fds358047,
   Author = {Sen, D and Sachs, M and Lu, J and Dunson, DB},
   Title = {Efficient posterior sampling for high-dimensional imbalanced
             logistic regression.},
   Journal = {Biometrika},
   Volume = {107},
   Number = {4},
   Pages = {1005-1012},
   Year = {2020},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asaa035},
   Abstract = {Classification with high-dimensional data is of widespread
             interest and often involves dealing with imbalanced data.
             Bayesian classification approaches are hampered by the fact
             that current Markov chain Monte Carlo algorithms for
             posterior computation become inefficient as the number
             [Formula: see text] of predictors or the number [Formula:
             see text] of subjects to classify gets large, because of the
             increasing computational time per step and worsening mixing
             rates. One strategy is to employ a gradient-based sampler to
             improve mixing while using data subsamples to reduce the
             per-step computational complexity. However, the usual
             subsampling breaks down when applied to imbalanced data.
             Instead, we generalize piecewise-deterministic Markov chain
             Monte Carlo algorithms to include importance-weighted and
             mini-batch subsampling. These maintain the correct
             stationary distribution with arbitrarily small subsamples
             and substantially outperform current competitors. We provide
             theoretical support for the proposed approach and
             demonstrate its performance gains in simulated data examples
             and an application to cancer data.},
   Doi = {10.1093/biomet/asaa035},
   Key = {fds358047}
}

@article{fds374408,
   Author = {Melikechi, O and Dunson, DB},
   Title = {Ellipsoid fitting with the Cayley transform.},
   Journal = {IEEE transactions on signal processing : a publication of
             the IEEE Signal Processing Society},
   Volume = {72},
   Pages = {70-83},
   Year = {2024},
   Month = {January},
   url = {http://dx.doi.org/10.1109/tsp.2023.3332560},
   Abstract = {We introduce Cayley transform ellipsoid fitting (CTEF), an
             algorithm that uses the Cayley transform to fit ellipsoids
             to noisy data in any dimension. Unlike many ellipsoid
             fitting methods, CTEF is ellipsoid specific, meaning it
             always returns elliptic solutions, and can fit arbitrary
             ellipsoids. It also significantly outperforms other fitting
             methods when data are not uniformly distributed over the
             surface of an ellipsoid. Inspired by growing calls for
             interpretable and reproducible methods in machine learning,
             we apply CTEF to dimension reduction, data visualization,
             and clustering in the context of cell cycle and circadian
             rhythm data and several classical toy examples. Since CTEF
             captures global curvature, it extracts nonlinear features in
             data that other machine learning methods fail to identify.
             For example, on the clustering examples CTEF outperforms 10
             popular algorithms.},
   Doi = {10.1109/tsp.2023.3332560},
   Key = {fds374408}
}

@article{fds376911,
   Author = {Winter, S and Campbell, T and Lin, L and Srivastava, S and Dunson,
             DB},
   Title = {Emerging Directions in Bayesian Computation},
   Journal = {Statistical Science},
   Volume = {39},
   Number = {1},
   Pages = {62-89},
   Year = {2024},
   Month = {January},
   url = {http://dx.doi.org/10.1214/23-STS919},
   Abstract = {Bayesian models are powerful tools for studying complex
             data, allowing the analyst to encode rich hierarchical
             dependencies and leverage prior information. Most
             importantly, they facilitate a complete characterization of
             uncertainty through the posterior distribution. Practical
             posterior computation is commonly performed via MCMC, which
             can be computationally infeasible for high-dimensional
             models with many observations. In this article, we discuss
             the potential to improve posterior computation using ideas
             from machine learning. Concrete directions are explored in
             vignettes on normalizing flows, statistical properties of
             variational approximations, Bayesian coresets and
             distributed Bayesian inference.},
   Doi = {10.1214/23-STS919},
   Key = {fds376911}
}

@article{fds258071,
   Author = {Dunson, DB},
   Title = {Empirical bayes density regression},
   Journal = {Statistica Sinica},
   Volume = {17},
   Number = {2},
   Pages = {481-504},
   Year = {2007},
   Month = {April},
   ISSN = {1017-0405},
   Abstract = {In Bayesian hierarchical modeling, it is often appealing to
             allow the conditional density of an (observable or
             unobservable) random variable Y to change flexibly with
             categorical and continuous predictors X. A mixture of
             regression models is proposed, with the mixture distribution
             varying with X. Treating the smoothing parameters and number
             of mixture components as unknown, the MLE does not exist,
             motivating an empirical Bayes approach. The proposed method
             shrinks the spatially-adaptive mixture distributions to a
             common baseline, while penalizing rapid changes and large
             numbers of components. The discrete form of the mixture
             distribution facilitates flexible classification of
             subjects. A Gibbs sampling algorithm is developed, which
             embeds a Monte Carlo EM-type stage to estimate smoothing and
             hyper-parameters. The method is applied to simulated
             examples and data from an epidemiologic study.},
   Key = {fds258071}
}

@article{fds322559,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Enriched Stick Breaking Processes for Functional
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {506},
   Pages = {647-660},
   Year = {2014},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2013.866564},
   Abstract = {In many applications involving functional data, prior
             information is available about the proportion of curves
             having different attributes. It is not straightforward to
             include such information in existing procedures for
             functional data analysis. Generalizing the functional
             Dirichlet process (FDP), we propose a class of
             stick-breaking priors for distributions of functions. These
             priors incorporate functional atoms drawn from constrained
             stochastic processes. The stick-breaking weights are
             specified to allow user-specified prior probabilities for
             curve attributes, with hyperpriors accommodating
             uncertainty. Compared with the FDP, the random distribution
             is enriched for curves having attributes known to be common.
             Theoretical properties are considered, methods are developed
             for posterior computation, and the approach is illustrated
             using data on temperature curves in menstrual
             cycles.},
   Doi = {10.1080/01621459.2013.866564},
   Key = {fds322559}
}

@article{fds258025,
   Author = {Hannah, LA and Dunson, DB},
   Title = {Ensemble methods for convex regression with applications to
             geometric programming based circuit design},
   Journal = {Proceedings of the 29th International Conference on Machine
             Learning, ICML 2012},
   Volume = {1},
   Pages = {369-376},
   Year = {2012},
   Month = {October},
   Abstract = {Convex regression is a promising area for bridging
             statistical estimation and deterministic convex
             optimization. New piecewise linear convex regression methods
             (Hannah and Dunson, 2011; Magnani and Boyd, 2009) are fast
             and scalable, but can have instability when used to
             approximate constraints or objective functions for
             optimization. Ensemble methods, like bagging, smearing and
             random partitioning, can alleviate this problem and maintain
             the theoretical properties of the underlying estimator. We
             empirically examine the performance of ensemble methods for
             prediction and optimization, and then apply them to device
             modeling and constraint approximation for geometric
             programming based circuit design. Copyright 2012 by the
             author(s)/owner(s).},
   Key = {fds258025}
}

@article{fds327282,
   Author = {Moffitt, AB and Ondrejka, SL and McKinney, M and Rempel, RE and Goodlad,
             JR and Teh, CH and Leppa, S and Mannisto, S and Kovanen, PE and Tse, E and Au-Yeung, RKH and Kwong, Y-L and Srivastava, G and Iqbal, J and Yu, J and Naresh, K and Villa, D and Gascoyne, RD and Said, J and Czader, MB and Chadburn, A and Richards, KL and Rajagopalan, D and Davis, NS and Smith,
             EC and Palus, BC and Tzeng, TJ and Healy, JA and Lugar, PL and Datta, J and Love, C and Levy, S and Dunson, DB and Zhuang, Y and Hsi, ED and Dave,
             SS},
   Title = {Enteropathy-associated T cell lymphoma subtypes are
             characterized by loss of function of SETD2.},
   Journal = {J Exp Med},
   Volume = {214},
   Number = {5},
   Pages = {1371-1386},
   Year = {2017},
   Month = {May},
   url = {http://dx.doi.org/10.1084/jem.20160894},
   Abstract = {Enteropathy-associated T cell lymphoma (EATL) is a lethal,
             and the most common, neoplastic complication of celiac
             disease. Here, we defined the genetic landscape of EATL
             through whole-exome sequencing of 69 EATL tumors. SETD2 was
             the most frequently silenced gene in EATL (32% of cases).
             The JAK-STAT pathway was the most frequently mutated
             pathway, with frequent mutations in STAT5B as well as JAK1,
             JAK3, STAT3, and SOCS1 We also identified mutations in KRAS,
             TP53, and TERT Type I EATL and type II EATL (monomorphic
             epitheliotropic intestinal T cell lymphoma) had highly
             overlapping genetic alterations indicating shared mechanisms
             underlying their pathogenesis. We modeled the effects of
             SETD2 loss in vivo by developing a T cell-specific knockout
             mouse. These mice manifested an expansion of γδ T cells,
             indicating novel roles for SETD2 in T cell development and
             lymphomagenesis. Our data render the most comprehensive
             genetic portrait yet of this uncommon but lethal disease and
             may inform future classification schemes.},
   Doi = {10.1084/jem.20160894},
   Key = {fds327282}
}

@article{fds362826,
   Author = {Van Den Boom and W and Reeves, G and Dunson, DB},
   Title = {Erratum: Approximating posteriors with high-dimensional
             nuisance parameters via integrated rotated Gaussian
             approximation (Biometrika (2021) 108 (269-282) DOI:
             10.1093/biomet/asaa068)},
   Journal = {Biometrika},
   Volume = {109},
   Number = {1},
   Pages = {275},
   Year = {2022},
   Month = {March},
   url = {http://dx.doi.org/10.1093/biomet/asab019},
   Abstract = {In the main paper under subsection -3.2. Bayesian variable
             selection-, all references to -5.2- should read: -3.1-.
             Under subsection -5.2. Bayesian variable selection-, the
             reference to -5.3 and 6- should read: -S5.3 and S6-. These
             errors have now been corrected.},
   Doi = {10.1093/biomet/asab019},
   Key = {fds362826}
}

@article{fds258000,
   Author = {Chen, M and Silva, J and Paisley, J and Wang, C and Dunson, D and Carin,
             L},
   Title = {Erratum: Compressive sensing on manifolds using a
             nonparametric mixture of factor analyzers: Algorithm and
             performance bounds (IEEE Transactions Signal Processing
             (2011)) 58,12 (6140-6155))},
   Journal = {IEEE Transactions on Signal Processing},
   Volume = {59},
   Number = {3},
   Pages = {1329},
   Publisher = {Institute of Electrical and Electronics Engineers
             (IEEE)},
   Year = {2011},
   Month = {March},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/TSP.2011.2107810},
   Doi = {10.1109/TSP.2011.2107810},
   Key = {fds258000}
}

@article{fds346412,
   Author = {Strawn, N and Armagan, A and Saab, R and Carin, L and Dunson,
             D},
   Title = {Erratum: Finite sample posterior concentration in
             high-dimensional regression (Information and Inference
             (2015) 3 (103-133) DOI: 10.1093/imaiai/iau003)},
   Journal = {Information and Inference},
   Volume = {4},
   Number = {1},
   Pages = {77},
   Year = {2015},
   Month = {March},
   url = {http://dx.doi.org/10.1093/imaiai/iau008},
   Abstract = {Artin Armagan's and Rayan Saab's affiliations were switched
             in the published version of this article. Artin Armagan's
             affiliation should be: SAS Institute, Inc., Raleigh, NC,
             USA; Rayan Saab's affiliation should be: Department of
             Mathematics, University of California, San Diego, CA, USA.
             The Publisher apologizes for this error.},
   Doi = {10.1093/imaiai/iau008},
   Key = {fds346412}
}

@article{fds257899,
   Author = {Nyska, A and Lomnitski, L and Spalding, J and Dunson, DB and Goldsworthy, TL and Ben-Shaul, V and Grossman, S and Bergman, M and Boorman, G},
   Title = {Erratum: Topical and oral administration of the natural
             water-soluble antioxidant from spinach reduces the
             multiplicity of papillomas in the Tg.AC mouse model
             (Toxicology Letters (2001) 122 (33-44) PII:
             S0378427401003459)},
   Journal = {Toxicology Letters},
   Volume = {123},
   Number = {2-3},
   Pages = {237},
   Publisher = {Elsevier BV},
   Year = {2001},
   Month = {September},
   ISSN = {0378-4274},
   url = {http://dx.doi.org/10.1016/S0378-4274(01)00417-9},
   Doi = {10.1016/S0378-4274(01)00417-9},
   Key = {fds257899}
}

@article{fds376096,
   Author = {Talbot, A and Dunson, D and Dzirasa, K and Carlson,
             D},
   Title = {Estimating a brain network predictive of stress and genotype
             with supervised autoencoders.},
   Journal = {J R Stat Soc Ser C Appl Stat},
   Volume = {72},
   Number = {4},
   Pages = {912-936},
   Year = {2023},
   Month = {August},
   url = {http://dx.doi.org/10.1093/jrsssc/qlad035},
   Abstract = {Targeted brain stimulation has the potential to treat mental
             illnesses. We develop an approach to help design protocols
             by identifying relevant multi-region electrical dynamics.
             Our approach models these dynamics as a superposition of
             latent networks, where the latent variables predict a
             relevant outcome. We use supervised autoencoders (SAEs) to
             improve predictive performance in this context, describe the
             conditions where SAEs improve predictions, and provide
             modelling constraints to ensure biological relevance. We
             experimentally validate our approach by finding a network
             associated with stress that aligns with a previous
             stimulation protocol and characterizing a genotype
             associated with bipolar disorder.},
   Doi = {10.1093/jrsssc/qlad035},
   Key = {fds376096}
}

@article{fds351441,
   Author = {Mukhopadhyay, M and Li, D and Dunson, DB},
   Title = {Estimating densities with non-linear support by using
             Fisher-Gaussian kernels.},
   Journal = {Journal of the Royal Statistical Society. Series B,
             Statistical methodology},
   Volume = {82},
   Number = {5},
   Pages = {1249-1271},
   Year = {2020},
   Month = {December},
   url = {http://dx.doi.org/10.1111/rssb.12390},
   Abstract = {Current tools for multivariate density estimation struggle
             when the density is concentrated near a non-linear subspace
             or manifold. Most approaches require the choice of a kernel,
             with the multivariate Gaussian kernel by far the most
             commonly used. Although heavy-tailed and skewed extensions
             have been proposed, such kernels cannot capture curvature in
             the support of the data. This leads to poor performance
             unless the sample size is very large relative to the
             dimension of the data. The paper proposes a novel
             generalization of the Gaussian distribution, which includes
             an additional curvature parameter. We refer to the proposed
             class as Fisher-Gaussian kernels, since they arise by
             sampling from a von Mises-Fisher density on the sphere and
             adding Gaussian noise. The Fisher-Gaussian density has an
             analytic form and is amenable to straightforward
             implementation within Bayesian mixture models by using
             Markov chain Monte Carlo sampling. We provide theory on
             large support and illustrate gains relative to competitors
             in simulated and real data applications.},
   Doi = {10.1111/rssb.12390},
   Key = {fds351441}
}

@article{fds257941,
   Author = {Peddada, SD and Dunson, DB and Tan, X},
   Title = {Estimation of order-restricted means from correlated
             data},
   Journal = {Biometrika},
   Volume = {92},
   Number = {3},
   Pages = {703-715},
   Publisher = {Oxford University Press (OUP)},
   Year = {2005},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/92.3.703},
   Abstract = {In many applications, researchers are interested in
             estimating the mean of a multivariate normal random vector
             whose components are subject to order restrictions. Various
             authors have demonstrated that the likelihood-based
             methodology may perform poorly under certain conditions for
             such problems. The problem is much harder when the
             underlying covariance matrix is nondiagonal. In this paper a
             simple iterative algorithm is introduced that can be used
             for estimating the mean of a multivariate normal population
             when the components are subject to any order restriction.
             The proposed methodology is illustrated through an
             application to human reproductive hormone data. © 2005
             Biometrika Trust.},
   Doi = {10.1093/biomet/92.3.703},
   Key = {fds257941}
}

@article{fds329110,
   Author = {Srivastava, S and Engelhardt, BE and Dunson, DB},
   Title = {Expandable factor analysis.},
   Journal = {Biometrika},
   Volume = {104},
   Number = {3},
   Pages = {649-663},
   Year = {2017},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/asx030},
   Abstract = {Bayesian sparse factor models have proven useful for
             characterizing dependence in multivariate data, but scaling
             computation to large numbers of samples and dimensions is
             problematic. We propose expandable factor analysis for
             scalable inference in factor models when the number of
             factors is unknown. The method relies on a continuous
             shrinkage prior for efficient maximum a posteriori
             estimation of a low-rank and sparse loadings matrix. The
             structure of the prior leads to an estimation algorithm that
             accommodates uncertainty in the number of factors. We
             propose an information criterion to select the
             hyperparameters of the prior. Expandable factor analysis has
             better false discovery rates and true positive rates than
             its competitors across diverse simulation settings. We apply
             the proposed approach to a gene expression study of ageing
             in mice, demonstrating superior results relative to four
             competing methods.},
   Doi = {10.1093/biomet/asx030},
   Key = {fds329110}
}

@article{fds371873,
   Author = {Buch, DA and Johndrow, JE and Dunson, DB},
   Title = {Explaining transmission rate variations and forecasting
             epidemic spread in multiple regions with a semiparametric
             mixed effects SIR model.},
   Journal = {Biometrics},
   Volume = {79},
   Number = {4},
   Pages = {2987-2997},
   Year = {2023},
   Month = {December},
   url = {http://dx.doi.org/10.1111/biom.13901},
   Abstract = {The transmission rate is a central parameter in mathematical
             models of infectious disease. Its pivotal role in outbreak
             dynamics makes estimating the current transmission rate and
             uncovering its dependence on relevant covariates a core
             challenge in epidemiological research as well as public
             health policy evaluation. Here, we develop a method for
             flexibly inferring a time-varying transmission rate
             parameter, modeled as a function of covariates and a smooth
             Gaussian process (GP). The transmission rate model is
             further embedded in a hierarchy to allow information
             borrowing across parallel streams of regional incidence
             data. Crucially, the method makes use of optional
             vaccination data as a first step toward modeling of endemic
             infectious diseases. Computational techniques borrowed from
             the Bayesian spatial analysis literature enable fast and
             reliable posterior computation. Simulation studies reveal
             that the method recovers true covariate effects at nominal
             coverage levels. We analyze data from the COVID-19 pandemic
             and validate forecast intervals on held-out data.
             User-friendly software is provided to enable practitioners
             to easily deploy the method in public health research.},
   Doi = {10.1111/biom.13901},
   Key = {fds371873}
}

@article{fds332886,
   Author = {Shang, Y and Dunson, D and Song, JS},
   Title = {Exploiting big data in logistics risk assessment via
             Bayesian nonparametrics},
   Journal = {Operations Research},
   Volume = {65},
   Number = {6},
   Pages = {1574-1588},
   Publisher = {Institute for Operations Research and the Management
             Sciences (INFORMS)},
   Year = {2017},
   Month = {November},
   url = {http://dx.doi.org/10.1287/opre.2017.1612},
   Abstract = {In cargo logistics, a key performance measure is transport
             risk, defined as the deviation of the actual arrival time
             from the planned arrival time. Neither earliness nor
             tardiness is desirable for customer and freight forwarders.
             In this paper, we investigate ways to assess and forecast
             transport risks using a half-year of air cargo data,
             provided by a leading forwarder on 1,336 routes served by 20
             airlines. Interestingly, our preliminary data analysis shows
             a strong multimodal feature in the transport risks, driven
             by unobserved events, such as cargo missing flights. To
             accommodate this feature, we introduce a Bayesian
             nonparametric model-the probit stick-breaking process
             mixture model-for flexible estimation of the conditional
             (i.e., state-dependent) density function of transport risk.
             We demonstrate that using alternative methods can lead to
             misleading inferences. Our model provides a tool for the
             forwarder to offer customized price and service quotes. It
             can also generate baseline airline performance to enable
             fair supplier evaluation. Furthermore, the method allows us
             to separate recurrent risks from disruption risks. This is
             important, because hedging strategies for these two kinds of
             risks are often drastically di erent.},
   Doi = {10.1287/opre.2017.1612},
   Key = {fds332886}
}

@article{fds367372,
   Author = {Legramanti, S and Rigon, T and Durante, D and Dunson,
             DB},
   Title = {EXTENDED STOCHASTIC BLOCK MODELS WITH APPLICATION TO
             CRIMINAL NETWORKS.},
   Journal = {The annals of applied statistics},
   Volume = {16},
   Number = {4},
   Pages = {2369-2395},
   Year = {2022},
   Month = {December},
   url = {http://dx.doi.org/10.1214/21-aoas1595},
   Abstract = {Reliably learning group structures among nodes in network
             data is challenging in several applications. We are
             particularly motivated by studying covert networks that
             encode relationships among criminals. These data are subject
             to measurement errors, and exhibit a complex combination of
             an unknown number of core-periphery, assortative and
             disassortative structures that may unveil key architectures
             of the criminal organization. The coexistence of these noisy
             block patterns limits the reliability of routinely-used
             community detection algorithms, and requires extensions of
             model-based solutions to realistically characterize the node
             partition process, incorporate information from node
             attributes, and provide improved strategies for estimation
             and uncertainty quantification. To cover these gaps, we
             develop a new class of extended stochastic block models
             (esbm) that infer groups of nodes having common connectivity
             patterns via Gibbs-type priors on the partition process.
             This choice encompasses many realistic priors for criminal
             networks, covering solutions with fixed, random and infinite
             number of possible groups, and facilitates the inclusion of
             node attributes in a principled manner. Among the new
             alternatives in our class, we focus on the Gnedin process as
             a realistic prior that allows the number of groups to be
             finite, random and subject to a reinforcement process
             coherent with criminal networks. A collapsed Gibbs sampler
             is proposed for the whole esbm class, and refined strategies
             for estimation, prediction, uncertainty quantification and
             model selection are outlined. The esbm performance is
             illustrated in realistic simulations and in an application
             to an Italian mafia network, where we unveil key complex
             block structures, mostly hidden from state-of-the-art
             alternatives.},
   Doi = {10.1214/21-aoas1595},
   Key = {fds367372}
}

@article{fds339365,
   Author = {van den Boom, W and Mao, C and Schroeder, RA and Dunson,
             DB},
   Title = {Extrema-weighted feature extraction for functional
             data.},
   Journal = {Bioinformatics},
   Volume = {34},
   Number = {14},
   Pages = {2457-2464},
   Year = {2018},
   Month = {July},
   url = {http://dx.doi.org/10.1093/bioinformatics/bty120},
   Abstract = {MOTIVATION: Although there is a rich literature on methods
             for assessing the impact of functional predictors, the focus
             has been on approaches for dimension reduction that do not
             suit certain applications. Examples of standard approaches
             include functional linear models, functional principal
             components regression and cluster-based approaches, such as
             latent trajectory analysis. This article is motivated by
             applications in which the dynamics in a predictor, across
             times when the value is relatively extreme, are particularly
             informative about the response. For example, physicians are
             interested in relating the dynamics of blood pressure
             changes during surgery to post-surgery adverse outcomes, and
             it is thought that the dynamics are more important when
             blood pressure is significantly elevated or lowered.
             RESULTS: We propose a novel class of extrema-weighted
             feature (XWF) extraction models. Key components in defining
             XWFs include the marginal density of the predictor, a
             function up-weighting values at extreme quantiles of this
             marginal, and functionals characterizing local dynamics.
             Algorithms are proposed for fitting of XWF-based regression
             and classification models, and are compared with current
             methods for functional predictors in simulations and a blood
             pressure during surgery application. XWFs find features of
             intraoperative blood pressure trajectories that are
             predictive of postoperative mortality. By their nature, most
             of these features cannot be found by previous methods.
             AVAILABILITY AND IMPLEMENTATION: The R package 'xwf' is
             available at the CRAN repository: https://cran.r-project.org/package=xwf.
             SUPPLEMENTARY INFORMATION: Supplementary data are available
             at Bioinformatics online.},
   Doi = {10.1093/bioinformatics/bty120},
   Key = {fds339365}
}

@article{fds345691,
   Author = {Lin, L and Mu, N and Cheung, P and Dunson, D},
   Title = {Extrinsic Gaussian processes for regression and
             classification on manifolds},
   Journal = {Bayesian Analysis},
   Volume = {14},
   Number = {3},
   Pages = {887-906},
   Year = {2019},
   Month = {January},
   url = {http://dx.doi.org/10.1214/18-BA1135},
   Abstract = {Gaussian processes (GPs) are very widely used for modeling
             of unknown functions or surfaces in applications ranging
             from regression to classification to spatial processes.
             Although there is an increasingly vast literature on
             applications, methods, theory and algorithms related to GPs,
             the overwhelming majority of this literature focuses on the
             case in which the input domain corresponds to a Euclidean
             space. However, particularly in recent years with the
             increasing collection of complex data, it is commonly the
             case that the input domain does not have such a simple form.
             For example, it is common for the inputs to be restricted to
             a non-Euclidean manifold, a case which forms the motivation
             for this article. In particular, we propose a general
             extrinsic framework for GP modeling on manifolds, which
             relies on embedding of the manifold into a Euclidean space
             and then constructing extrinsic kernels for GPs on their
             images. These extrinsic Gaussian processes (eGPs) are used
             as prior distributions for unknown functions in Bayesian
             inferences. Our approach is simple and general, and we show
             that the eGPs inherit fine theoretical properties from GP
             models in Euclidean spaces. We consider applications of our
             models to regression and classification problems with
             predictors lying in a large class of manifolds, including
             spheres, planar shape spaces, a space of positive definite
             matrices, and Grassmannians. Our models can be readily used
             by practitioners in biological sciences for various
             regression and classification problems, such as disease
             diagnosis or detection. Our work is also likely to have
             impact in spatial statistics when spatial locations are on
             the sphere or other geometric spaces.},
   Doi = {10.1214/18-BA1135},
   Key = {fds345691}
}

@article{fds326570,
   Author = {Lin, L and St Thomas and B and Zhu, H and Dunson, DB},
   Title = {Extrinsic local regression on manifold-valued
             data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {112},
   Number = {519},
   Pages = {1261-1273},
   Year = {2017},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2016.1208615},
   Abstract = {We propose an extrinsic regression framework for modeling
             data with manifold valued responses and Euclidean
             predictors. Regression with manifold responses has wide
             applications in shape analysis, neuroscience, medical
             imaging and many other areas. Our approach embeds the
             manifold where the responses lie onto a higher dimensional
             Euclidean space, obtains a local regression estimate in that
             space, and then projects this estimate back onto the image
             of the manifold. Outside the regression setting both
             intrinsic and extrinsic approaches have been proposed for
             modeling i.i.d manifold-valued data. However, to our
             knowledge our work is the first to take an extrinsic
             approach to the regression problem. The proposed extrinsic
             regression framework is general, computationally efficient
             and theoretically appealing. Asymptotic distributions and
             convergence rates of the extrinsic regression estimates are
             derived and a large class of examples are considered
             indicating the wide applicability of our
             approach.},
   Doi = {10.1080/01621459.2016.1208615},
   Key = {fds326570}
}

@article{fds257890,
   Author = {Dunson, DB and Perreault, SD},
   Title = {Factor analytic models of clustered multivariate data with
             informative censoring.},
   Journal = {Biometrics},
   Volume = {57},
   Number = {1},
   Pages = {302-308},
   Year = {2001},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2001.00302.x},
   Abstract = {This article describes a general class of factor analytic
             models for the analysis of clustered multivariate data in
             the presence of informative missingness. We assume that
             there are distinct sets of cluster-level latent variables
             related to the primary outcomes and to the censoring
             process, and we account for dependency between these latent
             variables through a hierarchical model. A linear model is
             used to relate covariates and latent variables to the
             primary outcomes for each subunit. A generalized linear
             model accounts for covariate and latent variable effects on
             the probability of censoring for subunits within each
             cluster. The model accounts for correlation within clusters
             and within subunits through a flexible factor analytic
             framework that allows multiple latent variables and
             covariate effects on the latent variables. The structure of
             the model facilitates implementation of Markov chain Monte
             Carlo methods for posterior estimation. Data from a
             spermatotoxicity study are analyzed to illustrate the
             proposed approach.},
   Doi = {10.1111/j.0006-341x.2001.00302.x},
   Key = {fds257890}
}

@article{fds258009,
   Author = {Dunson, WA and Dunson, DB},
   Title = {Factors influencing growth and survival of the killifish,
             Rivulus marmoratus, held inside enclosures in mangrove
             swamps},
   Journal = {Copeia},
   Volume = {1999},
   Number = {3},
   Pages = {661-668},
   Publisher = {JSTOR},
   Year = {1999},
   Month = {August},
   url = {http://dx.doi.org/10.2307/1447598},
   Abstract = {We measured growth and survival in field enclosures of
             juvenile Rivulus marmoratus under a variety of biotic
             (effects of body mass and intraspecific density) and abiotic
             conditions (seasonal climatic changes, site-specific
             hypoxia). We also tested three different enclosure types,
             surface-floating buckets (0.021 m3), and tubes (0.006 m3)
             positioned at the surface or on the bottom. Growth rate was
             inversely correlated with wet body mass (between 6 and 42
             mg) and density (1-16 fish/0.021 m3 enclosure). However,
             density did not affect survival. Growth was significantly
             lower in tubes placed on the bottom than at the surface.
             There were considerable differences in growth and survival
             among sites. This likely is due to differences in occurrence
             and persistence of hypoxic events. At the Catfish Creek
             location (a pool surrounded by black mangroves), the bottom
             was routinely hypoxic. At a shallow bay site, hypoxia was
             episodic: on the bottom at dawn (O2) < 2 mg/l occurred on
             nine of 48 days, with values < 1 mg/l on two of 48 days.
             Maximum growth rates (3.5-4%/day) were recorded in February
             to May, in comparison with lower values in December to
             January. However, low growth rates also occurred in the
             spring, probably caused by episodic hypoxia.},
   Doi = {10.2307/1447598},
   Key = {fds258009}
}

@article{fds258040,
   Author = {Wang, L and Dunson, DB},
   Title = {Fast Bayesian inference in Dirichlet process mixture
             models},
   Journal = {Journal of Computational & Graphical Statistics},
   Volume = {20},
   Number = {1},
   Pages = {196-216},
   Publisher = {Informa UK Limited},
   Year = {2009},
   ISSN = {1061-8600},
   url = {http://dx.doi.org/10.1198/jcgs.2010.07081},
   Abstract = {There has been increasing interest in applying Bayesian
             nonparametric methods in large samples and high dimensions.
             As Markov chain Monte Carlo (MCMC) algorithms are often
             infeasible, there is a pressing need for much faster
             algorithms. This article proposes a fast approach for
             inference in Dirichlet process mixture (DPM) models. Viewing
             the partitioning of subjects into clusters as a model
             selection problem, we propose a sequential greedy search
             algorithm for selecting the partition. Then, when conjugate
             priors are chosen, the resulting posterior conditionally on
             the selected partition is available in closed form. This
             approach allows testing of parametric models versus
             nonparametric alternatives based on Bayes factors. We
             evaluate the approach using simulation studies and compare
             it with four other fast nonparametric methods in the
             literature. We apply the proposed approach to three datasets
             including one from a large epidemiologic study. Matlab codes
             for the simulation and data analyses using the proposed
             approach are available online in the supplemental
             materials.},
   Doi = {10.1198/jcgs.2010.07081},
   Key = {fds258040}
}

@article{fds340937,
   Author = {Zhao, S and Engelhardt, BE and Mukherjee, S and Dunson,
             DB},
   Title = {Fast Moment Estimation for Generalized Latent Dirichlet
             Models.},
   Journal = {Journal of the American Statistical Association},
   Volume = {113},
   Number = {524},
   Pages = {1528-1540},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2017.1341839},
   Abstract = {We develop a generalized method of moments (GMM) approach
             for fast parameter estimation in a new class of Dirichlet
             latent variable models with mixed data types. Parameter
             estimation via GMM has computational and statistical
             advantages over alternative methods, such as expectation
             maximization, variational inference, and Markov chain Monte
             Carlo. A key computational advantage of our method, Moment
             Estimation for latent Dirichlet models (MELD), is that
             parameter estimation does not require instantiation of the
             latent variables. Moreover, performance is agnostic to
             distributional assumptions of the observations. We derive
             population moment conditions after marginalizing out the
             sample-specific Dirichlet latent variables. The moment
             conditions only depend on component mean parameters. We
             illustrate the utility of our approach on simulated data,
             comparing results from MELD to alternative methods, and we
             show the promise of our approach through the application to
             several datasets. Supplementary materials for this article
             are available online.},
   Doi = {10.1080/01621459.2017.1341839},
   Key = {fds340937}
}

@article{fds356411,
   Author = {Tam, E and Dunson, D},
   Title = {Fiedler regularization: Learning neural networks with graph
             sparsity},
   Journal = {37th International Conference on Machine Learning, ICML
             2020},
   Volume = {PartF168147-12},
   Pages = {9288-9297},
   Year = {2020},
   Month = {January},
   ISBN = {9781713821120},
   Abstract = {We introduce a novel regularization approach for deep
             learning that incorporates and respects the underlying
             graphical structure of the neural network. Existing
             regularization methods often focus on penalizing weights in
             a global/uniform manner that ignores the connectivity
             structure of the neural network. We propose to use the
             Fiedler value of the neural network's underlying graph as a
             tool for regularization. We provide theoretical support for
             this approach via spectral graph theory. We show several
             useful properties of the Fiedler value that make it suitable
             for regularization. We provide an approximate, variational
             approach for faster computation during training. We provide
             an alternative formulation of this framework in the form of
             a structurally weighted L1 penalty, thus linking our
             approach to sparsity induction. We performed experiments on
             datasets that compare Fiedler regularization with
             traditional regularization methods such as Dropout and
             weight decay. Results demonstrate the efficacy of Fiedler
             regularization.},
   Key = {fds356411}
}

@article{fds346413,
   Author = {Strawn, N and Armagan, A and Saab, R and Carin, L and Dunson,
             D},
   Title = {Finite sample posterior concentration in high-dimensional
             regression},
   Journal = {Information and Inference},
   Volume = {3},
   Number = {2},
   Pages = {103-133},
   Year = {2014},
   Month = {June},
   url = {http://dx.doi.org/10.1093/imaiai/iau003},
   Abstract = {We study the behavior of the posterior distribution in
             high-dimensional Bayesian Gaussian linear regression models
             having p ≫ n, where p is the number of predictors and n is
             the sample size. Our focus is on obtaining quantitative
             finite sample bounds ensuring sufficient posterior
             probability assigned in neighborhoods of the true regression
             coefficient vector (β0) with high probability. We assume
             that β0 is approximately S-sparse and also obtain universal
             bounds, which provide insight into the role of the prior in
             controlling concentration of the posterior. Based on these
             finite sample bounds, we examine the implied asymptotic
             contraction rates for several examples, showing that
             sparsely structured and heavy-tail shrinkage priors exhibit
             rapid contraction rates. We also demonstrate that a stronger
             result holds for the sparsity(S)-Gaussian1 prior. These
             types of finite sample bounds provide guidelines for
             designing and evaluating priors for high-dimensional
             problems.},
   Doi = {10.1093/imaiai/iau003},
   Key = {fds346413}
}

@article{fds258063,
   Author = {Pennell, ML and Dunson, DB},
   Title = {Fitting semiparametric random effects models to large data
             sets.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {8},
   Number = {4},
   Pages = {821-834},
   Year = {2007},
   Month = {October},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxm008},
   Abstract = {For large data sets, it can be difficult or impossible to
             fit models with random effects using standard algorithms due
             to memory limitations or high computational burdens. In
             addition, it would be advantageous to use the abundant
             information to relax assumptions, such as normality of
             random effects. Motivated by data from an epidemiologic
             study of childhood growth, we propose a 2-stage method for
             fitting semiparametric random effects models to longitudinal
             data with many subjects. In the first stage, we use a
             multivariate clustering method to identify G<<N groups of
             subjects whose data have no scientifically important
             differences, as defined by subject matter experts. Then, in
             stage 2, group-specific random effects are assumed to come
             from an unknown distribution, which is assigned a Dirichlet
             process prior, further clustering the groups from stage 1.
             We use our approach to model the effects of maternal smoking
             during pregnancy on growth in 17,518 girls.},
   Doi = {10.1093/biostatistics/kxm008},
   Key = {fds258063}
}

@article{fds258067,
   Author = {Kinney, S and Dunson, DB},
   Title = {Fixed and random effects selection in linear and logistic
             models},
   Journal = {Biometrics},
   Volume = {63},
   Number = {3},
   Pages = {690-698},
   Year = {2007},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/17403104},
   Abstract = {We address the problem of selecting which variables should
             be included in the fixed and random components of logistic
             mixed effects models for correlated data. A fully Bayesian
             variable selection is implemented using a stochastic search
             Gibbs sampler to estimate the exact model-averaged posterior
             distribution. This approach automatically identifies subsets
             of predictors having nonzero fixed effect coefficients or
             nonzero random effects variance, while allowing uncertainty
             in the model selection process. Default priors are proposed
             for the variance components and an efficient parameter
             expansion Gibbs sampler is developed for posterior
             computation. The approach is illustrated using simulated
             data and an epidemiologic example.},
   Doi = {10.1111/j.1541-0420.2007.00771.x},
   Key = {fds258067}
}

@article{fds258021,
   Author = {Stanford, JB and Dunson, DB},
   Title = {Foreword. Expanding Methodologies for Capturing Day-Specific
             Probabilities of Conception.},
   Journal = {Paediatric and perinatal epidemiology},
   Volume = {20 Suppl 1},
   Pages = {1-2},
   Year = {2006},
   Month = {November},
   ISSN = {0269-5022},
   url = {http://dx.doi.org/10.1111/j.1365-3016.2006.00764.x},
   Doi = {10.1111/j.1365-3016.2006.00764.x},
   Key = {fds258021}
}

@article{fds257834,
   Author = {Rodriguez, A and Dunson, DB},
   Title = {Functional clustering in nested designs: Modeling
             variability in reproductive epidemiology
             studies},
   Journal = {Annals of Applied Statistics},
   Volume = {8},
   Number = {3},
   Pages = {1416-1442},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2014},
   Month = {September},
   ISSN = {1932-6157},
   url = {http://dx.doi.org/10.1214/14-AOAS751},
   Abstract = {We discuss functional clustering procedures for nested
             designs, where multiple curves are collected for each
             subject in the study. We start by considering the
             application of standard functional clustering tools to this
             problem, which leads to groupings based on the average
             profile for each subject. After discussing some of the
             shortcomings of this approach, we present a mixture model
             based on a generalization of the nested Dirichlet process
             that clusters subjects based on the distribution of their
             curves. By using mixtures of generalized Dirichlet
             processes, the model induces a much more flexible prior on
             the partition structure than other popular model-based
             clustering methods, allowing for different rates of
             introduction of new clusters as the number of observations
             increases. The methods are illustrated using hormone
             profiles from multiple menstrual cycles collected for women
             in the Early Pregnancy Study.},
   Doi = {10.1214/14-AOAS751},
   Key = {fds257834}
}

@article{fds363849,
   Author = {Zhang, R and Mak, S and Dunson, D},
   Title = {GAUSSIAN PROCESS SUBSPACE PREDICTION FOR MODEL
             REDUCTION},
   Journal = {SIAM Journal on Scientific Computing},
   Volume = {44},
   Number = {3},
   Pages = {A1428-A1449},
   Year = {2022},
   Month = {January},
   url = {http://dx.doi.org/10.1137/21M1432739},
   Abstract = {Subspace-valued functions arise in a wide range of problems,
             including parametric reduced order modeling (PROM),
             parameter reduction, and subspace tracking. In PROM, each
             parameter point can be associated with a subspace, which is
             used for Petrov–Galerkin projections of large system
             matrices. Previous efforts to approximate such functions use
             interpolations on manifolds, which can be inaccurate and
             slow. To tackle this, we propose a novel Bayesian
             nonparametric model for subspace prediction: the Gaussian
             process subspace (GPS) model. This method is extrinsic and
             intrinsic at the same time: with multivariate Gaussian
             distributions on the Euclidean space, it induces a joint
             probability model on the Grassmann manifold, the set of
             fixed-dimensional subspaces. The GPS adopts a simple yet
             general correlation structure, and a principled approach for
             model selection. Its predictive distribution admits an
             analytical form, which allows for efficient subspace
             prediction over the parameter space. For PROM, the GPS
             provides a probabilistic prediction at a new parameter point
             that retains the accuracy of local reduced models, at a
             computational complexity that does not depend on system
             dimension, and thus is suitable for online computation. We
             give four numerical examples to compare our method to
             subspace interpolation, as well as two methods that
             interpolate local reduced models. Overall, GPS is the most
             data efficient, more computationally efficient than subspace
             interpolation, and gives smooth predictions with uncertainty
             quantification.},
   Doi = {10.1137/21M1432739},
   Key = {fds363849}
}

@article{fds257871,
   Author = {Zhu, B and Ashley-Koch, AE and Dunson, DB},
   Title = {Generalized admixture mapping for complex
             traits.},
   Journal = {G3 (Bethesda)},
   Volume = {3},
   Number = {7},
   Pages = {1165-1175},
   Year = {2013},
   Month = {July},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23665878},
   Abstract = {Admixture mapping is a popular tool to identify regions of
             the genome associated with traits in a recently admixed
             population. Existing methods have been developed primarily
             for identification of a single locus influencing a
             dichotomous trait within a case-control study design. We
             propose a generalized admixture mapping (GLEAM) approach, a
             flexible and powerful regression method for both
             quantitative and qualitative traits, which is able to test
             for association between the trait and local ancestries in
             multiple loci simultaneously and adjust for covariates. The
             new method is based on the generalized linear model and uses
             a quadratic normal moment prior to incorporate admixture
             prior information. Through simulation, we demonstrate that
             GLEAM achieves lower type I error rate and higher power than
             ANCESTRYMAP both for qualitative traits and more
             significantly for quantitative traits. We applied GLEAM to
             genome-wide SNP data from the Illumina African American
             panel derived from a cohort of black women participating in
             the Healthy Pregnancy, Healthy Baby study and identified a
             locus on chromosome 2 associated with the averaged maternal
             mean arterial pressure during 24 to 28 weeks of
             pregnancy.},
   Doi = {10.1534/g3.113.006478},
   Key = {fds257871}
}

@article{fds376572,
   Author = {Armagan, A and Dunson, DB and Clyde, M},
   Title = {Generalized beta mixtures of Gaussians},
   Journal = {Advances in Neural Information Processing Systems 24: 25th
             Annual Conference on Neural Information Processing Systems
             2011, NIPS 2011},
   Year = {2011},
   Month = {January},
   ISBN = {9781618395993},
   Abstract = {In recent years, a rich variety of shrinkage priors have
             been proposed that have great promise in addressing massive
             regression problems. In general, these new priors can be
             expressed as scale mixtures of normals, but have more
             complex forms and better properties than traditional Cauchy
             and double exponential priors. We first propose a new class
             of normal scale mixtures through a novel generalized beta
             distribution that encompasses many interesting priors as
             special cases. This encompassing framework should prove
             useful in comparing competing priors, considering properties
             and revealing close connections. We then develop a class of
             variational Bayes approximations through the new hierarchy
             presented that will scale more efficiently to the types of
             truly massive data sets that are now encountered
             routinely.},
   Key = {fds376572}
}

@article{fds257978,
   Author = {Armagan, A and Dunson, DB and Clyde, MA},
   Title = {Generalized Beta Mixtures of Gaussians},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {24},
   Pages = {523-531},
   Publisher = {Neural Information Processing Systems Foundation,
             Inc},
   Editor = {Shawe-Taylor, J and Zemel, RS and Bartlett, PL},
   Year = {2011},
   ISBN = {9781618395993},
   Abstract = {In recent years, a rich variety of shrinkage priors have
             been proposed that have great promise in addressing massive
             regression problems. In general, these new priors can be
             expressed as scale mixtures of normals, but have more
             complex forms and better properties than traditional Cauchy
             and double exponential priors. We first propose a new class
             of normal scale mixtures through a novel generalized beta
             distribution that encompasses many interesting priors as
             special cases. This encompassing framework should prove
             useful in comparing competing priors, considering properties
             and revealing close connections. We then develop a class of
             variational Bayes approximations through the new hierarchy
             presented that will scale more efficiently to the types of
             truly massive data sets that are now encountered
             routinely.},
   Key = {fds257978}
}

@article{fds257864,
   Author = {Armagan, A and Dunson, DB and Lee, J},
   Title = {GENERALIZED DOUBLE PARETO SHRINKAGE.},
   Journal = {Statistica Sinica},
   Volume = {23},
   Number = {1},
   Pages = {119-143},
   Year = {2013},
   Month = {January},
   ISSN = {1017-0405},
   url = {http://dx.doi.org/10.5705/ss.2011.048},
   Abstract = {We propose a generalized double Pareto prior for Bayesian
             shrinkage estimation and inferences in linear models. The
             prior can be obtained via a scale mixture of Laplace or
             normal distributions, forming a bridge between the Laplace
             and Normal-Jeffreys' priors. While it has a spike at zero
             like the Laplace density, it also has a Student's
             <i>t</i>-like tail behavior. Bayesian computation is
             straightforward via a simple Gibbs sampling algorithm. We
             investigate the properties of the maximum a posteriori
             estimator, as sparse estimation plays an important role in
             many problems, reveal connections with some well-established
             regularization procedures, and show some asymptotic results.
             The performance of the prior is tested through simulations
             and an application.},
   Doi = {10.5705/ss.2011.048},
   Key = {fds257864}
}

@article{fds257850,
   Author = {Cui, K and Dunson, DB},
   Title = {Generalized Dynamic Factor Models for Mixed-Measurement Time
             Series.},
   Journal = {Journal of computational and graphical statistics : a joint
             publication of American Statistical Association, Institute
             of Mathematical Statistics, Interface Foundation of North
             America},
   Volume = {23},
   Number = {1},
   Pages = {169-191},
   Year = {2014},
   Month = {February},
   ISSN = {1061-8600},
   url = {http://dx.doi.org/10.1080/10618600.2012.729986},
   Abstract = {In this article, we propose generalized Bayesian dynamic
             factor models for jointly modeling mixed-measurement time
             series. The framework allows mixed-scale measurements
             associated with each time series, with different
             measurements having different distributions in the
             exponential family conditionally on time-varying latent
             factor(s). Efficient Bayesian computational algorithms are
             developed for posterior inference on both the latent factors
             and model parameters, based on a Metropolis Hastings
             algorithm with adaptive proposals. The algorithm relies on a
             Greedy Density Kernel Approximation (GDKA) and parameter
             expansion with latent factor normalization. We tested the
             framework and algorithms in simulated studies and applied
             them to the analysis of intertwined credit and recovery risk
             for Moody's rated firms from 1982-2008, illustrating the
             importance of jointly modeling mixed-measurement time
             series. The article has supplemental materials available
             online.},
   Doi = {10.1080/10618600.2012.729986},
   Key = {fds257850}
}

@article{fds370544,
   Author = {Schiavon, L and Canale, A and Dunson, DB},
   Title = {Generalized infinite factorization models.},
   Journal = {Biometrika},
   Volume = {109},
   Number = {3},
   Pages = {817-835},
   Year = {2022},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/asab056},
   Abstract = {Factorization models express a statistical object of
             interest in terms of a collection of simpler objects. For
             example, a matrix or tensor can be expressed as a sum of
             rank-one components. However, in practice, it can be
             challenging to infer the relative impact of the different
             components as well as the number of components. A popular
             idea is to include infinitely many components having impact
             decreasing with the component index. This article is
             motivated by two limitations of existing methods: (1) lack
             of careful consideration of the within component sparsity
             structure; and (2) no accommodation for grouped variables
             and other non-exchangeable structures. We propose a general
             class of infinite factorization models that address these
             limitations. Theoretical support is provided, practical
             gains are shown in simulation studies, and an ecology
             application focusing on modelling bird species occurrence is
             discussed.},
   Doi = {10.1093/biomet/asab056},
   Key = {fds370544}
}

@article{fds329352,
   Author = {Reddy, A and Zhang, J and Davis, NS and Moffitt, AB and Love, CL and Waldrop, A and Leppa, S and Pasanen, A and Meriranta, L and Karjalainen-Lindsberg, M-L and Nørgaard, P and Pedersen, M and Gang,
             AO and Høgdall, E and Heavican, TB and Lone, W and Iqbal, J and Qin, Q and Li, G and Kim, SY and Healy, J and Richards, KL and Fedoriw, Y and Bernal-Mizrachi, L and Koff, JL and Staton, AD and Flowers, CR and Paltiel, O and Goldschmidt, N and Calaminici, M and Clear, A and Gribben, J and Nguyen, E and Czader, MB and Ondrejka, SL and Collie, A and Hsi, ED and Tse, E and Au-Yeung, RKH and Kwong, Y-L and Srivastava, G and Choi, WWL and Evens, AM and Pilichowska, M and Sengar, M and Reddy, N and Li, S and Chadburn, A and Gordon, LI and Jaffe, ES and Levy, S and Rempel,
             R and Tzeng, T and Happ, LE and Dave, T and Rajagopalan, D and Datta, J and Dunson, DB and Dave, SS},
   Title = {Genetic and Functional Drivers of Diffuse Large B Cell
             Lymphoma.},
   Journal = {Cell},
   Volume = {171},
   Number = {2},
   Pages = {481-494.e15},
   Year = {2017},
   Month = {October},
   url = {http://dx.doi.org/10.1016/j.cell.2017.09.027},
   Abstract = {Diffuse large B cell lymphoma (DLBCL) is the most common
             form of blood cancer and is characterized by a striking
             degree of genetic and clinical heterogeneity. This
             heterogeneity poses a major barrier to understanding the
             genetic basis of the disease and its response to therapy.
             Here, we performed an integrative analysis of whole-exome
             sequencing and transcriptome sequencing in a cohort of 1,001
             DLBCL patients to comprehensively define the landscape of
             150 genetic drivers of the disease. We characterized the
             functional impact of these genes using an unbiased CRISPR
             screen of DLBCL cell lines to define oncogenes that promote
             cell growth. A prognostic model comprising these genetic
             alterations outperformed current established methods: cell
             of origin, the International Prognostic Index comprising
             clinical variables, and dual MYC and BCL2 expression. These
             results comprehensively define the genetic drivers and their
             functional roles in DLBCL to identify new therapeutic
             opportunities in the disease.},
   Doi = {10.1016/j.cell.2017.09.027},
   Key = {fds329352}
}

@article{fds257880,
   Author = {Zhang, J and Grubor, V and Love, CL and Banerjee, A and Richards, KL and Mieczkowski, PA and Dunphy, C and Choi, W and Au, WY and Srivastava, G and Lugar, PL and Rizzieri, DA and Lagoo, AS and Bernal-Mizrachi, L and Mann, KP and Flowers, C and Naresh, K and Evens, A and Gordon, LI and Czader, M and Gill, JI and Hsi, ED and Liu, Q and Fan, A and Walsh, K and Jima, D and Smith, LL and Johnson, AJ and Byrd, JC and Luftig, MA and Ni,
             T and Zhu, J and Chadburn, A and Levy, S and Dunson, D and Dave,
             SS},
   Title = {Genetic heterogeneity of diffuse large B-cell
             lymphoma.},
   Journal = {Proc Natl Acad Sci U S A},
   Volume = {110},
   Number = {4},
   Pages = {1398-1403},
   Year = {2013},
   Month = {January},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23292937},
   Abstract = {Diffuse large B-cell lymphoma (DLBCL) is the most common
             form of lymphoma in adults. The disease exhibits a striking
             heterogeneity in gene expression profiles and clinical
             outcomes, but its genetic causes remain to be fully defined.
             Through whole genome and exome sequencing, we characterized
             the genetic diversity of DLBCL. In all, we sequenced 73
             DLBCL primary tumors (34 with matched normal DNA).
             Separately, we sequenced the exomes of 21 DLBCL cell lines.
             We identified 322 DLBCL cancer genes that were recurrently
             mutated in primary DLBCLs. We identified recurrent mutations
             implicating a number of known and not previously identified
             genes and pathways in DLBCL including those related to
             chromatin modification (ARID1A and MEF2B), NF-κB (CARD11
             and TNFAIP3), PI3 kinase (PIK3CD, PIK3R1, and MTOR), B-cell
             lineage (IRF8, POU2F2, and GNA13), and WNT signaling (WIF1).
             We also experimentally validated a mutation in PIK3CD, a
             gene not previously implicated in lymphomas. The patterns of
             mutation demonstrated a classic long tail distribution with
             substantial variation of mutated genes from patient to
             patient and also between published studies. Thus, our study
             reveals the tremendous genetic heterogeneity that underlies
             lymphomas and highlights the need for personalized medicine
             approaches to treating these patients.},
   Doi = {10.1073/pnas.1205299110},
   Key = {fds257880}
}

@article{fds360020,
   Author = {Liu, M and Zhang, Z and Dunson, DB},
   Title = {Graph auto-encoding brain networks with applications to
             analyzing large-scale brain imaging datasets.},
   Journal = {NeuroImage},
   Volume = {245},
   Pages = {118750},
   Year = {2021},
   Month = {December},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2021.118750},
   Abstract = {There has been a huge interest in studying human brain
             connectomes inferred from different imaging modalities and
             exploring their relationships with human traits, such as
             cognition. Brain connectomes are usually represented as
             networks, with nodes corresponding to different regions of
             interest (ROIs) and edges to connection strengths between
             ROIs. Due to the high-dimensionality and non-Euclidean
             nature of networks, it is challenging to depict their
             population distribution and relate them to human traits.
             Current approaches focus on summarizing the network using
             either pre-specified topological features or principal
             components analysis (PCA). In this paper, building on recent
             advances in deep learning, we develop a nonlinear latent
             factor model to characterize the population distribution of
             brain graphs and infer their relationships to human traits.
             We refer to our method as Graph AuTo-Encoding (GATE). We
             applied GATE to two large-scale brain imaging datasets, the
             Adolescent Brain Cognitive Development (ABCD) study and the
             Human Connectome Project (HCP) for adults, to study the
             structural brain connectome and its relationship with
             cognition. Numerical results demonstrate huge advantages of
             GATE over competitors in terms of prediction accuracy,
             statistical inference, and computing efficiency. We found
             that the structural connectome has a stronger association
             with a wide range of human cognitive traits than was
             apparent using previous approaches.},
   Doi = {10.1016/j.neuroimage.2021.118750},
   Key = {fds360020}
}

@article{fds362555,
   Author = {Dunson, DB and Wu, HT and Wu, N},
   Title = {Graph based Gaussian processes on restricted
             domains},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {84},
   Number = {2},
   Pages = {414-439},
   Year = {2022},
   Month = {April},
   url = {http://dx.doi.org/10.1111/rssb.12486},
   Abstract = {In nonparametric regression, it is common for the inputs to
             fall in a restricted subset of Euclidean space. Typical
             kernel-based methods that do not take into account the
             intrinsic geometry of the domain across which observations
             are collected may produce sub-optimal results. In this
             article, we focus on solving this problem in the context of
             Gaussian process (GP) models, proposing a new class of Graph
             Laplacian based GPs (GL-GPs), which learn a covariance that
             respects the geometry of the input domain. As the heat
             kernel is intractable computationally, we approximate the
             covariance using finitely-many eigenpairs of the Graph
             Laplacian (GL). The GL is constructed from a kernel which
             depends only on the Euclidean coordinates of the inputs.
             Hence, we can benefit from the full knowledge about the
             kernel to extend the covariance structure to newly arriving
             samples by a Nyström type extension. We provide substantial
             theoretical support for the GL-GP methodology, and
             illustrate performance gains in various applications.},
   Doi = {10.1111/rssb.12486},
   Key = {fds362555}
}

@article{fds257982,
   Author = {An, Q and Wang, C and Shterev, I and Wang, E and Carin, L and Dunson,
             DB},
   Title = {Hierarchical kernel stick-breaking process for multi-task
             image analysis},
   Journal = {Proceedings of the 25th International Conference on Machine
             Learning},
   Pages = {17-24},
   Year = {2008},
   Month = {January},
   url = {http://dx.doi.org/10.1145/1390156.1390159},
   Abstract = {The kernel stick-breaking process (KSBP) is employed to
             segment general imagery, imposing the condition that patches
             (small blocks of pixels) that are spatially proximate are
             more likely to be associated with the same cluster
             (segment). The number of clusters is not set a priori and is
             inferred from the hierarchical Bayesian model. Further, KSBP
             is integrated with a shared Dirichlet process prior to
             simultaneously model multiple images, inferring their
             inter-relationships. This latter application may be useful
             for sorting and learning relationships between multiple
             images. The Bayesian inference algorithm is based on a
             hybrid of variational Bayesian analysis and local sampling.
             In addition to providing details on the model and associated
             inference framework, example results are presented for
             several image-analysis problems. Copyright 2008 by the
             author(s)/owner(s).},
   Doi = {10.1145/1390156.1390159},
   Key = {fds257982}
}

@article{fds322562,
   Author = {Fyshe, A and Fox, E and Dunson, D and Mitchell, T},
   Title = {Hierarchical latent dictionaries for models of brain
             activation},
   Journal = {Journal of Machine Learning Research},
   Volume = {22},
   Pages = {409-421},
   Year = {2012},
   Month = {January},
   Abstract = {In this work, we propose a hierarchical latent dictionary
             approach to estimate the timevarying mean and covariance of
             a process for which we have only limited noisy samples. We
             fully leverage the limited sample size and redundancy in
             sensor measurements by transferring knowledge through a
             hierarchy of lower dimensional latent processes. As a case
             study, we utilize Magnetoencephalography (MEG) recordings of
             brain activity to identify the word being viewed by a human
             subject. Specifically, we identify the word category for a
             single noisy MEG recording, when only given limited noisy
             samples on which to train.},
   Key = {fds322562}
}

@article{fds257997,
   Author = {Zhang, XX and Dunson, DB and Carin, L},
   Title = {Hierarchical topic modeling for analysis of time-evolving
             personal choices},
   Journal = {Advances in Neural Information Processing Systems 24: 25th
             Annual Conference on Neural Information Processing Systems
             2011, NIPS 2011},
   Year = {2011},
   Month = {December},
   Abstract = {The nested Chinese restaurant process is extended to design
             a nonparametric topic-model tree for representation of human
             choices. Each tree path corresponds to a type of person, and
             each node (topic) has a corresponding probability vector
             over items that may be selected. The observed data are
             assumed to have associated temporal covariates
             (corresponding to the time at which choices are made), and
             we wish to impose that with increasing time it is more
             probable that topics deeper in the tree are utilized. This
             structure is imposed by developing a new "change point"
             stick-breaking model that is coupled with a Poisson and
             product-of-gammas construction. To share topics across the
             tree nodes, topic distributions are drawn from a Dirichlet
             process. As a demonstration of this concept, we analyze real
             data on course selections of undergraduate students at Duke
             University, with the goal of uncovering and concisely
             representing structure in the curriculum and in the
             characteristics of the student body.},
   Key = {fds257997}
}

@article{fds371474,
   Author = {Zhang, XX and Dunson, DB and Carin, L},
   Title = {Hierarchical topic modeling for analysis of time-evolving
             personal choices},
   Journal = {Advances in Neural Information Processing Systems 24: 25th
             Annual Conference on Neural Information Processing Systems
             2011, NIPS 2011},
   Year = {2011},
   Month = {January},
   ISBN = {9781618395993},
   Abstract = {The nested Chinese restaurant process is extended to design
             a nonparametric topic-model tree for representation of human
             choices. Each tree path corresponds to a type of person, and
             each node (topic) has a corresponding probability vector
             over items that may be selected. The observed data are
             assumed to have associated temporal covariates
             (corresponding to the time at which choices are made), and
             we wish to impose that with increasing time it is more
             probable that topics deeper in the tree are utilized. This
             structure is imposed by developing a new "change point"
             stick-breaking model that is coupled with a Poisson and
             product-of-gammas construction. To share topics across the
             tree nodes, topic distributions are drawn from a Dirichlet
             process. As a demonstration of this concept, we analyze real
             data on course selections of undergraduate students at Duke
             University, with the goal of uncovering and concisely
             representing structure in the curriculum and in the
             characteristics of the student body.},
   Key = {fds371474}
}

@article{fds257909,
   Author = {Baird, DD and Dunson, DB and Hill, MC and Cousins, D and Schectman,
             JM},
   Title = {High cumulative incidence of uterine leiomyoma in black and
             white women: ultrasound evidence.},
   Journal = {American journal of obstetrics and gynecology},
   Volume = {188},
   Number = {1},
   Pages = {100-107},
   Year = {2003},
   Month = {January},
   url = {http://dx.doi.org/10.1067/mob.2003.99},
   Abstract = {<h4>Objective</h4>Uterine leiomyoma, or fibroid tumors, are
             the leading indication for hysterectomy in the United
             States, but the proportion of women in whom fibroid tumors
             develop is not known. This study screened for fibroid
             tumors, independently of clinical symptoms, to estimate the
             age-specific proportion of black and white women in whom
             fibroid tumors develop.<h4>Study design</h4>Randomly
             selected members of an urban health plan who were 35 to 49
             years old participated (n = 1364 women). Medical records and
             self-report were used to assess fibroid status for those
             women who were no longer menstruating (most of whom had had
             hysterectomies). Premenopausal women were screened by
             ultrasonography. We estimated the age-specific cumulative
             incidence of fibroid tumors for black and white
             women.<h4>Results</h4>Thirty-five percent of premenopausal
             women had a previous diagnosis of fibroid tumors. Fifty-one
             percent of the premenopausal women who had no previous
             diagnosis had ultrasound evidence of fibroid tumors. The
             estimated cumulative incidence of tumors by age 50 was >80%
             for black women and nearly 70% for white women. The
             difference between the age-specific cumulative incidence
             curves for black and white women was highly significant
             (odds ratio, 2.9; 95% CI, 2.5-3.4; P <.001).<h4>Conclusion</h4>The
             results of this study suggest that most black and white
             women in the United States develop uterine fibroid tumors
             before menopause and that uterine fibroid tumors develop in
             black women at earlier ages than in white
             women.},
   Doi = {10.1067/mob.2003.99},
   Key = {fds257909}
}

@article{fds323139,
   Author = {Carin, L and Hero, A and Lucas, J and Dunson, D and Chen, M and Heñao, R and Tibau-Puig, A and Zaas, A and Woods, CW and Ginsburg,
             GS},
   Title = {High-Dimensional Longitudinal Genomic Data: An analysis used
             for monitoring viral infections.},
   Journal = {IEEE Signal Process Mag},
   Volume = {29},
   Number = {1},
   Pages = {108-123},
   Year = {2012},
   Month = {January},
   url = {http://dx.doi.org/10.1109/MSP.2011.943009},
   Doi = {10.1109/MSP.2011.943009},
   Key = {fds323139}
}

@article{fds258035,
   Author = {Liu, F and Dunson, D and Zou, F},
   Title = {High-dimensional variable selection in meta-analysis for
             censored data.},
   Journal = {Biometrics},
   Volume = {67},
   Number = {2},
   Pages = {504-512},
   Year = {2011},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2010.01466.x},
   Abstract = {This article considers the problem of selecting predictors
             of time to an event from a high-dimensional set of candidate
             predictors using data from multiple studies. As an
             alternative to the current multistage testing approaches, we
             propose to model the study-to-study heterogeneity explicitly
             using a hierarchical model to borrow strength. Our method
             incorporates censored data through an accelerated failure
             time model. Using a carefully formulated prior
             specification, we develop a fast approach to predictor
             selection and shrinkage estimation for high-dimensional
             predictors. For model fitting, we develop a Monte Carlo
             expectation maximization (MC-EM) algorithm to accommodate
             censored data. The proposed approach, which is related to
             the relevance vector machine (RVM), relies on maximum a
             posteriori estimation to rapidly obtain a sparse estimate.
             As for the typical RVM, there is an intrinsic thresholding
             property in which unimportant predictors tend to have their
             coefficients shrunk to zero. We compare our method with some
             commonly used procedures through simulation studies. We also
             illustrate the method using the gene expression barcode data
             from three breast cancer studies.},
   Doi = {10.1111/j.1541-0420.2010.01466.x},
   Key = {fds258035}
}

@article{fds329990,
   Author = {Ovaskainen, O and Tikhonov, G and Dunson, D and Grøtan, V and Engen, S and Sæther, B-E and Abrego, N},
   Title = {How are species interactions structured in species-rich
             communities? A new method for analysing time-series
             data.},
   Journal = {Proceedings. Biological sciences},
   Volume = {284},
   Number = {1855},
   Pages = {20170768},
   Year = {2017},
   Month = {May},
   url = {http://dx.doi.org/10.1098/rspb.2017.0768},
   Abstract = {Estimation of intra- and interspecific interactions from
             time-series on species-rich communities is challenging due
             to the high number of potentially interacting species pairs.
             The previously proposed sparse interactions model overcomes
             this challenge by assuming that most species pairs do not
             interact. We propose an alternative model that does not
             assume that any of the interactions are necessarily zero,
             but summarizes the influences of individual species by a
             small number of community-level drivers. The community-level
             drivers are defined as linear combinations of species
             abundances, and they may thus represent e.g. the total
             abundance of all species or the relative proportions of
             different functional groups. We show with simulated and real
             data how our approach can be used to compare different
             hypotheses on community structure. In an empirical example
             using aquatic microorganisms, the community-level drivers
             model clearly outperformed the sparse interactions model in
             predicting independent validation data.},
   Doi = {10.1098/rspb.2017.0768},
   Key = {fds329990}
}

@article{fds329991,
   Author = {Ovaskainen, O and Tikhonov, G and Norberg, A and Guillaume Blanchet,
             F and Duan, L and Dunson, D and Roslin, T and Abrego,
             N},
   Title = {How to make more out of community data? A conceptual
             framework and its implementation as models and
             software.},
   Journal = {Ecology letters},
   Volume = {20},
   Number = {5},
   Pages = {561-576},
   Year = {2017},
   Month = {May},
   url = {http://dx.doi.org/10.1111/ele.12757},
   Abstract = {Community ecology aims to understand what factors determine
             the assembly and dynamics of species assemblages at
             different spatiotemporal scales. To facilitate the
             integration between conceptual and statistical approaches in
             community ecology, we propose Hierarchical Modelling of
             Species Communities (HMSC) as a general, flexible framework
             for modern analysis of community data. While
             non-manipulative data allow for only correlative and not
             causal inference, this framework facilitates the formulation
             of data-driven hypotheses regarding the processes that
             structure communities. We model environmental filtering by
             variation and covariation in the responses of individual
             species to the characteristics of their environment, with
             potential contingencies on species traits and phylogenetic
             relationships. We capture biotic assembly rules by
             species-to-species association matrices, which may be
             estimated at multiple spatial or temporal scales. We
             operationalise the HMSC framework as a hierarchical Bayesian
             joint species distribution model, and implement it as R- and
             Matlab-packages which enable computationally efficient
             analyses of large data sets. Armed with this tool, community
             ecologists can make sense of many types of data, including
             spatially explicit data and time-series data. We illustrate
             the use of this framework through a series of diverse
             ecological examples.},
   Doi = {10.1111/ele.12757},
   Key = {fds329991}
}

@article{fds354544,
   Author = {Ferrari, F and Dunson, DB},
   Title = {IDENTIFYING MAIN EFFECTS AND INTERACTIONS AMONG EXPOSURES
             USING GAUSSIAN PROCESSES.},
   Journal = {The annals of applied statistics},
   Volume = {14},
   Number = {4},
   Pages = {1743-1758},
   Year = {2020},
   Month = {December},
   url = {http://dx.doi.org/10.1214/20-aoas1363},
   Abstract = {This article is motivated by the problem of studying the
             joint effect of different chemical exposures on human health
             outcomes. This is essentially a nonparametric regression
             problem, with interest being focused not on a black box for
             prediction but instead on selection of main effects and
             interactions. For interpretability we decompose the expected
             health outcome into a linear main effect, pairwise
             interactions and a nonlinear deviation. Our interest is in
             model selection for these different components, accounting
             for uncertainty and addressing nonidentifiability between
             the linear and nonparametric components of the
             semiparametric model. We propose a Bayesian approach to
             inference, placing variable selection priors on the
             different components, and developing a Markov chain Monte
             Carlo (MCMC) algorithm. A key component of our approach is
             the incorporation of a heredity constraint to only include
             interactions in the presence of main effects, effectively
             reducing dimensionality of the model search. We adapt a
             projection approach developed in the spatial statistics
             literature to enforce identifiability in modeling the
             nonparametric component using a Gaussian process. We also
             employ a dimension reduction strategy to sample the
             nonlinear random effects that aids the mixing of the MCMC
             algorithm. The proposed MixSelect framework is evaluated
             using a simulation study, and is illustrated using data from
             the National Health and Nutrition Examination Survey
             (NHANES). Code is available on GitHub.},
   Doi = {10.1214/20-aoas1363},
   Key = {fds354544}
}

@article{fds370898,
   Author = {Mahzarnia, A and Stout, JA and Anderson, RJ and Moon, HS and Yar Han and Z and Beck, K and Browndyke, JN and Dunson, DB and Johnson, KG and O'Brien,
             RJ and Badea, A},
   Title = {Identifying vulnerable brain networks associated with
             Alzheimer's disease risk.},
   Journal = {Cereb Cortex},
   Volume = {33},
   Number = {9},
   Pages = {5307-5322},
   Year = {2023},
   Month = {April},
   url = {http://dx.doi.org/10.1093/cercor/bhac419},
   Abstract = {The selective vulnerability of brain networks in individuals
             at risk for Alzheimer's disease (AD) may help differentiate
             pathological from normal aging at asymptomatic stages,
             allowing the implementation of more effective interventions.
             We used a sample of 72 people across the age span, enriched
             for the APOE4 genotype to reveal vulnerable networks
             associated with a composite AD risk factor including age,
             genotype, and sex. Sparse canonical correlation analysis
             (CCA) revealed a high weight associated with genotype, and
             subgraphs involving the cuneus, temporal, cingulate
             cortices, and cerebellum. Adding cognitive metrics to the
             risk factor revealed the highest cumulative degree of
             connectivity for the pericalcarine cortex, insula, banks of
             the superior sulcus, and the cerebellum. To enable scaling
             up our approach, we extended tensor network principal
             component analysis, introducing CCA components. We developed
             sparse regression predictive models with errors of 17% for
             genotype, 24% for family risk factor for AD, and 5 years for
             age. Age prediction in groups including cognitively impaired
             subjects revealed regions not found using only normal
             subjects, i.e. middle and transverse temporal, paracentral
             and superior banks of temporal sulcus, as well as the
             amygdala and parahippocampal gyrus. These modeling
             approaches represent stepping stones towards single subject
             prediction.},
   Doi = {10.1093/cercor/bhac419},
   Key = {fds370898}
}

@article{fds348078,
   Author = {Badea, A and Wu, W and Shuff, J and Wang, M and Anderson, RJ and Qi, Y and Johnson, GA and Wilson, JG and Koudoro, S and Garyfallidis, E and Colton, CA and Dunson, DB},
   Title = {Identifying Vulnerable Brain Networks in Mouse Models of
             Genetic Risk Factors for Late Onset Alzheimer's
             Disease.},
   Journal = {Front Neuroinform},
   Volume = {13},
   Pages = {72},
   Year = {2019},
   url = {http://dx.doi.org/10.3389/fninf.2019.00072},
   Abstract = {The major genetic risk for late onset Alzheimer's disease
             has been associated with the presence of APOE4 alleles.
             However, the impact of different APOE alleles on the brain
             aging trajectory, and how they interact with the brain local
             environment in a sex specific manner is not entirely clear.
             We sought to identify vulnerable brain circuits in novel
             mouse models with homozygous targeted replacement of the
             mouse ApoE gene with either human APOE3 or APOE4 gene
             alleles. These genes are expressed in mice that also model
             the human immune response to age and disease-associated
             challenges by expressing the human NOS2 gene in place of the
             mouse mNos2 gene. These mice had impaired learning and
             memory when assessed with the Morris water maze (MWM) and
             novel object recognition (NOR) tests. Ex vivo MRI-DTI
             analyses revealed global and local atrophy, and areas of
             reduced fractional anisotropy (FA). Using tensor network
             principal component analyses for structural connectomes, we
             inferred the pairwise connections which best separate APOE4
             from APOE3 carriers. These involved primarily
             interhemispheric connections among regions of olfactory
             areas, the hippocampus, and the cerebellum. Our results also
             suggest that pairwise connections may be subdivided and
             clustered spatially to reveal local changes on a finer
             scale. These analyses revealed not just genotype, but also
             sex specific differences. Identifying vulnerable networks
             may provide targets for interventions, and a means to
             stratify patients.},
   Doi = {10.3389/fninf.2019.00072},
   Key = {fds348078}
}

@article{fds257965,
   Author = {Dzirasa, K and McGarity, DL and Bhattacharya, A and Kumar, S and Takahashi, JS and Dunson, D and McClung, CA and Nicolelis,
             MAL},
   Title = {Impaired limbic gamma oscillatory synchrony during
             anxiety-related behavior in a genetic mouse model of bipolar
             mania.},
   Journal = {J Neurosci},
   Volume = {31},
   Number = {17},
   Pages = {6449-6456},
   Year = {2011},
   Month = {April},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21525286},
   Abstract = {Alterations in anxiety-related processing are observed
             across many neuropsychiatric disorders, including bipolar
             disorder. Though polymorphisms in a number of circadian
             genes confer risk for this disorder, little is known about
             how changes in circadian gene function disrupt brain
             circuits critical for anxiety-related processing. Here we
             characterize neurophysiological activity simultaneously
             across five limbic brain areas (nucleus accumbens, amygdala,
             prelimbic cortex, ventral hippocampus, and ventral tegmental
             area) as wild-type (WT) mice and mice with a mutation in the
             circadian gene, CLOCK (Clock-Δ19 mice) perform an elevated
             zero maze task. In WT mice, basal limbic gamma oscillatory
             synchrony observed before task performance predicted future
             anxiety-related behaviors. Additionally, dynamic changes in
             limbic gamma oscillatory synchrony were observed based on
             the position of WT mice in the zero maze. Clock-Δ19 mice,
             which displayed an increased propensity to enter the open
             section of the elevated maze, showed profound deficits in
             these anxiety-related circuit processes. Thus, our findings
             link the anxiety-related behavioral deficits observed in
             Clock-Δ19 mice with dysfunctional gamma oscillatory tuning
             across limbic circuits and suggest that alterations in
             limbic oscillatory circuit function induced by circadian
             gene polymorphisms may contribute to the behavioral
             manifestations seen in bipolar mania.},
   Doi = {10.1523/JNEUROSCI.6144-10.2011},
   Key = {fds257965}
}

@article{fds257845,
   Author = {Wade, S and Dunson, DB and Petrone, S and Trippa,
             L},
   Title = {Improving prediction from dirichlet process mixtures via
             enrichment},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {1041-1071},
   Year = {2014},
   Month = {January},
   ISSN = {1532-4435},
   Abstract = {Flexible covariate-dependent density estimation can be
             achieved by modelling the joint density of the response and
             covariates as a Dirichlet process mixture. An appealing
             aspect of this approach is that computations are relatively
             easy. In this paper, we examine the predictive performance
             of these models with an increasing number of covariates.
             Even for a moderate number of covariates, we find that the
             likelihood for x tends to dominate the posterior of the
             latent random partition, degrading the predictive
             performance of the model. To overcome this, we suggest using
             a different nonparametric prior, namely an enriched
             Dirichlet process. Our proposal maintains a simple
             allocation rule, so that computations remain relatively
             simple. Advantages are shown through both predictive
             equations and examples, including an application to
             diagnosis Alzheimer's disease. © 2014 Sara Wade, David B.
             Dunson, Sonia Petrone and Lorenzo Trippa.},
   Key = {fds257845}
}

@article{fds257907,
   Author = {Zeise, L and Hattis, D and Andersen, M and Bailer, AJ and Bayard, S and Chen, C and Clewell, H and Conolly, R and Crump, K and Dunson, D and Finkel, A and Haber, L and Jarabek, AM and Kodell, R and Krewski, D and Thomas, D and Thorslund, T and Wassell, JT},
   Title = {Improving risk Assessment: Research opportunities in dose
             response modeling to improve risk assessment},
   Journal = {Human and Ecological Risk Assessment},
   Volume = {8},
   Number = {6},
   Pages = {1421-1444},
   Publisher = {Informa UK Limited},
   Year = {2002},
   Month = {October},
   ISSN = {1080-7039},
   url = {http://dx.doi.org/10.1080/20028091057448},
   Abstract = {Substantial improvements in dose response modeling for risk
             assessment may result from recent and continuing advances in
             biological research, biochemical techniques,
             biostatistical/mathematical methods and computational power.
             This report provides a ranked set of recommendations for
             proposed research to advance the state of the art in dose
             response modeling. The report is the result of a meeting of
             invited workgroup participants charged with identifying five
             areas of research in dose response modeling that could be
             incorporated in a national agenda to improve risk assessment
             methods. Leading topics of emphasis are interindividual
             variability, injury risk assessment modeling, and procedures
             to incorporate distributional methods and mechanistic
             considerations into now-standard methods of deriving a
             reference dose (RfD), reference concentration (RfC), minimum
             risk level (MRL) or similar dose-response parameter
             estimates. © 2002 by ASP.},
   Doi = {10.1080/20028091057448},
   Key = {fds257907}
}

@article{fds257914,
   Author = {Dunson, DB},
   Title = {Incorporating heterogeneous intercourse records into time to
             pregnancy models},
   Journal = {Mathematical Population Studies},
   Volume = {10},
   Number = {2},
   Pages = {127-143},
   Publisher = {Informa UK Limited},
   Year = {2003},
   Month = {April},
   ISSN = {0889-8480},
   url = {http://dx.doi.org/10.1080/08898480306714},
   Abstract = {Information on the timing of intercourse relative to
             ovulation can be incorporated into time to pregnancy models
             to improve the power to detect covariate effects, to
             estimate the day-specific conception probabilities, and to
             distinguish between biological and behavioral effects on
             fecundability, and therefore the probability of conception
             in a menstrual cycle. In this paper, Bayesian methods are
             proposed for joint modeling of intercourse behavior and
             biologic fecundability. The model accommodates a sterile
             subpopulation of couples, general covariate effects, and
             heterogeneity among fecund couples in menstrual cycle
             viability and in frequency of unprotected intercourse.
             Methods are described for incorporating cycles with varying
             amounts of intercourse information into a single analysis. A
             Markov chain Monte Carlo algorithm is outlined for
             estimation of the posterior distributions of the unknowns.
             The methods arc applied to data from a North Carolina study
             of couples attempting pregnancy. Copyright © 2003 Taylor
             Francis.},
   Doi = {10.1080/08898480306714},
   Key = {fds257914}
}

@article{fds257923,
   Author = {Dunson, DB and Baird, DD and Colombo, B},
   Title = {Increased infertility with age in men and
             women.},
   Journal = {Obstetrics and gynecology},
   Volume = {103},
   Number = {1},
   Pages = {51-56},
   Year = {2004},
   Month = {January},
   url = {http://dx.doi.org/10.1097/01.aog.0000100153.24061.45},
   Abstract = {<h4>Objective</h4>To estimate the effects of aging on the
             percentage of outwardly healthy couples who are sterile
             (completely unable to conceive without assisted
             reproduction) or infertile (unable to conceive within a year
             of unprotected intercourse).<h4>Methods</h4>A prospective
             fecundability study was conducted in a sample of 782 couples
             recruited from 7 European centers for natural family
             planning. Women aged 18-40 years were eligible. Daily
             intercourse records were used to adjust for timing and
             frequency of intercourse when estimating the
             per-menstrual-cycle probability of conception. The number of
             menstrual cycles required to conceive a clinical pregnancy
             and the probability of sterility and infertility were
             derived from the estimated fecundability distributions for
             men and women of different ages.<h4>Results</h4>Sterility
             was estimated at about 1%; this percent did not change with
             age. The percentage infertility was estimated at 8% for
             women aged 19-26 years, 13-14% for women aged 27-34 years
             and 18% for women aged 35-39 years. Starting in the late
             30s, male age was an important factor, with the percentage
             failing to conceive within 12 cycles increasing from an
             estimated 18-28% between ages 35 and 40 years. The estimated
             percentage of infertile couples that would be able to
             conceive after an additional 12 cycles of trying varied from
             43-63% depending on age.<h4>Conclusion</h4>Increased
             infertility in older couples is attributable primarily to
             declines in fertility rates rather than to absolute
             sterility. Many infertile couples will conceive if they try
             for an additional year.},
   Doi = {10.1097/01.aog.0000100153.24061.45},
   Key = {fds257923}
}

@article{fds368122,
   Author = {Zito, A and Rigon, T and Dunson, DB},
   Title = {Inferring taxonomic placement from DNA barcoding aiding in
             discovery of new taxa},
   Journal = {Methods in Ecology and Evolution},
   Volume = {14},
   Number = {2},
   Pages = {529-542},
   Year = {2023},
   Month = {February},
   url = {http://dx.doi.org/10.1111/2041-210X.14009},
   Abstract = {Predicting the taxonomic affiliation of DNA sequences
             collected from biological samples is a fundamental step in
             biodiversity assessment. This task is performed by
             leveraging existing databases containing reference DNA
             sequences endowed with a taxonomic identification. However,
             environmental sequences can be from organisms that are
             either unknown to science or for which there are no
             reference sequences available. Thus, taxonomic novelty of a
             sequence needs to be accounted for when doing
             classification. We propose Bayesian nonparametric taxonomic
             classifiers, BayesANT, which use species sampling model
             priors to allow unobserved taxa to be discovered at each
             taxonomic rank. Using a simple product multinomial
             likelihood with conjugate Dirichlet priors at the lowest
             rank, a highly flexible supervised algorithm is developed to
             provide a probabilistic prediction of the taxa placement of
             each sequence at each rank. As an illustration, we run our
             algorithm on a carefully annotated library of Finnish
             arthropods (FinBOL). To assess the ability of BayesANT to
             recognize novelty and to predict known taxonomic
             affiliations correctly, we test it on two training-test
             splitting scenarios, each with a different proportion of
             taxa unobserved in training. We show how our algorithm
             attains accurate predictions and reliably quantifies
             classification uncertainty, especially when many sequences
             in the test set are affiliated to taxa unknown in training.
             By enabling taxonomic predictions for DNA barcodes to
             identify unseen branches, we believe BayesANT will be of
             broad utility as a tool for DNA metabarcoding within
             bioinformatics pipelines.},
   Doi = {10.1111/2041-210X.14009},
   Key = {fds368122}
}

@article{fds258005,
   Author = {Dunson, WA and Paradise, CJ and Dunson, DB},
   Title = {Inhibitory effect of low salinity on growth and reproduction
             of the estuarine sheepshead minnow, Cyprinodon
             variegatus},
   Journal = {Copeia},
   Volume = {1998},
   Number = {1},
   Pages = {235-239},
   Publisher = {JSTOR},
   Year = {1998},
   Month = {February},
   url = {http://dx.doi.org/10.2307/1447727},
   Doi = {10.2307/1447727},
   Key = {fds258005}
}

@article{fds342830,
   Author = {Niu, M and Cheung, P and Lin, L and Dai, Z and Lawrence, N and Dunson,
             D},
   Title = {Intrinsic Gaussian processes on complex constrained
             domains},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {81},
   Number = {3},
   Pages = {603-627},
   Year = {2019},
   Month = {July},
   url = {http://dx.doi.org/10.1111/rssb.12320},
   Abstract = {We propose a class of intrinsic Gaussian processes (GPs) for
             interpolation, regression and classification on manifolds
             with a primary focus on complex constrained domains or
             irregularly shaped spaces arising as subsets or submanifolds
             of R, R2, R3 and beyond. For example, intrinsic GPs can
             accommodate spatial domains arising as complex subsets of
             Euclidean space. Intrinsic GPs respect the potentially
             complex boundary or interior conditions as well as the
             intrinsic geometry of the spaces. The key novelty of the
             approach proposed is to utilize the relationship between
             heat kernels and the transition density of Brownian motion
             on manifolds for constructing and approximating valid and
             computationally feasible covariance kernels. This enables
             intrinsic GPs to be practically applied in great generality,
             whereas existing approaches for smoothing on constrained
             domains are limited to simple special cases. The broad
             utilities of the intrinsic GP approach are illustrated
             through simulation studies and data examples.},
   Doi = {10.1111/rssb.12320},
   Key = {fds342830}
}

@article{fds257987,
   Author = {Wang, E and Liu, D and Silva, J and Dunson, D and Carin,
             L},
   Title = {Joint analysis of time-evolving binary matrices and
             associated documents},
   Journal = {Advances in Neural Information Processing Systems 23: 24th
             Annual Conference on Neural Information Processing Systems
             2010, NIPS 2010},
   Year = {2010},
   Month = {December},
   Abstract = {We consider problems for which one has incomplete binary
             matrices that evolve with time (e:g:, the votes of
             legislators on particular legislation, with each year
             characterized by a different such matrix). An objective of
             such analysis is to infer structure and inter-relationships
             underlying the matrices, here defined by latent features
             associated with each axis of the matrix. In addition, it is
             assumed that documents are available for the entities
             associated with at least one of the matrix axes. By jointly
             analyzing the matrices and documents, one may be used to
             inform the other within the analysis, and the model offers
             the opportunity to predict matrix values (e:g:, votes) based
             only on an associated document (e:g:, legislation). The
             research presented here merges two areas of machine-learning
             that have previously been investigated separately:
             incomplete-matrix analysis and topic modeling. The analysis
             is performed from a Bayesian perspective, with efficient
             inference constituted via Gibbs sampling. The framework is
             demonstrated by considering all voting data and available
             documents (legislation) during the 220-year lifetime of the
             United States Senate and House of Representatives.},
   Key = {fds257987}
}

@article{fds374092,
   Author = {Wang, E and Liu, D and Silva, J and Dunson, D and Carin,
             L},
   Title = {Joint analysis of time-evolving binary matrices and
             associated documents},
   Journal = {Advances in Neural Information Processing Systems 23: 24th
             Annual Conference on Neural Information Processing Systems
             2010, NIPS 2010},
   Year = {2010},
   Month = {January},
   ISBN = {9781617823800},
   Abstract = {We consider problems for which one has incomplete binary
             matrices that evolve with time (e:g:, the votes of
             legislators on particular legislation, with each year
             characterized by a different such matrix). An objective of
             such analysis is to infer structure and inter-relationships
             underlying the matrices, here defined by latent features
             associated with each axis of the matrix. In addition, it is
             assumed that documents are available for the entities
             associated with at least one of the matrix axes. By jointly
             analyzing the matrices and documents, one may be used to
             inform the other within the analysis, and the model offers
             the opportunity to predict matrix values (e:g:, votes) based
             only on an associated document (e:g:, legislation). The
             research presented here merges two areas of machine-learning
             that have previously been investigated separately:
             incomplete-matrix analysis and topic modeling. The analysis
             is performed from a Bayesian perspective, with efficient
             inference constituted via Gibbs sampling. The framework is
             demonstrated by considering all voting data and available
             documents (legislation) during the 220-year lifetime of the
             United States Senate and House of Representatives.},
   Key = {fds374092}
}

@article{fds257829,
   Author = {Lock, EF and Soldano, KL and Garrett, ME and Cope, H and Markunas, CA and Fuchs, H and Grant, G and Dunson, DB and Gregory, SG and Ashley-Koch,
             AE},
   Title = {Joint eQTL assessment of whole blood and dura mater tissue
             from individuals with Chiari type I malformation.},
   Journal = {BMC Genomics},
   Volume = {16},
   Number = {1},
   Pages = {11},
   Year = {2015},
   Month = {January},
   url = {http://dx.doi.org/10.1186/s12864-014-1211-8},
   Abstract = {BACKGROUND: Expression quantitative trait loci (eQTL) play
             an important role in the regulation of gene expression. Gene
             expression levels and eQTLs are expected to vary from tissue
             to tissue, and therefore multi-tissue analyses are necessary
             to fully understand complex genetic conditions in humans.
             Dura mater tissue likely interacts with cranial bone growth
             and thus may play a role in the etiology of Chiari Type I
             Malformation (CMI) and related conditions, but it is often
             inaccessible and its gene expression has not been well
             studied. A genetic basis to CMI has been established;
             however, the specific genetic risk factors are not well
             characterized. RESULTS: We present an assessment of eQTLs
             for whole blood and dura mater tissue from individuals with
             CMI. A joint-tissue analysis identified 239 eQTLs in either
             dura or blood, with 79% of these eQTLs shared by both
             tissues. Several identified eQTLs were novel and these
             implicate genes involved in bone development (IPO8, XYLT1,
             and PRKAR1A), and ribosomal pathways related to marrow and
             bone dysfunction, as potential candidates in the development
             of CMI. CONCLUSIONS: Despite strong overall heterogeneity in
             expression levels between blood and dura, the majority of
             cis-eQTLs are shared by both tissues. The power to detect
             shared eQTLs was improved by using an integrative
             statistical approach. The identified tissue-specific and
             shared eQTLs provide new insight into the genetic basis for
             CMI and related conditions.},
   Doi = {10.1186/s12864-014-1211-8},
   Key = {fds257829}
}

@article{fds258065,
   Author = {Dunson, DB and Park, JH},
   Title = {Kernel stick-breaking processes},
   Journal = {Biometrika},
   Volume = {95},
   Number = {2},
   Pages = {307-323},
   Publisher = {Oxford University Press (OUP)},
   Year = {2008},
   Month = {June},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asn012},
   Abstract = {We propose a class of kernel stick-breaking processes for
             uncountable collections of dependent random probability
             measures. The process is constructed by first introducing an
             infinite sequence of random locations. Independent random
             probability measures and beta-distributed random weights are
             assigned to each location. Predictor-dependent random
             probability measures are then constructed by mixing over the
             locations, with stick-breaking probabilities expressed as a
             kernel multiplied by the beta weights. Some theoretical
             properties of the process are described, including a
             covariate-dependent prediction rule. A retrospective Markov
             chain Monte Carlo algorithm is developed for posterior
             computation, and the methods are illustrated using a
             simulated example and an epidemiological application.© US
             Government/Department of Health and Human Services
             2008.},
   Doi = {10.1093/biomet/asn012},
   Key = {fds258065}
}

@article{fds257837,
   Author = {Kundu, S and Dunson, DB},
   Title = {Latent factor models for density estimation},
   Journal = {Biometrika},
   Volume = {101},
   Number = {3},
   Pages = {641-654},
   Publisher = {Oxford University Press (OUP)},
   Year = {2014},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asu019},
   Abstract = {Although discrete mixture modelling has formed the backbone
             of the literature on Bayesian density estimation, there are
             some well-known disadvantages. As an alternative to discrete
             mixtures, we propose a class of priors based on random
             nonlinear functions of a uniform latent variable with an
             additive residual. The induced prior for the density is
             shown to have desirable properties, including ease of
             centring on an initial guess, large support, posterior
             consistency and straightforward computation via Gibbs
             sampling. Some advantages over discrete mixtures, such as
             Dirichlet process mixtures of Gaussian kernels, are
             discussed and illustrated via simulations and an
             application. © 2014 Biometrika Trust.},
   Doi = {10.1093/biomet/asu019},
   Key = {fds257837}
}

@article{fds348920,
   Author = {Camerlenghi, F and Dunson, DB and Lijoi, A and Prünster, I and Rodríguez, A},
   Title = {Latent Nested Nonparametric Priors (with
             Discussion).},
   Journal = {Bayesian analysis},
   Volume = {14},
   Number = {4},
   Pages = {1303-1356},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2019},
   Month = {December},
   url = {http://dx.doi.org/10.1214/19-ba1169},
   Abstract = {Discrete random structures are important tools in Bayesian
             nonparametrics and the resulting models have proven
             effective in density estimation, clustering, topic modeling
             and prediction, among others. In this paper, we consider
             nested processes and study the dependence structures they
             induce. Dependence ranges between homogeneity, corresponding
             to full exchangeability, and maximum heterogeneity,
             corresponding to (unconditional) independence across
             samples. The popular nested Dirichlet process is shown to
             degenerate to the fully exchangeable case when there are
             ties across samples at the observed or latent level. To
             overcome this drawback, inherent to nesting general discrete
             random measures, we introduce a novel class of latent nested
             processes. These are obtained by adding common and
             group-specific completely random measures and, then,
             normalizing to yield dependent random probability measures.
             We provide results on the partition distributions induced by
             latent nested processes, and develop a Markov Chain Monte
             Carlo sampler for Bayesian inferences. A test for
             distributional homogeneity across groups is obtained as a
             by-product. The results and their inferential implications
             are showcased on synthetic and real data.},
   Doi = {10.1214/19-ba1169},
   Key = {fds348920}
}

@article{fds258037,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {Latent Stick-Breaking Processes.},
   Journal = {Journal of the American Statistical Association},
   Volume = {105},
   Number = {490},
   Pages = {647-659},
   Year = {2010},
   Month = {April},
   ISSN = {0162-1459},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23559690},
   Abstract = {We develop a model for stochastic processes with random
             marginal distributions. Our model relies on a stick-breaking
             construction for the marginal distribution of the process,
             and introduces dependence across locations by using a latent
             Gaussian copula model as the mechanism for selecting the
             atoms. The resulting latent stick-breaking process (LaSBP)
             induces a random partition of the index space, with points
             closer in space having a higher probability of being in the
             same cluster. We develop an efficient and straightforward
             Markov chain Monte Carlo (MCMC) algorithm for computation
             and discuss applications in financial econometrics and
             ecology. This article has supplementary material
             online.},
   Doi = {10.1198/jasa.2010.tm08241},
   Key = {fds258037}
}

@article{fds258001,
   Author = {Carin, L and Baraniuk, RG and Cevher, V and Dunson, D and Jordan, MI and Sapiro, G and Wakin, MB},
   Title = {Learning Low-Dimensional Signal Models: A Bayesian approach
             based on incomplete measurements.},
   Journal = {IEEE signal processing magazine},
   Volume = {28},
   Number = {2},
   Pages = {39-51},
   Year = {2011},
   Month = {March},
   ISSN = {1053-5888},
   url = {http://dx.doi.org/10.1109/msp.2010.939733},
   Abstract = {Sampling, coding, and streaming even the most essential
             data, e.g., in medical imaging and weather-monitoring
             applications, produce a data deluge that severely stresses
             the available analog-to-digital converter, communication
             bandwidth, and digital-storage resources. Surprisingly,
             while the ambient data dimension is large in many problems,
             the relevant information in the data can reside in a much
             lower dimensional space. © 2006 IEEE.},
   Doi = {10.1109/msp.2010.939733},
   Key = {fds258001}
}

@article{fds257852,
   Author = {Kessler, DC and Taylor, JA and Dunson, DB},
   Title = {Learning phenotype densities conditional on many interacting
             predictors.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {30},
   Number = {11},
   Pages = {1562-1568},
   Year = {2014},
   Month = {June},
   ISSN = {1367-4803},
   url = {http://dx.doi.org/10.1093/bioinformatics/btu040},
   Abstract = {<h4>Motivation</h4>Estimating a phenotype distribution
             conditional on a set of discrete-valued predictors is a
             commonly encountered task. For example, interest may be in
             how the density of a quantitative trait varies with single
             nucleotide polymorphisms and patient characteristics. The
             subset of important predictors is not usually known in
             advance. This becomes more challenging with a
             high-dimensional predictor set when there is the possibility
             of interaction.<h4>Results</h4>We demonstrate a novel
             non-parametric Bayes method based on a tensor factorization
             of predictor-dependent weights for Gaussian kernels. The
             method uses multistage predictor selection for dimension
             reduction, providing succinct models for the phenotype
             distribution. The resulting conditional density morphs
             flexibly with the selected predictors. In a simulation study
             and an application to molecular epidemiology data, we
             demonstrate advantages over commonly used
             methods.},
   Doi = {10.1093/bioinformatics/btu040},
   Key = {fds257852}
}

@article{fds257893,
   Author = {Wilcox, AJ and Dunson, DB and Weinberg, CR and Trussell, J and Baird,
             DD},
   Title = {Likelihood of conception with a single act of intercourse:
             providing benchmark rates for assessment of post-coital
             contraceptives.},
   Journal = {Contraception},
   Volume = {63},
   Number = {4},
   Pages = {211-215},
   Year = {2001},
   Month = {April},
   ISSN = {0010-7824},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/11376648},
   Abstract = {Emergency post-coital contraceptives effectively reduce the
             risk of pregnancy, but their degree of efficacy remains
             uncertain. Measurement of efficacy depends on the pregnancy
             rate without treatment, which cannot be measured directly.
             We provide indirect estimates of such pregnancy rates, using
             data from a prospective study of 221 women who were
             attempting to conceive. We previously estimated the
             probability of pregnancy with an act of intercourse relative
             to ovulation. In this article, we extend these data to
             estimate the probability of pregnancy relative to
             intercourse on a given cycle day (counting from onset of
             previous menses). In assessing the efficacy of post-coital
             contraceptives, other approaches have not incorporated
             accurate information on the variability of ovulation. We
             find that the possibility of late ovulation produces a
             persistent risk of pregnancy even into the sixth week of the
             cycle. Post-coital contraceptives may be indicated even when
             intercourse has occurred late in the cycle.},
   Doi = {10.1016/s0010-7824(01)00191-3},
   Key = {fds257893}
}

@article{fds367231,
   Author = {Melikechi, O and Young, AL and Tang, T and Bowman, T and Dunson, D and Johndrow, J},
   Title = {Limits of epidemic prediction using SIR models.},
   Journal = {Journal of mathematical biology},
   Volume = {85},
   Number = {4},
   Pages = {36},
   Year = {2022},
   Month = {September},
   url = {http://dx.doi.org/10.1007/s00285-022-01804-5},
   Abstract = {The Susceptible-Infectious-Recovered (SIR) equations and
             their extensions comprise a commonly utilized set of models
             for understanding and predicting the course of an epidemic.
             In practice, it is of substantial interest to estimate the
             model parameters based on noisy observations early in the
             outbreak, well before the epidemic reaches its peak. This
             allows prediction of the subsequent course of the epidemic
             and design of appropriate interventions. However, accurately
             inferring SIR model parameters in such scenarios is
             problematic. This article provides novel, theoretical
             insight on this issue of practical identifiability of the
             SIR model. Our theory provides new understanding of the
             inferential limits of routinely used epidemic models and
             provides a valuable addition to current simulate-and-check
             methods. We illustrate some practical implications through
             application to a real-world epidemic data
             set.},
   Doi = {10.1007/s00285-022-01804-5},
   Key = {fds367231}
}

@article{fds257867,
   Author = {Li, D and Longnecker, MP and Dunson, DB},
   Title = {Lipid adjustment for chemical exposures: accounting for
             concomitant variables.},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {24},
   Number = {6},
   Pages = {921-928},
   Year = {2013},
   Month = {November},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/24051893},
   Abstract = {<h4>Background</h4>Some environmental chemical exposures are
             lipophilic and need to be adjusted by serum lipid levels
             before data analyses. There are currently various strategies
             that attempt to account for this problem, but all have their
             drawbacks. To address such concerns, we propose a new method
             that uses Box-Cox transformations and a simple Bayesian
             hierarchical model to adjust for lipophilic chemical
             exposures.<h4>Methods</h4>We compared our Box-Cox method to
             existing methods. We ran simulation studies in which
             increasing levels of lipid-adjusted chemical exposure did
             and did not increase the odds of having a disease, and we
             looked at both single-exposure and multiple-exposure cases.
             We also analyzed an epidemiology dataset that examined the
             effects of various chemical exposure on the risk of birth
             defects.<h4>Results</h4>Compared with existing methods, our
             Box-Cox method produced unbiased estimates, good coverage,
             similar power, and lower type I error rates. This was the
             case in both single- and multiple-exposure simulation
             studies. Results from analysis of the birth-defect data
             differed from results using existing methods.<h4>Conclusion</h4>Our
             Box-Cox method is a novel and intuitive way to account for
             the lipophilic nature of certain chemical exposures. It
             addresses some of the problems with existing methods, is
             easily extendable to multiple exposures, and can be used in
             any analysis that involves concomitant variables.},
   Doi = {10.1097/ede.0b013e3182a671e4},
   Key = {fds257867}
}

@article{fds257855,
   Author = {Zhu, B and Dunson, DB},
   Title = {Locally Adaptive Bayes Nonparametric Regression via Nested
             Gaussian Processes.},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {504},
   Year = {2013},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2013.838568},
   Abstract = {We propose a nested Gaussian process (nGP) as a locally
             adaptive prior for Bayesian nonparametric regression.
             Specified through a set of stochastic differential equations
             (SDEs), the nGP imposes a Gaussian process prior for the
             function's <i>m</i>th-order derivative. The nesting comes in
             through including a local instantaneous mean function, which
             is drawn from another Gaussian process inducing adaptivity
             to locally-varying smoothness. We discuss the support of the
             nGP prior in terms of the closure of a reproducing kernel
             Hilbert space, and consider theoretical properties of the
             posterior. The posterior mean under the nGP prior is shown
             to be equivalent to the minimizer of a nested penalized
             sum-of-squares involving penalties for both the global and
             local roughness of the function. Using highly-efficient
             Markov chain Monte Carlo for posterior inference, the
             proposed method performs well in simulation studies compared
             to several alternatives, and is scalable to massive data,
             illustrated through a proteomics application.},
   Doi = {10.1080/01621459.2013.838568},
   Key = {fds257855}
}

@article{fds257848,
   Author = {Durante, D and Scarpa, B and Dunson, DB},
   Title = {Locally adaptive bayesian multivariate time
             series},
   Journal = {Advances in Neural Information Processing
             Systems},
   Year = {2013},
   Month = {January},
   ISSN = {1049-5258},
   Abstract = {In modeling multivariate time series, it is important to
             allow time-varying smoothness in the mean and covariance
             process. In particular, there may be certain time intervals
             exhibiting rapid changes and others in which changes are
             slow. If such locally adaptive smoothness is not accounted
             for, one can obtain misleading inferences and predictions,
             with over-smoothing across erratic time intervals and
             under-smoothing across times exhibiting slow variation. This
             can lead to miscalibration of predictive intervals, which
             can be substantially too narrow or wide depending on the
             time. We propose a continuous multivariate stochastic
             process for time series having locally varying smoothness in
             both the mean and covariance matrix. This process is
             constructed utilizing latent dictionary functions in time,
             which are given nested Gaussian process priors and linearly
             related to the observed data through a sparse mapping. Using
             a differential equation representation, we bypass usual
             computational bottlenecks in obtaining MCMC and online
             algorithms for approximate Bayesian inference. The
             performance is assessed in simulations and illustrated in a
             financial application.},
   Key = {fds257848}
}

@article{fds329113,
   Author = {Durante, D and Dunson, DB},
   Title = {Locally adaptive dynamic networks},
   Journal = {Annals of Applied Statistics},
   Volume = {10},
   Number = {4},
   Pages = {2203-2232},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2016},
   Month = {December},
   url = {http://dx.doi.org/10.1214/16-AOAS971},
   Abstract = {Our focus is on realistically modeling and forecasting
             dynamic networks of face-to-face contacts among individuals.
             Important aspects of such data that lead to problems with
             current methods include the tendency of the contacts to move
             between periods of slow and rapid changes, and the dynamic
             heterogeneity in the actors’ connectivity behaviors.
             Motivated by this application, we develop a novel method for
             Locally Adaptive DYnamic (LADY) network inference. The
             proposed model relies on a dynamic latent space
             representation in which each actor’s position evolves in
             time via stochastic differential equations. Using a
             state-space representation for these stochastic processes
             and Pólya-gamma data augmentation, we develop an efficient
             MCMC algorithm for posterior inference along with tractable
             procedures for online updating and forecasting of future
             networks. We evaluate performance in simulation studies, and
             consider an application to face-to-face contacts among
             individuals in a primary school.},
   Doi = {10.1214/16-AOAS971},
   Key = {fds329113}
}

@article{fds257842,
   Author = {Durante, D and Scarpa, B and Dunson, DB},
   Title = {Locally adaptive factor processes for multivariate time
             series},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {1493-1522},
   Year = {2014},
   Month = {January},
   ISSN = {1532-4435},
   Abstract = {In modeling multivariate time series, it is important to
             allow time-varying smoothness in the mean and covariance
             process. In particular, there may be certain time intervals
             exhibiting rapid changes and others in which changes are
             slow. If such time-varying smoothness is not accounted for,
             one can obtain misleading inferences and predictions, with
             over-smoothing across erratic time intervals and
             under-smoothing across times exhibiting slow variation. This
             can lead to mis-calibration of predictive intervals, which
             can be substantially too narrow or wide depending on the
             time. We propose a locally adaptive factor process for
             characterizing multivariate mean-covariance changes in
             continuous time, allowing locally varying smoothness in both
             the mean and covariance matrix. This process is constructed
             utilizing latent dictionary functions evolving in time
             through nested Gaussian processes and linearly related to
             the observed data with a sparse mapping. Using a diffential
             equation representation, we bypass usual computational
             bottlenecks in obtaining MCMC and online algorithms for
             approximate Bayesian inference. The performance is assessed
             in simulations and illustrated in a financial application.
             © 2014 Daniele Durante, Bruno Scarpa and David B.
             Dunson.},
   Key = {fds257842}
}

@article{fds348797,
   Author = {Thai, DH and Wu, HT and Dunson, DB},
   Title = {Locally convex kernel mixtures: Bayesian subspace
             learning},
   Journal = {Proceedings - 18th IEEE International Conference on Machine
             Learning and Applications, ICMLA 2019},
   Pages = {272-275},
   Year = {2019},
   Month = {December},
   ISBN = {9781728145495},
   url = {http://dx.doi.org/10.1109/ICMLA.2019.00051},
   Abstract = {Kernel mixture models are routinely used for density
             estimation. However, in multivariate settings, issues arise
             in efficiently approximating lower-dimensional structure in
             the data. For example, it is common to suppose that the
             density is concentrated near a lower-dimensional non-linear
             subspace or manifold. Typical kernels used to locally
             approximate such subspaces are inflexible, so that a large
             number of components are often needed. We propose a novel
             class of LOcally COnvex (LOCO) kernels that are flexible in
             adapting to nonlinear local structure. LOCO kernels are
             induced by introducing random knots within local
             neighborhoods, and generating data as a random convex
             combination of these knots with adaptive weights and an
             additive noise. For identifiability, we constrain all
             observations from a particular component to have the same
             mean. For Bayesian inference subject to this constraint, we
             develop a hybrid Gibbs sampler and optimization algorithm
             that incorporates a Lagrange multiplier within a splitting
             method. The resulting LOCO algorithm is shown to
             dramatically outperform typical Gaussian mixture models in
             challenging examples.},
   Doi = {10.1109/ICMLA.2019.00051},
   Key = {fds348797}
}

@article{fds257992,
   Author = {Ren, L and Du, L and Carin, L and Dunson, DB},
   Title = {Logistic Stick-Breaking Process.},
   Journal = {Journal of machine learning research : JMLR},
   Volume = {12},
   Number = {Jan},
   Pages = {203-239},
   Year = {2011},
   Month = {January},
   ISSN = {1532-4435},
   Abstract = {A logistic stick-breaking process (LSBP) is proposed for
             non-parametric clustering of general spatially- or
             temporally-dependent data, imposing the belief that
             proximate data are more likely to be clustered together. The
             sticks in the LSBP are realized via multiple logistic
             regression functions, with shrinkage priors employed to
             favor contiguous and spatially localized segments. The LSBP
             is also extended for the simultaneous processing of multiple
             data sets, yielding a hierarchical logistic stick-breaking
             process (H-LSBP). The model parameters (atoms) within the
             H-LSBP are shared across the multiple learning tasks.
             Efficient variational Bayesian inference is derived, and
             comparisons are made to related techniques in the
             literature. Experimental analysis is performed for audio
             waveforms and images, and it is demonstrated that for
             segmentation applications the LSBP yields generally
             homogeneous segments with sharp boundaries.},
   Key = {fds257992}
}

@article{fds258027,
   Author = {Zhou, M and Li, L and Dunson, D and Carin, L},
   Title = {Lognormal and gamma mixed negative binomial
             regression},
   Journal = {Proceedings of the 29th International Conference on Machine
             Learning, ICML 2012},
   Volume = {2},
   Pages = {1343-1350},
   Year = {2012},
   Month = {October},
   url = {http://hdl.handle.net/10161/8954 Duke open
             access},
   Abstract = {In regression analysis of counts, a lack of simple and
             efficient algorithms for posterior computation has made
             Bayesian approaches appear unattractive and thus
             underdeveloped. We propose a lognormal and gamma mixed
             negative binomial (NB) regression model for counts, and
             present efficient closed-form Bayesian inference; unlike
             conventional Poisson models, the proposed approach has two
             free parameters to include two different kinds of random
             effects, and allows the incorporation of prior information,
             such as sparsity in the regression coefficients. By placing
             a gamma distribution prior on the NB dispersion parameter r,
             and connecting a log-normal distribution prior with the
             logit of the NB probability parameter p, efficient Gibbs
             sampling and variational Bayes inference are both developed.
             The closed-form updates are obtained by exploiting
             conditional conjugacy via both a compound Poisson
             representation and a Polya-Gamma distribution based data
             augmentation approach. The proposed Bayesian inference can
             be implemented routinely, while being easily generalizable
             to more complex settings involving multivariate dependence
             structures. The algorithms are illustrated using real
             examples. Copyright 2012 by the author(s)/owner(s).},
   Key = {fds258027}
}

@article{fds257943,
   Author = {Baird, DD and Kesner, JS and Dunson, DB},
   Title = {Luteinizing hormone in premenopausal women may stimulate
             uterine leiomyomata development.},
   Journal = {Journal of the Society for Gynecologic Investigation},
   Volume = {13},
   Number = {2},
   Pages = {130-135},
   Year = {2006},
   Month = {February},
   ISSN = {1071-5576},
   url = {http://dx.doi.org/10.1016/j.jsgi.2005.12.001},
   Abstract = {<h4>Objective</h4>Human chorionic gonadotropin (hCG) has
             proliferative effects on uterine smooth muscle and leiomyoma
             tissue in vitro. We hypothesized that luteinizing hormone
             (LH) would have the same effect by activating the LH/hCG
             receptor, and it would follow that premenopausal women with
             higher basal LH levels would be more likely to have
             leiomyomata.<h4>Methods</h4>Randomly selected women, aged 35
             to 49 years, from a prepaid health plan were screened for
             leiomyomata with pelvic ultrasound. Urine samples collected
             during the first or last 5 days of the menstrual cycle were
             analyzed for LH by immunofluorometric assay, and
             concentrations were corrected for creatinine (n = 523).
             Logistic regression and Bayes analyses were used to evaluate
             the association of LH with presence and size of leiomyomata,
             adjusting for age, and other risk factors.<h4>Results</h4>Women
             with higher LH were more likely to have leiomyomata
             (adjusted odds ratios for second and third tertiles were 1.7
             and 2.0 compared with lower tertile; 95% confidence
             intervals, 1.0 to 2.7 and 1.2 to 3.4, respectively). The
             association was stronger for large leiomyomata. Bayes
             analyses designed to estimate LH effects on tumor onset
             separately from tumor growth showed significantly
             accelerated tumor onset but little evidence of effects on
             tumor growth. Age, an independent risk factor for
             leiomyomata, was not affected by inclusion of LH in the
             logistic models.<h4>Conclusions</h4>As hypothesized, women
             with higher LH were more likely to have leiomyomata, but
             this did not explain the age-related increase in leiomyomata
             during perimenopausal ages. Determining whether LH is causal
             or a marker for susceptibility will require further
             research.},
   Doi = {10.1016/j.jsgi.2005.12.001},
   Key = {fds257943}
}

@article{fds257827,
   Author = {Chabout, J and Sarkar, A and Dunson, DB and Jarvis,
             ED},
   Title = {Male mice song syntax depends on social contexts and
             influences female preferences.},
   Journal = {Front Behav Neurosci},
   Volume = {9},
   Pages = {76},
   Publisher = {FRONTIERS MEDIA SA},
   Year = {2015},
   url = {http://hdl.handle.net/10161/9544 Duke open
             access},
   Abstract = {In 2005, Holy and Guo advanced the idea that male mice
             produce ultrasonic vocalizations (USV) with some features
             similar to courtship songs of songbirds. Since then, studies
             showed that male mice emit USV songs in different contexts
             (sexual and other) and possess a multisyllabic repertoire.
             Debate still exists for and against plasticity in their
             vocalizations. But the use of a multisyllabic repertoire can
             increase potential flexibility and information, in how
             elements are organized and recombined, namely syntax. In
             many bird species, modulating song syntax has ethological
             relevance for sexual behavior and mate preferences. In this
             study we exposed adult male mice to different social
             contexts and developed a new approach of analyzing their
             USVs based on songbird syntax analysis. We found that male
             mice modify their syntax, including specific sequences,
             length of sequence, repertoire composition, and spectral
             features, according to stimulus and social context. Males
             emit longer and simpler syllables and sequences when singing
             to females, but more complex syllables and sequences in
             response to fresh female urine. Playback experiments show
             that the females prefer the complex songs over the simpler
             ones. We propose the complex songs are to lure females in,
             whereas the directed simpler sequences are used for direct
             courtship. These results suggest that although mice have a
             much more limited ability of song modification, they could
             still be used as animal models for understanding some vocal
             communication features that songbirds are used
             for.},
   Doi = {10.3389/fnbeh.2015.00076},
   Key = {fds257827}
}

@article{fds333512,
   Author = {Zhang, Z and Descoteaux, M and Zhang, J and Girard, G and Chamberland,
             M and Dunson, D and Srivastava, A and Zhu, H},
   Title = {Mapping population-based structural connectomes.},
   Journal = {NeuroImage},
   Volume = {172},
   Pages = {130-145},
   Year = {2018},
   Month = {May},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2017.12.064},
   Abstract = {Advances in understanding the structural connectomes of
             human brain require improved approaches for the
             construction, comparison and integration of high-dimensional
             whole-brain tractography data from a large number of
             individuals. This article develops a population-based
             structural connectome (PSC) mapping framework to address
             these challenges. PSC simultaneously characterizes a large
             number of white matter bundles within and across different
             subjects by registering different subjects' brains based on
             coarse cortical parcellations, compressing the bundles of
             each connection, and extracting novel connection weights. A
             robust tractography algorithm and streamline post-processing
             techniques, including dilation of gray matter regions,
             streamline cutting, and outlier streamline removal are
             applied to improve the robustness of the extracted
             structural connectomes. The developed PSC framework can be
             used to reproducibly extract binary networks, weighted
             networks and streamline-based brain connectomes. We apply
             the PSC to Human Connectome Project data to illustrate its
             application in characterizing normal variations and
             heritability of structural connectomes in healthy
             subjects.},
   Doi = {10.1016/j.neuroimage.2017.12.064},
   Key = {fds333512}
}

@article{fds257849,
   Author = {Kessler, DC and Hoff, PD and Dunson, DB},
   Title = {Marginally specified priors for non-parametric Bayesian
             estimation.},
   Journal = {Journal of the Royal Statistical Society. Series B,
             Statistical methodology},
   Volume = {77},
   Number = {1},
   Pages = {35-58},
   Year = {2015},
   Month = {January},
   ISSN = {1369-7412},
   url = {http://dx.doi.org/10.1111/rssb.12059},
   Abstract = {Prior specification for non-parametric Bayesian inference
             involves the difficult task of quantifying prior knowledge
             about a parameter of high, often infinite, dimension. A
             statistician is unlikely to have informed opinions about all
             aspects of such a parameter but will have real information
             about functionals of the parameter, such as the population
             mean or variance. The paper proposes a new framework for
             non-parametric Bayes inference in which the prior
             distribution for a possibly infinite dimensional parameter
             is decomposed into two parts: an informative prior on a
             finite set of functionals, and a non-parametric conditional
             prior for the parameter given the functionals. Such priors
             can be easily constructed from standard non-parametric prior
             distributions in common use and inherit the large support of
             the standard priors on which they are based. Additionally,
             posterior approximations under these informative priors can
             generally be made via minor adjustments to existing Markov
             chain approximation algorithms for standard non-parametric
             prior distributions. We illustrate the use of such priors in
             the context of multivariate density estimation using
             Dirichlet process mixture models, and in the modelling of
             high dimensional sparse contingency tables.},
   Doi = {10.1111/rssb.12059},
   Key = {fds257849}
}

@article{fds257935,
   Author = {Longnecker, MP and Klebanoff, MA and Dunson, DB and Guo, X and Chen, Z and Zhou, H and Brock, JW},
   Title = {Maternal serum level of the DDT metabolite DDE in relation
             to fetal loss in previous pregnancies.},
   Journal = {Environmental research},
   Volume = {97},
   Number = {2},
   Pages = {127-133},
   Year = {2005},
   Month = {February},
   ISSN = {0013-9351},
   url = {http://dx.doi.org/10.1016/s0013-9351(03)00108-7},
   Abstract = {Use of 1,1,1-trichloro-2,2-bis(p-chlorophenyl)ethane (DDT)
             continues in about 25 countries. This use has been justified
             partly by the belief that it has no adverse consequences on
             human health. Evidence has been increasing, however, for
             adverse reproductive effects of DDT, but additional data are
             needed. Pregnant women who enrolled in the Collaborative
             Perinatal Project (United States, 1959-1965) were asked
             about their previous pregnancy history; blood samples were
             drawn and the serum frozen. In 1997-1999, the sera of 1717
             of these women who had previous pregnancies were analyzed
             for 1,1-dichloro-2,2-bis(p-chlorophenyl)ethylene (DDE), the
             major breakdown product of DDT. The odds of previous fetal
             loss was examined in relation to DDE level in logistic
             regression models. Compared with women whose DDE level was
             <15 microg/L, the adjusted odds ratios of fetal loss
             according to category of DDE were as follows: 15-29
             microg/L, 1.1; 30-44 microg/L, 1.4; 45-59 microg/L, 1.6; and
             60+ microg/L, 1.2. The adjusted odds ratio per 60 microg/L
             increase was 1.4 (95% confidence interval 1.1-1.6). The
             results were consistent with an adverse effect of DDE on
             fetal loss, but were inconclusive owing to the possibility
             that previous pregnancies ending in fetal loss decreased
             serum DDE levels less than did those carried to
             term.},
   Doi = {10.1016/s0013-9351(03)00108-7},
   Key = {fds257935}
}

@article{fds257940,
   Author = {Law, DCG and Klebanoff, MA and Brock, JW and Dunson, DB and Longnecker,
             MP},
   Title = {Maternal serum levels of polychlorinated biphenyls and
             1,1-dichloro-2,2-bis(p-chlorophenyl)ethylene (DDE) and time
             to pregnancy.},
   Journal = {American journal of epidemiology},
   Volume = {162},
   Number = {6},
   Pages = {523-532},
   Year = {2005},
   Month = {September},
   url = {http://dx.doi.org/10.1093/aje/kwi240},
   Abstract = {Polychlorinated biphenyls (PCBs), once used widely in
             transformers and other applications, and
             1,1-dichloro-2,2-bis(p-chlorophenyl)ethylene (DDE), the main
             metabolite of the pesticide 1,1,1-trichloro-2,2-bis(p-chlorophenyl)ethane
             (DDT), are hormonally active agents. Changes in menstrual
             cycle functioning associated with PCBs and DDE, and
             increased odds of spontaneous abortion associated with DDE,
             suggest that these compounds could affect fertility. The
             authors investigated the association between PCB and DDE
             exposure and time to pregnancy by using serum levels
             measured in 390 pregnant women in the Collaborative
             Perinatal Project enrolled at 12 study centers in the United
             States from 1959 to 1965. They estimated adjusted
             fecundability odds ratios by using Cox proportional hazards
             modeling for discrete time data. Compared with time to
             pregnancy for women in the lowest exposure category (PCBs <
             1.24 microg/liter, DDE < 14 microg/liter), time to pregnancy
             increased for women in the highest exposure category in
             terms of both PCBs (fecundability odds ratio for PCBs > or =
             5.00 microg/liter = 0.65, 95% confidence interval: 0.36,
             1.18) and DDE (fecundability odds ratio for DDE > or = 60
             microg/liter = 0.65, 95% confidence interval: 0.32, 1.31).
             Overall, time to pregnancy increased with increasing serum
             PCB levels but was less suggestive of an association with
             DDE. Both trends were imprecise and attenuated when
             expressed on a lipid basis. Overall, evidence of an
             association between PCB or DDE exposure and time to
             pregnancy was weak and inconclusive.},
   Doi = {10.1093/aje/kwi240},
   Key = {fds257940}
}

@article{fds359277,
   Author = {Lee, K and Lin, L and Dunson, D},
   Title = {Maximum pairwise bayes factors for covariance structure
             testing},
   Journal = {Electronic Journal of Statistics},
   Volume = {15},
   Number = {2},
   Pages = {4384-4419},
   Year = {2021},
   Month = {January},
   url = {http://dx.doi.org/10.1214/21-EJS1900},
   Abstract = {Hypothesis testing of structure in covariance matrices is of
             sig-nificant importance, but faces great challenges in
             high-dimensional settings. Although consistent frequentist
             one-sample covariance tests have been pro-posed, there is a
             lack of simple, computationally scalable, and theoretically
             sound Bayesian testing methods for large covariance
             matrices. Motivated by this gap and by the need for tests
             that are powerful against sparse al-ternatives, we propose a
             novel testing framework based on the maximum pairwise Bayes
             factor. Our initial focus is on one-sample covariance
             testing; the proposed test can optimally distinguish null
             and alternative hypothe-ses in a frequentist asymptotic
             sense. We then propose diagonal tests and a scalable
             covariance graph selection procedure that are shown to be
             con-sistent. A simulation study evaluates the proposed
             approach relative to competitors. We illustrate advantages
             of our graph selection method on a gene expression data
             set.},
   Doi = {10.1214/21-EJS1900},
   Key = {fds359277}
}

@article{fds340936,
   Author = {Johndrow, JE and Smith, A and Pillai, N and Dunson,
             DB},
   Title = {MCMC for Imbalanced Categorical Data},
   Journal = {Journal of the American Statistical Association},
   Volume = {114},
   Number = {527},
   Pages = {1394-1403},
   Year = {2019},
   Month = {July},
   url = {http://dx.doi.org/10.1080/01621459.2018.1505626},
   Abstract = {Many modern applications collect highly imbalanced
             categorical data, with some categories relatively rare.
             Bayesian hierarchical models combat data sparsity by
             borrowing information, while also quantifying uncertainty.
             However, posterior computation presents a fundamental
             barrier to routine use; a single class of algorithms does
             not work well in all settings and practitioners waste time
             trying different types of Markov chain Monte Carlo (MCMC)
             approaches. This article was motivated by an application to
             quantitative advertising in which we encountered extremely
             poor computational performance for data augmentation MCMC
             algorithms but obtained excellent performance for adaptive
             Metropolis. To obtain a deeper understanding of this
             behavior, we derive theoretical results on the computational
             complexity of commonly used data augmentation algorithms and
             the Random Walk Metropolis algorithm for highly imbalanced
             binary data. In this regime, our results show computational
             complexity of Metropolis is logarithmic in sample size,
             while data augmentation is polynomial in sample size. The
             root cause of this poor performance of data augmentation is
             a discrepancy between the rates at which the target density
             and MCMC step sizes concentrate. Our methods also show that
             MCMC algorithms that exhibit a similar discrepancy will fail
             in large samples—a result with substantial practical
             impact. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/01621459.2018.1505626},
   Key = {fds340936}
}

@article{fds257836,
   Author = {Wheeler, MW and Dunson, DB and Pandalai, SP and Baker, BA and Herring,
             AH},
   Title = {Mechanistic Hierarchical Gaussian Processes.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {507},
   Pages = {894-904},
   Year = {2014},
   Month = {July},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.899234},
   Abstract = {The statistics literature on functional data analysis
             focuses primarily on flexible black-box approaches, which
             are designed to allow individual curves to have essentially
             any shape while characterizing variability. Such methods
             typically cannot incorporate mechanistic information, which
             is commonly expressed in terms of differential equations.
             Motivated by studies of muscle activation, we propose a
             nonparametric Bayesian approach that takes into account
             mechanistic understanding of muscle physiology. A novel
             class of hierarchical Gaussian processes is defined that
             favors curves consistent with differential equations defined
             on motor, damper, spring systems. A Gibbs sampler is
             proposed to sample from the posterior distribution and
             applied to a study of rats exposed to non-injurious muscle
             activation protocols. Although motivated by muscle force
             data, a parallel approach can be used to include mechanistic
             information in broad functional data analysis
             applications.},
   Doi = {10.1080/01621459.2014.899234},
   Key = {fds257836}
}

@article{fds257826,
   Author = {Wang, X and Peng, P and Dunson, DB},
   Title = {Median selection subset aggregation for parallel
             inference},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {3},
   Number = {January},
   Pages = {2195-2203},
   Year = {2014},
   Month = {January},
   ISSN = {1049-5258},
   Abstract = {For massive data sets, efficient computation commonly relies
             on distributed algorithms that store and process subsets of
             the data on different machines, minimizing communication
             costs. Our focus is on regression and classification
             problems involving many features. A variety of distributed
             algorithms have been proposed in this context, but
             challenges arise in defining an algorithm with low
             communication, theoretical guarantees and excellent
             practical performance in general settings. We propose a
             MEdian Selection Subset AGgregation Estimator (message)
             algorithm, which attempts to solve these problems. The
             algorithm applies feature selection in parallel for each
             subset using Lasso or another method, calculates the
             'median' feature inclusion index, estimates coefficients for
             the selected features in parallel for each subset, and then
             averages these estimates. The algorithm is simple, involves
             very minimal communication, scales efficiently in both
             sample and feature size, and has theoretical guarantees. In
             particular, we show model selection consistency and
             coefficient estimation efficiency. Extensive experiments
             show excellent performance in variable selection,
             estimation, prediction, and computation time relative to
             usual competitors.},
   Key = {fds257826}
}

@article{fds257922,
   Author = {Tingen, C and Stanford, JB and Dunson, DB},
   Title = {Methodologic and statistical approaches to studying human
             fertility and environmental exposure.},
   Journal = {Environmental health perspectives},
   Volume = {112},
   Number = {1},
   Pages = {87-93},
   Year = {2004},
   Month = {January},
   url = {http://dx.doi.org/10.1289/ehp.6263},
   Abstract = {Although there has been growing concern about the effects of
             environmental exposures on human fertility, standard
             epidemiologic study designs may not collect sufficient data
             to identify subtle effects while properly adjusting for
             confounding. In particular, results from conventional time
             to pregnancy studies can be driven by the many sources of
             bias inherent in these studies. By prospectively collecting
             detailed records of menstrual bleeding, occurrences of
             intercourse, and a marker of ovulation day in each menstrual
             cycle, precise information on exposure effects can be
             obtained, adjusting for many of the primary sources of bias.
             This article provides an overview of the different types of
             study designs, focusing on the data required, the practical
             advantages and disadvantages of each design, and the
             statistical methods required to take full advantage of the
             available data. We conclude that detailed prospective
             studies allowing inferences on day-specific probabilities of
             conception should be considered as the gold standard for
             studying the effects of environmental exposures on
             fertility.},
   Doi = {10.1289/ehp.6263},
   Key = {fds257922}
}

@article{fds257883,
   Author = {Dunson, DB and Weinberg, CR},
   Title = {Modeling human fertility in the presence of measurement
             error.},
   Journal = {Biometrics},
   Volume = {56},
   Number = {1},
   Pages = {288-292},
   Year = {2000},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2000.00288.x},
   Abstract = {The probability of conception in a given menstrual cycle is
             closely related to the timing of intercourse relative to
             ovulation. Although commonly used markers of time of
             ovulation are known to be error prone, most fertility models
             assume the day of ovulation is measured without error. We
             develop a mixture model that allows the day to be
             misspecified. We assume that the measurement errors are
             i.i.d. across menstrual cycles. Heterogeneity among couples
             in the per cycle likelihood of conception is accounted for
             using a beta mixture model. Bayesian estimation is
             straightforward using Markov chain Monte Carlo techniques.
             The methods are applied to a prospective study of couples at
             risk of pregnancy. In the absence of validation data or
             multiple independent markers of ovulation, the
             identifiability of the measurement error distribution
             depends on the assumed model. Thus, the results of studies
             relating the timing of intercourse to the probability of
             conception should be interpreted cautiously.},
   Doi = {10.1111/j.0006-341x.2000.00288.x},
   Key = {fds257883}
}

@article{fds257891,
   Author = {Dunson, DB},
   Title = {Modeling of changes in tumor burden},
   Journal = {Journal of Agricultural, Biological, and Environmental
             Statistics},
   Volume = {6},
   Number = {1},
   Pages = {38-48},
   Publisher = {Springer Nature},
   Year = {2001},
   Month = {March},
   url = {http://dx.doi.org/10.1198/108571101300325238},
   Abstract = {Skin painting studies on transgenic mice have recently been
             approved by the Food and Drug Administration (FDA) for
             carcinogenicity testing. Data consist of serial skin tumor
             counts on the backs of shaved mice in each of several dose
             groups. Current methods for assessing the tumorigenicity of
             test compounds are based on generalized estimating equations
             and require large samples. This paper proposes a new
             framework for modeling of the change over time in the
             papilloma burden in each mouse. A latent variable underlying
             the observed papilloma response is assumed to follow a
             generalized linear mixed-effects transition model. The model
             accounts for heterogeneity among animals and serial
             dependency in the skin tumor counts. Extensions of existing
             Markov chain Monte Carlo procedures for Bayesian estimation
             in generalized linear mixed models are proposed. The methods
             are applied to data from a National Toxicology Program
             short-term carcinogenicity study of lauric
             acid.},
   Doi = {10.1198/108571101300325238},
   Key = {fds257891}
}

@article{fds257932,
   Author = {Herring, AH and Dunson, DB and Dole, N},
   Title = {Modeling the effects of a bidirectional latent predictor
             from multivariate questionnaire data.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {4},
   Pages = {926-935},
   Year = {2004},
   Month = {December},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00248.x},
   Abstract = {Researchers often measure stress using questionnaire data on
             the occurrence of potentially stress-inducing life events
             and the strength of reaction to these events, characterized
             as negative or positive and assigned an ordinal ranking. In
             studying the health effects of stress, one needs to obtain
             measures of an individual's negative and positive stress
             levels to be used as predictors. Motivated by data of this
             type, we propose a latent variable model, which is
             characterized by event-specific negative and positive
             reaction scores. If the positive reaction score dominates
             the negative reaction score for an event, then the
             individual's reported response to that event will be
             positive, with an ordinal ranking determined by the value of
             the score. Measures of overall positive and negative stress
             can be obtained by summing the reactivity scores across the
             events that occur for an individual. By incorporating these
             measures as predictors in a regression model and fitting the
             stress and outcome models jointly using Bayesian methods,
             inferences can be conducted without the need to assume known
             weights for the different events. We propose an MCMC
             algorithm for posterior computation and apply the approach
             to study the effects of stress on preterm
             delivery.},
   Doi = {10.1111/j.0006-341x.2004.00248.x},
   Key = {fds257932}
}

@article{fds258010,
   Author = {Dunson, DB and Haseman, JK},
   Title = {Modeling tumor onset and multiplicity using transition
             models with latent variables.},
   Journal = {Biometrics},
   Volume = {55},
   Number = {3},
   Pages = {965-970},
   Year = {1999},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.1999.00965.x},
   Abstract = {We describe a method for modeling carcinogenicity from
             animal studies where the data consist of counts of the
             number of tumors present over time. The research is
             motivated by applications to transgenic rodent studies,
             which have emerged as an alternative to chronic bioassays
             for screening possible carcinogens. In transgenic mouse
             studies, the endpoint of interest is frequently skin
             papilloma, with weekly examinations determining how many
             papillomas each animal has at a particular point in time. It
             is assumed that each animal has two unobservable latent
             variables at each time point. The first indicates whether or
             not the tumors are in a multiplying state and the second is
             the potential number of additional tumors if the tumors are
             in a multiplying state. The product of these variables
             follows a zero-inflated Poisson distribution, and the EM
             algorithm can be used to maximize the observed-data
             pseudo-likelihood, based on the latent variables. A
             generalized estimating equations robust variance estimator
             adjusts for dependency among outcomes within individual
             animals. The method is applied to testing for a dose-related
             trend in both tumor incidence and multiplicity in
             carcinogenicity studies.},
   Doi = {10.1111/j.0006-341x.1999.00965.x},
   Key = {fds258010}
}

@article{fds258013,
   Author = {Dunson, DB},
   Title = {Models for papilloma multiplicity and regression:
             Applications to transgenic mouse studies},
   Journal = {Journal of the Royal Statistical Society. Series C: Applied
             Statistics},
   Volume = {49},
   Number = {1},
   Pages = {19-30},
   Publisher = {WILEY},
   Year = {2000},
   Month = {January},
   url = {http://dx.doi.org/10.1111/1467-9876.00176},
   Abstract = {In cancer studies that use transgenic or knockout mice, skin
             tumour counts are recorded over time to measure
             tumorigenicity. In these studies cancer biologists are
             interested in the effect of endogenous and/or exogenous
             factors on papilloma onset, multiplicity and regression. In
             this paper an analysis of data from a study conducted by the
             National Institute of Environmental Health Sciences on the
             effect of genetic factors on skin tumorigenesis is
             presented. Papilloma multiplicity and regression are
             modelled by using Bernoulli, Poisson and binomial latent
             variables, each of which can depend on covariates and
             previous outcomes. An EM algorithm is proposed for parameter
             estimation, and generalized estimating equations adjust for
             extra dependence between outcomes within individual animals.
             A Cox proportional hazards model is used to describe
             covariate effects on the onset of tumours.},
   Doi = {10.1111/1467-9876.00176},
   Key = {fds258013}
}

@article{fds355211,
   Author = {Jauch, M and Hoff, PD and Dunson, DB},
   Title = {Monte Carlo Simulation on the Stiefel Manifold via Polar
             Expansion},
   Journal = {Journal of Computational and Graphical Statistics},
   Volume = {30},
   Number = {3},
   Pages = {622-631},
   Year = {2021},
   Month = {January},
   url = {http://dx.doi.org/10.1080/10618600.2020.1859382},
   Abstract = {Motivated by applications to Bayesian inference for
             statistical models with orthogonal matrix parameters, we
             present (Formula presented.) a general approach to Monte
             Carlo simulation from probability distributions on the
             Stiefel manifold. To bypass many of the well-established
             challenges of simulating from the distribution of a random
             orthogonal matrix (Formula presented.) we construct a
             distribution for an unconstrained random matrix X such that
             (Formula presented.) the orthogonal component of the polar
             decomposition of (Formula presented.) is equal in
             distribution to (Formula presented.) The distribution of X
             is amenable to Markov chain Monte Carlo (MCMC) simulation
             using standard methods, and an approximation to the
             distribution of Q can be recovered from a Markov chain on
             the unconstrained space. When combined with modern MCMC
             software, polar expansion allows for routine and flexible
             posterior inference in models with orthogonal matrix
             parameters. We find that polar expansion with adaptive
             Hamiltonian Monte Carlo is an order of magnitude more
             efficient than competing MCMC approaches in a benchmark
             protein interaction network application. We also propose a
             new approach to Bayesian functional principal component
             analysis which we illustrate in a meteorological time series
             application. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/10618600.2020.1859382},
   Key = {fds355211}
}

@article{fds257924,
   Author = {Bigelow, JL and Dunson, DB and Stanford, JB and Ecochard, R and Gnoth,
             C and Colombo, B},
   Title = {Mucus observations in the fertile window: a better predictor
             of conception than timing of intercourse.},
   Journal = {Human reproduction (Oxford, England)},
   Volume = {19},
   Number = {4},
   Pages = {889-892},
   Year = {2004},
   Month = {April},
   url = {http://dx.doi.org/10.1093/humrep/deh173},
   Abstract = {<h4>Background</h4>Intercourse results in a pregnancy
             essentially only if it occurs during the 6-day fertile
             interval ending on the day of ovulation. The strong
             association between timing of intercourse within this
             interval and the probability of conception typically is
             attributed to limited sperm and egg life
             times.<h4>Methods</h4>A total of 782 women recruited from
             natural family planning centres in Europe contributed
             prospective data on 7288 menstrual cycles. Daily records of
             intercourse, basal body temperature and vaginal discharge of
             cervical mucus were collected. Probabilities of conception
             were estimated according to the timing of intercourse
             relative to ovulation and a 1-4 score of mucus
             quality.<h4>Results</h4>There was a strong increasing trend
             in the day-specific probabilities of pregnancy with
             increases in the mucus score. Adjusting for the mucus score,
             the day-specific probabilities had limited variability
             across the fertile interval.<h4>Conclusions</h4>Changes in
             mucus quality across the fertile interval predict the
             observed pattern in the day-specific probabilities of
             conception. To maximize the likelihood of conception,
             intercourse should occur on days with optimal mucus quality,
             as observed in vaginal discharge, regardless of the exact
             timing relative to ovulation.},
   Doi = {10.1093/humrep/deh173},
   Key = {fds257924}
}

@article{fds257985,
   Author = {Wang, C and An, Q and Carin, L and Dunson, DB},
   Title = {Multi-task classification with infinite local
             experts},
   Journal = {ICASSP, IEEE International Conference on Acoustics, Speech
             and Signal Processing - Proceedings},
   Pages = {1569-1572},
   Publisher = {IEEE},
   Year = {2009},
   Month = {September},
   ISSN = {1520-6149},
   url = {http://dx.doi.org/10.1109/ICASSP.2009.4959897},
   Abstract = {We propose a multi-task learning (MTL) framework for
             nonlinear classification, based on an infinite set of local
             experts in feature space. The usage of local experts enables
             sharing at the expert-level, encouraging the borrowing of
             information even if tasks are similar only in subregions of
             feature space. A kernel stick-breaking process (KSBP) prior
             is imposed on the underlying distribution of class labels,
             so that the number of experts is inferred in the posterior
             and thus model selection issues are avoided. The MTL is
             implemented by imposing a Dirichlet process (DP) prior on a
             layer above the task- dependent KSBPs. ©2009
             IEEE.},
   Doi = {10.1109/ICASSP.2009.4959897},
   Key = {fds257985}
}

@article{fds257981,
   Author = {Qi, Y and Liu, D and Dunson, D and Carin, L},
   Title = {Multi-task compressive sensing with dirichlet process
             priors},
   Journal = {Proceedings of the 25th International Conference on Machine
             Learning},
   Pages = {768-775},
   Year = {2008},
   Month = {January},
   url = {http://dx.doi.org/10.1145/1390156.1390253},
   Abstract = {Compressive sensing (CS) is an emerging £eld that, under
             appropriate conditions, can signi£cantly reduce the number
             of measurements required for a given signal. In many
             applications, one is interested in multiple signals that may
             be measured in multiple CS-type measurements, where here
             each signal corresponds to a sensing "task". In this paper
             we propose a novel multitask compressive sensing framework
             based on a Bayesian formalism, where a Dirichlet process
             (DP) prior is employed, yielding a principled means of
             simultaneously inferring the appropriate sharing mechanisms
             as well as CS inversion for each task. A variational
             Bayesian (VB) inference algorithm is employed to estimate
             the full posterior on the model parameters. Copyright 2008
             by the author(s)/owner(s).},
   Doi = {10.1145/1390156.1390253},
   Key = {fds257981}
}

@article{fds258057,
   Author = {Ni, K and Paisley, J and Carin, L and Dunson, D},
   Title = {Multi-task learning for analyzing and sorting large
             databases of sequential data},
   Journal = {IEEE Transactions on Signal Processing},
   Volume = {56},
   Number = {8 II},
   Pages = {3918-3931},
   Publisher = {Institute of Electrical and Electronics Engineers
             (IEEE)},
   Year = {2008},
   Month = {August},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/TSP.2008.924798},
   Abstract = {A new hierarchical nonparametric Bayesian framework is
             proposed for the problem of multi-task learning (MTL) with
             sequential data. The models for multiple tasks, each
             characterized by sequential data, are learned jointly, and
             the intertask relationships are obtained simultaneously.
             This MTL setting is used to analyze and sort large databases
             composed of sequential data, such as music clips. Within
             each data set, we represent the sequential data with an
             infinite hidden Markov model (iHMM), avoiding the problem of
             model selection (selecting a number of states). Across the
             data sets, the multiple iHMMs are learned jointly in a MTL
             setting, employing a nested Dirichlet process (nDP). The
             nDP-iHMM MTL method allows simultaneous task-level and
             data-level clustering, with which the individual iHMMs are
             enhanced and the between-task similarities are learned.
             Therefore, in addition to improved learning of each of the
             models via appropriate data sharing, the learned sharing
             mechanisms are used to infer interdata relationships of
             interest for data search. Specifically, the MTL-learned
             task-level sharing mechanisms are used to define the
             affinity matrix in a graph-diffusion sorting framework. To
             speed up the MCMC inference for large databases, the
             nDP-iHMM is truncated to yield a nested Dirichlet-distribution
             based HMM representation, which accommodates fast
             variational Bayesian (VB) analysis for large-scale
             inference, and the effectiveness of the framework is
             demonstrated using a database composed of 2500 digital music
             pieces. © 2008 IEEE.},
   Doi = {10.1109/TSP.2008.924798},
   Key = {fds258057}
}

@article{fds257980,
   Author = {Ni, K and Carin, L and Dunson, D},
   Title = {Multi-task learning for sequential data via iHMMs and the
             nested Dirichlet process},
   Journal = {ACM International Conference Proceeding Series},
   Volume = {227},
   Pages = {689-696},
   Publisher = {ACM Press},
   Year = {2007},
   Month = {August},
   url = {http://dx.doi.org/10.1145/1273496.1273583},
   Abstract = {A new hierarchical nonparametric Bayesian model is proposed
             for the problem of multitask learning (MTL) with sequential
             data. Sequential data are typically modeled with a hidden
             Markov model (HMM), for which one often must choose an
             appropriate model structure (number of states) before
             learning. Here we model sequential data from each task with
             an infinite hidden Markov model (iHMM), avoiding the problem
             of model selection. The MTL for iHMMs is implemented by
             imposing a nested Dirichlet process (nDP) prior on the base
             distributions of the iHMMs. The nDP-iHMM MTL method allows
             us to perform task-level clustering and data-level
             clustering simultaneously, with which the learning for
             individual iHMMs is enhanced and between-task similarities
             are learned. Learning and inference for the nDP-iHMM MTL are
             based on a Gibbs sampler. The effectiveness of the framework
             is demonstrated using synthetic data as well as real music
             data.},
   Doi = {10.1145/1273496.1273583},
   Key = {fds257980}
}

@article{fds257851,
   Author = {Carlson, DE and Vogelstein, JT and Wu, Q and Lian, W and Zhou, M and Stoetzner, CR and Kipke, D and Weber, D and Dunson, DB and Carin,
             L},
   Title = {Multichannel electrophysiological spike sorting via joint
             dictionary learning and mixture modeling},
   Journal = {IEEE Transactions on Biomedical Engineering},
   Volume = {61},
   Number = {1},
   Pages = {41-54},
   Publisher = {IEEE},
   Year = {2013},
   ISSN = {0018-9294},
   url = {http://dx.doi.org/10.1109/tbme.2013.2275751},
   Abstract = {We propose a methodology for joint feature learning and
             clustering of multichannel extracellular
             electrophysiological data, across multiple recording periods
             for action potential detection and classification (sorting).
             Our methodology improves over the previous state of the art
             principally in four ways. First, via sharing information
             across channels, we can better distinguish between
             single-unit spikes and artifacts. Second, our proposed
             "focused mixture model" (FMM) deals with units appearing,
             disappearing, or reappearing over multiple recording days,
             an important consideration for any chronic experiment.
             Third, by jointly learning features and clusters, we improve
             performance over previous attempts that proceeded via a
             two-stage learning process. Fourth, by directly modeling
             spike rate, we improve the detection of sparsely firing
             neurons. Moreover, our Bayesian methodology seamlessly
             handles missing data. We present the state-of-the-art
             performance without requiring manually tuning
             hyperparameters, considering both a public dataset with
             partial ground truth and a new experimental
             dataset.},
   Doi = {10.1109/tbme.2013.2275751},
   Key = {fds257851}
}

@article{fds257876,
   Author = {Fox, EB and Dunson, DB},
   Title = {Multiresolution Gaussian processes},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {1},
   Pages = {737-745},
   Year = {2012},
   Month = {December},
   ISSN = {1049-5258},
   Abstract = {We propose a multiresolution Gaussian process to capture
             long-range, non-Markovian dependencies while allowing for
             abrupt changes and non-stationarity. The multiresolution GP
             hierarchically couples a collection of smooth GPs, each
             defined over an element of a random nested partition.
             Long-range dependencies are captured by the top-level GP
             while the partition points define the abrupt changes. Due to
             the inherent conjugacy of the GPs, one can analytically
             marginalize the GPs and compute the marginal likelihood of
             the observations given the partition tree. This property
             allows for efficient inference of the partition itself, for
             which we employ graph-theoretic techniques. We apply the
             multiresolution GP to the analysis of magnetoencephalography
             (MEG) recordings of brain activity.},
   Key = {fds257876}
}

@article{fds329118,
   Author = {Canale, A and Dunson, DB},
   Title = {Multiscale bernstein polynomials for densities},
   Journal = {Statistica Sinica},
   Volume = {26},
   Number = {3},
   Pages = {1175-1195},
   Publisher = {Institute of Statistical Science},
   Year = {2016},
   Month = {July},
   url = {http://dx.doi.org/10.5705/ss.202015.0163},
   Abstract = {Our focus is on constructing a multiscale nonparametric
             prior for densities. The Bayes density estimation literature
             is dominated by single scale methods, with the exception of
             Polya trees, which favor overly-spiky densities even when
             the truth is smooth. We propose a multiscale Bernstein
             polynomial family of priors, which produce smooth
             realizations that do not rely on hard partitioning of the
             support. At each level in an infinitely-deep binary tree, we
             place a beta dictionary density; within a scale the
             densities are equivalent to Bernstein polynomials. Using a
             stick-breaking characterization, stochastically decreasing
             weights are allocated to the finer scale dictionary
             elements. A slice sampler is used for posterior computation,
             and properties are described. The method characterizes
             densities with locally-varying smoothness, and can produce a
             sequence of coarse to fine density estimates. An extension
             for Bayesian testing of group differences is introduced and
             applied to DNA methylation array data.},
   Doi = {10.5705/ss.202015.0163},
   Key = {fds329118}
}

@article{fds257847,
   Author = {Petralia, F and Vogelstein, J and Dunson, DB},
   Title = {Multiscale dictionary learning for estimating conditional
             distributions},
   Journal = {Advances in Neural Information Processing
             Systems},
   Year = {2013},
   Month = {January},
   ISSN = {1049-5258},
   Abstract = {Nonparametric estimation of the conditional distribution of
             a response given highdimensional features is a challenging
             problem. It is important to allow not only the mean but also
             the variance and shape of the response density to change
             flexibly with features, which are massive-dimensional. We
             propose a multiscale dictionary learning model, which
             expresses the conditional response density as a convex
             combination of dictionary densities, with the densities used
             and their weights dependent on the path through a tree
             decomposition of the feature space. A fast graph
             partitioning algorithm is applied to obtain the tree
             decomposition, with Bayesian methods then used to adaptively
             prune and average over different sub-trees in a soft
             probabilistic manner. The algorithm scales efficiently to
             approximately one million features. State of the art
             predictive performance is demonstrated for toy examples and
             two neuroscience applications including up to a million
             features.},
   Key = {fds257847}
}

@article{fds258028,
   Author = {Ji, S and Dunson, D and Carin, L},
   Title = {Multitask compressive sensing},
   Journal = {IEEE Transactions on Signal Processing},
   Volume = {57},
   Number = {1},
   Pages = {92-106},
   Publisher = {Institute of Electrical and Electronics Engineers
             (IEEE)},
   Year = {2009},
   Month = {January},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/TSP.2008.2005866},
   Abstract = {Compressive sensing (CS) is a framework whereby one performs
             N nonadaptive measurements to constitute a vector v∈ℝN
             with v used to recover an approximation u∈RℝM to a
             desired signal u∈RℝM with N≪ M; this is performed
             under the assumption that uis sparse in the basis
             represented by the matrix Ψ∈RℝM×M. It has been
             demonstrated that with appropriate design of the compressive
             measurements used to define v, the decompressive mapping
             v⇁ umay be performed with error ∥u-u∥22 having
             asymptotic properties analogous to those of the best
             transform-coding algorithm applied in the basis Ψ. The
             mapping v⇁u constitutes an inverse problem, often solved
             using ℓ1 regularization or related techniques. In most
             previous research, if L〉 sets of compressive measurements
             vii=1,L are performed, each of the associated uii=1,L are
             recovered one at a time, independently. In many applications
             the "tasks"defined by the mappings vi⇁ ui are not
             statistically independent, and it may be possible to improve
             the performance of the inversion if statistical
             interrelationships are exploited. In this paper, we address
             this problem within a multitask learning setting, wherein
             the mapping vi ⇁uifor each task corresponds to inferring
             the parameters (here, wavelet coefficients) associated with
             the desired signal ui, and a shared prior is placed across
             all of the L tasks. Under this hierarchical Bayesian
             modeling, data from all L tasks contribute toward inferring
             a posterior on the hyperparameters, and once the shared
             prior is thereby inferred, the data from each of the L
             individual tasks is then employed to estimate the
             task-dependent wavelet coefficients. An empirical Bayesian
             procedure for the estimation of hyperparameters is
             considered; two fast inference algorithms extending the
             relevance vector machine (RVM) are developed. Example
             results on several data sets demonstrate the effectiveness
             and robustness of the proposed algorithms. © 2008
             IEEE.},
   Doi = {10.1109/TSP.2008.2005866},
   Key = {fds258028}
}

@article{fds257859,
   Author = {Hannah, LA and Dunson, DB},
   Title = {Multivariate convex regression with adaptive
             partitioning},
   Journal = {Journal of Machine Learning Research},
   Volume = {14},
   Pages = {3153-3188},
   Publisher = {MICROTOME PUBL},
   Year = {2013},
   Month = {November},
   ISSN = {1532-4435},
   Abstract = {We propose a new, nonparametric method for multivariate
             regression subject to convexity or concavity constraints on
             the response function. Convexity constraints are common in
             economics, statistics, operations research, financial
             engineering and optimization, but there is currently no
             multivariate method that is stable and computationally
             feasible for more than a few thousand observations. We
             introduce convex adaptive partitioning (CAP), which creates
             a globally convex regression model from locally linear
             estimates fit on adaptively selected covariate partitions.
             CAP is a computationally efficient, consistent method for
             convex regression. We demonstrate empirical performance by
             comparing the performance of CAP to other shape-constrained
             and unconstrained regression methods for predicting weekly
             wages and value function approximation for pricing American
             basket options. © 2013 Lauren A. Hannah and David B.
             Dunson.},
   Key = {fds257859}
}

@article{fds376605,
   Author = {Hannah, LA and Dunson, DB},
   Title = {Multivariate Convex Regression with Adaptive
             Partitioning},
   Journal = {JOURNAL OF MACHINE LEARNING RESEARCH},
   Volume = {14},
   Pages = {3261-3294},
   Publisher = {MICROTOME PUBL},
   Year = {2013},
   Month = {November},
   Key = {fds376605}
}

@article{fds258046,
   Author = {Dunson, DB},
   Title = {MULTIVARIATE KERNEL PARTITION PROCESS MIXTURES.},
   Journal = {Statistica Sinica},
   Volume = {20},
   Number = {4},
   Pages = {1395-1422},
   Year = {2010},
   Month = {October},
   ISSN = {1017-0405},
   Abstract = {Mixtures provide a useful approach for relaxing parametric
             assumptions. Discrete mixture models induce clusters,
             typically with the same cluster allocation for each
             parameter in multivariate cases. As a more flexible approach
             that facilitates sparse nonparametric modeling of
             multivariate random effects distributions, this article
             proposes a kernel partition process (KPP) in which the
             cluster allocation varies for different parameters. The KPP
             is shown to be the driving measure for a multivariate
             ordered Chinese restaurant process that induces a
             highly-flexible dependence structure in local clustering.
             This structure allows the relative locations of the random
             effects to inform the clustering process, with
             spatially-proximal random effects likely to be assigned the
             same cluster index. An exact block Gibbs sampler is
             developed for posterior computation, avoiding truncation of
             the infinite measure. The methods are applied to hormone
             curve data, and a dependent KPP is proposed for
             classification from functional predictors.},
   Key = {fds258046}
}

@article{fds365275,
   Author = {Russo, M and Singer, BH and Dunson, DB},
   Title = {MULTIVARIATE MIXED MEMBERSHIP MODELING: INFERRING
             DOMAIN-SPECIFIC RISK PROFILES.},
   Journal = {The annals of applied statistics},
   Volume = {16},
   Number = {1},
   Pages = {391-413},
   Year = {2022},
   Month = {March},
   url = {http://dx.doi.org/10.1214/21-aoas1496},
   Abstract = {Characterizing the shared memberships of individuals in a
             classification scheme poses severe interpretability issues,
             even when using a moderate number of classes (say 4). Mixed
             membership models quantify this phenomenon, but they
             typically focus on goodness-of-fit more than on
             interpretable inference. To achieve a good numerical fit,
             these models may in fact require many extreme profiles,
             making the results difficult to interpret. We introduce a
             new class of multivariate mixed membership models that, when
             variables can be partitioned into subject-matter based
             domains, can provide a good fit to the data using fewer
             profiles than standard formulations. The proposed model
             explicitly accounts for the blocks of variables
             corresponding to the distinct domains along with a
             cross-domain correlation structure, which provides new
             information about shared membership of individuals in a
             complex classification scheme. We specify a multivariate
             logistic normal distribution for the membership vectors,
             which allows easy introduction of auxiliary information
             leveraging a latent multivariate logistic regression. A
             Bayesian approach to inference, relying on Pólya gamma data
             augmentation, facilitates efficient posterior computation
             via Markov Chain Monte Carlo. We apply this methodology to a
             spatially explicit study of malaria risk over time on the
             Brazilian Amazon frontier.},
   Doi = {10.1214/21-aoas1496},
   Key = {fds365275}
}

@article{fds257984,
   Author = {Ren, L and Dunson, DB and Lindroth, S and Carin, L},
   Title = {Music analysis with a Bayesian dynamic model},
   Journal = {ICASSP, IEEE International Conference on Acoustics, Speech
             and Signal Processing - Proceedings},
   Pages = {1681-1684},
   Publisher = {IEEE},
   Year = {2009},
   Month = {September},
   ISSN = {1520-6149},
   url = {http://dx.doi.org/10.1109/ICASSP.2009.4959925},
   Abstract = {A Bayesian dynamic model is developed to model complex
             sequential data, with a focus on audio signals from music.
             The music is represented in terms of a sequence of discrete
             observations, and the sequence is modeled using a hidden
             Markov model (HMM) with time-evolving parameters. The model
             imposes the belief that observations that are temporally
             proximate are more likely to be drawn from HMMs with similar
             parameters, while also allowing for "innovation" associated
             with abrupt changes in the music texture. Segmentation of a
             given musical piece is constituted via the model inference
             and the results are compared with other models and also to a
             conventional music-theoretic analysis. ©2009
             IEEE.},
   Doi = {10.1109/ICASSP.2009.4959925},
   Key = {fds257984}
}

@article{fds257905,
   Author = {Dollé, MET and Snyder, WK and Dunson, DB and Vijg,
             J},
   Title = {Mutational fingerprints of aging.},
   Journal = {Nucleic acids research},
   Volume = {30},
   Number = {2},
   Pages = {545-549},
   Year = {2002},
   Month = {January},
   ISSN = {0305-1048},
   url = {http://dx.doi.org/10.1093/nar/30.2.545},
   Abstract = {Using a lacZ plasmid transgenic mouse model, spectra of
             spontaneous point mutations were determined in brain, heart,
             liver, spleen and small intestine in young and old mice.
             While similar at a young age, the mutation spectra among
             these organs were significantly different in old age. In
             brain and heart G:C-->A:T transitions at CpG sites were the
             predominant mutation, suggesting that oxidative damage is
             not a major mutagenic event in these tissues. Other base
             changes, especially those affecting A:T base pairs,
             positively correlated with increasing proliferative activity
             of the different tissues. A relatively high percentage of
             base changes at A:T base pairs and compound mutants were
             found in both spleen and spontaneous lymphoma, suggesting a
             possible role of the hypermutation process in splenocytes in
             carcinogenesis. The similar mutant spectra observed at a
             young age may reflect a common mutation mechanism for all
             tissues that could be driven by the rapid cell division that
             takes place during development. However, the spectra of the
             young tissues did not resemble that of the most
             proliferative aged tissue, implying that replicative history
             per se is not the underlying causal factor of age-related
             organ-specific differences in mutation spectra. Rather,
             differences in organ function, possibly in association with
             replicative history, may explain the divergence in mutation
             spectra during aging.},
   Doi = {10.1093/nar/30.2.545},
   Key = {fds257905}
}

@article{fds370635,
   Author = {Young, AL and van den Boom, W and Schroeder, RA and Krishnamoorthy,
             V and Raghunathan, K and Wu, H-T and Dunson, DB},
   Title = {Mutual information: Measuring nonlinear dependence in
             longitudinal epidemiological data.},
   Journal = {PLoS One},
   Volume = {18},
   Number = {4},
   Pages = {e0284904},
   Year = {2023},
   url = {http://dx.doi.org/10.1371/journal.pone.0284904},
   Abstract = {Given a large clinical database of longitudinal patient
             information including many covariates, it is computationally
             prohibitive to consider all types of interdependence between
             patient variables of interest. This challenge motivates the
             use of mutual information (MI), a statistical summary of
             data interdependence with appealing properties that make it
             a suitable alternative or addition to correlation for
             identifying relationships in data. MI: (i) captures all
             types of dependence, both linear and nonlinear, (ii) is zero
             only when random variables are independent, (iii) serves as
             a measure of relationship strength (similar to but more
             general than R2), and (iv) is interpreted the same way for
             numerical and categorical data. Unfortunately, MI typically
             receives little to no attention in introductory statistics
             courses and is more difficult than correlation to estimate
             from data. In this article, we motivate the use of MI in the
             analyses of epidemiologic data, while providing a general
             introduction to estimation and interpretation. We illustrate
             its utility through a retrospective study relating
             intraoperative heart rate (HR) and mean arterial pressure
             (MAP). We: (i) show postoperative mortality is associated
             with decreased MI between HR and MAP and (ii) improve
             existing postoperative mortality risk assessment by
             including MI and additional hemodynamic statistics.},
   Doi = {10.1371/journal.pone.0284904},
   Key = {fds370635}
}

@article{fds257901,
   Author = {Wilcox, AJ and Baird, DD and Dunson, D and McChesney, R and Weinberg,
             CR},
   Title = {Natural limits of pregnancy testing in relation to the
             expected menstrual period.},
   Journal = {JAMA},
   Volume = {286},
   Number = {14},
   Pages = {1759-1761},
   Year = {2001},
   Month = {October},
   ISSN = {0098-7484},
   url = {http://dx.doi.org/10.1001/jama.286.14.1759},
   Abstract = {<h4>Context</h4>Pregnancy test kits routinely recommend
             testing "as early as the first day of the missed period."
             However, a pregnancy cannot be detected before the
             blastocyst implants. Due to natural variability in the
             timing of ovulation, implantation does not necessarily occur
             before the expected onset of next menses.<h4>Objective</h4>To
             estimate the maximum screening sensitivity of pregnancy
             tests when used on the first day of the expected period,
             taking into account the natural variability of ovulation and
             implantation.<h4>Design and setting</h4>Community-based
             prospective cohort study conducted in North Carolina between
             1982 and 1986.<h4>Participants</h4>Two hundred twenty-one
             healthy women 21 to 42 years of age who were planning to
             conceive.<h4>Main outcome measures</h4>Day of implantation,
             defined by the serial assay of first morning urine samples
             using an extremely sensitive immunoradiometric assay for
             human chorionic gonadotropin (hCG), relative to the first
             day of the missed period, defined as the day on which women
             expected their next menses to begin, based on self-reported
             usual cycle length.<h4>Results</h4>Data were available for
             136 clinical pregnancies conceived during the study, 14
             (10%) of which had not yet implanted by the first day of the
             missed period. The highest possible screening sensitivity
             for an hCG-based pregnancy test therefore is estimated to be
             90% (95% confidence interval [CI], 84%-94%) on the first day
             of the missed period. By 1 week after the first day of the
             missed period, the highest possible screening sensitivity is
             estimated to be 97% (95% CI, 94%-99%).<h4>Conclusions</h4>In
             this study, using an extremely sensitive assay for hCG, 10%
             of clinical pregnancies were undetectable on the first day
             of missed menses. In practice, an even larger percentage of
             clinical pregnancies may be undetected by current test kits
             on this day, given their reported assay properties and other
             practical limitations.},
   Doi = {10.1001/jama.286.14.1759},
   Key = {fds257901}
}

@article{fds322545,
   Author = {Wang, X and Dunson, D and Leng, C},
   Title = {No penalty no tears: Least squares in high-dimensional
             linear models},
   Journal = {33rd International Conference on Machine Learning, ICML
             2016},
   Volume = {4},
   Pages = {2685-2706},
   Year = {2016},
   Month = {January},
   ISBN = {9781510829008},
   Abstract = {Ordinary least squares (OI,S) is the default method for
             fitting linear models, but is not applicable for problems
             with dimensionality larger than the sample size. For these
             problems, we advocate the use of a generalized version of
             OLS motivated by ridge regression, and propose two novel
             three-step algorithms involving least squares fitting and
             hard thresholding. The algorithms are methodologically
             simple to understand intuitively, computationally easy to
             implement efficiently, and theoretically appealing for
             choosing models consistently. Numerical exercises comparing
             our methods with penalization-based approaches in
             simulations and data analyses illustrate the great potential
             of the proposed algorithms.},
   Key = {fds322545}
}

@article{fds257974,
   Author = {Bhattacharya, A and Dunson, D},
   Title = {Nonparametric Bayes classification and hypothesis testing on
             manifolds},
   Journal = {Journal of Multivariate Analysis},
   Volume = {111},
   Pages = {1-19},
   Publisher = {Elsevier BV},
   Year = {2012},
   Month = {October},
   ISSN = {0047-259X},
   url = {http://dx.doi.org/10.1016/j.jmva.2012.02.020},
   Abstract = {Our first focus is prediction of a categorical response
             variable using features that lie on a general manifold. For
             example, the manifold may correspond to the surface of a
             hypersphere. We propose a general kernel mixture model for
             the joint distribution of the response and predictors, with
             the kernel expressed in product form and dependence induced
             through the unknown mixing measure. We provide simple
             sufficient conditions for large support and weak and strong
             posterior consistency in estimating both the joint
             distribution of the response and predictors and the
             conditional distribution of the response. Focusing on a
             Dirichlet process prior for the mixing measure, these
             conditions hold using von Mises-Fisher kernels when the
             manifold is the unit hypersphere. In this case, Bayesian
             methods are developed for efficient posterior computation
             using slice sampling. Next we develop Bayesian nonparametric
             methods for testing whether there is a difference in
             distributions between groups of observations on the manifold
             having unknown densities. We prove consistency of the Bayes
             factor and develop efficient computational methods for its
             calculation. The proposed classification and testing methods
             are evaluated using simulation examples and applied to
             spherical data applications. © 2012 Elsevier
             Inc.},
   Doi = {10.1016/j.jmva.2012.02.020},
   Key = {fds257974}
}

@article{fds258044,
   Author = {Chung, Y and Dunson, DB},
   Title = {Nonparametric Bayes Conditional Distribution Modeling With
             Variable Selection.},
   Journal = {Journal of the American Statistical Association},
   Volume = {104},
   Number = {488},
   Pages = {1646-1660},
   Year = {2009},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://hdl.handle.net/10161/4398 Duke open
             access},
   Abstract = {This article considers a methodology for flexibly
             characterizing the relationship between a response and
             multiple predictors. Goals are (1) to estimate the
             conditional response distribution addressing the
             distributional changes across the predictor space, and (2)
             to identify important predictors for the response
             distribution change both within local regions and globally.
             We first introduce the probit stick-breaking process (PSBP)
             as a prior for an uncountable collection of
             predictor-dependent random distributions and propose a PSBP
             mixture (PSBPM) of normal regressions for modeling the
             conditional distributions. A global variable selection
             structure is incorporated to discard unimportant predictors,
             while allowing estimation of posterior inclusion
             probabilities. Local variable selection is conducted relying
             on the conditional distribution estimates at different
             predictor points. An efficient stochastic search sampling
             algorithm is proposed for posterior computation. The methods
             are illustrated through simulation and applied to an
             epidemiologic study.},
   Doi = {10.1198/jasa.2009.tm08302},
   Key = {fds258044}
}

@article{fds322556,
   Author = {Durante, D and Dunson, DB},
   Title = {Nonparametric Bayes dynamic modelling of relational
             data},
   Journal = {Biometrika},
   Volume = {101},
   Number = {4},
   Pages = {883-898},
   Publisher = {Oxford University Press (OUP)},
   Year = {2014},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asu040},
   Abstract = {Symmetric binary matrices representing relations are
             collected in many areas. Our focus is on dynamically
             evolving binary relational matrices, with interest being on
             inference on the relationship structure and prediction. We
             propose a nonparametric Bayesian dynamic model, which
             reduces dimensionality in characterizing the binary matrix
             through a lower-dimensional latent space representation,
             with the latent coordinates evolving in continuous time via
             Gaussian processes. By using a logistic mapping function
             from the link probability matrix space to the latent
             relational space, we obtain a flexible and computationally
             tractable formulation. Employing Ṕolya-gamma data
             augmentation, an efficient Gibbs sampler is developed for
             posterior computation, with the dimension of the latent
             space automatically inferred. We provide theoretical results
             on flexibility of the model, and illustrate its performance
             via simulation experiments.We also consider an application
             to co-movements in world financial markets.},
   Doi = {10.1093/biomet/asu040},
   Key = {fds322556}
}

@article{fds322543,
   Author = {Kunihama, T and Dunson, DB},
   Title = {Nonparametric Bayes inference on conditional
             independence},
   Journal = {Biometrika},
   Volume = {103},
   Number = {1},
   Pages = {35-47},
   Publisher = {Oxford University Press (OUP)},
   Year = {2015},
   Month = {January},
   url = {http://dx.doi.org/10.1093/biomet/asv060},
   Abstract = {In many application areas, a primary focus is on assessing
             evidence in the data refuting the assumption of independence
             of Y and X conditionally on Z, with Y response variables, X
             predictors of interest, and Z covariates. Ideally, one would
             have methods available that avoid parametric assumptions,
             allow Y, X, Z to be random variables on arbitrary spaces
             with arbitrary dimension, and accommodate rapid
             consideration of different candidate predictors. As a formal
             decision-theoretic approach has clear disadvantages in this
             context, we instead rely on an encompassing nonparametric
             Bayes model for the joint distribution of Y, X and Z, with
             conditional mutual information used as a summary of the
             strength of conditional dependence. We construct a
             functional of the encompassing model and empirical measure
             for estimation of conditional mutual information. The
             implementation relies on a single Markov chain Monte Carlo
             run under the encompassing model, with conditional mutual
             information for candidate models calculated as a byproduct.
             We provide an asymptotic theory supporting the approach, and
             apply the method to variable selection. The methods are
             illustrated through simulations and criminology
             applications.},
   Doi = {10.1093/biomet/asv060},
   Key = {fds322543}
}

@article{fds258056,
   Author = {MacLehose, RF and Dunson, DB},
   Title = {Nonparametric Bayes kernel-based priors for functional data
             analysis},
   Journal = {Statistica Sinica},
   Volume = {19},
   Number = {2},
   Pages = {611-629},
   Year = {2009},
   Month = {April},
   ISSN = {1017-0405},
   Abstract = {We focus on developing nonparametric Bayes methods for
             collections of dependent random functions, allowing
             individual curves to vary flexibly while adaptively
             borrowing information. A prior is proposed, which is
             expressed as a hierarchical mixture of weighted kernels
             placed at unknown locations. The induced prior for any
             individual function is shown to fall within a reproducing
             kernel Hilbert space. We allow flexible borrowing of
             information through the use of a hierarchical Dirichlet
             process prior for the random locations, along with a
             functional Dirichlet process for the weights. Theoretical
             properties are considered and an efficient MCMC algorithm is
             developed, relying on stick-breaking truncations. The
             methods are illustrated using simulation examples and an
             application to reproductive hormone data.},
   Key = {fds258056}
}

@article{fds258054,
   Author = {Dunson, DB},
   Title = {Nonparametric Bayes local partition models for random
             effects.},
   Journal = {Biometrika},
   Volume = {96},
   Number = {2},
   Pages = {249-262},
   Year = {2009},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asp021},
   Abstract = {This paper focuses on the problem of choosing a prior for an
             unknown random effects distribution within a Bayesian
             hierarchical model. The goal is to obtain a sparse
             representation by allowing a combination of global and local
             borrowing of information. A local partition process prior is
             proposed, which induces dependent local clustering. Subjects
             can be clustered together for a subset of their parameters,
             and one learns about similarities between subjects
             increasingly as parameters are added. Some basic properties
             are described, including simple two-parameter expressions
             for marginal and conditional clustering probabilities. A
             slice sampler is developed which bypasses the need to
             approximate the countably infinite random measure in
             performing posterior computation. The methods are
             illustrated using simulation examples, and an application to
             hormone trajectory data.},
   Doi = {10.1093/biomet/asp021},
   Key = {fds258054}
}

@article{fds322541,
   Author = {Zhou, J and Herring, AH and Bhattacharya, A and Olshan, AF and Dunson,
             DB and National Birth Defects Prevention Study},
   Title = {Nonparametric Bayes modeling for case control studies with
             many predictors.},
   Journal = {Biometrics},
   Volume = {72},
   Number = {1},
   Pages = {184-192},
   Year = {2016},
   Month = {March},
   url = {http://dx.doi.org/10.1111/biom.12411},
   Abstract = {It is common in biomedical research to run case-control
             studies involving high-dimensional predictors, with the main
             goal being detection of the sparse subset of predictors
             having a significant association with disease. Usual
             analyses rely on independent screening, considering each
             predictor one at a time, or in some cases on logistic
             regression assuming no interactions. We propose a
             fundamentally different approach based on a nonparametric
             Bayesian low rank tensor factorization model for the
             retrospective likelihood. Our model allows a very flexible
             structure in characterizing the distribution of multivariate
             variables as unknown and without any linear assumptions as
             in logistic regression. Predictors are excluded only if they
             have no impact on disease risk, either directly or through
             interactions with other predictors. Hence, we obtain an
             omnibus approach for screening for important predictors.
             Computation relies on an efficient Gibbs sampler. The
             methods are shown to have high power and low false discovery
             rates in simulation studies, and we consider an application
             to an epidemiology study of birth defects.},
   Doi = {10.1111/biom.12411},
   Key = {fds322541}
}

@article{fds258045,
   Author = {Dunson, DB and Xing, C},
   Title = {Nonparametric Bayes Modeling of Multivariate Categorical
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {104},
   Number = {487},
   Pages = {1042-1051},
   Year = {2012},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2009.tm08439},
   Abstract = {Modeling of multivariate unordered categorical (nominal)
             data is a challenging problem, particularly in high
             dimensions and cases in which one wishes to avoid strong
             assumptions about the dependence structure. Commonly used
             approaches rely on the incorporation of latent Gaussian
             random variables or parametric latent class models. The goal
             of this article is to develop a nonparametric Bayes
             approach, which defines a prior with full support on the
             space of distributions for multiple unordered categorical
             variables. This support condition ensures that we are not
             restricting the dependence structure a priori. We show this
             can be accomplished through a Dirichlet process mixture of
             product multinomial distributions, which is also a
             convenient form for posterior computation. Methods for
             nonparametric testing of violations of independence are
             proposed, and the methods are applied to model positional
             dependence within transcription factor binding
             motifs.},
   Doi = {10.1198/jasa.2009.tm08439},
   Key = {fds258045}
}

@article{fds327388,
   Author = {Durante, D and Dunson, DB and Vogelstein, JT},
   Title = {Nonparametric Bayes Modeling of Populations of
             Networks},
   Journal = {Journal of the American Statistical Association},
   Volume = {112},
   Number = {520},
   Pages = {1516-1530},
   Publisher = {Informa UK Limited},
   Year = {2017},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2016.1219260},
   Abstract = {Replicated network data are increasingly available in many
             research fields. For example, in connectomic applications,
             interconnections among brain regions are collected for each
             patient under study, motivating statistical models which can
             flexibly characterize the probabilistic generative mechanism
             underlying these network-valued data. Available models for a
             single network are not designed specifically for inference
             on the entire probability mass function of a network-valued
             random variable and therefore lack flexibility in
             characterizing the distribution of relevant topological
             structures. We propose a flexible Bayesian nonparametric
             approach for modeling the population distribution of
             network-valued data. The joint distribution of the edges is
             defined via a mixture model that reduces dimensionality and
             efficiently incorporates network information within each
             mixture component by leveraging latent space
             representations. The formulation leads to an efficient Gibbs
             sampler and provides simple and coherent strategies for
             inference and goodness-of-fit assessments. We provide
             theoretical results on the flexibility of our model and
             illustrate improved performance—compared to
             state-of-the-art models—in simulations and application to
             human brain networks. Supplementary materials for this
             article are available online.},
   Doi = {10.1080/01621459.2016.1219260},
   Key = {fds327388}
}

@article{fds322536,
   Author = {Kunihama, T and Herring, AH and Halpern, CT and Dunson,
             DB},
   Title = {Nonparametric Bayes modeling with sample survey
             weights.},
   Journal = {Statistics & probability letters},
   Volume = {113},
   Pages = {41-48},
   Publisher = {Elsevier BV},
   Year = {2016},
   Month = {June},
   url = {http://dx.doi.org/10.1016/j.spl.2016.02.009},
   Abstract = {In population studies, it is standard to sample data via
             designs in which the population is divided into strata, with
             the different strata assigned different probabilities of
             inclusion. Although there have been some proposals for
             including sample survey weights into Bayesian analyses,
             existing methods require complex models or ignore the
             stratified design underlying the survey weights. We propose
             a simple approach based on modeling the distribution of the
             selected sample as a mixture, with the mixture weights
             appropriately adjusted, while accounting for uncertainty in
             the adjustment. We focus for simplicity on Dirichlet process
             mixtures but the proposed approach can be applied more
             broadly. We sketch a simple Markov chain Monte Carlo
             algorithm for computation, and assess the approach via
             simulations and an application.},
   Doi = {10.1016/j.spl.2016.02.009},
   Key = {fds322536}
}

@article{fds257861,
   Author = {Canale, A and Dunson, DB},
   Title = {Nonparametric Bayes modelling of count processes},
   Journal = {Biometrika},
   Volume = {100},
   Number = {4},
   Pages = {801-816},
   Publisher = {Oxford University Press (OUP)},
   Year = {2013},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000327714200002&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {Data on count processes arise in a variety of applications,
             including longitudinal, spatial and imaging studies
             measuring count responses. The literature on statistical
             models for dependent count data is dominated by models built
             from hierarchical Poisson components. The Poisson assumption
             is not warranted in many applied contexts, and hierarchical
             Poisson models make restrictive assumptions about
             overdispersion in marginal distributions. In this article we
             propose a class of nonparametric Bayes count process models,
             constructed through rounding real-valued underlying
             processes. The proposed class of models accommodates
             situations in which separate count-valued functional data
             are observed for each subject under study. Theoretical
             results on large support and posterior consistency are
             established, and computational algorithms are developed
             based on Markov chain Monte Carlo simulation. The methods
             are evaluated via simulation and illustrated by application
             to longitudinal tumour counts and to asthma inhaler usage.
             © 2013 Biometrika Trust.},
   Doi = {10.1093/biomet/ast037},
   Key = {fds257861}
}

@article{fds342829,
   Author = {Zhang, Z and Descoteaux, M and Dunson, DB},
   Title = {Nonparametric Bayes Models of Fiber Curves Connecting Brain
             Regions.},
   Journal = {Journal of the American Statistical Association},
   Volume = {114},
   Number = {528},
   Pages = {1505-1517},
   Year = {2019},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2019.1574582},
   Abstract = {In studying structural inter-connections in the human brain,
             it is common to first estimate fiber bundles connecting
             different regions relying on diffusion MRI. These fiber
             bundles act as highways for neural activity. Current
             statistical methods reduce the rich information into an
             adjacency matrix, with the elements containing a count of
             fibers or a mean diffusion feature along the fibers. The
             goal of this article is to avoid discarding the rich
             geometric information of fibers, developing flexible models
             for characterizing the population distribution of fibers
             between brain regions of interest within and across
             different individuals. We start by decomposing each fiber
             into a rotation matrix, shape and translation from a global
             reference curve. These components are viewed as data lying
             on a product space composed of different Euclidean spaces
             and manifolds. To nonparametrically model the distribution
             within and across individuals, we rely on a hierarchical
             mixture of product kernels specific to the component spaces.
             Taking a Bayesian approach to inference, we develop
             efficient methods for posterior sampling. The approach
             automatically produces clusters of fibers within and across
             individuals. Applying the method to Human Connectome Project
             data, we find interesting relationships between brain fiber
             geometry and reading ability. Supplementary materials for
             this article, including a standardized description of the
             materials available for reproducing the work, are available
             as an online supplement.},
   Doi = {10.1080/01621459.2019.1574582},
   Key = {fds342829}
}

@article{fds376095,
   Author = {Datta, J and Banerjee, S and Dunson, DB},
   Title = {Nonparametric Bayes multiresolution testing for
             high-dimensional rare events},
   Journal = {Journal of Nonparametric Statistics},
   Year = {2024},
   Month = {January},
   url = {http://dx.doi.org/10.1080/10485252.2024.2309978},
   Abstract = {In a variety of application areas, there is interest in
             assessing evidence of differences in the intensity of event
             realizations between groups. For example, in cancer genomic
             studies collecting data on rare variants, the focus is on
             assessing whether and how the variant profile changes with
             the disease subtype. Motivated by this application, we
             develop multiresolution nonparametric Bayes tests for
             differential mutation rates across groups. The
             multiresolution approach yields fast and accurate detection
             of spatial clusters of rare variants, and our nonparametric
             Bayes framework provides great flexibility for modelling the
             intensities of rare variants. Some theoretical properties
             are also assessed, including weak consistency of our
             Dirichlet Process-Poisson-Gamma mixture over multiple
             resolutions. Simulation studies illustrate excellent small
             sample properties relative to competitors, and we apply the
             method to detect rare variants related to common variable
             immunodeficiency from whole exome sequencing data on 215
             patients and over 60,027 control subjects.},
   Doi = {10.1080/10485252.2024.2309978},
   Key = {fds376095}
}

@article{fds257962,
   Author = {Yang, H and O'Brien, S and Dunson, DB},
   Title = {Nonparametric Bayes Stochastically Ordered Latent Class
             Models.},
   Journal = {J Am Stat Assoc},
   Volume = {106},
   Number = {495},
   Pages = {807-817},
   Year = {2011},
   Month = {September},
   ISSN = {0162-1459},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/22505787},
   Abstract = {Latent class models (LCMs) are used increasingly for
             addressing a broad variety of problems, including sparse
             modeling of multivariate and longitudinal data, model-based
             clustering, and flexible inferences on predictor effects.
             Typical frequentist LCMs require estimation of a single
             finite number of classes, which does not increase with the
             sample size, and have a well-known sensitivity to parametric
             assumptions on the distributions within a class. Bayesian
             nonparametric methods have been developed to allow an
             infinite number of classes in the general population, with
             the number represented in a sample increasing with sample
             size. In this article, we propose a new nonparametric Bayes
             model that allows predictors to flexibly impact the
             allocation to latent classes, while limiting sensitivity to
             parametric assumptions by allowing class-specific
             distributions to be unknown subject to a stochastic ordering
             constraint. An efficient MCMC algorithm is developed for
             posterior computation. The methods are validated using
             simulation studies and applied to the problem of ranking
             medical procedures in terms of the distribution of patient
             morbidity.},
   Doi = {10.1198/jasa.2011.ap10058},
   Key = {fds257962}
}

@article{fds258064,
   Author = {Pennell, ML and Dunson, DB},
   Title = {Nonparametric bayes testing of changes in a response
             distribution with an ordinal predictor.},
   Journal = {Biometrics},
   Volume = {64},
   Number = {2},
   Pages = {413-423},
   Year = {2008},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2007.00885.x},
   Abstract = {In certain biomedical studies, one may anticipate changes in
             the shape of a response distribution across the levels of an
             ordinal predictor. For instance, in toxicology studies,
             skewness and modality might change as dose increases. To
             address this issue, we propose a Bayesian nonparametric
             method for testing for distribution changes across an
             ordinal predictor. Using a dynamic mixture of Dirichlet
             processes, we allow the response distribution to change
             flexibly at each level of the predictor. In addition, by
             assigning mixture priors to the hyperparameters, we can
             obtain posterior probabilities of no effect of the predictor
             and identify the lowest dose level for which there is an
             appreciable change in distribution. The method also provides
             a natural framework for performing tests across multiple
             outcomes. We apply our method to data from a genotoxicity
             experiment.},
   Doi = {10.1111/j.1541-0420.2007.00885.x},
   Key = {fds258064}
}

@article{fds258036,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Nonparametric Bayesian density estimation on manifolds with
             applications to planar shapes.},
   Journal = {Biometrika},
   Volume = {97},
   Number = {4},
   Pages = {851-865},
   Year = {2010},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/22822255},
   Abstract = {Statistical analysis on landmark-based shape spaces has
             diverse applications in morphometrics, medical diagnostics,
             machine vision and other areas. These shape spaces are
             non-Euclidean quotient manifolds. To conduct nonparametric
             inferences, one may define notions of centre and spread on
             this manifold and work with their estimates. However, it is
             useful to consider full likelihood-based methods, which
             allow nonparametric estimation of the probability density.
             This article proposes a broad class of mixture models
             constructed using suitable kernels on a general compact
             metric space and then on the planar shape space in
             particular. Following a Bayesian approach with a
             nonparametric prior on the mixing distribution, conditions
             are obtained under which the Kullback-Leibler property
             holds, implying large support and weak posterior
             consistency. Gibbs sampling methods are developed for
             posterior computation, and the methods are applied to
             problems in density estimation and classification with
             shape-based predictors. Simulation studies show improved
             estimation performance relative to existing
             approaches.},
   Doi = {10.1093/biomet/asq044},
   Key = {fds258036}
}

@article{fds258003,
   Author = {Zhou, M and Chen, H and Paisley, J and Ren, L and Li, L and Xing, Z and Dunson, D and Sapiro, G and Carin, L},
   Title = {Nonparametric Bayesian dictionary learning for analysis of
             noisy and incomplete images.},
   Journal = {IEEE transactions on image processing : a publication of the
             IEEE Signal Processing Society},
   Volume = {21},
   Number = {1},
   Pages = {130-144},
   Year = {2012},
   Month = {January},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21693421},
   Abstract = {Nonparametric Bayesian methods are considered for recovery
             of imagery based upon compressive, incomplete, and/or noisy
             measurements. A truncated beta-Bernoulli process is employed
             to infer an appropriate dictionary for the data under test
             and also for image recovery. In the context of compressive
             sensing, significant improvements in image recovery are
             manifested using learned dictionaries, relative to using
             standard orthonormal image expansions. The
             compressive-measurement projections are also optimized for
             the learned dictionary. Additionally, we consider simpler
             (incomplete) measurements, defined by measuring a subset of
             image pixels, uniformly selected at random. Spatial
             interrelationships within imagery are exploited through use
             of the Dirichlet and probit stick-breaking processes.
             Several example results are presented, with comparisons to
             other methods in the literature.},
   Doi = {10.1109/tip.2011.2160072},
   Key = {fds258003}
}

@article{fds257986,
   Author = {Zhou, M and Wang, C and Chen, M and Paisley, J and Dunson, D and Carin,
             L},
   Title = {Nonparametric bayesian matrix completion},
   Journal = {2010 IEEE Sensor Array and Multichannel Signal Processing
             Workshop, SAM 2010},
   Pages = {213-216},
   Publisher = {IEEE},
   Year = {2010},
   Month = {December},
   url = {http://dx.doi.org/10.1109/SAM.2010.5606741},
   Abstract = {The Beta-Binomial processes are considered for inferring
             missing values in matrices. The model moves beyond the
             low-rank assumption, modeling the matrix columns as residing
             in a nonlinear subspace. Large-scale problems are considered
             via efficient Gibbs sampling, yielding predictions as well
             as a measure of confidence in each prediction. Algorithm
             performance is considered for several datasets, with
             encouraging performance relative to existing approaches. ©
             2010 IEEE.},
   Doi = {10.1109/SAM.2010.5606741},
   Key = {fds257986}
}

@article{fds257960,
   Author = {Rodríguez, A and Dunson, DB},
   Title = {Nonparametric Bayesian models through probit stick-breaking
             processes.},
   Journal = {Bayesian analysis},
   Volume = {6},
   Number = {1},
   Pages = {145-178},
   Year = {2011},
   Month = {March},
   ISSN = {1936-0975},
   url = {http://dx.doi.org/10.1214/11-ba605},
   Abstract = {We describe a novel class of Bayesian nonparametric priors
             based on stick-breaking constructions where the weights of
             the process are constructed as probit transformations of
             normal random variables. We show that these priors are
             extremely flexible, allowing us to generate a great variety
             of models while preserving computational simplicity.
             Particular emphasis is placed on the construction of rich
             temporal and spatial processes, which are applied to two
             problems in finance and ecology.},
   Doi = {10.1214/11-ba605},
   Key = {fds257960}
}

@article{fds257878,
   Author = {Ding, M and He, L and Dunson, D and Carin, L},
   Title = {Nonparametric Bayesian Segmentation of a Multivariate
             Inhomogeneous Space-Time Poisson Process.},
   Journal = {Bayesian analysis},
   Volume = {7},
   Number = {4},
   Pages = {813-840},
   Year = {2012},
   Month = {December},
   ISSN = {1931-6690},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000311975100005&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {A nonparametric Bayesian model is proposed for segmenting
             time-evolving multivariate spatial point process data. An
             inhomogeneous Poisson process is assumed, with a logistic
             stick-breaking process (LSBP) used to encourage
             piecewise-constant spatial Poisson intensities. The LSBP
             explicitly favors spatially contiguous segments, and infers
             the number of segments based on the observed data. The
             temporal dynamics of the segmentation and of the Poisson
             intensities are modeled with exponential correlation in
             time, implemented in the form of a first-order
             autoregressive model for uniformly sampled discrete data,
             and via a Gaussian process with an exponential kernel for
             general temporal sampling. We consider and compare two
             different inference techniques: a Markov chain Monte Carlo
             sampler, which has relatively high computational complexity;
             and an approximate and efficient variational Bayesian
             analysis. The model is demonstrated with a simulated example
             and a real example of space-time crime events in Cincinnati,
             Ohio, USA.},
   Doi = {10.1214/12-ba727},
   Key = {fds257878}
}

@article{fds258050,
   Author = {Rodriguez, A and Dunson, DB and Gelfand, AE},
   Title = {Nonparametric functional data analysis through Bayesian
             density estimation},
   Journal = {Biometrika},
   Volume = {96},
   Pages = {149-162},
   Year = {2008},
   Key = {fds258050}
}

@article{fds358025,
   Author = {Roy, A and Dunson, DB},
   Title = {Nonparametric graphical model for counts.},
   Journal = {Journal of machine learning research : JMLR},
   Volume = {21},
   Pages = {229},
   Year = {2020},
   Month = {December},
   Abstract = {Although multivariate count data are routinely collected in
             many application areas, there is surprisingly little work
             developing flexible models for characterizing their
             dependence structure. This is particularly true when
             interest focuses on inferring the conditional independence
             graph. In this article, we propose a new class of pairwise
             Markov random field-type models for the joint distribution
             of a multivariate count vector. By employing a novel type of
             transformation, we avoid restricting to non-negative
             dependence structures or inducing other restrictions through
             truncations. Taking a Bayesian approach to inference, we
             choose a Dirichlet process prior for the distribution of a
             random effect to induce great flexibility in the
             specification. An efficient Markov chain Monte Carlo (MCMC)
             algorithm is developed for posterior computation. We prove
             various theoretical properties, including posterior
             consistency, and show that our COunt Nonparametric Graphical
             Analysis (CONGA) approach has good performance relative to
             competitors in simulation studies. The methods are motivated
             by an application to neuron spike count data in
             mice.},
   Key = {fds358025}
}

@article{fds344442,
   Author = {Li, C and Lin, L and Dunson, DB},
   Title = {On posterior consistency of tail index for Bayesian kernel
             mixture models},
   Journal = {Bernoulli},
   Volume = {25},
   Number = {3},
   Pages = {1999-2028},
   Publisher = {Bernoulli Society for Mathematical Statistics and
             Probability},
   Year = {2019},
   Month = {August},
   url = {http://dx.doi.org/10.3150/18-bej1043},
   Doi = {10.3150/18-bej1043},
   Key = {fds344442}
}

@article{fds322554,
   Author = {Wang, X and Leng, C and Dunson, DB},
   Title = {On the consistency theory of high dimensional variable
             screening},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {2015-January},
   Pages = {2431-2439},
   Year = {2015},
   Month = {January},
   Abstract = {Variable screening is a fast dimension reduction technique
             for assisting high dimensional feature selection. As a
             preselection method, it selects a moderate size subset of
             candidate variables for further refining via feature
             selection to produce the final model. The performance of
             variable screening depends on both computational efficiency
             and the ability to dramatically reduce the number of
             variables without discarding the important ones. When the
             data dimension p is substantially larger than the sample
             size n, variable screening becomes crucial as 1) Faster
             feature selection algorithms are needed; 2) Conditions
             guaranteeing selection consistency might fail to hold. This
             article studies a class of linear screening methods and
             establishes consistency theory for this special class. In
             particular, we prove the restricted diagonally dominant
             (RDD) condition is a necessary and sufficient condition for
             strong screening consistency. As concrete examples, we show
             two screening methods SIS and HOLP are both strong screening
             consistent (subject to additional constraints) with large
             probability if n > O((ρgma;/τ)2logp) under random designs.
             In addition, we relate the RDD condition to the
             irrepresentable condition, and highlight limitations of
             SIS.},
   Key = {fds322554}
}

@article{fds257929,
   Author = {Wilcox, AJ and Baird, DD and Dunson, DB and McConnaughey, DR and Kesner,
             JS and Weinberg, CR},
   Title = {On the frequency of intercourse around ovulation: evidence
             for biological influences.},
   Journal = {Human reproduction (Oxford, England)},
   Volume = {19},
   Number = {7},
   Pages = {1539-1543},
   Year = {2004},
   Month = {July},
   ISSN = {0268-1161},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/15190016},
   Abstract = {<h4>Background</h4>Intercourse in mammals is often
             coordinated with ovulation, for example through fluctuations
             in libido or by the acceleration of ovulation with
             intercourse. Such coordination has not been established in
             humans. We explored this possibility by examining patterns
             of sexual intercourse in relation to ovulation.<h4>Methods</h4>Sixty-eight
             sexually active North Carolina women with either an
             intrauterine device or tubal ligation provided data for up
             to three menstrual cycles. These women collected daily urine
             specimens and kept daily diaries of intercourse and
             menstrual bleeding. Major estrogen and progesterone
             metabolites excreted in urine were used to identify the day
             of ovulation. The fertile days of the cycle were defined as
             the 6 consecutive days ending with ovulation. Women
             contributed a total of 171 ovulatory cycles. Menstrual
             bleeding days were excluded from analysis.<h4>Results</h4>The
             frequency of intercourse rose during the follicular phase,
             peaking at ovulation and declining abruptly thereafter. The
             6 consecutive days with most frequent intercourse
             corresponded with the 6 fertile days of the menstrual cycle.
             Intercourse was 24% more frequent during the 6 fertile days
             than during the remaining non-bleeding days (P <
             0.001).<h4>Conclusions</h4>There apparently are biological
             factors that promote intercourse during a woman's 6 fertile
             days.},
   Doi = {10.1093/humrep/deh305},
   Key = {fds257929}
}

@article{fds322539,
   Author = {Kabisa, S and Dunson, DB and Morris, JS},
   Title = {Online Variational Bayes Inference for High-Dimensional
             Correlated Data},
   Journal = {Journal of Computational and Graphical Statistics},
   Volume = {25},
   Number = {2},
   Pages = {426-444},
   Publisher = {Informa UK Limited},
   Year = {2016},
   Month = {April},
   url = {http://dx.doi.org/10.1080/10618600.2014.998336},
   Abstract = {High-dimensional data with hundreds of thousands of
             observations are becoming commonplace in many disciplines.
             The analysis of such data poses many computational
             challenges, especially when the observations are correlated
             over time and/or across space. In this article, we propose
             flexible hierarchical regression models for analyzing such
             data that accommodate serial and/or spatial correlation. We
             address the computational challenges involved in fitting
             these models by adopting an approximate inference framework.
             We develop an online variational Bayes algorithm that works
             by incrementally reading the data into memory one portion at
             a time. The performance of the method is assessed through
             simulation studies. The methodology is applied to analyze
             signal intensity in MRI images of subjects with knee
             osteoarthritis, using data from the Osteoarthritis
             Initiative. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/10618600.2014.998336},
   Key = {fds322539}
}

@article{fds331654,
   Author = {Johndrow, JE and Mattingly, JC and Mukherjee, S and Dunson,
             D},
   Title = {Optimal approximating Markov chains for Bayesian
             inference},
   Year = {2015},
   Month = {August},
   Abstract = {The Markov Chain Monte Carlo method is the dominant paradigm
             for posterior computation in Bayesian analysis. It is common
             to control computation time by making approximations to the
             Markov transition kernel. Comparatively little attention has
             been paid to computational optimality in these approximating
             Markov Chains, or when such approximations are justified
             relative to obtaining shorter paths from the exact kernel.
             We give simple, sharp bounds for uniform approximations of
             uniformly mixing Markov chains. We then suggest a notion of
             optimality that incorporates computation time and
             approximation error, and use our bounds to make
             generalizations about properties of good approximations in
             the uniformly mixing setting. The relevance of these
             properties is demonstrated in applications to a
             minibatching-based approximate MCMC algorithm for large $n$
             logistic regression and low-rank approximations for Gaussian
             processes.},
   Key = {fds331654}
}

@article{fds365798,
   Author = {Dey, P and Zhang, Z and Dunson, DB},
   Title = {Outlier detection for multi-network data.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {38},
   Number = {16},
   Pages = {4011-4018},
   Year = {2022},
   Month = {August},
   url = {http://dx.doi.org/10.1093/bioinformatics/btac431},
   Abstract = {<h4>Motivation</h4>It has become routine in neuroscience
             studies to measure brain networks for different individuals
             using neuroimaging. These networks are typically expressed
             as adjacency matrices, with each cell containing a summary
             of connectivity between a pair of brain regions. There is an
             emerging statistical literature describing methods for the
             analysis of such multi-network data in which nodes are
             common across networks but the edges vary. However, there
             has been essentially no consideration of the important
             problem of outlier detection. In particular, for certain
             subjects, the neuroimaging data are so poor quality that the
             network cannot be reliably reconstructed. For such subjects,
             the resulting adjacency matrix may be mostly zero or exhibit
             a bizarre pattern not consistent with a functioning brain.
             These outlying networks may serve as influential points,
             contaminating subsequent statistical analyses. We propose a
             simple Outlier DetectIon for Networks (ODIN) method relying
             on an influence measure under a hierarchical generalized
             linear model for the adjacency matrices. An efficient
             computational algorithm is described, and ODIN is
             illustrated through simulations and an application to data
             from the UK Biobank.<h4>Results</h4>ODIN was successful in
             identifying moderate to extreme outliers. Removing such
             outliers can significantly change inferences in downstream
             applications.<h4>Availability and implementation</h4>ODIN
             has been implemented in both Python and R and these
             implementations along with other code are publicly available
             at github.com/pritamdey/ODIN-python and github.com/pritamdey/ODIN-r,
             respectively.<h4>Supplementary information</h4>Supplementary
             data are available at Bioinformatics online.},
   Doi = {10.1093/bioinformatics/btac431},
   Key = {fds365798}
}

@article{fds322029,
   Author = {Wang, X and Guo, F and Heller, KA and Dunson, DB},
   Title = {Parallelizing MCMC with random partition
             trees},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {2015-January},
   Pages = {451-459},
   Year = {2015},
   Month = {January},
   Abstract = {The modern scale of data has brought new challenges to
             Bayesian inference. In particular, conventional MCMC
             algorithms are computationally very expensive for large data
             sets. A promising approach to solve this problem is
             embarrassingly parallel MCMC (EP-MCMC), which first
             partitions the data into multiple subsets and runs
             independent sampling algorithms on each subset. The subset
             posterior draws are then aggregated via some combining rules
             to obtain the final approximation. Existing EP-MCMC
             algorithms are limited by approximation accuracy and
             difficulty in resampling. In this article, we propose a new
             EP-MCMC algorithm PART that solves these problems. The new
             algorithm applies random partition trees to combine the
             subset posterior draws, which is distribution-free, easy to
             resample from and can adapt to multiple scales. We provide
             theoretical justification and extensive experiments
             illustrating empirical performance.},
   Key = {fds322029}
}

@article{fds258020,
   Author = {O'Brien, SM and Kupper, LL and Dunson, DB},
   Title = {Performance of tests of association in misspecified
             generalized linear models},
   Journal = {Journal of Statistical Planning and Inference},
   Volume = {136},
   Number = {9},
   Pages = {3090-3100},
   Publisher = {Elsevier BV},
   Year = {2006},
   Month = {September},
   ISSN = {0378-3758},
   url = {http://dx.doi.org/10.1016/j.jspi.2004.12.004},
   Abstract = {We examine the effects of modelling errors, such as
             underfitting and overfitting, on the asymptotic power of
             tests of association between an explanatory variable x and
             an outcome in the setting of generalized linear models. The
             regression function for x is approximated by a polynomial or
             another simple function, and a chi-square statistic is used
             to test whether the coefficients of the approximation are
             simultaneously equal to zero. Adding terms to the
             approximation increases asymptotic power if and only if the
             fit of the model increases by a certain quantifiable amount.
             Although a high degree of freedom approximation offers
             robustness to the shape of the unknown regression function,
             a low degree of freedom approximation can yield much higher
             asymptotic power even when the approximation is very poor.
             In practice, it is useful to compute the power of competing
             test statistics across the range of alternatives that are
             plausible a priori. This approach is illustrated through an
             application in epidemiology. © 2006 Elsevier B.V. All
             rights reserved.},
   Doi = {10.1016/j.jspi.2004.12.004},
   Key = {fds258020}
}

@article{fds329116,
   Author = {Li, D and Heyer, L and Jennings, VH and Smith, CA and Dunson,
             DB},
   Title = {Personalised estimation of a woman's most fertile
             days.},
   Journal = {The European journal of contraception & reproductive health
             care : the official journal of the European Society of
             Contraception},
   Volume = {21},
   Number = {4},
   Pages = {323-328},
   Year = {2016},
   Month = {August},
   url = {http://dx.doi.org/10.1080/13625187.2016.1196485},
   Abstract = {<h4>Objectives</h4>We propose a new, personalised approach
             of estimating a woman's most fertile days that only requires
             recording the first day of menses and can use a smartphone
             to convey this information to the user so that she can plan
             or prevent pregnancy.<h4>Methods</h4>We performed a
             retrospective analysis of two cohort studies (a North
             Carolina-based study and the Early Pregnancy Study [EPS])
             and a prospective multicentre trial (World Health
             Organization [WHO] study). The North Carolina study
             consisted of 68 sexually active women with either an
             intrauterine device or tubal ligation. The EPS comprised 221
             women who planned to become pregnant and had no known
             fertility problems. The WHO study consisted of 706 women
             from five geographically and culturally diverse settings.
             Bayesian statistical methods were used to design our
             proposed method, Dynamic Optimal Timing (DOT). Simulation
             studies were used to estimate the cumulative pregnancy
             risk.<h4>Results</h4>For the proposed method, simulation
             analyses indicated a 4.4% cumulative probability of
             pregnancy over 13 cycles with correct use. After a
             calibration window, this method flagged between 11 and 13
             days when unprotected intercourse should be avoided per
             cycle. Eligible women should have cycle lengths between 20
             and 40 days with a variability range less than or equal to 9
             days.<h4>Conclusions</h4>DOT can easily be implemented by
             computer or smartphone applications, allowing for women to
             make more informed decisions about their fertility. This
             approach is already incorporated into a patent-pending
             system and is available for free download on iPhones and
             Androids.},
   Doi = {10.1080/13625187.2016.1196485},
   Key = {fds329116}
}

@article{fds362585,
   Author = {Roy, A and Lavine, I and Herring, AH and Dunson, DB},
   Title = {PERTURBED FACTOR ANALYSIS: ACCOUNTING FOR GROUP DIFFERENCES
             IN EXPOSURE PROFILES.},
   Journal = {The annals of applied statistics},
   Volume = {15},
   Number = {3},
   Pages = {1386-1404},
   Year = {2021},
   Month = {September},
   url = {http://dx.doi.org/10.1214/20-aoas1435},
   Abstract = {In this article we investigate group differences in
             phthalate exposure profiles using NHANES data. Phthalates
             are a family of industrial chemicals used in plastics and as
             solvents. There is increasing evidence of adverse health
             effects of exposure to phthalates on reproduction and
             neurodevelopment and concern about racial disparities in
             exposure. We would like to identify a single set of
             low-dimensional factors summarizing exposure to different
             chemicals, while allowing differences across groups.
             Improving on current multigroup additive factor models, we
             propose a class of Perturbed Factor Analysis (PFA) models
             that assume a common factor structure after perturbing the
             data via multiplication by a group-specific matrix. Bayesian
             inference algorithms are defined using a matrix normal
             hierarchical model for the perturbation matrices. The
             resulting model is just as flexible as current approaches in
             allowing arbitrarily large differences across groups but has
             substantial advantages that we illustrate in simulation
             studies. Applying PFA to NHANES data, we learn common
             factors summarizing exposures to phthalates, while showing
             clear differences across groups.},
   Doi = {10.1214/20-aoas1435},
   Key = {fds362585}
}

@article{fds370378,
   Author = {Xu, J and Li, Y and Yang, H and Dunson, D and Daubechies,
             I},
   Title = {PiPs: A kernel-based optimization scheme for analyzing
             non-stationary 1D signals},
   Journal = {Applied and Computational Harmonic Analysis},
   Volume = {66},
   Pages = {1-17},
   Year = {2023},
   Month = {September},
   url = {http://dx.doi.org/10.1016/j.acha.2023.04.002},
   Abstract = {This paper proposes a novel kernel-based optimization scheme
             to handle tasks in the analysis, e.g., signal spectral
             estimation and single-channel source separation of 1D
             non-stationary oscillatory data. The key insight of our
             optimization scheme for reconstructing the time-frequency
             information is that when a nonparametric regression is
             applied on some input values, the output regressed points
             would lie near the oscillatory pattern of the oscillatory 1D
             signal only if these input values are a good approximation
             of the ground-truth phase function. In this work, Gaussian
             Process (GP) is chosen to conduct this nonparametric
             regression: the oscillatory pattern is encoded as the
             Pattern-inducing Points (PiPs) which act as the training
             data points in the GP regression; while the targeted phase
             function is fed in to compute the correlation kernels,
             acting as the testing input. Better approximated phase
             function generates more precise kernels, thus resulting in
             smaller optimization loss error when comparing the
             kernel-based regression output with the original signals. To
             the best of our knowledge, this is the first algorithm that
             can satisfactorily handle fully non-stationary oscillatory
             data, close and crossover frequencies, and general
             oscillatory patterns. Even in the example of a signal
             produced by slow variation in the parameters of a
             trigonometric expansion, we show that PiPs admits
             competitive or better performance in terms of accuracy and
             robustness than existing state-of-the-art
             algorithms.},
   Doi = {10.1016/j.acha.2023.04.002},
   Key = {fds370378}
}

@article{fds372789,
   Author = {Sachs, M and Sen, D and Lu, J and Dunson, D},
   Title = {Posterior Computation with the Gibbs Zig-Zag
             Sampler},
   Journal = {Bayesian Analysis},
   Volume = {18},
   Number = {3},
   Pages = {909-927},
   Year = {2023},
   Month = {January},
   url = {http://dx.doi.org/10.1214/22-BA1319},
   Abstract = {An intriguing new class of piecewise deterministic Markov
             processes (PDMPs) has recently been proposed as an
             alternative to Markov chain Monte Carlo (MCMC). We propose a
             new class of PDMPs termed Gibbs zig-zag samplers, which
             allow parameters to be updated in blocks with a zig-zag
             sampler applied to certain parameters and traditional
             MCMC-style updates to others. We demonstrate the flexibility
             of this framework on posterior sampling for logistic models
             with shrinkage priors for high-dimensional regression and
             random effects, and provide conditions for geometric
             ergodicity and the validity of a central limit
             theorem.},
   Doi = {10.1214/22-BA1319},
   Key = {fds372789}
}

@article{fds257877,
   Author = {Pati, D and Dunson, DB and Tokdar, ST},
   Title = {Posterior consistency in conditional distribution
             estimation.},
   Journal = {Journal of multivariate analysis},
   Volume = {116},
   Pages = {456-472},
   Year = {2013},
   Month = {April},
   ISSN = {0047-259X},
   url = {http://dx.doi.org/10.1016/j.jmva.2013.01.011},
   Abstract = {A wide variety of priors have been proposed for
             nonparametric Bayesian estimation of conditional
             distributions, and there is a clear need for theorems
             providing conditions on the prior for large support, as well
             as posterior consistency. Estimation of an uncountable
             collection of conditional distributions across different
             regions of the predictor space is a challenging problem,
             which differs in some important ways from density and mean
             regression estimation problems. Defining various topologies
             on the space of conditional distributions, we provide
             sufficient conditions for posterior consistency focusing on
             a broad class of priors formulated as predictor-dependent
             mixtures of Gaussian kernels. This theory is illustrated by
             showing that the conditions are satisfied for a class of
             generalized stick-breaking process mixtures in which the
             stick-breaking lengths are monotone, differentiable
             functions of a continuous stochastic process. We also
             provide a set of sufficient conditions for the case where
             stick-breaking lengths are predictor independent, such as
             those arising from a fixed Dirichlet process
             prior.},
   Doi = {10.1016/j.jmva.2013.01.011},
   Key = {fds257877}
}

@article{fds257860,
   Author = {Armagan, A and Dunson, DB and Lee, J and Bajwa, WU and Strawn,
             N},
   Title = {Posterior consistency in linear models under shrinkage
             priors},
   Journal = {Biometrika},
   Volume = {100},
   Number = {4},
   Pages = {1011-1018},
   Publisher = {Oxford University Press (OUP)},
   Year = {2013},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000327714200017&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {We investigate the asymptotic behaviour of posterior
             distributions of regression coefficients in high-dimensional
             linear models as the number of dimensions grows with the
             number of observations. We show that the posterior
             distribution concentrates in neighbourhoods of the true
             parameter under simple sufficient conditions. These
             conditions hold under popular shrinkage priors given some
             sparsity assumptions. © 2013 Biometrika
             Trust.},
   Doi = {10.1093/biomet/ast028},
   Key = {fds257860}
}

@article{fds257840,
   Author = {Pati, D and Bhattacharya, A and Pillai, NS and Dunson,
             D},
   Title = {Posterior contraction in sparse bayesian factor models for
             massive covariance matrices},
   Journal = {Annals of Statistics},
   Volume = {42},
   Number = {3},
   Pages = {1102-1130},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2014},
   Month = {January},
   ISSN = {0090-5364},
   url = {http://dx.doi.org/10.1214/14-AOS1215},
   Abstract = {Sparse Bayesian factor models are routinely implemented for
             parsimonious dependence modeling and dimensionality
             reduction in highdimensional applications. We provide
             theoretical understanding of such Bayesian procedures in
             terms of posterior convergence rates in inferring
             high-dimensional covariance matrices where the dimension can
             be larger than the sample size. Under relevant sparsity
             assumptions on the true covariance matrix, we show that
             commonly-used point mass mixture priors on the factor
             loadings lead to consistent estimation in the operator norm
             even when pn. One of our major contributions is to develop a
             new class of continuous shrinkage priors and provide
             insights into their concentration around sparse vectors.
             Using such priors for the factor loadings, we obtain similar
             rate of convergence as obtained with point mass mixture
             priors. To obtain the convergence rates, we construct test
             functions to separate points in the space of
             high-dimensional covariance matrices using insights from
             random matrix theory; the tools developed may be of
             independent interest. We also derive minimax rates and show
             that the Bayesian posterior rates of convergence coincide
             with the minimax rates upto a √log n term.},
   Doi = {10.1214/14-AOS1215},
   Key = {fds257840}
}

@article{fds258033,
   Author = {Crandell, JL and Dunson, DB},
   Title = {Posterior simulation across nonparametric models for
             functional clustering},
   Journal = {Sankhya B},
   Volume = {73},
   Number = {1},
   Pages = {42-61},
   Publisher = {Springer Nature},
   Year = {2011},
   Month = {May},
   ISSN = {0972-7671},
   url = {http://dx.doi.org/10.1007/s13571-011-0014-z},
   Abstract = {By choosing a species sampling random probability measure
             for the distribution of the basis coefficients, a general
             class of nonparametric Bayesian methods for clustering of
             functional data is developed. Allowing the basis functions
             to be unknown, one faces the problem of posterior simulation
             over a high-dimensional space of semiparametric models. To
             address this problem, we propose a novel Metropolis-Hastings
             algorithm for moving between models, with a nested
             generalized collapsed Gibbs sampler for updating the model
             parameters. Focusing on Dirichlet process priors for the
             distribution of the basis coefficients in multivariate
             linear spline models, we apply the approach to the problem
             of clustering of hormone trajectories. This approach allows
             the number of clusters and the shape of the trajectories
             within each cluster to be unknown. The methodology can be
             applied broadly to allow uncertainty in variable selection
             in semiparametric Bayes hierarchical models.},
   Doi = {10.1007/s13571-011-0014-z},
   Key = {fds258033}
}

@article{fds362554,
   Author = {Joubert, BR and Kioumourtzoglou, M-A and Chamberlain, T and Chen, HY and Gennings, C and Turyk, ME and Miranda, ML and Webster, TF and Ensor, KB and Dunson, DB and Coull, BA},
   Title = {Powering Research through Innovative Methods for Mixtures in
             Epidemiology (PRIME) Program: Novel and Expanded Statistical
             Methods.},
   Journal = {International journal of environmental research and public
             health},
   Volume = {19},
   Number = {3},
   Pages = {1378},
   Year = {2022},
   Month = {January},
   url = {http://dx.doi.org/10.3390/ijerph19031378},
   Abstract = {Humans are exposed to a diverse mixture of chemical and
             non-chemical exposures across their lifetimes. Well-designed
             epidemiology studies as well as sophisticated exposure
             science and related technologies enable the investigation of
             the health impacts of mixtures. While existing statistical
             methods can address the most basic questions related to the
             association between environmental mixtures and health
             endpoints, there were gaps in our ability to learn from
             mixtures data in several common epidemiologic scenarios,
             including high correlation among health and exposure
             measures in space and/or time, the presence of missing
             observations, the violation of important modeling
             assumptions, and the presence of computational challenges
             incurred by current implementations. To address these and
             other challenges, NIEHS initiated the Powering Research
             through Innovative methods for Mixtures in Epidemiology
             (PRIME) program, to support work on the development and
             expansion of statistical methods for mixtures. Six
             independent projects supported by PRIME have been highly
             productive but their methods have not yet been described
             collectively in a way that would inform application. We
             review 37 new methods from PRIME projects and summarize the
             work across previously published research questions, to
             inform methods selection and increase awareness of these new
             methods. We highlight important statistical advancements
             considering data science strategies, exposure-response
             estimation, timing of exposures, epidemiological methods,
             the incorporation of toxicity/chemical information,
             spatiotemporal data, risk assessment, and model performance,
             efficiency, and interpretation. Importantly, we link to
             software to encourage application and testing on other
             datasets. This review can enable more informed analyses of
             environmental mixtures. We stress training for early career
             scientists as well as innovation in statistical methodology
             as an ongoing need. Ultimately, we direct efforts to the
             common goal of reducing harmful exposures to improve public
             health.},
   Doi = {10.3390/ijerph19031378},
   Key = {fds362554}
}

@article{fds371472,
   Author = {Liu, R and Li, M and Dunson, DB},
   Title = {PPA: Principal parcellation analysis for brain connectomes
             and multiple traits.},
   Journal = {NeuroImage},
   Volume = {276},
   Pages = {120214},
   Year = {2023},
   Month = {August},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2023.120214},
   Abstract = {Our understanding of the structure of the brain and its
             relationships with human traits is largely determined by how
             we represent the structural connectome. Standard practice
             divides the brain into regions of interest (ROIs) and
             represents the connectome as an adjacency matrix having
             cells measuring connectivity between pairs of ROIs.
             Statistical analyses are then heavily driven by the (largely
             arbitrary) choice of ROIs. In this article, we propose a
             human trait prediction framework utilizing a
             tractography-based representation of the brain connectome,
             which clusters fiber endpoints to define a data-driven white
             matter parcellation targeted to explain variation among
             individuals and predict human traits. This leads to
             Principal Parcellation Analysis (PPA), representing
             individual brain connectomes by compositional vectors
             building on a basis system of fiber bundles that captures
             the connectivity at the population level. PPA eliminates the
             need to choose atlases and ROIs a priori, and provides a
             simpler, vector-valued representation that facilitates
             easier statistical analysis compared to the complex graph
             structures encountered in classical connectome analyses. We
             illustrate the proposed approach through applications to
             data from the Human Connectome Project (HCP) and show that
             PPA connectomes improve power in predicting human traits
             over state-of-the-art methods based on classical
             connectomes, while dramatically improving parsimony and
             maintaining interpretability. Our PPA package is publicly
             available on GitHub, and can be implemented routinely for
             diffusion image data.},
   Doi = {10.1016/j.neuroimage.2023.120214},
   Key = {fds371472}
}

@article{fds362990,
   Author = {Guha, S and Jung, R and Dunson, D},
   Title = {Predicting phenotypes from brain connection
             structure},
   Journal = {Journal of the Royal Statistical Society. Series C: Applied
             Statistics},
   Volume = {71},
   Number = {3},
   Pages = {639-668},
   Year = {2022},
   Month = {June},
   url = {http://dx.doi.org/10.1111/rssc.12549},
   Abstract = {This article focuses on the problem of predicting a response
             variable based on a network-valued predictor. Our motivation
             is the development of interpretable and accurate predictive
             models for cognitive traits and neuro-psychiatric disorders
             based on an individual's brain connection network
             (connectome). Current methods reduce the complex,
             high-dimensional brain network into low-dimensional
             pre-specified features prior to applying standard predictive
             algorithms. These methods are sensitive to feature choice
             and inevitably discard important information. Instead, we
             propose a nonparametric Bayes class of models that utilize
             the entire adjacency matrix defining brain region
             connections to adaptively detect predictive algorithms,
             while maintaining interpretability. The Bayesian
             Connectomics (BaCon) model class utilizes
             Poisson–Dirichlet processes to find a lower dimensional,
             bidirectional (covariate, subject) pattern in the adjacency
             matrix. The small n, large p problem is transformed into a
             ‘small n, small q’ problem, facilitating an effective
             stochastic search of the predictors. A spike-and-slab prior
             for the cluster predictors strikes a balance between
             regression model parsimony and flexibility, resulting in
             improved inferences and test case predictions. We describe
             basic properties of the BaCon model and develop efficient
             algorithms for posterior computation. The resulting methods
             are found to outperform existing approaches and applied to a
             creative reasoning dataset.},
   Doi = {10.1111/rssc.12549},
   Key = {fds362990}
}

@article{fds258002,
   Author = {Chen, M and Zaas, A and Woods, C and Ginsburg, GS and Lucas, J and Dunson,
             D and Carin, L},
   Title = {Predicting Viral Infection From High-Dimensional Biomarker
             Trajectories.},
   Journal = {J Am Stat Assoc},
   Volume = {106},
   Number = {496},
   Pages = {1259-1279},
   Year = {2011},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2011.ap10611},
   Abstract = {There is often interest in predicting an individual's latent
             health status based on high-dimensional biomarkers that vary
             over time. Motivated by time-course gene expression array
             data that we have collected in two influenza challenge
             studies performed with healthy human volunteers, we develop
             a novel time-aligned Bayesian dynamic factor analysis
             methodology. The time course trajectories in the gene
             expressions are related to a relatively low-dimensional
             vector of latent factors, which vary dynamically starting at
             the latent initiation time of infection. Using a
             nonparametric cure rate model for the latent initiation
             times, we allow selection of the genes in the viral response
             pathway, variability among individuals in infection times,
             and a subset of individuals who are not infected. As we
             demonstrate using held-out data, this statistical framework
             allows accurate predictions of infected individuals in
             advance of the development of clinical symptoms, without
             labeled data and even when the number of biomarkers vastly
             exceeds the number of individuals under study. Biological
             interpretation of several of the inferred pathways (factors)
             is provided.},
   Doi = {10.1198/jasa.2011.ap10611},
   Key = {fds258002}
}

@article{fds257957,
   Author = {Gordon, GJ and Dunson, D},
   Title = {Preface to the proceedings of AISTATS 2011},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {1-2},
   Year = {2011},
   Month = {December},
   ISSN = {1532-4435},
   Key = {fds257957}
}

@article{fds257963,
   Author = {Gordon, GJ and Dunson, D},
   Title = {Preface to the Proceedings of AISTATS 2011},
   Journal = {Journal of Machine Learning Research},
   Volume = {9},
   Pages = {1-2},
   Year = {2010},
   Month = {December},
   ISSN = {1532-4435},
   Key = {fds257963}
}

@article{fds322555,
   Author = {Wang, Y and Dunson, D},
   Title = {Probabilistic curve learning: Coulomb repulsion and the
             electrostatic Gaussian process},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {2015-January},
   Pages = {1738-1746},
   Year = {2015},
   Month = {January},
   Abstract = {Learning of low dimensional structure in multidimensional
             data is a canonical problem in machine learning. One common
             approach is to suppose that the observed data are close to a
             lower-dimensional smooth manifold. There are a rich variety
             of manifold learning methods available, which allow mapping
             of data points to the manifold. However, there is a clear
             lack of probabilistic methods that allow learning of the
             manifold along with the generative distribution of the
             observed data. The best attempt is the Gaussian process
             latent variable model (GP-LVM), but identifiability issues
             lead to poor performance. We solve these issues by proposing
             a novel Coulomb repulsive process (Corp) for locations of
             points on the manifold, inspired by physical models of
             electrostatic interactions among particles. Combining this
             process with a GP prior for the mapping function yields a
             novel electrostatic GP (electroGP) process. Focusing on the
             simple case of a one-dimensional manifold, we develop
             efficient inference algorithms, and illustrate substantially
             improved performance in a variety of experiments including
             filling in missing frames in video.},
   Key = {fds322555}
}

@article{fds257988,
   Author = {Blei, D and Carin, L and Dunson, D},
   Title = {Probabilistic Topic Models: A focus on graphical model
             design and applications to document and image
             analysis.},
   Journal = {IEEE signal processing magazine},
   Volume = {27},
   Number = {6},
   Pages = {55-65},
   Year = {2010},
   Month = {November},
   ISSN = {1053-5888},
   url = {http://dx.doi.org/10.1109/msp.2010.938079},
   Abstract = {In this article, we review probabilistic topic models:
             graphical models that can be used to summarize a large
             collection of documents with a smaller number of
             distributions over words. Those distributions are called
             Â¿topicsÂ¿ because, when fit to data, they capture the
             salient themes that run through the collection. We describe
             both finite-dimensional parametric topic models and their
             Bayesian nonparametric counterparts, which are based on the
             hierarchical Dirichlet process (HDP). We discuss two
             extensions of topic models to time-series dataÂ¿one that
             lets the topics slowly change over time and one that lets
             the assumed prevalence of the topics change. Finally, we
             illustrate the application of topic models to nontext data,
             summarizing some recent research results in image analysis.
             © 2010 IEEE.},
   Doi = {10.1109/msp.2010.938079},
   Key = {fds257988}
}

@article{fds350128,
   Author = {Aliverti, E and Tilson, JL and Filer, DL and Babcock, B and Colaneri, A and Ocasio, J and Gershon, TR and Wilhelmsen, KC and Dunson,
             DB},
   Title = {Projected t-SNE for batch correction.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {36},
   Number = {11},
   Pages = {3522-3527},
   Year = {2020},
   Month = {June},
   url = {http://dx.doi.org/10.1093/bioinformatics/btaa189},
   Abstract = {<h4>Motivation</h4>Low-dimensional representations of
             high-dimensional data are routinely employed in biomedical
             research to visualize, interpret and communicate results
             from different pipelines. In this article, we propose a
             novel procedure to directly estimate t-SNE embeddings that
             are not driven by batch effects. Without correction,
             interesting structure in the data can be obscured by batch
             effects. The proposed algorithm can therefore significantly
             aid visualization of high-dimensional data.<h4>Results</h4>The
             proposed methods are based on linear algebra and constrained
             optimization, leading to efficient algorithms and fast
             computation in many high-dimensional settings. Results on
             artificial single-cell transcription profiling data show
             that the proposed procedure successfully removes multiple
             batch effects from t-SNE embeddings, while retaining
             fundamental information on cell types. When applied to
             single-cell gene expression data to investigate mouse
             medulloblastoma, the proposed method successfully removes
             batches related with mice identifiers and the date of the
             experiment, while preserving clusters of oligodendrocytes,
             astrocytes, and endothelial cells and microglia, which are
             expected to lie in the stroma within or adjacent to the
             tumours.<h4>Availability and implementation</h4>Source code
             implementing the proposed approach is available as an R
             package at https://github.com/emanuelealiverti/BC_tSNE,
             including a tutorial to reproduce the simulation
             studies.<h4>Contact</h4>aliverti@stat.unipd.it.},
   Doi = {10.1093/bioinformatics/btaa189},
   Key = {fds350128}
}

@article{fds257950,
   Author = {Elliott, L and Henderson, J and Northstone, K and Chiu, GY and Dunson,
             D and London, SJ},
   Title = {Prospective study of breast-feeding in relation to wheeze,
             atopy, and bronchial hyperresponsiveness in the Avon
             Longitudinal Study of Parents and Children
             (ALSPAC).},
   Journal = {The Journal of allergy and clinical immunology},
   Volume = {122},
   Number = {1},
   Pages = {49-54.e3},
   Year = {2008},
   Month = {July},
   ISSN = {0091-6749},
   url = {http://dx.doi.org/10.1016/j.jaci.2008.04.001},
   Abstract = {<h4>Background</h4>Breast-feeding clearly protects against
             early wheezing, but recent data suggest that it might
             increase later risk of atopic disease and
             asthma.<h4>Objective</h4>We sought to examine the
             relationship between breast-feeding and later asthma and
             allergy outcomes by using data from the Avon Longitudinal
             Study of Parents and Children, a large birth cohort in the
             United Kingdom.<h4>Methods</h4>We used adjusted logistic
             regression models to evaluate the association between
             breast-feeding and atopy at age 7 years, bronchial
             responsiveness to methacholine at age 8 years, and wheeze at
             ages 3 and 7 1/2 years. Bayesian methods were used to assess
             the possibility of bias caused by an influence of early
             wheezing on the duration of breast-feeding, as well as
             selection bias.<h4>Results</h4>Breast-feeding was protective
             for wheeze in the first 3 years of life (odds ratio [OR] of
             0.80 [95% CI, 0.70-0.90] for > or = 6 months relative to
             never) but not wheeze (OR, 0.98; 95% CI, 0.79-1.22), atopy
             (OR, 1.12; 95% CI, 0.92-1.35), or bronchial
             hyperresponsiveness (OR, 1.07; 95% CI, 0.82-1.40) at ages 7
             to 8 years. Bayesian models adjusting for the longer
             duration of breast-feeding among children with wheezing in
             early infancy produced virtually identical
             results.<h4>Conclusions</h4>We did not find consistent
             evidence for either a deleterious effect or a protective
             effect of breast-feeding on later risk of allergic disease
             in a large prospective birth cohort of children with
             objective outcome measures and extensive data on potential
             confounders and effect modifiers. Neither reverse causation
             nor loss to follow-up appears to have materially biased our
             results.},
   Doi = {10.1016/j.jaci.2008.04.001},
   Key = {fds257950}
}

@article{fds322544,
   Author = {Van Den Boom and W and Dunson, D and Reeves, G},
   Title = {Quantifying uncertainty in variable selection with arbitrary
             matrices},
   Journal = {2015 IEEE 6th International Workshop on Computational
             Advances in Multi-Sensor Adaptive Processing, CAMSAP
             2015},
   Pages = {385-388},
   Year = {2015},
   Month = {January},
   ISBN = {9781479919635},
   url = {http://dx.doi.org/10.1109/CAMSAP.2015.7383817},
   Abstract = {Probabilistically quantifying uncertainty in parameters,
             predictions and decisions is a crucial component of broad
             scientific and engineering applications. This is however
             difficult if the number of parameters far exceeds the sample
             size. Although there are currently many methods which have
             guarantees for problems characterized by large random
             matrices, there is often a gap between theory and practice
             when it comes to measures of statistical significance for
             matrices encountered in real-world applications. This paper
             proposes a scalable framework that utilizes state-of-the-art
             methods to provide approximations to the marginal posterior
             distributions. This framework is used to approximate
             marginal posterior inclusion probabilities for Bayesian
             variable selection.},
   Doi = {10.1109/CAMSAP.2015.7383817},
   Key = {fds322544}
}

@article{fds257921,
   Author = {Chen, Z and Dunson, DB},
   Title = {Random effects selection in linear mixed
             models.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {4},
   Pages = {762-769},
   Year = {2003},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2003.00089.x},
   Abstract = {We address the important practical problem of how to select
             the random effects component in a linear mixed model. A
             hierarchical Bayesian model is used to identify any random
             effect with zero variance. The proposed approach
             reparameterizes the mixed model so that functions of the
             covariance parameters of the random effects distribution are
             incorporated as regression coefficients on standard normal
             latent variables. We allow random effects to effectively
             drop out of the model by choosing mixture priors with point
             mass at zero for the random effects variances. Due to the
             reparameterization, the model enjoys a conditionally linear
             structure that facilitates the use of normal conjugate
             priors. We demonstrate that posterior computation can
             proceed via a simple and efficient Markov chain Monte Carlo
             algorithm. The methods are illustrated using simulated data
             and real data from a study relating prenatal exposure to
             polychlorinated biphenyls and psychomotor development of
             children.},
   Doi = {10.1111/j.0006-341x.2003.00089.x},
   Key = {fds257921}
}

@article{fds349191,
   Author = {Jauch, M and Hoff, PD and Dunson, DB},
   Title = {Random orthogonal matrices and the Cayley
             transform},
   Journal = {Bernoulli},
   Volume = {26},
   Number = {2},
   Pages = {1560-1586},
   Year = {2020},
   Month = {January},
   url = {http://dx.doi.org/10.3150/19-BEJ1176},
   Abstract = {Random orthogonal matrices play an important role in
             probability and statistics, arising in multivariate
             analysis, directional statistics, and models of physical
             systems, among other areas. Calculations involving random
             orthogonal matrices are complicated by their constrained
             support. Accordingly, we parametrize the Stiefel and
             Grassmann manifolds, represented as subsets of orthogonal
             matrices, in terms of Euclidean parameters using the Cayley
             transform. We derive the necessary Jacobian terms for change
             of variables formulas. Given a density defined on the
             Stiefel or Grassmann manifold, these allow us to specify the
             corresponding density for the Euclidean parameters, and vice
             versa. As an application, we present a Markov chain Monte
             Carlo approach to simulating from distributions on the
             Stiefel and Grassmann manifolds. Finally, we establish that
             the Euclidean parameters corresponding to a uniform
             orthogonal matrix can be approximated asymptotically by
             independent normals. This result contributes to the growing
             literature on normal approximations to the entries of random
             orthogonal matrices or transformations thereof.},
   Doi = {10.3150/19-BEJ1176},
   Key = {fds349191}
}

@article{fds326919,
   Author = {Schaich Borg and J and Srivastava, S and Lin, L and Heffner, J and Dunson,
             D and Dzirasa, K and de Lecea, L},
   Title = {Rat intersubjective decisions are encoded by
             frequency-specific oscillatory contexts.},
   Journal = {Brain Behav},
   Volume = {7},
   Number = {6},
   Pages = {e00710},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1002/brb3.710},
   Abstract = {INTRODUCTION: It is unknown how the brain coordinates
             decisions to withstand personal costs in order to prevent
             other individuals' distress. Here we test whether local
             field potential (LFP) oscillations between brain regions
             create "neural contexts" that select specific brain
             functions and encode the outcomes of these types of
             intersubjective decisions. METHODS: Rats participated in an
             "Intersubjective Avoidance Test" (IAT) that tested rats'
             willingness to enter an innately aversive chamber to prevent
             another rat from getting shocked. c-Fos immunoreactivity was
             used to screen for brain regions involved in IAT
             performance. Multi-site local field potential (LFP)
             recordings were collected simultaneously and bilaterally
             from five brain regions implicated in the c-Fos studies
             while rats made decisions in the IAT. Local field potential
             recordings were analyzed using an elastic net penalized
             regression framework. RESULTS: Rats voluntarily entered an
             innately aversive chamber to prevent another rat from
             getting shocked, and c-Fos immunoreactivity in brain regions
             known to be involved in human empathy-including the anterior
             cingulate, insula, orbital frontal cortex, and
             amygdala-correlated with the magnitude of "intersubjective
             avoidance" each rat displayed. Local field potential
             recordings revealed that optimal accounts of rats'
             performance in the task require specific frequencies of LFP
             oscillations between brain regions in addition to specific
             frequencies of LFP oscillations within brain regions. Alpha
             and low gamma coherence between spatially distributed brain
             regions predicts more intersubjective avoidance, while theta
             and high gamma coherence between a separate subset of brain
             regions predicts less intersubjective avoidance. Phase
             relationship analyses indicated that choice-relevant
             coherence in the alpha range reflects information passed
             from the amygdala to cortical structures, while coherence in
             the theta range reflects information passed in the reverse
             direction. CONCLUSION: These results indicate that the
             frequency-specific "neural context" surrounding brain
             regions involved in social cognition encodes outcomes of
             decisions that affect others, above and beyond signals from
             any set of brain regions in isolation.},
   Doi = {10.1002/brb3.710},
   Key = {fds326919}
}

@article{fds354543,
   Author = {Nishimura, A and Dunson, D},
   Title = {Recycling Intermediate Steps to Improve Hamiltonian Monte
             Carlo},
   Journal = {Bayesian Analysis},
   Volume = {15},
   Number = {4},
   Pages = {1087-1108},
   Year = {2020},
   Month = {January},
   url = {http://dx.doi.org/10.1214/19-BA1171},
   Abstract = {Hamiltonian Monte Carlo (HMC) and related algorithms have
             become routinely used in Bayesian computation. In this
             article, we present a simple and provably accurate method to
             improve the efficiency of HMC and related algorithms with
             essentially no extra computational cost. This is achieved by
             recycling the intermediate states along simulated
             trajectories of Hamiltonian dynamics. Standard algorithms
             use only the end points of trajectories, wastefully
             discarding all the intermediate states. Compared to the
             alternative methods for utilizing the intermediate states,
             our algorithm is simpler to apply in practice and requires
             little programming effort beyond the usual implementations
             of HMC and related algorithms. Our algorithm applies
             straightforwardly to the no-U-turn sampler, arguably the
             most popular variant of HMC. Through a variety of
             experiments, we demonstrate that our recycling algorithm
             yields substantial computational efficiency
             gains.},
   Doi = {10.1214/19-BA1171},
   Key = {fds354543}
}

@article{fds258074,
   Author = {Dunson, DB and Bigelow, JL and Colombo, B},
   Title = {Reduced fertilization rates in older men when cervical mucus
             is suboptimal.},
   Journal = {Obstetrics and gynecology},
   Volume = {105},
   Number = {4},
   Pages = {788-793},
   Year = {2005},
   Month = {April},
   ISSN = {0029-7844},
   url = {http://dx.doi.org/10.1097/01.aog.0000154155.20366.ee},
   Abstract = {<h4>Objective</h4>Cervical mucus is vital in the regulation
             of sperm survival and transport through the reproductive
             tract. The goal of this study is to assess whether the
             lowered fertility for men in their late 30s and early 40s is
             related to the nature of cervical mucus on the day of
             intercourse.<h4>Methods</h4>In a prospective study of 7
             European family planning centers, 782 couples not using
             birth control recorded daily observations of intercourse and
             the nature of cervical mucus. Using data from 1,459
             menstrual cycles, 342 ending in pregnancy, we estimate
             day-specific conception probabilities in relation to mucus
             and male and female age.<h4>Results</h4>On days where
             cervical mucus was not evident, intercourse for men in their
             late 30s and early 40s was 50% less likely to result in a
             clinical pregnancy, adjusting for intercourse timing and
             female age. As secretions become more conducive to sperm
             transport, the effect of male age diminishes steadily from
             21% on days with damp secretions, to 11% on days with thick
             mucus, to only 4% on days with most fertile-type
             mucus.<h4>Conclusion</h4>The effect of male age on
             fecundability can be minimized by timing intercourse on days
             with optimal secretions.<h4>Level of evidence</h4>II-2.},
   Doi = {10.1097/01.aog.0000154155.20366.ee},
   Key = {fds258074}
}

@article{fds332379,
   Author = {Durante, D and Dunson, DB and Vogelstein, JT},
   Title = {Rejoinder: Nonparametric Bayes Modeling of Populations of
             Networks},
   Journal = {Journal of the American Statistical Association},
   Volume = {112},
   Number = {520},
   Pages = {1547-1552},
   Publisher = {Informa UK Limited},
   Year = {2017},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2017.1395643},
   Doi = {10.1080/01621459.2017.1395643},
   Key = {fds332379}
}

@article{fds321837,
   Author = {Yin, R and Cornelis, B and Fodor, G and Ocon, N and Dunson, D and Daubechies, I},
   Title = {Removing cradle artifacts in X-ray images of
             paintings},
   Journal = {SIAM Journal on Imaging Sciences},
   Volume = {9},
   Number = {3},
   Pages = {1247-1272},
   Publisher = {Society for Industrial & Applied Mathematics
             (SIAM)},
   Year = {2016},
   Month = {August},
   url = {http://dx.doi.org/10.1137/15M1053554},
   Abstract = {We propose an algorithm that removes the visually unpleasant
             effects of cradling in X-ray images of panel paintings, with
             the goal of improving the X-ray image readability by art
             experts. The algorithm consists of three stages. In the
             first stage the location of the cradle is detected
             automatically and the grayscale inconsistency, caused by the
             thickness of the cradle, is corrected. In a second stage we
             use a method called morphological component analysis to
             separate the X-ray image into a so-called cartoon part and a
             texture part, where the latter contains mostly the wood
             grain from both the panel and the cradling. The algorithm
             next learns a Bayesian factor model that distinguishes
             between the texture patterns that originate from the cradle
             and those from other components such as the panel and/or the
             painting on the panel surface, and finally uses this to
             remove the textures associated with the cradle. We apply the
             algorithm to a number of historically important paintings on
             panel. We also show how it can be used to digitally remove
             stretcher artifacts from X-rays of paintings on canvas. We
             compare our results with those obtained manually by best
             current practices in art conservation as well as on a ground
             truth dataset, consisting of X-ray images of a painting
             before and after removal of the physically attached
             cradle.},
   Doi = {10.1137/15M1053554},
   Key = {fds321837}
}

@article{fds357659,
   Author = {Aliverti, E and Lum, K and Johndrow, JE and Dunson,
             DB},
   Title = {Removing the influence of group variables in
             high-dimensional predictive modelling.},
   Journal = {Journal of the Royal Statistical Society. Series A,
             (Statistics in Society)},
   Volume = {184},
   Number = {3},
   Pages = {791-811},
   Year = {2021},
   Month = {July},
   url = {http://dx.doi.org/10.1111/rssa.12613},
   Abstract = {In many application areas, predictive models are used to
             support or make important decisions. There is increasing
             awareness that these models may contain spurious or
             otherwise undesirable correlations. Such correlations may
             arise from a variety of sources, including batch effects,
             systematic measurement errors, or sampling bias. Without
             explicit adjustment, machine learning algorithms trained
             using these data can produce poor out-of-sample predictions
             which propagate these undesirable correlations. We propose a
             method to pre-process the training data, producing an
             adjusted dataset that is statistically independent of the
             nuisance variables with minimum information loss. We develop
             a conceptually simple approach for creating an adjusted
             dataset in high-dimensional settings based on a constrained
             form of matrix decomposition. The resulting dataset can then
             be used in any predictive algorithm with the guarantee that
             predictions will be statistically independent of the group
             variable. We develop a scalable algorithm for implementing
             the method, along with theory support in the form of
             independence guarantees and optimality. The method is
             illustrated on some simulation examples and applied to two
             case studies: removing machine-specific correlations from
             brain scan data, and removing race and ethnicity information
             from a dataset used to predict recidivism. That the
             motivation for removing undesirable correlations is quite
             different in the two applications illustrates the broad
             applicability of our approach.},
   Doi = {10.1111/rssa.12613},
   Key = {fds357659}
}

@article{fds341600,
   Author = {Dunson, D and Fryzlewicz, P},
   Title = {Report of the editors-2016},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {79},
   Number = {1},
   Pages = {3-4},
   Year = {2017},
   Month = {January},
   url = {http://dx.doi.org/10.1111/rssb.12220},
   Doi = {10.1111/rssb.12220},
   Key = {fds341600}
}

@article{fds371715,
   Author = {Dunson, D and Wood, S},
   Title = {Report of the Editors—2018},
   Journal = {Journal of the Royal Statistical Society. Series B:
             Statistical Methodology},
   Volume = {81},
   Number = {1},
   Pages = {3-4},
   Year = {2019},
   Month = {January},
   url = {http://dx.doi.org/10.1111/RSSB.12306},
   Doi = {10.1111/RSSB.12306},
   Key = {fds371715}
}

@article{fds257875,
   Author = {Petralia, F and Rao, V and Dunson, DB},
   Title = {Repulsive mixtures},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {3},
   Pages = {1889-1897},
   Year = {2012},
   Month = {December},
   ISSN = {1049-5258},
   Abstract = {Discrete mixtures are used routinely in broad sweeping
             applications ranging from unsupervised settings to fully
             supervised multi-task learning. Indeed, finite mixtures and
             infinite mixtures, relying on Dirichlet processes and
             modifications, have become a standard tool. One important
             issue that arises in using discrete mixtures is low
             separation in the components; in particular, different
             components can be introduced that are very similar and hence
             redundant. Such redundancy leads to too many clusters that
             are too similar, degrading performance in unsupervised
             learning and leading to computational problems and an
             unnecessarily complex model in supervised settings.
             Redundancy can arise in the absence of a penalty on
             components placed close together even when a Bayesian
             approach is used to learn the number of components. To solve
             this problem, we propose a novel prior that generates
             components from a repulsive process, automatically
             penalizing redundant components. We characterize this
             repulsive prior theoretically and propose a Markov chain
             Monte Carlo sampling algorithm for posterior computation.
             The methods are illustrated using synthetic examples and an
             iris data set.},
   Key = {fds257875}
}

@article{fds332378,
   Author = {Minsker, S and Srivastava, S and Lin, L and Dunson,
             DB},
   Title = {Robust and scalable bayes via a median of subset posterior
             measures},
   Journal = {Journal of Machine Learning Research},
   Volume = {18},
   Pages = {1-40},
   Year = {2017},
   Month = {December},
   Abstract = {We propose a novel approach to Bayesian analysis that is
             provably robust to outliers in the data and often has
             computational advantages over standard methods. Our
             technique is based on splitting the data into
             non-overlapping subgroups, evaluating the posterior
             distribution given each independent subgroup, and then
             combining the resulting measures. The main novelty of our
             approach is the proposed aggregation step, which is based on
             the evaluation of a median in the space of probability
             measures equipped with a suitable collection of distances
             that can be quickly and efficiently evaluated in practice.
             We present both theoretical and numerical evidence
             illustrating the improvements achieved by our
             method.},
   Key = {fds332378}
}

@article{fds337687,
   Author = {Miller, JW and Dunson, DB},
   Title = {Robust Bayesian inference via coarsening.},
   Journal = {Journal of the American Statistical Association},
   Volume = {114},
   Number = {527},
   Pages = {1113-1125},
   Publisher = {Informa UK Limited},
   Year = {2019},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2018.1469995},
   Abstract = {The standard approach to Bayesian inference is based on the
             assumption that the distribution of the data belongs to the
             chosen model class. However, even a small violation of this
             assumption can have a large impact on the outcome of a
             Bayesian procedure. We introduce a novel approach to
             Bayesian inference that improves robustness to small
             departures from the model: rather than conditioning on the
             event that the observed data are generated by the model, one
             conditions on the event that the model generates data close
             to the observed data, in a distributional sense. When
             closeness is defined in terms of relative entropy, the
             resulting "coarsened" posterior can be approximated by
             simply tempering the likelihood-that is, by raising the
             likelihood to a fractional power-thus, inference can usually
             be implemented via standard algorithms, and one can even
             obtain analytical solutions when using conjugate priors.
             Some theoretical properties are derived, and we illustrate
             the approach with real and simulated data using mixture
             models and autoregressive models of unknown
             order.},
   Doi = {10.1080/01621459.2018.1469995},
   Key = {fds337687}
}

@article{fds257831,
   Author = {Minsker, S and Srivastava, S and Lin, L and Dunson,
             DB},
   Title = {Scalable and robust Bayesian inference via the median
             posterior},
   Journal = {31st International Conference on Machine Learning, ICML
             2014},
   Volume = {5},
   Pages = {3629-3639},
   Year = {2014},
   Month = {January},
   ISBN = {9781634393973},
   Abstract = {Many Bayesian learning methods for massive data benefit from
             working with small subsets of observations. In particular,
             significant progress has been made in scalable Bayesian
             learning via stochastic approximation. However, Bayesian
             learning methods in distributed computing environments are
             often problem- or distribution-specific and use ad hoc
             techniques. We propose a novel general approach to Bayesian
             inference that is scalable and robust to corruption in the
             data. Our technique is based on the idea of splitting the
             data into several non-overlapping subgroups, evaluating the
             posterior distribution given each independent subgroup, and
             then combining the results. Our main contribution is the
             proposed aggregation step which is based on finding the
             geometric median of subset posterior distributions.
             Presented theoretical and numerical results confirm the
             advantages of our approach.},
   Key = {fds257831}
}

@article{fds338057,
   Author = {Srivastava, S and Li, C and Dunson, DB},
   Title = {Scalable Bayes via barycenter in Wasserstein
             space},
   Journal = {Journal of Machine Learning Research},
   Volume = {19},
   Pages = {1-35},
   Year = {2018},
   Month = {August},
   Abstract = {Divide-and-conquer based methods for Bayesian inference
             provide a general approach for tractable posterior inference
             when the sample size is large. These methods divide the data
             into smaller subsets, sample from the posterior distribution
             of parameters in parallel on all the subsets, and combine
             posterior samples from all the subsets to approximate the
             full data posterior distribution. The smaller size of any
             subset compared to the full data implies that posterior
             sampling on any subset is computationally more efficient
             than sampling from the true posterior distribution. Since
             the combination step takes negligible time relative to
             sampling, posterior computations can be scaled to massive
             data by dividing the full data into sufficiently large
             number of data subsets. One such approach relies on the
             geometry of posterior distributions estimated across
             different subsets and combines them through their barycenter
             in a Wasserstein space of probability measures. We provide
             theoretical guarantees on the accuracy of approximation that
             are valid in many applications. We show that the geometric
             method approximates the full data posterior distribution
             better than its competitors across diverse simulations and
             reproduces known results when applied to a movie ratings
             database.},
   Key = {fds338057}
}

@article{fds257824,
   Author = {Rai, P and Wang, Y and Guo, S and Chen, G and Dunson, D and Carin,
             L},
   Title = {Scalable bayesian low-rank decomposition of incomplete
             multiway tensors},
   Journal = {31st International Conference on Machine Learning, ICML
             2014},
   Volume = {5},
   Pages = {3810-3820},
   Year = {2014},
   Month = {January},
   ISBN = {9781634393973},
   Abstract = {We present a scalable Bayesian framework for low-rank
             decomposition of multiway tensor data with missing
             observations. The key issue of pre-specifying the rank of
             the decomposition is sidestepped in a principled manner
             using a multiplicative gamma process prior. Both continuous
             and binary data can be analyzed under the framework, in a
             coherent way using fully conjugate Bayesian analysis. In
             particular, the analysis in the non-conjugate binary case is
             facilitated via the use of the Pólya-Gamma sampling
             strategy which elicits closed-form Gibbs sampling updates.
             The resulting samplers are efficient and enable us to apply
             our framework to large-scale problems, with time-complexity
             that is linear in the number of observed entries in the
             tensor. This is especially attractive in analyzing very
             large but sparsely observed tensors with very few known
             entries. Moreover, our method admits easy extension to the
             supervised setting where entities in one or more tensor
             modes have labels. Our method outperforms several
             state-of-the-art tensor decomposition methods on various
             synthetic and benchmark real-world datasets.},
   Key = {fds257824}
}

@article{fds344776,
   Author = {Wang, Y and Canale, A and Dunson, D},
   Title = {Scalable geometric density estimation},
   Journal = {Proceedings of the 19th International Conference on
             Artificial Intelligence and Statistics, AISTATS
             2016},
   Pages = {857-865},
   Year = {2016},
   Month = {January},
   Abstract = {It is standard to assume a low-dimensional structure in
             estimating a high-dimensional density. However, popular
             methods, such as probabilistic principal component analysis,
             scale poorly computationally. We introduce a novel empirical
             Bayes method that we term geometric density estimation
             (GEODE) and show that, with mild conditions and among all
             d-dimensional linear subspaces, the span of the d leading
             principal axes of the data maximizes the model posterior.
             With these axes pre-computed using fast singular value
             decomposition, GEODE easily scales to high dimensional
             problems while providing uncertainty characterization. The
             model is also capable of imputing missing data and
             dynamically deleting redundant dimensions. Finally, we
             generalize GEODE by mixing it across a dyadic clustering
             tree. Both simulation studies and real world data
             applications show superior performance of GEODE in terms of
             robustness and computational efficiency.},
   Key = {fds344776}
}

@article{fds340499,
   Author = {Duan, LL and Johndrow, JE and Dunson, DB},
   Title = {Scaling up data augmentation MCMC via calibration},
   Journal = {Journal of Machine Learning Research},
   Volume = {19},
   Year = {2018},
   Month = {October},
   Abstract = {There has been considerable interest in making Bayesian
             inference more scalable. In big data settings, most of the
             focus has been on reducing the computing time per iteration
             rather than reducing the number of iterations needed in
             Markov chain Monte Carlo (MCMC). This article considers data
             augmentation MCMC (DA-MCMC), a widely used technique.
             DA-MCMC samples tend to become highly autocorrelated in
             large samples, due to a mis-calibration problem in which
             conditional posterior distributions given augmented data are
             too concentrated. This makes it necessary to collect very
             long MCMC paths to obtain acceptably low MC error. To combat
             this inefficiency, we propose a family of calibrated data
             augmentation algorithms, which appropriately adjust the
             variance of conditional posterior distributions. A
             Metropolis-Hastings step is used to eliminate bias in the
             stationary distribution of the resulting sampler. Compared
             to existing alternatives, this approach can dramatically
             reduce MC error by reducing autocorrelation and increasing
             the effective number of DA-MCMC samples per unit of
             computing time. The approach is simple and applicable to a
             broad variety of existing data augmentation algorithms. We
             focus on three popular generalized linear models: probit,
             logistic and Poisson log-linear. Dramatic gains in
             computational efficiency are shown in applications.},
   Key = {fds340499}
}

@article{fds257927,
   Author = {Dunson, DB and Chen, Z},
   Title = {Selecting factors predictive of heterogeneity in
             multivariate event time data.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {2},
   Pages = {352-358},
   Year = {2004},
   Month = {June},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00179.x},
   Abstract = {In multivariate survival analysis, investigators are often
             interested in testing for heterogeneity among clusters, both
             overall and within specific classes. We represent different
             hypotheses about the heterogeneity structure using a
             sequence of gamma frailty models, ranging from a null model
             with no random effects to a full model having random effects
             for each class. Following a Bayesian approach, we define
             prior distributions for the frailty variances consisting of
             mixtures of point masses at zero and inverse-gamma
             densities. Since frailties with zero variance effectively
             drop out of the model, this prior allocates probability to
             each model in the sequence, including the overall null
             hypothesis of homogeneity. Using a counting process
             formulation, the conditional posterior distributions of the
             frailties and proportional hazards regression coefficients
             have simple forms. Posterior computation proceeds via a data
             augmentation Gibbs sampling algorithm, a single run of which
             can be used to obtain model-averaged estimates of the
             population parameters and posterior model probabilities for
             testing hypotheses about the heterogeneity structure. The
             methods are illustrated using data from a lung cancer
             trial.},
   Doi = {10.1111/j.0006-341x.2004.00179.x},
   Key = {fds257927}
}

@article{fds257838,
   Author = {Hannah, LA and Powell, WB and Dunson, DB},
   Title = {Semiconvex regression for metamodeling-based
             optimization},
   Journal = {SIAM Journal on Optimization},
   Volume = {24},
   Number = {2},
   Pages = {573-597},
   Publisher = {Society for Industrial & Applied Mathematics
             (SIAM)},
   Year = {2014},
   Month = {January},
   ISSN = {1052-6234},
   url = {http://dx.doi.org/10.1137/130907070},
   Abstract = {Stochastic search involves finding a set of controllable
             parameters that minimizes an unknown objective function
             using a set of noisy observations. We consider the case when
             the unknown function is convex and a metamodel is used as a
             surrogate objective function. Often he data are non-i.i.d.
             and include an observable state variable, such as applicant
             information in a loan rate decision problem. State
             information is difficult to incorporate into convex models.
             We propose a new semiconvex regression method that is used
             to produce a convex metamodel in the presence of a state
             variable. We show consistency for this method. We
             demonstrate its effectiveness for metamodeling on a set of
             synthetic inventory management problems and a large
             real-life auto loan dataset. © 2014 Society for Industrial
             and Applied Mathematics.},
   Doi = {10.1137/130907070},
   Key = {fds257838}
}

@article{fds257959,
   Author = {Yang, M and Dunson, DB and Baird, D},
   Title = {Semiparametric Bayes hierarchical models with mean and
             variance constraints.},
   Journal = {Computational statistics & data analysis},
   Volume = {54},
   Number = {9},
   Pages = {2172-2186},
   Year = {2010},
   Month = {September},
   ISSN = {0167-9473},
   url = {http://dx.doi.org/10.1016/j.csda.2010.03.025},
   Abstract = {In parametric hierarchical models, it is standard practice
             to place mean and variance constraints on the latent
             variable distributions for the sake of identifiability and
             interpretability. Because incorporation of such constraints
             is challenging in semiparametric models that allow latent
             variable distributions to be unknown, previous methods
             either constrain the median or avoid constraints. In this
             article, we propose a centered stick-breaking process
             (CSBP), which induces mean and variance constraints on an
             unknown distribution in a hierarchical model. This is
             accomplished by viewing an unconstrained stick-breaking
             process as a parameter-expanded version of a CSBP. An
             efficient blocked Gibbs sampler is developed for approximate
             posterior computation. The methods are illustrated through a
             simulated example and an epidemiologic application.},
   Doi = {10.1016/j.csda.2010.03.025},
   Key = {fds257959}
}

@article{fds257863,
   Author = {Hua, Z and Zhu, H and Dunson, DB},
   Title = {Semiparametric Bayes local additive models for longitudinal
             data.},
   Journal = {Statistics in biosciences},
   Volume = {7},
   Number = {1},
   Pages = {90-107},
   Year = {2015},
   Month = {May},
   ISSN = {1867-1764},
   url = {http://dx.doi.org/10.1007/s12561-013-9104-y},
   Abstract = {In longitudinal data analysis, there is great interest in
             assessing the impact of predictors on the time-varying
             trajectory in a response variable. In such settings, an
             important issue is to account for heterogeneity in the shape
             of the trajectory among subjects, while allowing the impact
             of the predictors to vary across subjects. We propose a
             flexible semiparametric Bayes approach for addressing this
             issue relying on a local partition process prior, which
             allows flexible local borrowing of information across
             subjects. Local hypothesis testing and credible bands are
             developed for the identification of time windows across
             which a predictor has a significant impact, while adjusting
             for multiple comparisons. Posterior computation proceeds via
             an efficient MCMC algorithm using the exact block Gibbs
             sampler. The methods are assessed using simulation studies
             and applied to a yeast cell-cycle gene expression data
             set.},
   Doi = {10.1007/s12561-013-9104-y},
   Key = {fds257863}
}

@article{fds258039,
   Author = {Wang, L and Dunson, DB},
   Title = {Semiparametric Bayes multiple testing: Applications to tumor
             data.},
   Journal = {Biometrics},
   Volume = {66},
   Number = {2},
   Pages = {493-501},
   Year = {2009},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2009.01301.x},
   Abstract = {In National Toxicology Program (NTP) studies, investigators
             want to assess whether a test agent is carcinogenic overall
             and specific to certain tumor types, while estimating the
             dose-response profiles. Because there are potentially
             correlations among the tumors, a joint inference is
             preferred to separate univariate analyses for each tumor
             type. In this regard, we propose a random effect logistic
             model with a matrix of coefficients representing log-odds
             ratios for the adjacent dose groups for tumors at different
             sites. We propose appropriate nonparametric priors for these
             coefficients to characterize the correlations and to allow
             borrowing of information across different dose groups and
             tumor types. Global and local hypotheses can be easily
             evaluated by summarizing the output of a single Monte Carlo
             Markov chain (MCMC). Two multiple testing procedures are
             applied for testing local hypotheses based on the posterior
             probabilities of local alternatives. Simulation studies are
             conducted and an NTP tumor data set is analyzed illustrating
             the proposed approach.},
   Doi = {10.1111/j.1541-0420.2009.01301.x},
   Key = {fds258039}
}

@article{fds257970,
   Author = {Wang, L and Dunson, DB},
   Title = {Semiparametric bayes' proportional odds models for current
             status data with underreporting.},
   Journal = {Biometrics},
   Volume = {67},
   Number = {3},
   Pages = {1111-1118},
   Year = {2011},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2010.01532.x},
   Abstract = {Current status data are a type of interval-censored event
             time data in which all the individuals are either left or
             right censored. For example, our motivation is drawn from a
             cross-sectional study, which measured whether or not fibroid
             onset had occurred by the age of an ultrasound exam for each
             woman. We propose a semiparametric Bayesian proportional
             odds model in which the baseline event time distribution is
             estimated nonparametrically by using adaptive monotone
             splines in a logistic regression model and the potential
             risk factors are included in the parametric part of the mean
             structure. The proposed approach has the advantage of being
             straightforward to implement using a simple and efficient
             Gibbs sampler, whereas alternative semiparametric Bayes'
             event time models encounter problems for current status
             data. The model is generalized to allow systematic
             underreporting in a subset of the data, and the methods are
             applied to an epidemiologic study of uterine
             fibroids.},
   Doi = {10.1111/j.1541-0420.2010.01532.x},
   Key = {fds257970}
}

@article{fds257977,
   Author = {Hua, Z and Dunson, DB and Gilmore, JH and Styner, MA and Zhu,
             H},
   Title = {Semiparametric Bayesian local functional models for
             diffusion tensor tract statistics.},
   Journal = {NeuroImage},
   Volume = {63},
   Number = {1},
   Pages = {460-474},
   Year = {2012},
   Month = {October},
   ISSN = {1053-8119},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2012.06.027},
   Abstract = {We propose a semiparametric Bayesian local functional model
             (BFM) for the analysis of multiple diffusion properties
             (e.g., fractional anisotropy) along white matter fiber
             bundles with a set of covariates of interest, such as age
             and gender. BFM accounts for heterogeneity in the shape of
             the fiber bundle diffusion properties among subjects, while
             allowing the impact of the covariates to vary across
             subjects. A nonparametric Bayesian LPP2 prior facilitates
             global and local borrowings of information among subjects,
             while an infinite factor model flexibly represents
             low-dimensional structure. Local hypothesis testing and
             credible bands are developed to identify fiber segments,
             along which multiple diffusion properties are significantly
             associated with covariates of interest, while controlling
             for multiple comparisons. Moreover, BFM naturally group
             subjects into more homogeneous clusters. Posterior
             computation proceeds via an efficient Markov chain Monte
             Carlo algorithm. A simulation study is performed to evaluate
             the finite sample performance of BFM. We apply BFM to
             investigate the development of white matter diffusivities
             along the splenium of the corpus callosum tract and the
             right internal capsule tract in a clinical study of
             neurodevelopment in new born infants.},
   Doi = {10.1016/j.neuroimage.2012.06.027},
   Key = {fds257977}
}

@article{fds322548,
   Author = {Lock, EF and Dunson, DB},
   Title = {Shared kernel Bayesian screening.},
   Journal = {Biometrika},
   Volume = {102},
   Number = {4},
   Pages = {829-842},
   Year = {2015},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asv032},
   Abstract = {This article concerns testing for equality of distribution
             between groups. We focus on screening variables with shared
             distributional features such as common support, modes and
             patterns of skewness. We propose a Bayesian testing method
             using kernel mixtures, which improves performance by
             borrowing information across the different variables and
             groups through shared kernels and a common probability of
             group differences. The inclusion of shared kernels in a
             finite mixture, with Dirichlet priors on the weights, leads
             to a simple framework for testing that scales well for
             high-dimensional data. We provide closed asymptotic forms
             for the posterior probability of equivalence in two groups
             and prove consistency under model misspecification. The
             method is applied to DNA methylation array data from a
             breast cancer study, and compares favourably to competitors
             when Type I error is estimated via permutation.},
   Doi = {10.1093/biomet/asv032},
   Key = {fds322548}
}

@article{fds329109,
   Author = {Li, C and Srivastava, S and Dunson, DB},
   Title = {Simple, scalable and accurate posterior interval
             estimation},
   Journal = {Biometrika},
   Volume = {104},
   Number = {3},
   Pages = {665-680},
   Publisher = {Oxford University Press (OUP)},
   Year = {2017},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/asx033},
   Abstract = {Standard posterior sampling algorithms, such as Markov chain
             Monte Carlo procedures, face major challenges in scaling up
             to massive datasets. We propose a simple and general
             posterior interval estimation algorithm to rapidly and
             accurately estimate quantiles of the posterior distributions
             for one-dimensional functionals. Our algorithm runs Markov
             chain Monte Carlo in parallel for subsets of the data, and
             then averages quantiles estimated from each subset. We
             provide strong theoretical guarantees and show that the
             credible intervals from our algorithm asymptotically
             approximate those from the full posterior in the leading
             parametric order. Our algorithm has a better balance of
             accuracy and efficiency than its competitors across a
             variety of simulations and a real-data example.},
   Doi = {10.1093/biomet/asx033},
   Key = {fds329109}
}

@article{fds257973,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Simplex Factor Models for Multivariate Unordered Categorical
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {107},
   Number = {497},
   Pages = {362-377},
   Year = {2012},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2011.646934},
   Abstract = {Gaussian latent factor models are routinely used for
             modeling of dependence in continuous, binary, and ordered
             categorical data. For unordered categorical variables,
             Gaussian latent factor models lead to challenging
             computation and complex modeling structures. As an
             alternative, we propose a novel class of simplex factor
             models. In the single-factor case, the model treats the
             different categorical outcomes as independent with unknown
             marginals. The model can characterize flexible dependence
             structures parsimoniously with few factors, and as factors
             are added, any multivariate categorical data distribution
             can be accurately approximated. Using a Bayesian approach
             for computation and inferences, a Markov chain Monte Carlo
             (MCMC) algorithm is proposed that scales well with
             increasing dimension, with the number of factors treated as
             unknown. We develop an efficient proposal for updating the
             base probability vector in hierarchical Dirichlet models.
             Theoretical properties are described, and we evaluate the
             approach through simulation examples. Applications are
             described for modeling dependence in nucleotide sequences
             and prediction from high-dimensional categorical
             features.},
   Doi = {10.1080/01621459.2011.646934},
   Key = {fds257973}
}

@article{fds362586,
   Author = {Papadogeorgou, G and Zhang, Z and Dunson, DB},
   Title = {Soft tensor regression},
   Journal = {Journal of Machine Learning Research},
   Volume = {22},
   Pages = {1-53},
   Year = {2021},
   Month = {January},
   Abstract = {Statistical methods relating tensor predictors to scalar
             outcomes in a regression model generally vectorize the
             tensor predictor and estimate the coefficients of its
             entries employing some form of regularization, use summaries
             of the tensor covariate, or use a low dimensional
             approximation of the coefficient tensor. However, low rank
             approximations of the coefficient tensor can suffer if the
             true rank is not small. We propose a tensor regression
             framework which assumes a soft version of the parallel
             factors (PARAFAC) approximation. In contrast to classic
             PARAFAC where each entry of the coefficient tensor is the
             sum of products of row-specific contributions across the
             tensor modes, the soft tensor regression (Softer) framework
             allows the row-specific contributions to vary around an
             overall mean. We follow a Bayesian approach to inference,
             and show that softening the PARAFAC increases model
             flexibility, leads to improved estimation of coefficient
             tensors, more accurate identification of important predictor
             entries, and more precise predictions, even for a low
             approximation rank. From a theoretical perspective, we show
             that employing Softer leads to a weakly consistent posterior
             distribution of the coefficient tensor, irrespective of the
             true or approximation tensor rank, a result that is not true
             when employing the classic PARAFAC for tensor regression. In
             the context of our motivating application, we adapt Softer
             to symmetric and semi-symmetric tensor predictors and
             analyze the relationship between brain network
             characteristics and human traits.},
   Key = {fds362586}
}

@article{fds257884,
   Author = {Weinberg, CR and Dunson, DB},
   Title = {Some Issues in Assessing Human Fertility},
   Journal = {Journal of the American Statistical Association},
   Volume = {95},
   Number = {449},
   Pages = {300-303},
   Booktitle = {Statistics in the 21st Century},
   Publisher = {Informa UK Limited},
   Year = {2000},
   Month = {March},
   ISBN = {9781420035391},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2000.10473928},
   Abstract = {© 2002 by American Statistical Association. One of the
             pleasures of working as an applied statistician is the
             awareness it brings of the wide diversity of scientific
             fields to which our profession contributes critical concepts
             and methods. My own awareness was enhanced by accepting the
             invitation from the editors of JASA to serve as guest editor
             for this section of vignettes celebrating the significant
             contributions made by statisticians to the life and medical
             sciences in the 20th century. The goal of the project was
             not an encyclopedic catalog of all the major developments,
             but rather a sampling of some of the most interesting work.
             Of the 12 vignettes, 10 focus on particular areas of
             application: environmetrics, wildlife populations, animal
             breeding, human fertility, toxicology, medical diagnosis,
             clinical trials, environmental epidemiology, statistical
             genetics, and molecular biology. The two vignettes that
             begin the series focus more on methods that have had, or
             promise to have, impact across a range of subject matter
             areas: survival analysis and causal analysis.},
   Doi = {10.1080/01621459.2000.10473928},
   Key = {fds257884}
}

@article{fds257966,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Sparse Bayesian infinite factor models.},
   Journal = {Biometrika},
   Volume = {98},
   Number = {2},
   Pages = {291-306},
   Year = {2011},
   Month = {June},
   ISSN = {0006-3444},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23049129},
   Abstract = {We focus on sparse modelling of high-dimensional covariance
             matrices using Bayesian latent factor models. We propose a
             multiplicative gamma process shrinkage prior on the factor
             loadings which allows introduction of infinitely many
             factors, with the loadings increasingly shrunk towards zero
             as the column index increases. We use our prior on a
             parameter-expanded loading matrix to avoid the order
             dependence typical in factor analysis models and develop an
             efficient Gibbs sampler that scales well as data
             dimensionality increases. The gain in efficiency is achieved
             by the joint conjugacy property of the proposed prior, which
             allows block updating of the loadings matrix. We propose an
             adaptive Gibbs sampler for automatically truncating the
             infinite loading matrix through selection of the number of
             important factors. Theoretical results are provided on the
             support of the prior and truncation approximation bounds. A
             fast algorithm is proposed to produce approximate Bayes
             estimates. Latent factor regression methods are developed
             for prediction and variable selection in applications with
             high-dimensional correlated predictors. Operating
             characteristics are assessed through simulation studies, and
             the approach is applied to predict survival times from gene
             expression data.},
   Doi = {10.1093/biomet/asr013},
   Key = {fds257966}
}

@article{fds258031,
   Author = {Armagan, A and Dunson, DB},
   Title = {Sparse variational analysis of large longitudinal data
             sets},
   Journal = {Statistics & Probability Letters},
   Year = {2009},
   Key = {fds258031}
}

@article{fds257969,
   Author = {Armagan, A and Dunson, D},
   Title = {Sparse variational analysis of linear mixed models for large
             data sets},
   Journal = {Statistics and Probability Letters},
   Volume = {81},
   Number = {8},
   Pages = {1056-1062},
   Publisher = {Elsevier BV},
   Year = {2011},
   Month = {August},
   ISSN = {0167-7152},
   url = {http://dx.doi.org/10.1016/j.spl.2011.02.029},
   Abstract = {It is increasingly common to be faced with longitudinal or
             multi-level data sets that have large numbers of predictors
             and/or a large sample size. Current methods of fitting and
             inference for mixed effects models tend to perform poorly in
             such settings. When there are many variables, it is
             appealing to allow uncertainty in subset selection and to
             obtain a sparse characterization of the data. Bayesian
             methods are available to address these goals using Markov
             chain Monte Carlo (MCMC), but MCMC is very computationally
             expensive and can be infeasible in large p and/or large n
             problems. As a fast approximate Bayes solution, we recommend
             a novel approximation to the posterior relying on
             variational methods. Variational methods are used to
             approximate the posterior of the parameters in a
             decomposition of the variance components, with priors chosen
             to obtain a sparse solution that allows selection of random
             effects. The method is evaluated through a simulation study,
             and applied to an epidemiological application. © 2011
             Elsevier B.V.},
   Doi = {10.1016/j.spl.2011.02.029},
   Key = {fds257969}
}

@article{fds362728,
   Author = {Peruzzi, M and Dunson, DB},
   Title = {Spatial Multivariate Trees for Big Data Bayesian
             Regression.},
   Journal = {Journal of machine learning research : JMLR},
   Volume = {23},
   Pages = {17},
   Year = {2022},
   Month = {January},
   Abstract = {High resolution geospatial data are challenging because
             standard geostatistical models based on Gaussian processes
             are known to not scale to large data sizes. While progress
             has been made towards methods that can be computed more
             efficiently, considerably less attention has been devoted to
             methods for large scale data that allow the description of
             complex relationships between several outcomes recorded at
             high resolutions by different sensors. Our Bayesian
             multivariate regression models based on spatial multivariate
             trees (SpamTrees) achieve scalability via conditional
             independence assumptions on latent random effects following
             a treed directed acyclic graph. Information-theoretic
             arguments and considerations on computational efficiency
             guide the construction of the tree and the related efficient
             sampling algorithms in imbalanced multivariate settings. In
             addition to simulated data examples, we illustrate SpamTrees
             using a large climate data set which combines satellite data
             with land-based station data. Software and source code are
             available on CRAN at https://CRAN.R-project.org/package=spamtree.},
   Key = {fds362728}
}

@article{fds257879,
   Author = {Wang, E and Salazar, E and Dunson, D and Carin, L},
   Title = {Spatio-temporal modeling of legislation and
             votes},
   Journal = {Bayesian Analysis},
   Volume = {8},
   Number = {1},
   Pages = {233-268},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2013},
   Month = {March},
   ISSN = {1936-0975},
   url = {http://dx.doi.org/10.1214/13-BA810},
   Abstract = {A model is presented for analysis of multivariate binary
             data with spatio-temporal dependencies, and applied to
             congressional roll call data from the United States House of
             Representatives and Senate. The model considers each
             legislator's constituency (location), the congressional
             session (time) of each vote, and the details (text) of each
             piece of legislation. The model can predict votes of new
             legislation from only text, while imposing smooth temporal
             evolution of legislator latent features, and correlation of
             legislators with adjacent constituencies. Additionally, the
             model estimates the number of latent dimensions required to
             represent the data. A Gibbs sampler is developed for
             posterior inference. The model is demonstrated as an
             exploratory tool of legislation and it performs well in
             quantitative comparisons to a traditional ideal-point model.
             © 2013 International Society for Bayesian
             Analysis.},
   Doi = {10.1214/13-BA810},
   Key = {fds257879}
}

@article{fds258018,
   Author = {Dunson, DB},
   Title = {Special issue of statistical methods in medical research on
             reproductive studies},
   Journal = {Statistical Methods in Medical Research},
   Volume = {15},
   Number = {2},
   Pages = {91-92},
   Publisher = {SAGE Publications},
   Year = {2006},
   Month = {April},
   ISSN = {0962-2802},
   url = {http://dx.doi.org/10.1191/0962280206sm432ed},
   Doi = {10.1191/0962280206sm432ed},
   Key = {fds258018}
}

@article{fds257858,
   Author = {Chen, CWS and Dunson, D and Frühwirth-Schnatter, S and Walker,
             SG},
   Title = {Special issue on Bayesian computing, methods and
             applications},
   Journal = {Computational Statistics and Data Analysis},
   Volume = {71},
   Pages = {273},
   Publisher = {Elsevier BV},
   Year = {2014},
   Month = {January},
   ISSN = {0167-9473},
   url = {http://dx.doi.org/10.1016/j.csda.2013.10.011},
   Doi = {10.1016/j.csda.2013.10.011},
   Key = {fds257858}
}

@article{fds357954,
   Author = {Dunson, DB and Wu, HT and Wu, N},
   Title = {Spectral convergence of graph Laplacian and heat kernel
             reconstruction in L^∞ from random
             samples},
   Journal = {Applied and Computational Harmonic Analysis},
   Volume = {55},
   Pages = {282-336},
   Year = {2021},
   Month = {November},
   url = {http://dx.doi.org/10.1016/j.acha.2021.06.002},
   Abstract = {In the manifold setting, we provide a series of spectral
             convergence results quantifying how the eigenvectors and
             eigenvalues of the graph Laplacian converge to the
             eigenfunctions and eigenvalues of the Laplace-Beltrami
             operator in the L∞ sense. Based on these results,
             convergence of the proposed heat kernel approximation
             algorithm, as well as the convergence rate, to the exact
             heat kernel is guaranteed. To our knowledge, this is the
             first work exploring the spectral convergence in the L∞
             sense and providing a numerical heat kernel reconstruction
             from the point cloud with theoretical guarantees.},
   Doi = {10.1016/j.acha.2021.06.002},
   Key = {fds357954}
}

@article{fds257882,
   Author = {Dunson, DB and Haseman, JK and van Birgelen, AP and Stasiewicz, S and Tennant, RW},
   Title = {Statistical analysis of skin tumor data from Tg.AC mouse
             bioassays.},
   Journal = {Toxicological sciences : an official journal of the Society
             of Toxicology},
   Volume = {55},
   Number = {2},
   Pages = {293-302},
   Year = {2000},
   Month = {June},
   url = {http://dx.doi.org/10.1093/toxsci/55.2.293},
   Abstract = {New strategies for identifying chemical carcinogens and
             assessing risk have been proposed based on the Tg.AC
             (zetaglobin promoted v-Ha-ras) transgenic mouse. Preliminary
             studies suggest that the Tg. AC mouse bioassay may be an
             effective means of quickly evaluating the carcinogenic
             potential of a test agent. The skin of the Tg.AC mouse is
             genetically initiated, and the induction of epidermal
             papillomas in response to dermal or oral exposure to a
             chemical agent acts as a reporter phenotype of the activity
             of the test chemical. In Tg.AC mouse bioassays, the test
             agent is typically applied topically for up to 26 weeks, and
             the number of papillomas in the treated area is counted
             weekly. Statistical analyses are complicated by
             within-animal and serial dependency in the papilloma counts,
             survival differences between animals, and missing data. In
             this paper, we describe a statistical model for the analysis
             of skin tumor data from a Tg.AC mouse bioassay. The model
             separates effects on papilloma latency and multiplicity and
             accommodates important features of the data, including
             variability in expression of the transgene and dependency in
             the tumor counts. Methods are described for carcinogenicity
             testing and risk assessment. We illustrate our approach
             using data from a study of the effect of 2,3,7,
             8-tetrachlorodibenzo-p-dioxin (TCDD) exposure on
             tumorigenesis.},
   Doi = {10.1093/toxsci/55.2.293},
   Key = {fds257882}
}

@article{fds371512,
   Author = {Plummer, S and Zhou, S and Bhattacharya, A and Dunson, D and Pati,
             D},
   Title = {Statistical Guarantees for Transformation Based Models with
             Applications to Implicit Variational Inference},
   Journal = {Proceedings of Machine Learning Research},
   Volume = {130},
   Pages = {2449-2457},
   Year = {2021},
   Month = {January},
   Abstract = {Transformation-based methods have been an attractive
             approach in non-parametric inference for problems such as
             unconditional and conditional density estimation due to
             their unique hierarchical structure that models the data as
             flexible transformation of a set of common latent variables.
             More recently, transformation-based models have been used in
             variational inference (VI) to construct flexible implicit
             families of variational distributions. However, their use in
             both nonparametric inference and variational inference lacks
             theoretical justification. We provide theoretical
             justification for the use of non-linear latent variable
             models (NL-LVMs) in non-parametric inference by showing that
             the support of the transformation induced prior in the space
             of densities is sufficiently large in the L1 sense. We also
             show that, when a Gaussian process (GP) prior is placed on
             the transformation function, the posterior concentrates at
             the optimal rate up to a logarithmic factor. Adopting the
             flexibility demonstrated in the non-parametric setting, we
             use the NL-LVM to construct an implicit family of
             variational distributions, deemed GP-IVI. We delineate
             sufficient conditions under which GP-IVI achieves optimal
             risk bounds and approximates the true posterior in the sense
             of the Kullback-Leibler divergence. To the best of our
             knowledge, this is the first work on providing theoretical
             guarantees for implicit variational inference.},
   Key = {fds371512}
}

@article{fds333225,
   Author = {Dunson, DB},
   Title = {Statistics in the big data era: Failures of the
             machine},
   Journal = {Statistics and Probability Letters},
   Volume = {136},
   Pages = {4-9},
   Publisher = {Elsevier BV},
   Year = {2018},
   Month = {May},
   url = {http://dx.doi.org/10.1016/j.spl.2018.02.028},
   Abstract = {There is vast interest in automated methods for complex data
             analysis. However, there is a lack of consideration of (1)
             interpretability, (2) uncertainty quantification, (3)
             applications with limited training data, and (4) selection
             bias. Statistical methods can achieve (1)-(4) with a change
             in focus.},
   Doi = {10.1016/j.spl.2018.02.028},
   Key = {fds333225}
}

@article{fds258032,
   Author = {Bornkamp, B and Ickstadt, K and Dunson, D},
   Title = {Stochastically ordered multiple regression.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {11},
   Number = {3},
   Pages = {419-431},
   Year = {2010},
   Month = {July},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxq001},
   Abstract = {In various application areas, prior information is available
             about the direction of the effects of multiple predictors on
             the conditional response distribution. For example, in
             epidemiology studies of potentially adverse exposures and
             continuous health responses, one can typically assume a
             priori that increasing the level of an exposure does not
             lead to an improvement in the health response. Such an
             assumption can be formalized through a stochastic ordering
             assumption in each of the exposures, leading to a
             potentially large improvement in efficiency in nonparametric
             modeling of the conditional response distribution. This
             article proposes a Bayesian nonparametric approach to this
             problem based on characterizing the conditional response
             density as a Gaussian mixture, with the locations of the
             Gaussian means varying flexibly with predictors subject to
             minimal constraints to ensure stochastic ordering.
             Theoretical properties are considered and Markov chain Monte
             Carlo methods are developed for posterior computation. The
             methods are illustrated using simulation examples and a
             reproductive epidemiology application.},
   Doi = {10.1093/biostatistics/kxq001},
   Key = {fds258032}
}

@article{fds257975,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Strong consistency of nonparametric Bayes density estimation
             on compact metric spaces with applications to specific
             manifolds.},
   Journal = {Annals of the Institute of Statistical Mathematics},
   Volume = {64},
   Number = {4},
   Pages = {687-714},
   Year = {2012},
   Month = {August},
   ISSN = {0020-3157},
   url = {http://dx.doi.org/10.1007/s10463-011-0341-x},
   Abstract = {This article considers a broad class of kernel mixture
             density models on compact metric spaces and manifolds.
             Following a Bayesian approach with a nonparametric prior on
             the location mixing distribution, sufficient conditions are
             obtained on the kernel, prior and the underlying space for
             strong posterior consistency at any continuous density. The
             prior is also allowed to depend on the sample size n and
             sufficient conditions are obtained for weak and strong
             consistency. These conditions are verified on compact
             Euclidean spaces using multivariate Gaussian kernels, on the
             hypersphere using a von Mises-Fisher kernel and on the
             planar shape space using complex Watson kernels.},
   Doi = {10.1007/s10463-011-0341-x},
   Key = {fds257975}
}

@article{fds257928,
   Author = {Slama, R and Ducot, B and Keiding, N and Bouyer, J},
   Title = {Studying human fertility and environmental
             exposures.},
   Journal = {Environmental health perspectives},
   Volume = {112},
   Number = {11},
   Pages = {A604},
   Year = {2004},
   Month = {August},
   ISSN = {0091-6765},
   url = {http://dx.doi.org/10.1289/ehp.112-1247502},
   Doi = {10.1289/ehp.112-1247502},
   Key = {fds257928}
}

@article{fds329112,
   Author = {Bhattacharya, A and Dunson, DB and Pati, D and Pillai,
             NS},
   Title = {Sub-optimality of some continuous shrinkage
             priors},
   Journal = {Stochastic Processes and their Applications},
   Volume = {126},
   Number = {12},
   Pages = {3828-3842},
   Publisher = {Elsevier BV},
   Year = {2016},
   Month = {December},
   url = {http://dx.doi.org/10.1016/j.spa.2016.08.007},
   Abstract = {Two-component mixture priors provide a traditional way to
             induce sparsity in high-dimensional Bayes models. However,
             several aspects of such a prior, including computational
             complexities in high-dimensions, interpretation of exact
             zeros and non-sparse posterior summaries under standard loss
             functions, have motivated an amazing variety of continuous
             shrinkage priors, which can be expressed as global–local
             scale mixtures of Gaussians. Interestingly, we demonstrate
             that many commonly used shrinkage priors, including the
             Bayesian Lasso, do not have adequate posterior concentration
             in high-dimensional settings.},
   Doi = {10.1016/j.spa.2016.08.007},
   Key = {fds329112}
}

@article{fds322542,
   Author = {Tang, K and Dunson, DB and Su, Z and Liu, R and Zhang, J and Dong,
             J},
   Title = {Subspace segmentation by dense block and sparse
             representation.},
   Journal = {Neural networks : the official journal of the International
             Neural Network Society},
   Volume = {75},
   Pages = {66-76},
   Year = {2016},
   Month = {March},
   url = {http://dx.doi.org/10.1016/j.neunet.2015.11.011},
   Abstract = {Subspace segmentation is a fundamental topic in computer
             vision and machine learning. However, the success of many
             popular methods is about independent subspace segmentation
             instead of the more flexible and realistic disjoint subspace
             segmentation. Focusing on the disjoint subspaces, we provide
             theoretical and empirical evidence of inferior performance
             for popular algorithms such as LRR. To solve these problems,
             we propose a novel dense block and sparse representation
             (DBSR) for subspace segmentation and provide related
             theoretical results. DBSR minimizes a combination of the
             1,1-norm and maximum singular value of the representation
             matrix, leading to a combination of dense block and
             sparsity. We provide experimental results for synthetic and
             benchmark data showing that our method can outperform the
             state-of-the-art.},
   Doi = {10.1016/j.neunet.2015.11.011},
   Key = {fds322542}
}

@article{fds258008,
   Author = {Dunson, DB and Weinberg, CR and Perreault, SD and Chapin,
             RE},
   Title = {Summarizing the motion of self-propelled cells: applications
             to sperm motility.},
   Journal = {Biometrics},
   Volume = {55},
   Number = {2},
   Pages = {537-543},
   Year = {1999},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.1999.00537.x},
   Abstract = {Proper characterization of the motion of spermatozoa is an
             important prerequisite for interpreting differences in sperm
             motility that might arise from exposure to toxicants.
             Patterns of sperm movement can be extremely complex. On the
             basis of an exponential model that relates the discretely
             approximated curvilinear velocity to the tracking rate, we
             develop a statistic that indexes the predictability of the
             path for individual sperm. We summarize the path of each
             sperm using this and two other statistics: (1) the path
             displacement velocity and (2) linearity of movement. We
             apply the method to a set of rat sperm tracks representative
             of both normal and abnormal motion characteristics.},
   Doi = {10.1111/j.0006-341x.1999.00537.x},
   Key = {fds258008}
}

@article{fds354254,
   Author = {Talbot, A and Dunson, D and Dzirasa, K and Carlson,
             D},
   Title = {Supervised Autoencoders Learn Robust Joint Factor Models of
             Neural Activity},
   Journal = {arXiv preprint arXiv:2004.05209},
   Volume = {abs/2004.05209},
   Year = {2020},
   Key = {fds354254}
}

@article{fds362761,
   Author = {Durante, D and Dunson, DB},
   Title = {Supplementary Material For “Bayesian Inference And Testing
             Of Group Differences In Brain Networks”},
   Journal = {Bayesian Analysis},
   Volume = {13},
   Number = {1},
   Pages = {1-2},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1214/16-BA1030SUPP},
   Abstract = {The supplementary materials contain proofs of Propositions
             1, 2 and 3, providing the-oretical support for the
             methodology developed in the article “Bayesian Inference
             and Testing of Group Differences in Brain
             Networks},
   Doi = {10.1214/16-BA1030SUPP},
   Key = {fds362761}
}

@article{fds342197,
   Author = {Wang, L and Zhang, Z and Dunson, D},
   Title = {Symmetric Bilinear Regression for Signal Subgraph
             Estimation.},
   Journal = {IEEE transactions on signal processing : a publication of
             the IEEE Signal Processing Society},
   Volume = {67},
   Number = {7},
   Pages = {1929-1940},
   Year = {2019},
   Month = {April},
   url = {http://dx.doi.org/10.1109/tsp.2019.2899818},
   Abstract = {There is an increasing interest in learning a set of small
             outcome-relevant subgraphs in network-predictor regression.
             The extracted signal subgraphs can greatly improve the
             interpretation of the association between the network
             predictor and the response. In brain connectomics, the brain
             network for an individual corresponds to a set of
             interconnections among brain regions and there is a strong
             interest in linking the brain connectome to human cognitive
             traits. Modern neuroimaging technology allows a very fine
             segmentation of the brain, producing very large structural
             brain networks. Therefore, accurate and efficient methods
             for identifying a set of small predictive subgraphs become
             crucial, leading to discovery of key interconnected brain
             regions related to the trait and important insights on the
             mechanism of variation in human cognitive traits. We propose
             a symmetric bilinear model with <i>L</i><sub>1</sub> penalty
             to search for small clique subgraphs that contain useful
             information about the response. A coordinate descent
             algorithm is developed to estimate the model where we derive
             analytical solutions for a sequence of conditional convex
             optimizations. Application of this method on human
             connectome and language comprehension data shows interesting
             discovery of relevant interconnections among several small
             sets of brain regions and better predictive performance than
             competitors.},
   Doi = {10.1109/tsp.2019.2899818},
   Key = {fds342197}
}

@article{fds347354,
   Author = {Mukhopadhyay, M and Dunson, DB},
   Title = {Targeted Random Projection for Prediction From
             High-Dimensional Features},
   Journal = {Journal of the American Statistical Association},
   Volume = {115},
   Number = {532},
   Pages = {1998-2010},
   Year = {2020},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2019.1677240},
   Abstract = {We consider the problem of computationally efficient
             prediction with high dimensional and highly correlated
             predictors when accurate variable selection is effectively
             impossible. Direct application of penalization or Bayesian
             methods implemented with Markov chain Monte Carlo can be
             computationally daunting and unstable. A common solution is
             first stage dimension reduction through screening or
             projecting the design matrix to a lower dimensional
             hyper-plane. Screening is highly sensitive to threshold
             choice, while projections often have poor performance in
             very high-dimensions. We propose targeted random projection
             (TARP) to combine positive aspects of both strategies. TARP
             uses screening to order the inclusion probabilities of the
             features in the projection matrix used for dimension
             reduction, leading to data-informed sparsity. We provide
             theoretical support for a Bayesian predictive algorithm
             based on TARP, including statistical and computational
             complexity guarantees. Examples for simulated and real data
             applications illustrate gains relative to a variety of
             competitors. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/01621459.2019.1677240},
   Key = {fds347354}
}

@article{fds325339,
   Author = {Johndrow, JE and Bhattacharya, A and Dunson, DB},
   Title = {TENSOR DECOMPOSITIONS AND SPARSE LOG-LINEAR
             MODELS.},
   Journal = {Annals of statistics},
   Volume = {45},
   Number = {1},
   Pages = {1-38},
   Year = {2017},
   Month = {January},
   url = {http://dx.doi.org/10.1214/15-aos1414},
   Abstract = {Contingency table analysis routinely relies on log-linear
             models, with latent structure analysis providing a common
             alternative. Latent structure models lead to a reduced rank
             tensor factorization of the probability mass function for
             multivariate categorical data, while log-linear models
             achieve dimensionality reduction through sparsity. Little is
             known about the relationship between these notions of
             dimensionality reduction in the two paradigms. We derive
             several results relating the support of a log-linear model
             to nonnegative ranks of the associated probability tensor.
             Motivated by these findings, we propose a new collapsed
             Tucker class of tensor decompositions, which bridge existing
             PARAFAC and Tucker decompositions, providing a more flexible
             framework for parsimoniously characterizing multivariate
             categorical data. Taking a Bayesian approach to inference,
             we illustrate empirical advantages of the new
             decompositions.},
   Doi = {10.1214/15-aos1414},
   Key = {fds325339}
}

@article{fds342827,
   Author = {Zhang, Z and Allen, GI and Zhu, H and Dunson, D},
   Title = {Tensor network factorizations: Relationships between brain
             structural connectomes and traits.},
   Journal = {NeuroImage},
   Volume = {197},
   Pages = {330-343},
   Year = {2019},
   Month = {August},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2019.04.027},
   Abstract = {Advanced brain imaging techniques make it possible to
             measure individuals' structural connectomes in large cohort
             studies non-invasively. Given the availability of large
             scale data sets, it is extremely interesting and important
             to build a set of advanced tools for structural connectome
             extraction and statistical analysis that emphasize both
             interpretability and predictive power. In this paper, we
             developed and integrated a set of toolboxes, including an
             advanced structural connectome extraction pipeline and a
             novel tensor network principal components analysis (TN-PCA)
             method, to study relationships between structural
             connectomes and various human traits such as alcohol and
             drug use, cognition and motion abilities. The structural
             connectome extraction pipeline produces a set of connectome
             features for each subject that can be organized as a tensor
             network, and TN-PCA maps the high-dimensional tensor network
             data to a lower-dimensional Euclidean space. Combined with
             classical hypothesis testing, canonical correlation analysis
             and linear discriminant analysis techniques, we analyzed
             over 1100 scans of 1076 subjects from the Human Connectome
             Project (HCP) and the Sherbrooke test-retest data set, as
             well as 175 human traits measuring different domains
             including cognition, substance use, motor, sensory and
             emotion. The test-retest data validated the developed
             algorithms. With the HCP data, we found that structural
             connectomes are associated with a wide range of traits,
             e.g., fluid intelligence, language comprehension, and motor
             skills are associated with increased cortical-cortical brain
             structural connectivity, while the use of alcohol, tobacco,
             and marijuana are associated with decreased
             cortical-cortical connectivity. We also demonstrated that
             our extracted structural connectomes and analysis method can
             give superior prediction accuracies compared with
             alternative connectome constructions and other tensor and
             network regression methods.},
   Doi = {10.1016/j.neuroimage.2019.04.027},
   Key = {fds342827}
}

@article{fds258019,
   Author = {Chen, Z and Dunson, DB},
   Title = {The authors replied as follows [2]},
   Journal = {Biometrics},
   Volume = {62},
   Number = {2},
   Pages = {623-624},
   Publisher = {WILEY},
   Year = {2006},
   Month = {January},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2006.00586_2.x},
   Doi = {10.1111/j.1541-0420.2006.00586_2.x},
   Key = {fds258019}
}

@article{fds257983,
   Author = {Ren, L and Dunson, DB and Carin, L},
   Title = {The dynamic hierarchical Dirichlet process},
   Journal = {Proceedings of the 25th International Conference on Machine
             Learning},
   Pages = {824-831},
   Year = {2008},
   Month = {January},
   url = {http://dx.doi.org/10.1145/1390156.1390260},
   Abstract = {The dynamic hierarchical Dirichlet process (dHDP) is
             developed to model the time-evolving statistical properties
             of sequential data sets. The data collected at any time
             point are represented via a mixture associated with an
             appropriate underlying model, in the framework of HDP. The
             statistical properties of data collected at consecutive time
             points are linked via a random parameter that controls their
             probabilistic similarity. The sharing mechanisms of the
             time-evolving data are derived, and a relatively simple
             Markov Chain Monte Carlo sampler is developed. Experimental
             results are presented to demonstrate the model. Copyright
             2008 by the author(s)/owner(s).},
   Doi = {10.1145/1390156.1390260},
   Key = {fds257983}
}

@article{fds326037,
   Author = {McKinney, M and Moffitt, AB and Gaulard, P and Travert, M and De Leval,
             L and Nicolae, A and Raffeld, M and Jaffe, ES and Pittaluga, S and Xi, L and Heavican, T and Iqbal, J and Belhadj, K and Delfau-Larue, MH and Fataccioli, V and Czader, MB and Lossos, IS and Chapman-Fredricks,
             JR and Richards, KL and Fedoriw, Y and Ondrejka, SL and Hsi, ED and Low, L and Weisenburger, D and Chan, WC and Mehta-Shah, N and Horwitz, S and Bernal-Mizrachi, L and Flowers, CR and Beaven, AW and Parihar, M and Baseggio, L and Parrens, M and Moreau, A and Sujobert, P and Pilichowska, M and Evens, AM and Chadburn, A and Au-Yeung, RKH and Srivastava, G and Choi, WWL and Goodlad, JR and Aurer, I and Basic-Kinda, S and Gascoyne, RD and Davis, NS and Li, G and Zhang, J and Rajagopalan, D and Reddy, A and Love, C and Levy, S and Zhuang, Y and Datta, J and Dunson, DB and Davé, SS},
   Title = {The Genetic Basis of Hepatosplenic T-cell
             Lymphoma.},
   Journal = {Cancer Discov},
   Volume = {7},
   Number = {4},
   Pages = {369-379},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.1158/2159-8290.CD-16-0330},
   Abstract = {Hepatosplenic T-cell lymphoma (HSTL) is a rare and lethal
             lymphoma; the genetic drivers of this disease are unknown.
             Through whole-exome sequencing of 68 HSTLs, we define
             recurrently mutated driver genes and copy-number alterations
             in the disease. Chromatin-modifying genes, including SETD2,
             INO80, and ARID1B, were commonly mutated in HSTL, affecting
             62% of cases. HSTLs manifest frequent mutations in STAT5B
             (31%), STAT3 (9%), and PIK3CD (9%), for which there
             currently exist potential targeted therapies. In addition,
             we noted less frequent events in EZH2, KRAS, and TP53SETD2
             was the most frequently silenced gene in HSTL. We
             experimentally demonstrated that SETD2 acts as a tumor
             suppressor gene. In addition, we found that mutations in
             STAT5B and PIK3CD activate critical signaling pathways
             important to cell survival in HSTL. Our work thus defines
             the genetic landscape of HSTL and implicates gene mutations
             linked to HSTL pathogenesis and potential treatment
             targets.Significance: We report the first systematic
             application of whole-exome sequencing to define the genetic
             basis of HSTL, a rare but lethal disease. Our work defines
             SETD2 as a tumor suppressor gene in HSTL and implicates
             genes including INO80 and PIK3CD in the disease. Cancer
             Discov; 7(4); 369-79. ©2017 AACR.See related commentary by
             Yoshida and Weinstock, p. 352This article is highlighted in
             the In This Issue feature, p. 339.},
   Doi = {10.1158/2159-8290.CD-16-0330},
   Key = {fds326037}
}

@article{fds257881,
   Author = {Love, C and Sun, Z and Jima, D and Li, G and Zhang, J and Miles, R and Richards, KL and Dunphy, CH and Choi, WWL and Srivastava, G and Lugar,
             PL and Rizzieri, DA and Lagoo, AS and Bernal-Mizrachi, L and Mann, KP and Flowers, CR and Naresh, KN and Evens, AM and Chadburn, A and Gordon, LI and Czader, MB and Gill, JI and Hsi, ED and Greenough, A and Moffitt, AB and McKinney, M and Banerjee, A and Grubor, V and Levy, S and Dunson, DB and Dave, SS},
   Title = {The genetic landscape of mutations in Burkitt
             lymphoma.},
   Journal = {Nat Genet},
   Volume = {44},
   Number = {12},
   Pages = {1321-1325},
   Year = {2012},
   Month = {December},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23143597},
   Abstract = {Burkitt lymphoma is characterized by deregulation of MYC,
             but the contribution of other genetic mutations to the
             disease is largely unknown. Here, we describe the first
             completely sequenced genome from a Burkitt lymphoma tumor
             and germline DNA from the same affected individual. We
             further sequenced the exomes of 59 Burkitt lymphoma tumors
             and compared them to sequenced exomes from 94 diffuse large
             B-cell lymphoma (DLBCL) tumors. We identified 70 genes that
             were recurrently mutated in Burkitt lymphomas, including
             ID3, GNA13, RET, PIK3R1 and the SWI/SNF genes ARID1A and
             SMARCA4. Our data implicate a number of genes in cancer for
             the first time, including CCT6B, SALL3, FTCD and PC. ID3
             mutations occurred in 34% of Burkitt lymphomas and not in
             DLBCLs. We show experimentally that ID3 mutations promote
             cell cycle progression and proliferation. Our work thus
             elucidates commonly occurring gene-coding mutations in
             Burkitt lymphoma and implicates ID3 as a new tumor
             suppressor gene.},
   Doi = {10.1038/ng.2468},
   Key = {fds257881}
}

@article{fds257839,
   Author = {Zhang, J and Jima, D and Moffitt, AB and Liu, Q and Czader, M and Hsi, ED and Fedoriw, Y and Dunphy, CH and Richards, KL and Gill, JI and Sun, Z and Love, C and Scotland, P and Lock, E and Levy, S and Hsu, DS and Dunson, D and Dave, SS},
   Title = {The genomic landscape of mantle cell lymphoma is related to
             the epigenetically determined chromatin state of normal B
             cells.},
   Journal = {Blood},
   Volume = {123},
   Number = {19},
   Pages = {2988-2996},
   Year = {2014},
   Month = {May},
   ISSN = {0006-4971},
   url = {http://dx.doi.org/10.1182/blood-2013-07-517177},
   Abstract = {In this study, we define the genetic landscape of mantle
             cell lymphoma (MCL) through exome sequencing of 56 cases of
             MCL. We identified recurrent mutations in ATM, CCND1, MLL2,
             and TP53. We further identified a number of novel genes
             recurrently mutated in patients with MCL including RB1,
             WHSC1, POT1, and SMARCA4. We noted that MCLs have a distinct
             mutational profile compared with lymphomas from other B-cell
             stages. The ENCODE project has defined the chromatin
             structure of many cell types. However, a similar
             characterization of primary human mature B cells has been
             lacking. We defined, for the first time, the chromatin
             structure of primary human naïve, germinal center, and
             memory B cells through chromatin immunoprecipitation and
             sequencing for H3K4me1, H3K4me3, H3Ac, H3K36me3, H3K27me3,
             and PolII. We found that somatic mutations that occur more
             frequently in either MCLs or Burkitt lymphomas were
             associated with open chromatin in their respective B cells
             of origin, naïve B cells, and germinal center B cells. Our
             work thus elucidates the landscape of gene-coding mutations
             in MCL and the critical interplay between epigenetic
             alterations associated with B-cell differentiation and the
             acquisition of somatic mutations in cancer.},
   Doi = {10.1182/blood-2013-07-517177},
   Key = {fds257839}
}

@article{fds348918,
   Author = {Dunson, DB and Johndrow, JE},
   Title = {The Hastings algorithm at fifty},
   Journal = {Biometrika},
   Volume = {107},
   Number = {1},
   Pages = {1-23},
   Year = {2020},
   Month = {March},
   url = {http://dx.doi.org/10.1093/biomet/asz066},
   Abstract = {In a 1970 Biometrika paper, W. K. Hastings developed a broad
             class of Markov chain algorithms for sampling from
             probability distributions that are difficult to sample from
             directly. The algorithm draws a candidate value from a
             proposal distribution and accepts the candidate with a
             probability that can be computed using only the unnormalized
             density of the target distribution, allowing one to sample
             from distributions known only up to a constant of
             proportionality. The stationary distribution of the
             corresponding Markov chain is the target distribution one is
             attempting to sample from. The Hastings algorithm
             generalizes the Metropolis algorithm to allow a much broader
             class of proposal distributions instead of just symmetric
             cases.An important class of applications for the Hastings
             algorithm corresponds to sampling from Bayesian posterior
             distributions, which have densities given by a prior density
             multiplied by a likelihood function and divided by a
             normalizing constant equal to the marginal likelihood. The
             marginal likelihood is typically intractable, presenting a
             fundamental barrier to implementation in Bayesian
             statistics. This barrier can be overcome by Markov chain
             Monte Carlo sampling algorithms. Amazingly, even after 50
             years, the majority of algorithms used in practice today
             involve the Hastings algorithm. This article provides a
             brief celebration of the continuing impact of this ingenious
             algorithm on the 50th anniversary of its
             publication.},
   Doi = {10.1093/biomet/asz066},
   Key = {fds348918}
}

@article{fds257994,
   Author = {Chen, B and Polatkan, G and Sapiro, G and Dunson, DB and Carin,
             L},
   Title = {The hierarchical beta process for convolutional factor
             analysis and deep learning},
   Journal = {Proceedings of the 28th International Conference on Machine
             Learning, ICML 2011},
   Pages = {361-368},
   Year = {2011},
   Month = {October},
   Abstract = {A convolutional factor-analysis model is developed, with the
             number of filters (factors) inferred via the beta process
             (BP) and hierarchical BP, for single-task and multi-task
             learning, respectively. The computation of the model
             parameters is implemented within a Bayesian setting,
             employing Gibbs sampling; we explicitly exploit the
             convolutional nature of the expansion to accelerate
             computations. The model is used in a multi-level ("deep")
             analysis of general data, with specific results presented
             for image-processing data sets, e.g., classification.
             Copyright 2011 by the author(s)/owner(s).},
   Key = {fds257994}
}

@article{fds257998,
   Author = {Ren, L and Wang, Y and Dunson, D and Carin, L},
   Title = {The kernel beta process},
   Journal = {Advances in Neural Information Processing Systems 24: 25th
             Annual Conference on Neural Information Processing Systems
             2011, NIPS 2011},
   Year = {2011},
   Month = {December},
   Abstract = {A new Lévy process prior is proposed for an uncountable
             collection of covariate-dependent feature-learning measures;
             the model is called the kernel beta process (KBP). Available
             covariates are handled efficiently via the kernel
             construction, with covariates assumed observed with each
             data sample ("customer"), and latent covariates learned for
             each feature ("dish"). Each customer selects dishes from an
             infinite buffet, in a manner analogous to the beta process,
             with the added constraint that a customer first decides
             probabilistically whether to "consider" a dish, based on the
             distance in covariate space between the customer and dish.
             If a customer does consider a particular dish, that dish is
             then selected probabilistically as in the beta process. The
             beta process is recovered as a limiting case of the KBP. An
             efficient Gibbs sampler is developed for computations, and
             state-of-the-art results are presented for image processing
             and music analysis tasks.},
   Key = {fds257998}
}

@article{fds371473,
   Author = {Ren, L and Wang, Y and Dunson, D and Carin, L},
   Title = {The kernel beta process},
   Journal = {Advances in Neural Information Processing Systems 24: 25th
             Annual Conference on Neural Information Processing Systems
             2011, NIPS 2011},
   Year = {2011},
   Month = {January},
   ISBN = {9781618395993},
   Abstract = {A new Lévy process prior is proposed for an uncountable
             collection of covariate-dependent feature-learning measures;
             the model is called the kernel beta process (KBP). Available
             covariates are handled efficiently via the kernel
             construction, with covariates assumed observed with each
             data sample ("customer"), and latent covariates learned for
             each feature ("dish"). Each customer selects dishes from an
             infinite buffet, in a manner analogous to the beta process,
             with the added constraint that a customer first decides
             probabilistically whether to "consider" a dish, based on the
             distance in covariate space between the customer and dish.
             If a customer does consider a particular dish, that dish is
             then selected probabilistically as in the beta process. The
             beta process is recovered as a limiting case of the KBP. An
             efficient Gibbs sampler is developed for computations, and
             state-of-the-art results are presented for image processing
             and music analysis tasks.},
   Key = {fds371473}
}

@article{fds258055,
   Author = {Chung, Y and Dunson, DB},
   Title = {The local Dirichlet process.},
   Journal = {Annals of the Institute of Statistical Mathematics},
   Volume = {63},
   Number = {1},
   Pages = {59-80},
   Year = {2011},
   Month = {February},
   ISSN = {0020-3157},
   url = {http://dx.doi.org/10.1007/s10463-008-0218-9},
   Abstract = {As a generalization of the Dirichlet process (DP) to allow
             predictor dependence, we propose a local Dirichlet process
             (lDP). The lDP provides a prior distribution for a
             collection of random probability measures indexed by
             predictors. This is accomplished by assigning stick-breaking
             weights and atoms to random locations in a predictor space.
             The probability measure at a given predictor value is then
             formulated using the weights and atoms located in a
             neighborhood about that predictor value. This construction
             results in a marginal DP prior for the random measure at any
             specific predictor value. Dependence is induced through
             local sharing of random components. Theoretical properties
             are considered and a blocked Gibbs sampler is proposed for
             posterior computation in lDP mixture models. The methods are
             illustrated using simulated examples and an epidemiologic
             application.},
   Doi = {10.1007/s10463-008-0218-9},
   Key = {fds258055}
}

@article{fds257979,
   Author = {Xue, Y and Dunson, D and Carin, L},
   Title = {The matrix stick-breaking process for flexible multi-task
             learning},
   Journal = {ACM International Conference Proceeding Series},
   Volume = {227},
   Pages = {1063-1070},
   Publisher = {ACM Press},
   Year = {2007},
   Month = {August},
   url = {http://dx.doi.org/10.1145/1273496.1273630},
   Abstract = {In multi-task learning our goal is to design regression or
             classification models for each of the tasks and
             appropriately share information between tasks. A Dirichlet
             process (DP) prior can be used to encourage task clustering.
             However, the DP prior does not allow local clustering of
             tasks with respect to a subset of the feature vector without
             making independence assumptions. Motivated by this problem,
             we develop a new multitask-learning prior, termed the matrix
             stick-breaking process (MSBP), which encourages cross-task
             sharing of data. However, the MSBP allows separate
             clustering and borrowing of information for the different
             feature components. This is important when tasks are more
             closely related for certain features than for others.
             Bayesian inference proceeds by a Gibbs sampling algorithm
             and the approach is illustrated using a simulated example
             and a multi-national application.},
   Doi = {10.1145/1273496.1273630},
   Key = {fds257979}
}

@article{fds258066,
   Author = {Dunson, DB and Xue, Y and Carin, L},
   Title = {The matrix stick-breaking process: Flexible Bayes
             meta-analysis},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {481},
   Pages = {317-327},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214507000001364},
   Abstract = {In analyzing data from multiple related studies, it often is
             of interest to borrow information across studies and to
             cluster similar studies. Although parametric hierarchical
             models are commonly used, of concern is sensitivity to the
             form chosen for the random-effects distribution. A Dirichlet
             process (DP) prior can allow the distribution to be unknown,
             while clustering studies; however, the DP does not allow
             local clustering of studies with respect to a subset of the
             coefficients without making independence assumptions.
             Motivated by this problem, we propose a matrix
             stick-breaking process (MSBP) as a prior for a matrix of
             random probability measures. Properties of the MSBP are
             considered, and methods are developed for posterior
             computation using Markov chain Monte Carlo. Using the MSBP
             as a prior for a matrix of study-specific regression
             coefficients, we demonstrate advantages over parametric
             modeling in simulated examples. The methods are further
             illustrated using a multinational uterotrophic bioassay
             study.},
   Doi = {10.1198/016214507000001364},
   Key = {fds258066}
}

@article{fds257951,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {The nested dirichlet process},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {483},
   Pages = {1131-1154},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214508000000553},
   Abstract = {In multicenter studies, subjects in different centers may
             have different outcome distributions. This article is
             motivated by the problem of nonparametric modeling of these
             distributions, borrowing information across centers while
             also allowing centers to be clustered. Starting with a
             stick-breaking representation of the Dirichlet process (DP),
             we replace the random atoms with random probability measures
             drawn from a DP. This results in a nested DP prior, which
             can be placed on the collection of distributions for the
             different centers, with centers drawn from the same DP
             component automatically clustered together. Theoretical
             properties are discussed, and an efficient Markov chain
             Monte Carlo algorithm is developed for computation. The
             methods are illustrated using a simulation study and an
             application to quality of care in U.S. hospitals.},
   Doi = {10.1198/016214508000000553},
   Key = {fds257951}
}

@article{fds258058,
   Author = {Rodriguez, A and Dunson, DB and Gelfand, AE},
   Title = {The nested Dirichlet process (with discussion)},
   Journal = {Journal of the American Statistical Association},
   Year = {2008},
   Key = {fds258058}
}

@article{fds257952,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {The nested Dirichlet process: Rejoinder},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {483},
   Pages = {1153-1154},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {September},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214508000000616},
   Doi = {10.1198/016214508000000616},
   Key = {fds257952}
}

@article{fds257888,
   Author = {Dunson, DB and Sinai, I and Colombo, B},
   Title = {The relationship between cervical secretions and the daily
             probabilities of pregnancy: effectiveness of the TwoDay
             Algorithm.},
   Journal = {Human reproduction (Oxford, England)},
   Volume = {16},
   Number = {11},
   Pages = {2278-2282},
   Year = {2001},
   Month = {November},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/16.11.2278},
   Abstract = {<h4>Background</h4>The TwoDay Algorithm is a simple method
             for identifying the fertile window. It classifies a day as
             fertile if cervical secretions are present on that day or
             were present on the day before. This approach may be an
             effective alternative to the ovulation and symptothermal
             methods for populations and programmes that find current
             natural family planning methods difficult to
             implement.<h4>Methods</h4>We used data on secretions from a
             large multinational European fecundability study to assess
             the relationship between the days predicted to be
             potentially fertile by the TwoDay Algorithm and the
             day-specific probabilities of pregnancy based on intercourse
             patterns in 434 conception cycles from the
             study.<h4>Results</h4>The days around ovulation that had the
             highest fecundability were the days most likely to be
             classified as fertile by the TwoDay Algorithm. In addition,
             intercourse on a particular day in the fertile interval was
             twice as likely to result in a pregnancy if cervical
             secretions were present on that day or the day
             before.<h4>Conclusions</h4>The TwoDay Algorithm is
             effective, both in identifying the fertile days of the cycle
             and in predicting days within the fertile interval that have
             a high pregnancy rate. Our data provide the first direct
             evidence that cervical secretions are associated with higher
             fecundability within the fertile window.},
   Doi = {10.1093/humrep/16.11.2278},
   Key = {fds257888}
}

@article{fds257887,
   Author = {Wilcox, AJ and Dunson, D and Baird, DD},
   Title = {The timing of the "fertile window" in the menstrual cycle:
             day specific estimates from a prospective
             study.},
   Journal = {BMJ (Clinical research ed.)},
   Volume = {321},
   Number = {7271},
   Pages = {1259-1262},
   Year = {2000},
   Month = {November},
   ISSN = {0959-8146},
   url = {http://dx.doi.org/10.1136/bmj.321.7271.1259},
   Abstract = {<h4>Objectives</h4>To provide specific estimates of the
             likely occurrence of the six fertile days (the "fertile
             window") during the menstrual cycle.<h4>Design</h4>Prospective
             cohort study.<h4>Participants</h4>221 healthy women who were
             planning a pregnancy.<h4>Main outcome measures</h4>The
             timing of ovulation in 696 menstrual cycles, estimated using
             urinary metabolites of oestrogen and progesterone.<h4>Results</h4>The
             fertile window occurred during a broad range of days in the
             menstrual cycle. On every day between days 6 and 21, women
             had at minimum a 10% probability of being in their fertile
             window. Women cannot predict a sporadic late ovulation; 4-6%
             of women whose cycles had not yet resumed were potentially
             fertile in the fifth week of their cycle.<h4>Conclusions</h4>In
             only about 30% of women is the fertile window entirely
             within the days of the menstrual cycle identified by
             clinical guidelines-that is, between days 10 and 17. Most
             women reach their fertile window earlier and others much
             later. Women should be advised that the timing of their
             fertile window can be highly unpredictable, even if their
             cycles are usually regular.},
   Doi = {10.1136/bmj.321.7271.1259},
   Key = {fds257887}
}

@article{fds349871,
   Author = {Panea, RI and Love, CL and Shingleton, JR and Reddy, A and Bailey, JA and Moormann, AM and Otieno, JA and Ong'echa, JM and Oduor, CI and Schroeder, KMS and Masalu, N and Chao, NJ and Agajanian, M and Major,
             MB and Fedoriw, Y and Richards, KL and Rymkiewicz, G and Miles, RR and Alobeid, B and Bhagat, G and Flowers, CR and Ondrejka, SL and Hsi, ED and Choi, WWL and Au-Yeung, RKH and Hartmann, W and Lenz, G and Meyerson, H and Lin, Y-Y and Zhuang, Y and Luftig, MA and Waldrop, A and Dave, T and Thakkar, D and Sahay, H and Li, G and Palus, BC and Seshadri, V and Kim,
             SY and Gascoyne, RD and Levy, S and Mukhopadyay, M and Dunson, DB and Dave,
             SS},
   Title = {The whole-genome landscape of Burkitt lymphoma
             subtypes.},
   Journal = {Blood},
   Volume = {134},
   Number = {19},
   Pages = {1598-1607},
   Year = {2019},
   Month = {November},
   url = {http://dx.doi.org/10.1182/blood.2019001880},
   Abstract = {Burkitt lymphoma (BL) is an aggressive, MYC-driven lymphoma
             comprising 3 distinct clinical subtypes: sporadic BLs that
             occur worldwide, endemic BLs that occur predominantly in
             sub-Saharan Africa, and immunodeficiency-associated BLs that
             occur primarily in the setting of HIV. In this study, we
             comprehensively delineated the genomic basis of BL through
             whole-genome sequencing (WGS) of 101 tumors representing all
             3 subtypes of BL to identify 72 driver genes. These data
             were additionally informed by CRISPR screens in BL cell
             lines to functionally annotate the role of oncogenic
             drivers. Nearly every driver gene was found to have both
             coding and non-coding mutations, highlighting the importance
             of WGS for identifying driver events. Our data implicate
             coding and non-coding mutations in IGLL5, BACH2, SIN3A, and
             DNMT1. Epstein-Barr virus (EBV) infection was associated
             with higher mutation load, with type 1 EBV showing a higher
             mutational burden than type 2 EBV. Although sporadic and
             immunodeficiency-associated BLs had similar genetic
             profiles, endemic BLs manifested more frequent mutations in
             BCL7A and BCL6 and fewer genetic alterations in DNMT1,
             SNTB2, and CTCF. Silencing mutations in ID3 were a common
             feature of all 3 subtypes of BL. In vitro, mass
             spectrometry-based proteomics demonstrated that the ID3
             protein binds primarily to TCF3 and TCF4. In vivo knockout
             of ID3 potentiated the effects of MYC, leading to rapid
             tumorigenesis and tumor phenotypes consistent with those
             observed in the human disease.},
   Doi = {10.1182/blood.2019001880},
   Key = {fds349871}
}

@article{fds335795,
   Author = {Johndrow, JE and Lum, K and Dunson, DB},
   Title = {Theoretical limits of microclustering for record
             linkage.},
   Journal = {Biometrika},
   Volume = {105},
   Number = {2},
   Pages = {431-446},
   Year = {2018},
   Month = {June},
   url = {http://dx.doi.org/10.1093/biomet/asy003},
   Abstract = {There has been substantial recent interest in record
             linkage, where one attempts to group the records pertaining
             to the same entities from one or more large databases that
             lack unique identifiers. This can be viewed as a type of
             microclustering, with few observations per cluster and a
             very large number of clusters. We show that the problem is
             fundamentally hard from a theoretical perspective and, even
             in idealized cases, accurate entity resolution is
             effectively impossible unless the number of entities is
             small relative to the number of records and/or the
             separation between records from different entities is
             extremely large. These results suggest conservatism in
             interpretation of the results of record linkage, support
             collection of additional data to more accurately
             disambiguate the entities, and motivate a focus on coarser
             inference. For example, results from a simulation study
             suggest that sometimes one may obtain accurate results for
             population size estimation even when fine-scale entity
             resolution is inaccurate.},
   Doi = {10.1093/biomet/asy003},
   Key = {fds335795}
}

@article{fds257995,
   Author = {Chen, H and Dunson, DB and Carin, L},
   Title = {Topic Modeling with Nonparametric Markov
             Tree.},
   Journal = {Proceedings of the ... International Conference on Machine
             Learning. International Conference on Machine
             Learning},
   Volume = {2011},
   Pages = {377-384},
   Year = {2011},
   Month = {January},
   Abstract = {A new hierarchical tree-based topic model is developed,
             based on nonparametric Bayesian techniques. The model has
             two unique attributes: (<i>i</i>) a child node in the tree
             may have more than one parent, with the goal of eliminating
             redundant sub-topics deep in the tree; and (<i>ii</i>)
             parsimonious sub-topics are manifested, by removing
             redundant usage of words at multiple scales. The depth and
             width of the tree are unbounded within the prior, with a
             retrospective sampler employed to adaptively infer the
             appropriate tree size based upon the corpus under study.
             Excellent quantitative results are manifested on five
             standard data sets, and the inferred tree structure is also
             found to be highly interpretable.},
   Key = {fds257995}
}

@article{fds257895,
   Author = {Nyska, A and Lomnitski, L and Spalding, J and Dunson, DB and Goldsworthy, TL and Ben-Shaul, V and Grossman, S and Bergman, M and Boorman, G},
   Title = {Topical and oral administration of the natural water-soluble
             antioxidant from spinach reduces the multiplicity of
             papillomas in the Tg.AC mouse model.},
   Journal = {Toxicology letters},
   Volume = {122},
   Number = {1},
   Pages = {33-44},
   Year = {2001},
   Month = {May},
   ISSN = {0378-4274},
   url = {http://dx.doi.org/10.1016/s0378-4274(01)00345-9},
   Abstract = {The Tg.AC mouse carrying the v-Ha-ras structural gene is a
             useful model for the study of chemical carcinogens,
             especially those acting via non-genotoxic mechanisms. This
             study evaluated the efficacy of the non-toxic, water-soluble
             antioxidant from spinach, natural antioxidant (NAO), in
             reducing skin papilloma induction in female hemizygous Tg.AC
             mice treated dermally five times over 2.5 weeks with 2.5
             microg 12-O-tetradecanoylphorbol-13-acetate (TPA). The
             TPA-only group was considered as a control; the other two
             groups received, additionally, NAO topically (2 mg) or
             orally (100 mg/kg), 5 days/week for 5 weeks. Papilloma
             counts made macroscopically during the clinical observations
             showed a significant decrease in multiplicity (P<0.01) in
             the NAO topically treated group. According to histological
             criteria, papilloma multiplicity were lower in both
             topical-NAO and oral-NAO groups, but significantly so only
             in the oral-NAO mice (P<0.01). The beneficial effect of NAO
             in the Tg.AC mouse is reported.},
   Doi = {10.1016/s0378-4274(01)00345-9},
   Key = {fds257895}
}

@article{fds326219,
   Author = {Dunson, DB},
   Title = {Toward automated prior choice},
   Journal = {Statistical Science},
   Volume = {32},
   Number = {1},
   Pages = {41-43},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2017},
   Month = {February},
   url = {http://dx.doi.org/10.1214/16-STS607},
   Doi = {10.1214/16-STS607},
   Key = {fds326219}
}

@article{fds373933,
   Author = {Li, D and Nguyen, P and Zhang, Z and Dunson, D},
   Title = {Tree representations of brain structural connectivity via
             persistent homology.},
   Journal = {Frontiers in neuroscience},
   Volume = {17},
   Pages = {1200373},
   Year = {2023},
   Month = {January},
   url = {http://dx.doi.org/10.3389/fnins.2023.1200373},
   Abstract = {The brain structural connectome is generated by a collection
             of white matter fiber bundles constructed from diffusion
             weighted MRI (dMRI), acting as highways for neural activity.
             There has been abundant interest in studying how the
             structural connectome varies across individuals in relation
             to their traits, ranging from age and gender to
             neuropsychiatric outcomes. After applying tractography to
             dMRI to get white matter fiber bundles, a key question is
             how to represent the brain connectome to facilitate
             statistical analyses relating connectomes to traits. The
             current standard divides the brain into regions of interest
             (ROIs), and then relies on an <i>adjacency matrix</i> (AM)
             representation. Each cell in the AM is a measure of
             connectivity, e.g., number of fiber curves, between a pair
             of ROIs. Although the AM representation is intuitive, a
             disadvantage is the high-dimensionality due to the large
             number of cells in the matrix. This article proposes a
             simpler tree representation of the brain connectome, which
             is motivated by ideas in computational topology and takes
             topological and biological information on the cortical
             surface into consideration. We demonstrate that our tree
             representation preserves useful information and
             interpretability, while reducing dimensionality to improve
             statistical and computational efficiency. Applications to
             data from the Human Connectome Project (HCP) are considered
             and code is provided for reproducing our
             analyses.},
   Doi = {10.3389/fnins.2023.1200373},
   Key = {fds373933}
}

@article{fds257996,
   Author = {Zhang, X and Dunson, DB and Carin, L},
   Title = {Tree-Structured Infinite Sparse Factor Model.},
   Journal = {Proceedings of the ... International Conference on Machine
             Learning. International Conference on Machine
             Learning},
   Volume = {2011},
   Pages = {785-792},
   Year = {2011},
   Month = {January},
   Abstract = {A tree-structured multiplicative gamma process (TMGP) is
             developed, for inferring the depth of a tree-based
             factor-analysis model. This new model is coupled with the
             nested Chinese restaurant process, to nonparametrically
             infer the depth and width (structure) of the tree. In
             addition to developing the model, theoretical properties of
             the TMGP are addressed, and a novel MCMC sampler is
             developed. The structure of the inferred tree is used to
             learn relationships between high-dimensional data, and the
             model is also applied to compressive sensing and
             interpolation of incomplete images.},
   Key = {fds257996}
}

@article{fds257956,
   Author = {Mitra, R and Dunson, D},
   Title = {Two-level stochastic search variable selection in GLMs with
             missing predictors.},
   Journal = {The international journal of biostatistics},
   Volume = {6},
   Number = {1},
   Pages = {Article-33},
   Year = {2010},
   Month = {January},
   ISSN = {1557-4679},
   url = {http://dx.doi.org/10.2202/1557-4679.1173},
   Abstract = {Stochastic search variable selection (SSVS) algorithms
             provide an appealing and widely used approach for searching
             for good subsets of predictors while simultaneously
             estimating posterior model probabilities and model-averaged
             predictive distributions. This article proposes a two-level
             generalization of SSVS to account for missing predictors
             while accommodating uncertainty in the relationships between
             these predictors. Bayesian approaches for allowing
             predictors that are missing at random require a model on the
             joint distribution of the predictors. We show that
             predictive performance can be improved by allowing
             uncertainty in the specification of predictor relationships
             in this model. The methods are illustrated through
             simulation studies and analysis of an epidemiologic data
             set.},
   Doi = {10.2202/1557-4679.1173},
   Key = {fds257956}
}

@article{fds257900,
   Author = {Mikolajczyk, R},
   Title = {TwoDay Algorithm in predicting fertile time.},
   Journal = {Human reproduction (Oxford, England)},
   Volume = {17},
   Number = {7},
   Pages = {1925},
   Year = {2002},
   Month = {July},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/17.7.1925},
   Doi = {10.1093/humrep/17.7.1925},
   Key = {fds257900}
}

@article{fds322552,
   Author = {Guo, F and Dunson, DB},
   Title = {Uncovering systematic bias in ratings across categories: A
             Bayesian approach},
   Journal = {RecSys 2015 - Proceedings of the 9th ACM Conference on
             Recommender Systems},
   Pages = {317-320},
   Year = {2015},
   Month = {September},
   ISBN = {9781450336925},
   url = {http://dx.doi.org/10.1145/2792838.2799683},
   Abstract = {Recommender systems are routinely equipped with standardized
             taxonomy that associates each item with one or more
             categories or genres. Although such information does not
             directly imply the quality of an item, the distribution of
             ratings vary greatly across categories, e.g. animation
             movies may generally receive higher ratings than action
             movies. While it is a natural outcome given the diversity
             and heterogeneity of both users and items, it makes directly
             aggregated ratings, which are commonly used to guide users'
             choice by reecting the overall quality of an item,
             incomparable across categories and hence prone to fairness
             and diversity issues. This paper aims to uncover and
             calibrate systematic category-wise biases for
             discrete-valued ratings. We propose a novel Bayesian
             multiplicative probit model that treats the ination or
             deation of mean rating for a combination of categories as
             multiplicatively contributed from category-specific
             parameters. The posterior distribution of those parameters,
             as inferred from data, can capture the bias for all possible
             combinations of categories, thus enabling statistically
             efficient estimation and principled rating
             calibration.},
   Doi = {10.1145/2792838.2799683},
   Key = {fds322552}
}

@article{fds329992,
   Author = {Tikhonov, G and Abrego, N and Dunson, D and Ovaskainen,
             O},
   Title = {Using joint species distribution models for evaluating how
             species-to-species associations depend on the environmental
             context},
   Journal = {Methods in Ecology and Evolution},
   Volume = {8},
   Number = {4},
   Pages = {443-452},
   Publisher = {WILEY},
   Editor = {Warton, D},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.1111/2041-210X.12723},
   Abstract = {Joint species distribution models (JSDM) are increasingly
             used to analyse community ecology data. Recent progress with
             JSDMs has provided ecologists with new tools for estimating
             species associations (residual co-occurrence patterns after
             accounting for environmental niches) from large data sets,
             as well as for increasing the predictive power of species
             distribution models (SDMs) by accounting for such
             associations. Yet, one critical limitation of JSDMs
             developed thus far is that they assume constant species
             associations. However, in real ecological communities, the
             direction and strength of interspecific interactions are
             likely to be different under different environmental
             conditions. In this paper, we overcome the shortcoming of
             present JSDMs by allowing species associations covary with
             measured environmental covariates. To estimate
             environmental-dependent species associations, we utilize a
             latent variable structure, where the factor loadings are
             modelled as a linear regression to environmental covariates.
             We illustrate the performance of the statistical framework
             with both simulated and real data. Our results show that
             JSDMs perform substantially better in inferring
             environmental-dependent species associations than single
             SDMs, especially with sparse data. Furthermore, JSDMs
             consistently overperform SDMs in terms of predictive power
             for generating predictions that account for
             environment-dependent biotic associations. We implemented
             the statistical framework as a MATLAB package, which
             includes tools both for model parameterization as well as
             for post-processing of results, particularly for addressing
             whether and how species associations depend on the
             environmental conditions. Our statistical framework provides
             a new tool for ecologists who wish to investigate from
             non-manipulative observational community data the dependency
             of interspecific interactions on environmental context. Our
             method can be applied to answer the fundamental questions in
             community ecology about how species’ interactions shift in
             changing environmental conditions, as well as to predict
             future changes of species’ interactions in response to
             global change.},
   Doi = {10.1111/2041-210X.12723},
   Key = {fds329992}
}

@article{fds329994,
   Author = {Ovaskainen, O and Abrego, N and Halme, P and Dunson,
             D},
   Title = {Using latent variable models to identify large networks of
             species-to-species associations at different spatial
             scales},
   Journal = {Methods in Ecology and Evolution},
   Volume = {7},
   Number = {5},
   Pages = {549-555},
   Publisher = {WILEY},
   Editor = {Warton, D},
   Year = {2016},
   Month = {May},
   url = {http://dx.doi.org/10.1111/2041-210X.12501},
   Abstract = {We present a hierarchical latent variable model that
             partitions variation in species occurrences and
             co-occurrences simultaneously at multiple spatial scales. We
             illustrate how the parameterized model can be used to
             predict the occurrences of a species by using as predictors
             not only the environmental covariates, but also the
             occurrences of all other species, at all spatial scales. We
             leverage recent progress in Bayesian latent variable models
             to implement a computationally effective algorithm that
             enables one to consider large communities and extensive
             sampling schemes. We exemplify the framework with a
             community of 98 fungal species sampled in c. 22 500 dead
             wood units in 230 plots in 29 beech forests. The networks
             identified by correlations and partial correlations were
             consistent, as were networks for natural and managed
             forests, but networks at different spatial scales were
             dissimilar. Accounting for the occurrences of the other
             species roughly doubled the predictive powers of the models
             compared to accounting for environmental covariates
             only.},
   Doi = {10.1111/2041-210X.12501},
   Key = {fds329994}
}

@article{fds257955,
   Author = {Baird, DD and Travlos, G and Wilson, R and Dunson, DB and Hill, MC and D'Aloisio, AA and London, SJ and Schectman, JM},
   Title = {Uterine leiomyomata in relation to insulin-like growth
             factor-I, insulin, and diabetes.},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {20},
   Number = {4},
   Pages = {604-610},
   Year = {2009},
   Month = {July},
   ISSN = {1044-3983},
   url = {http://dx.doi.org/10.1097/ede.0b013e31819d8d3f},
   Abstract = {<h4>Background</h4>Insulin-like growth factor-I (IGF-I) and
             insulin stimulate cell proliferation in uterine leiomyoma
             (fibroid) tissue. We hypothesized that circulating levels of
             these proteins would be associated with increased prevalence
             and size of uterine fibroids.<h4>Methods</h4>Participants
             were 35-49-year-old, randomly selected members of an urban
             health plan who were enrolled in the study in 1996-1999.
             Premenopausal participants were screened for fibroids with
             ultrasound. Fasting blood samples were collected.
             Associations between fibroids and diabetes, plasma IGF-I,
             IGF binding protein 3 (BP3), and insulin were evaluated for
             blacks (n = 585) and whites (n = 403) by using multiple
             logistic regression.<h4>Results</h4>IGF-I showed no
             association with fibroids in blacks, but in whites the
             adjusted odds ratios (aORs) for both mid and upper tertiles
             compared with the lowest tertile were 0.6 (95% confidence
             intervals [CI] = 0.3-1.0 and 0.4-1.1, respectively). Insulin
             and diabetes both tended to be inversely associated with
             fibroids in blacks. The insulin association was with large
             fibroids; aOR for the upper insulin tertile relative to the
             lowest was 0.4 (0.2-0.9). The aOR for diabetes was 0.5
             (0.2-1.0). Associations of insulin and diabetes with
             fibroids were weak for whites. Binding protein 3 showed no
             association with fibroids.<h4>Conclusions</h4>Contrary to
             our hypothesis, high circulating IGF-I and insulin were not
             related to increased fibroid prevalence. Instead, there was
             suggestion of the opposite. The inverse association with
             diabetes, although based on small numbers, is consistent
             with previously reported findings. Future studies might
             investigate vascular dysfunction as a mediator between
             hyperinsulinemia or diabetes and possible reduced risk of
             fibroids.},
   Doi = {10.1097/ede.0b013e31819d8d3f},
   Key = {fds257955}
}

@article{fds344777,
   Author = {Han, S and Liao, X and Dunson, DB and Carin, L},
   Title = {Variational Gaussian copula inference},
   Journal = {Proceedings of the 19th International Conference on
             Artificial Intelligence and Statistics, AISTATS
             2016},
   Pages = {829-838},
   Year = {2016},
   Month = {January},
   Abstract = {We utilize copulas to constitute a unified framework for
             constructing and optimizing variational proposals in
             hierarchical Bayesian models. For models with continuous and
             non-Gaussian hidden variables, we propose a semiparametric
             and automated variational Gaussian copula approach, in which
             the parametric Gaussian copula family is able to preserve
             multivariate posterior dependence, and the nonparametric
             transformations based on Bernstein polynomials provide ample
             flexibility in characterizing the univariate marginal
             posteriors.},
   Key = {fds344777}
}

@article{fds257915,
   Author = {Stanford, JB and Smith, KR and Dunson, DB},
   Title = {Vulvar mucus observations and the probability of
             pregnancy.},
   Journal = {Obstetrics and gynecology},
   Volume = {101},
   Number = {6},
   Pages = {1285-1293},
   Year = {2003},
   Month = {June},
   url = {http://dx.doi.org/10.1016/s0029-7844(03)00358-2},
   Abstract = {<h4>Objective</h4>To assess the day-specific and
             cycle-specific probabilities of conception leading to
             clinical pregnancy, in relation to the timing of intercourse
             and vulvar mucus observations.<h4>Methods</h4>This was a
             retrospective cohort study of women beginning use of the
             Creighton Model Fertility Care System in Missouri, Nebraska,
             Kansas, and California. Data were abstracted from Creighton
             Model Fertility Care System records, including women's daily
             standardized vulvar observations of cervical mucus
             discharge, days of intercourse, and clinically evident
             pregnancy (conception). Established statistical models were
             used to estimate day-specific probabilities of
             conception.<h4>Results</h4>Data were analyzed from 1681
             cycles with 81 conceptions from 309 normally fertile couples
             (initially seeking to avoid pregnancy) and from 373 cycles
             with 30 conceptions from 117 subfertile couples (who were
             initially trying to achieve pregnancy). The highest
             probability of pregnancy occurred on the peak day of vulvar
             mucus observation (.38 for normally fertile couples and.14
             for subfertile couples). The probability of pregnancy was
             greater than.05 for normally fertile couples from 3 days
             before to 2 days after the peak, and for subfertile couples
             from 1 day before to 1 day after the peak. The
             cycle-specific probability of conception correlated with the
             quality of mucus discharge in normally fertile couples but
             not in subfertile couples.<h4>Conclusion</h4>Standardized
             vulvar observations of vaginal mucus discharge identify the
             days with the greatest likelihood of conception from
             intercourse in normal fertility and subfertility and provide
             an indicator of the overall potential for conception in a
             given menstrual cycle in normal fertility.},
   Doi = {10.1016/s0029-7844(03)00358-2},
   Key = {fds257915}
}

@article{fds322553,
   Author = {Srivastava, S and Cevher, V and Tran-Dinh, Q and Dunson,
             DB},
   Title = {WASP: Scalable Bayes via barycenters of subset
             posteriors},
   Journal = {Journal of Machine Learning Research},
   Volume = {38},
   Pages = {912-920},
   Year = {2015},
   Month = {January},
   Abstract = {The promise of Bayesian methods for big data sets has not
             fully been realized due to the lack of scalable
             computational algorithms. For massive data, it is necessary
             to store and process subsets on different machines in a
             distributed manner. We propose a simple, general, and highly
             efficient approach, which first runs a posterior sampling
             algorithm in parallel on different machines for subsets of a
             large data set. To combine these subset posteriors, we
             calculate the Wasserstein barycenter via a highly efficient
             linear program. The resulting estimate for the Wasserstein
             posterior (WASP) has an atomic form, facilitating
             straightforward estimation of posterior summaries of
             functionals of interest. The WASP approach allows posterior
             sampling algorithms for smaller data sets to be trivially
             scaled to huge data. We provide theoretical justification in
             terms of posterior consistency and algorithm efficiency.
             Examples are provided in complex settings including Gaussian
             process regression and nonparametric Bayes mixture
             models.},
   Key = {fds322553}
}

@article{fds257913,
   Author = {Baird, DD and Dunson, DB},
   Title = {Why is parity protective for uterine fibroids?},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {14},
   Number = {2},
   Pages = {247-250},
   Year = {2003},
   Month = {March},
   url = {http://dx.doi.org/10.1097/01.ede.0000054360.61254.27},
   Abstract = {Uterine fibroids are benign tumors, the etiology of which is
             not understood. Symptoms can be debilitating, and the
             primary treatment is surgery, usually hysterectomy.
             Epidemiologic data show that pregnancy is associated with
             reduced risk of fibroids. We hypothesize that this
             association is attributable to a protective effect of
             postpartum involution of the uterus. After each pregnancy
             the uterus rapidly returns to prepregnancy size by dramatic
             remodeling of the tissue. We hypothesize that small fibroids
             are eliminated during this process. We present preliminary
             epidemiologic evidence that is consistent with this
             hypothesis. If the hypothesis is supported by more direct
             evidence, it may have broader implications, supporting the
             idea that tissue remodeling may be a general mechanism for
             limiting tumor development.},
   Doi = {10.1097/01.ede.0000054360.61254.27},
   Key = {fds257913}
}

@article{fds333226,
   Author = {Abrego, N and Dunson, D and Halme, P and Salcedo, I and Ovaskainen,
             O},
   Title = {Wood-inhabiting fungi with tight associations with other
             species have declined as a response to forest
             management},
   Journal = {Oikos},
   Volume = {126},
   Number = {2},
   Publisher = {WILEY},
   Year = {2017},
   Month = {February},
   url = {http://dx.doi.org/10.1111/oik.03674},
   Abstract = {Research on mutualistic and antagonistic networks, such as
             plant–pollinator and host–parasite networks, has shown
             that species interactions can influence and be influenced by
             the responses of species to environmental perturbations.
             Here we examine whether results obtained for directly
             observable networks generalize to more complex networks in
             which species interactions cannot be observed directly. As a
             case study, we consider data on the occurrences of 98
             wood-inhabiting fungal species in managed and natural
             forests. We specifically ask if and how much the positions
             of wood-inhabiting fungal species within the interaction
             networks influence their responses to forest management. For
             this, we utilize a joint species distribution model that
             partitions variation in species occurrences among
             environmental (i.e. resource availability) and biotic (i.e.
             species-to-species associations) predictors. Our results
             indicate that in addition to the direct loss of
             resource-specialised species, forest management has indirect
             effects mediated through interactive associations. In
             particular, species with strong associative links to other
             species are especially sensitive to forest
             management.},
   Doi = {10.1111/oik.03674},
   Key = {fds333226}
}


%% Papers Submitted   
@article{fds70581,
   Author = {L.Wang and D.B. Dunson},
   Title = {Bayesian isotonic density regression},
   Year = {2007},
   Key = {fds70581}
}

@article{fds151355,
   Author = {R. Mitra and D.B. Dunson},
   Title = {Two level stochastic search variable selection in GLMs with
             missing predictors},
   Year = {2008},
   Key = {fds151355}
}

@article{fds70573,
   Author = {B. Cai and D.B. Dunson},
   Title = {Variable selection in nonparametric random effects
             models},
   Journal = {submitted},
   Year = {2007},
   Key = {fds70573}
}


%% Chapters   
@misc{fds365019,
   Author = {Dunson, DB},
   Title = {Nonparametric Bayes},
   Pages = {281-291},
   Booktitle = {Past, Present, and Future of Statistical
             Science},
   Year = {2014},
   Month = {January},
   ISBN = {9781482204964},
   Abstract = {I reflect on the past, present, and future of nonparametric
             Bayesian statistics. Current nonparametric Bayes research
             tends to be split between theoretical studies, seeking to
             understand relatively simple models, and machine learning,
             defining new models and computational algorithms motivated
             by practical performance. I comment on the current
             landscape, open problems and promising future directions in
             modern big data applications.},
   Key = {fds365019}
}

@misc{fds257825,
   Author = {Dunson, DB and Bhattacharya, A and Griffin, JE},
   Title = {Nonparametric Bayes Regression and Classification Through
             Mixtures of Product Kernels},
   Volume = {9780199694587},
   Pages = {145-164},
   Booktitle = {Bayesian Statistics 9},
   Publisher = {Oxford University Press},
   Year = {2012},
   Month = {January},
   ISBN = {9780199694587},
   url = {http://dx.doi.org/10.1093/acprof:oso/9780199694587.003.0005},
   Abstract = {It is routine in many fields to collect data having a
             variety of measurement scales and supports. For example, in
             biomedical studies for each patient one may collect
             functional data on a biomarker over time, gene expression
             values normalized to lie on a hypersphere to remove
             artifacts, clinical and demographic covariates and a health
             outcome. A common interest focuses on building predictive
             models, with parametric assumptions seldom supported by
             prior knowledge. Hence, it is most appropriate to define a
             prior with large support allowing the conditional
             distribution of the response given predictors to be unknown
             and changing flexibly across the predictor space not just in
             the mean but also in the variance and shape. Building on
             earlier work on Dirichlet process mixtures, we describe a
             simple and general strategy for inducing models for
             conditional distributions through discrete mixtures of
             product kernel models for joint distributions of predictors
             and response variables. Computation is straightforward and
             the approach can easily accommodate combining of widely
             disparate data types, including vector data in a Euclidean
             space, categorical observations, functions, images and
             manifold data.},
   Doi = {10.1093/acprof:oso/9780199694587.003.0005},
   Key = {fds257825}
}

@misc{fds340365,
   Author = {Weinberg, CR and Dunson, DB},
   Title = {Some issues in assessing human fertility},
   Pages = {42-49},
   Booktitle = {Statistics in the 21st Century},
   Year = {2001},
   Month = {January},
   ISBN = {9781584882725},
   Abstract = {One of the pleasures of working as an applied statistician
             is the awareness it brings of the wide diversity of
             scientific fields to which our profession contributes
             critical concepts and methods. My own awareness was enhanced
             by accepting the invitation from the editors of JASA to
             serve as guest editor for this section of vignettes
             celebrating the significant contributions made by
             statisticians to the life and medical sciences in the 20th
             century. The goal of the project was not an encyclopedic
             catalog of all the major developments, but rather a sampling
             of some of the most interesting work. Of the 12 vignettes,
             10 focus on particular areas of application: environmetrics,
             wildlife populations, animal breeding, human fertility,
             toxicology, medical diagnosis, clinical trials,
             environmental epidemiology, statistical genetics, and
             molecular biology. The two vignettes that begin the series
             focus more on methods that have had, or promise to have,
             impact across a range of subject matter areas: survival
             analysis and causal analysis.},
   Key = {fds340365}
}
dept@math.duke.edu
ph: 919.660.2800
fax: 919.660.2821
Mathematics Department
Duke University, Box 90320
Durham, NC 27708-0320
Mathematics Home | Arts & Sciences | Duke University